mirror of
https://github.com/ModernRelay/omnigraph.git
synced 2026-06-24 02:38:06 +02:00
Merge branch 'main' into devin/1779464281-mr-656-inline-query-strings
Resolve conflicts: keep query/mutate canonical CLI subcommands and top-level lint command (this branch) alongside the repo→graph terminology rename from main. Update test helpers (repo_path → graph_path, init_repo → init_graph, app_for_loaded_repo → app_for_loaded_graph) and align tempdir variable names so the merged tests compile. Drop the now- unused QueryCommand enum (Lint was promoted to a top-level Command). Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>
This commit is contained in:
commit
9ff4af47fb
79 changed files with 2780 additions and 1894 deletions
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "omnigraph-cli"
|
||||
version = "0.4.2"
|
||||
version = "0.6.0"
|
||||
edition = "2024"
|
||||
description = "CLI for the Omnigraph graph database."
|
||||
license = "MIT"
|
||||
|
|
@ -13,10 +13,10 @@ name = "omnigraph"
|
|||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.4.2" }
|
||||
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.4.2" }
|
||||
omnigraph-policy = { path = "../omnigraph-policy", version = "0.4.2" }
|
||||
omnigraph-server = { path = "../omnigraph-server", version = "0.4.2" }
|
||||
omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.6.0" }
|
||||
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.0" }
|
||||
omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.0" }
|
||||
omnigraph-server = { path = "../omnigraph-server", version = "0.6.0" }
|
||||
clap = { workspace = true }
|
||||
color-eyre = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
|
|
@ -30,4 +30,5 @@ assert_cmd = "2"
|
|||
predicates = "3"
|
||||
serde_json = { workspace = true }
|
||||
tempfile = { workspace = true }
|
||||
lance = { workspace = true }
|
||||
lance-index = { workspace = true }
|
||||
|
|
|
|||
|
|
@ -67,16 +67,16 @@ enum Command {
|
|||
Version,
|
||||
/// Generate, clean, or refresh explicit seed embeddings
|
||||
Embed(EmbedArgs),
|
||||
/// Initialize a new repo from a schema
|
||||
/// Initialize a new graph from a schema
|
||||
Init {
|
||||
#[arg(long)]
|
||||
schema: PathBuf,
|
||||
/// Repo URI (local path or s3://)
|
||||
/// Graph URI (local path or s3://)
|
||||
uri: String,
|
||||
},
|
||||
/// Load data into a repo
|
||||
/// Load data into a graph
|
||||
Load {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
|
|
@ -93,7 +93,7 @@ enum Command {
|
|||
},
|
||||
/// Ingest data into a reviewable named branch
|
||||
Ingest {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
|
|
@ -127,7 +127,7 @@ enum Command {
|
|||
/// printed and the invocation is rewritten to `omnigraph lint`).
|
||||
#[command(visible_alias = "check")]
|
||||
Lint {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
|
|
@ -140,9 +140,9 @@ enum Command {
|
|||
#[arg(long)]
|
||||
json: bool,
|
||||
},
|
||||
/// Show repo snapshot
|
||||
/// Show graph snapshot
|
||||
Snapshot {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
|
|
@ -155,7 +155,7 @@ enum Command {
|
|||
},
|
||||
/// Export a full graph snapshot as JSONL
|
||||
Export {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
|
|
@ -182,7 +182,7 @@ enum Command {
|
|||
/// when used. Pairs with `omnigraph mutate` on the write side.
|
||||
#[command(visible_alias = "read")]
|
||||
Query {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
#[arg(long)]
|
||||
uri: Option<String>,
|
||||
#[arg(hide = true)]
|
||||
|
|
@ -220,7 +220,7 @@ enum Command {
|
|||
/// warning when used. Pairs with `omnigraph query` on the read side.
|
||||
#[command(visible_alias = "change")]
|
||||
Mutate {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
#[arg(long)]
|
||||
uri: Option<String>,
|
||||
#[arg(hide = true)]
|
||||
|
|
@ -252,9 +252,9 @@ enum Command {
|
|||
#[command(subcommand)]
|
||||
command: PolicyCommand,
|
||||
},
|
||||
/// Compact small Lance fragments in every table of the repo
|
||||
/// Compact small Lance fragments in every table of the graph
|
||||
Optimize {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
|
|
@ -263,9 +263,9 @@ enum Command {
|
|||
#[arg(long)]
|
||||
json: bool,
|
||||
},
|
||||
/// Remove old Lance versions from every table of the repo (destructive)
|
||||
/// Remove old Lance versions from every table of the graph (destructive)
|
||||
Cleanup {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
|
|
@ -291,7 +291,7 @@ enum Command {
|
|||
enum BranchCommand {
|
||||
/// Create a new branch
|
||||
Create {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
#[arg(long)]
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
|
|
@ -306,7 +306,7 @@ enum BranchCommand {
|
|||
},
|
||||
/// List branches
|
||||
List {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
#[arg(long)]
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
|
|
@ -318,7 +318,7 @@ enum BranchCommand {
|
|||
},
|
||||
/// Delete a branch
|
||||
Delete {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
#[arg(long)]
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
|
|
@ -331,7 +331,7 @@ enum BranchCommand {
|
|||
},
|
||||
/// Merge a source branch into a target branch
|
||||
Merge {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
#[arg(long)]
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
|
|
@ -350,7 +350,7 @@ enum BranchCommand {
|
|||
enum SchemaCommand {
|
||||
/// Plan a schema migration against the accepted persisted schema
|
||||
Plan {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
|
|
@ -368,7 +368,7 @@ enum SchemaCommand {
|
|||
},
|
||||
/// Apply a supported schema migration
|
||||
Apply {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
|
|
@ -393,7 +393,7 @@ enum SchemaCommand {
|
|||
/// Show the current accepted schema source
|
||||
#[command(alias = "get")]
|
||||
Show {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
|
|
@ -405,10 +405,11 @@ enum SchemaCommand {
|
|||
}
|
||||
|
||||
#[derive(Debug, Subcommand)]
|
||||
|
||||
enum CommitCommand {
|
||||
/// List graph commits
|
||||
List {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
|
|
@ -421,7 +422,7 @@ enum CommitCommand {
|
|||
},
|
||||
/// Show a graph commit
|
||||
Show {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
#[arg(long)]
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
|
|
@ -594,7 +595,7 @@ fn finish_query_lint(output: &QueryLintOutput, json: bool) -> Result<()> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn ensure_local_repo_parent(uri: &str) -> Result<()> {
|
||||
fn ensure_local_graph_parent(uri: &str) -> Result<()> {
|
||||
if !uri.contains("://") {
|
||||
fs::create_dir_all(uri)?;
|
||||
}
|
||||
|
|
@ -706,10 +707,10 @@ fn resolve_policy_engine(config: &OmnigraphConfig) -> Result<PolicyEngine> {
|
|||
let policy_file = config
|
||||
.resolve_policy_file()
|
||||
.ok_or_else(|| color_eyre::eyre::eyre!("policy.file must be set in omnigraph.yaml"))?;
|
||||
PolicyEngine::load(&policy_file, &policy_repo_id(config))
|
||||
PolicyEngine::load(&policy_file, &policy_graph_id(config))
|
||||
}
|
||||
|
||||
/// Open a local-URI repo and, when `policy.file` is configured in
|
||||
/// Open a local-URI graph and, when `policy.file` is configured in
|
||||
/// `omnigraph.yaml`, install the resolved `PolicyEngine` on the engine
|
||||
/// handle so every direct-engine write goes through
|
||||
/// `Omnigraph::enforce(...)` (MR-722). Without a configured policy this
|
||||
|
|
@ -733,10 +734,7 @@ async fn open_local_db_with_policy(uri: &str, config: &OmnigraphConfig) -> Resul
|
|||
/// policy is configured and this returns `None`, the engine-layer
|
||||
/// footgun guard intentionally denies — silent bypass via "I forgot the
|
||||
/// actor" is what the guard prevents.
|
||||
fn resolve_cli_actor<'a>(
|
||||
cli_as: Option<&'a str>,
|
||||
config: &'a OmnigraphConfig,
|
||||
) -> Option<&'a str> {
|
||||
fn resolve_cli_actor<'a>(cli_as: Option<&'a str>, config: &'a OmnigraphConfig) -> Option<&'a str> {
|
||||
cli_as.or(config.cli.actor.as_deref())
|
||||
}
|
||||
|
||||
|
|
@ -748,7 +746,7 @@ fn resolve_policy_tests_path(config: &OmnigraphConfig) -> Result<PathBuf> {
|
|||
})
|
||||
}
|
||||
|
||||
fn policy_repo_id(config: &OmnigraphConfig) -> String {
|
||||
fn policy_graph_id(config: &OmnigraphConfig) -> String {
|
||||
if let Some(name) = &config.project.name {
|
||||
return name.clone();
|
||||
}
|
||||
|
|
@ -846,8 +844,15 @@ fn parse_duration_arg(s: &str) -> Result<std::time::Duration> {
|
|||
if s.is_empty() {
|
||||
bail!("duration is empty");
|
||||
}
|
||||
let (num_part, unit) = match s.char_indices().rev().find(|(_, c)| c.is_ascii_alphabetic()) {
|
||||
Some((i, _)) => (&s[..i + 1 - s[i..].chars().next().unwrap().len_utf8()], &s[i..]),
|
||||
let (num_part, unit) = match s
|
||||
.char_indices()
|
||||
.rev()
|
||||
.find(|(_, c)| c.is_ascii_alphabetic())
|
||||
{
|
||||
Some((i, _)) => (
|
||||
&s[..i + 1 - s[i..].chars().next().unwrap().len_utf8()],
|
||||
&s[i..],
|
||||
),
|
||||
None => (s, ""),
|
||||
};
|
||||
let n: u64 = num_part
|
||||
|
|
@ -873,7 +878,7 @@ fn resolve_local_uri(
|
|||
let uri = resolve_uri(config, cli_uri, cli_target)?;
|
||||
if is_remote_uri(&uri) {
|
||||
bail!(
|
||||
"{} is only supported against local repo URIs in this milestone",
|
||||
"{} is only supported against local graph URIs in this milestone",
|
||||
operation
|
||||
);
|
||||
}
|
||||
|
|
@ -1138,9 +1143,7 @@ fn render_schema_plan_step(step: &SchemaMigrationStep) -> String {
|
|||
type_name,
|
||||
drop_mode_label(*mode),
|
||||
),
|
||||
SchemaMigrationStep::UnsupportedChange {
|
||||
entity, reason, ..
|
||||
} => {
|
||||
SchemaMigrationStep::UnsupportedChange { entity, reason, .. } => {
|
||||
// When a schema-lint code is attached, render code + tier
|
||||
// so operators see at-a-glance the kind of risk (destructive
|
||||
// / validated / safe) — not just the rule identifier.
|
||||
|
|
@ -1550,10 +1553,10 @@ async fn execute_query_lint(
|
|||
));
|
||||
}
|
||||
|
||||
let has_repo_target =
|
||||
let has_graph_target =
|
||||
cli_uri.is_some() || cli_target.is_some() || config.cli_graph_name().is_some();
|
||||
if !has_repo_target {
|
||||
bail!("query lint requires --schema <schema.pg> or a resolvable repo target");
|
||||
if !has_graph_target {
|
||||
bail!("query lint requires --schema <schema.pg> or a resolvable graph target");
|
||||
}
|
||||
|
||||
let uri = resolve_local_uri(config, cli_uri, cli_target, "query lint")?;
|
||||
|
|
@ -1562,7 +1565,7 @@ async fn execute_query_lint(
|
|||
&db.catalog(),
|
||||
&query_source,
|
||||
query_path,
|
||||
QueryLintSchemaSource::repo(uri),
|
||||
QueryLintSchemaSource::graph(uri),
|
||||
))
|
||||
}
|
||||
|
||||
|
|
@ -1806,7 +1809,7 @@ async fn main() -> Result<()> {
|
|||
}
|
||||
Command::Init { schema, uri } => {
|
||||
let schema_source = fs::read_to_string(&schema)?;
|
||||
ensure_local_repo_parent(&uri)?;
|
||||
ensure_local_graph_parent(&uri)?;
|
||||
Omnigraph::init(&uri, &schema_source).await?;
|
||||
scaffold_config_if_missing(&uri)?;
|
||||
println!("initialized {}", uri);
|
||||
|
|
@ -2589,17 +2592,16 @@ async fn main() -> Result<()> {
|
|||
let config = load_cli_config(config.as_ref())?;
|
||||
let uri = resolve_uri(&config, uri, target.as_deref())?;
|
||||
|
||||
let older_than_dur = older_than
|
||||
.as_deref()
|
||||
.map(parse_duration_arg)
|
||||
.transpose()?;
|
||||
let older_than_dur = older_than.as_deref().map(parse_duration_arg).transpose()?;
|
||||
|
||||
if keep.is_none() && older_than_dur.is_none() {
|
||||
bail!("cleanup requires at least one of --keep or --older-than");
|
||||
}
|
||||
|
||||
let policy_desc = match (keep, older_than_dur) {
|
||||
(Some(k), Some(d)) => format!("keep {} versions, remove anything older than {:?}", k, d),
|
||||
(Some(k), Some(d)) => {
|
||||
format!("keep {} versions, remove anything older than {:?}", k, d)
|
||||
}
|
||||
(Some(k), None) => format!("keep {} versions", k),
|
||||
(None, Some(d)) => format!("remove anything older than {:?}", d),
|
||||
_ => unreachable!(),
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -52,7 +52,7 @@ pub fn fixture(name: &str) -> PathBuf {
|
|||
.join(name)
|
||||
}
|
||||
|
||||
pub fn repo_path(root: &Path) -> PathBuf {
|
||||
pub fn graph_path(root: &Path) -> PathBuf {
|
||||
root.join("demo.omni")
|
||||
}
|
||||
|
||||
|
|
@ -86,14 +86,14 @@ pub fn parse_stdout_json(output: &Output) -> Value {
|
|||
serde_json::from_slice(&output.stdout).unwrap()
|
||||
}
|
||||
|
||||
pub fn init_repo(repo: &Path) {
|
||||
pub fn init_graph(graph: &Path) {
|
||||
let schema = fixture("test.pg");
|
||||
output_success(cli().arg("init").arg("--schema").arg(&schema).arg(repo));
|
||||
output_success(cli().arg("init").arg("--schema").arg(&schema).arg(graph));
|
||||
}
|
||||
|
||||
pub fn load_fixture(repo: &Path) {
|
||||
pub fn load_fixture(graph: &Path) {
|
||||
let data = fixture("test.jsonl");
|
||||
output_success(cli().arg("load").arg("--data").arg(&data).arg(repo));
|
||||
output_success(cli().arg("load").arg("--data").arg(&data).arg(graph));
|
||||
}
|
||||
|
||||
pub fn write_jsonl(path: &Path, rows: &str) {
|
||||
|
|
@ -116,7 +116,7 @@ fn yaml_string(value: &str) -> String {
|
|||
format!("'{}'", value.replace('\'', "''"))
|
||||
}
|
||||
|
||||
pub fn local_yaml_config(repo: &Path) -> String {
|
||||
pub fn local_yaml_config(graph: &Path) -> String {
|
||||
format!(
|
||||
"\
|
||||
graphs:
|
||||
|
|
@ -130,7 +130,7 @@ query:
|
|||
- .
|
||||
policy: {{}}
|
||||
",
|
||||
yaml_string(&repo.to_string_lossy())
|
||||
yaml_string(&graph.to_string_lossy())
|
||||
)
|
||||
}
|
||||
|
||||
|
|
@ -200,9 +200,9 @@ fn spawn_server_process(mut command: StdCommand) -> TestServer {
|
|||
panic!("server did not become healthy");
|
||||
}
|
||||
|
||||
pub fn spawn_server(repo: &Path) -> TestServer {
|
||||
pub fn spawn_server(graph: &Path) -> TestServer {
|
||||
let mut command = server_process();
|
||||
command.arg(repo);
|
||||
command.arg(graph);
|
||||
spawn_server_process(command)
|
||||
}
|
||||
|
||||
|
|
@ -221,58 +221,57 @@ pub fn spawn_server_with_config_env(config: &Path, envs: &[(&str, &str)]) -> Tes
|
|||
spawn_server_process(command)
|
||||
}
|
||||
|
||||
|
||||
pub struct SystemRepo {
|
||||
pub struct SystemGraph {
|
||||
_temp: TempDir,
|
||||
repo: PathBuf,
|
||||
graph: PathBuf,
|
||||
}
|
||||
|
||||
impl SystemRepo {
|
||||
impl SystemGraph {
|
||||
pub fn initialized() -> Self {
|
||||
let temp = tempdir().unwrap();
|
||||
let repo = repo_path(temp.path());
|
||||
init_repo(&repo);
|
||||
Self { _temp: temp, repo }
|
||||
let graph = graph_path(temp.path());
|
||||
init_graph(&graph);
|
||||
Self { _temp: temp, graph }
|
||||
}
|
||||
|
||||
pub fn loaded() -> Self {
|
||||
let temp = tempdir().unwrap();
|
||||
let repo = repo_path(temp.path());
|
||||
init_repo(&repo);
|
||||
load_fixture(&repo);
|
||||
Self { _temp: temp, repo }
|
||||
let graph = graph_path(temp.path());
|
||||
init_graph(&graph);
|
||||
load_fixture(&graph);
|
||||
Self { _temp: temp, graph }
|
||||
}
|
||||
|
||||
pub fn path(&self) -> &Path {
|
||||
&self.repo
|
||||
&self.graph
|
||||
}
|
||||
|
||||
pub fn write_query(&self, name: &str, source: &str) -> PathBuf {
|
||||
let path = self.repo.parent().unwrap().join(name);
|
||||
let path = self.graph.parent().unwrap().join(name);
|
||||
write_query_file(&path, source);
|
||||
path
|
||||
}
|
||||
|
||||
pub fn write_jsonl(&self, name: &str, rows: &str) -> PathBuf {
|
||||
let path = self.repo.parent().unwrap().join(name);
|
||||
let path = self.graph.parent().unwrap().join(name);
|
||||
write_jsonl(&path, rows);
|
||||
path
|
||||
}
|
||||
|
||||
pub fn write_config(&self, name: &str, source: &str) -> PathBuf {
|
||||
let path = self.repo.parent().unwrap().join(name);
|
||||
let path = self.graph.parent().unwrap().join(name);
|
||||
write_config(&path, source);
|
||||
path
|
||||
}
|
||||
|
||||
pub fn write_file(&self, name: &str, source: &str) -> PathBuf {
|
||||
let path = self.repo.parent().unwrap().join(name);
|
||||
let path = self.graph.parent().unwrap().join(name);
|
||||
write_file(&path, source);
|
||||
path
|
||||
}
|
||||
|
||||
pub fn spawn_server(&self) -> TestServer {
|
||||
spawn_server(&self.repo)
|
||||
spawn_server(&self.graph)
|
||||
}
|
||||
|
||||
pub fn spawn_server_with_config(&self, config: &Path) -> TestServer {
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@ fn yaml_string(value: &str) -> String {
|
|||
format!("'{}'", value.replace('\'', "''"))
|
||||
}
|
||||
|
||||
fn local_policy_config(repo: &SystemRepo) -> String {
|
||||
fn local_policy_config(graph: &SystemGraph) -> String {
|
||||
format!(
|
||||
"\
|
||||
project:
|
||||
|
|
@ -83,12 +83,12 @@ query:
|
|||
policy:
|
||||
file: ./policy.yaml
|
||||
",
|
||||
yaml_string(&repo.path().to_string_lossy())
|
||||
yaml_string(&graph.path().to_string_lossy())
|
||||
)
|
||||
}
|
||||
|
||||
fn insert_person_query(repo: &SystemRepo, name: &str) -> std::path::PathBuf {
|
||||
repo.write_query(
|
||||
fn insert_person_query(graph: &SystemGraph, name: &str) -> std::path::PathBuf {
|
||||
graph.write_query(
|
||||
name,
|
||||
r#"
|
||||
query insert_person($name: String, $age: I32) {
|
||||
|
|
@ -98,8 +98,8 @@ query insert_person($name: String, $age: I32) {
|
|||
)
|
||||
}
|
||||
|
||||
fn add_friend_query(repo: &SystemRepo, name: &str) -> std::path::PathBuf {
|
||||
repo.write_query(
|
||||
fn add_friend_query(graph: &SystemGraph, name: &str) -> std::path::PathBuf {
|
||||
graph.write_query(
|
||||
name,
|
||||
r#"
|
||||
query add_friend($from: String, $to: String) {
|
||||
|
|
@ -109,13 +109,13 @@ query add_friend($from: String, $to: String) {
|
|||
)
|
||||
}
|
||||
|
||||
fn snapshot_table_row_count(repo: &SystemRepo, table_key: &str) -> u64 {
|
||||
snapshot_table_row_count_at(repo.path(), table_key)
|
||||
fn snapshot_table_row_count(graph: &SystemGraph, table_key: &str) -> u64 {
|
||||
snapshot_table_row_count_at(graph.path(), table_key)
|
||||
}
|
||||
|
||||
fn snapshot_table_row_count_at(repo: &std::path::Path, table_key: &str) -> u64 {
|
||||
fn snapshot_table_row_count_at(graph: &std::path::Path, table_key: &str) -> u64 {
|
||||
let payload = parse_stdout_json(&output_success(
|
||||
cli().arg("snapshot").arg(repo).arg("--json"),
|
||||
cli().arg("snapshot").arg(graph).arg("--json"),
|
||||
));
|
||||
payload["tables"]
|
||||
.as_array()
|
||||
|
|
@ -178,7 +178,7 @@ fn format_vector(values: &[f32]) -> String {
|
|||
.join(", ")
|
||||
}
|
||||
|
||||
fn s3_test_repo_uri(suite: &str) -> Option<String> {
|
||||
fn s3_test_graph_uri(suite: &str) -> Option<String> {
|
||||
let bucket = env::var("OMNIGRAPH_S3_TEST_BUCKET").ok()?;
|
||||
let prefix = env::var("OMNIGRAPH_S3_TEST_PREFIX")
|
||||
.ok()
|
||||
|
|
@ -193,21 +193,21 @@ fn s3_test_repo_uri(suite: &str) -> Option<String> {
|
|||
|
||||
#[test]
|
||||
fn local_cli_end_to_end_init_load_read_change_read_flow() {
|
||||
let repo = SystemRepo::initialized();
|
||||
let mutation_file = insert_person_query(&repo, "system-local-init-change.gq");
|
||||
let graph = SystemGraph::initialized();
|
||||
let mutation_file = insert_person_query(&graph, "system-local-init-change.gq");
|
||||
|
||||
output_success(
|
||||
cli()
|
||||
.arg("load")
|
||||
.arg("--data")
|
||||
.arg(fixture("test.jsonl"))
|
||||
.arg(repo.path()),
|
||||
.arg(graph.path()),
|
||||
);
|
||||
|
||||
let read_before = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
|
|
@ -222,7 +222,7 @@ fn local_cli_end_to_end_init_load_read_change_read_flow() {
|
|||
let change_payload = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("change")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--query")
|
||||
.arg(&mutation_file)
|
||||
.arg("--params")
|
||||
|
|
@ -235,7 +235,7 @@ fn local_cli_end_to_end_init_load_read_change_read_flow() {
|
|||
let read_after = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
|
|
@ -253,7 +253,7 @@ fn local_cli_end_to_end_init_load_read_change_read_flow() {
|
|||
let inline_change = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("change")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("-e")
|
||||
.arg("query add($name: String, $age: I32) { insert Person { name: $name, age: $age } }")
|
||||
.arg("--params")
|
||||
|
|
@ -267,7 +267,7 @@ fn local_cli_end_to_end_init_load_read_change_read_flow() {
|
|||
let inline_read = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--query-string")
|
||||
.arg("query find($name: String) { match { $p: Person { name: $name } } return { $p.name, $p.age } }")
|
||||
.arg("--params")
|
||||
|
|
@ -281,15 +281,15 @@ fn local_cli_end_to_end_init_load_read_change_read_flow() {
|
|||
|
||||
#[test]
|
||||
fn local_cli_end_to_end_branch_change_merge_flow() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let mutation_file = insert_person_query(&repo, "system-local-change.gq");
|
||||
let graph = SystemGraph::loaded();
|
||||
let mutation_file = insert_person_query(&graph, "system-local-change.gq");
|
||||
|
||||
output_success(
|
||||
cli()
|
||||
.arg("branch")
|
||||
.arg("create")
|
||||
.arg("--uri")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--from")
|
||||
.arg("main")
|
||||
.arg("feature"),
|
||||
|
|
@ -298,7 +298,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() {
|
|||
let change_payload = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("change")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--query")
|
||||
.arg(&mutation_file)
|
||||
.arg("--branch")
|
||||
|
|
@ -313,7 +313,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() {
|
|||
let feature_read = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
|
|
@ -332,7 +332,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() {
|
|||
.arg("branch")
|
||||
.arg("merge")
|
||||
.arg("--uri")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("feature")
|
||||
.arg("--json"),
|
||||
));
|
||||
|
|
@ -341,7 +341,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() {
|
|||
let main_read = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
|
|
@ -358,7 +358,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() {
|
|||
cli()
|
||||
.arg("commit")
|
||||
.arg("list")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--branch")
|
||||
.arg("main")
|
||||
.arg("--json"),
|
||||
|
|
@ -368,8 +368,8 @@ fn local_cli_end_to_end_branch_change_merge_flow() {
|
|||
|
||||
#[test]
|
||||
fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let ingest_data = repo.write_jsonl(
|
||||
let graph = SystemGraph::loaded();
|
||||
let ingest_data = graph.write_jsonl(
|
||||
"system-local-ingest.jsonl",
|
||||
r#"{"type":"Person","data":{"name":"Zoe","age":33}}
|
||||
{"type":"Person","data":{"name":"Bob","age":26}}"#,
|
||||
|
|
@ -382,7 +382,7 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() {
|
|||
.arg(&ingest_data)
|
||||
.arg("--branch")
|
||||
.arg("feature-ingest")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--json"),
|
||||
));
|
||||
assert_eq!(ingest_payload["branch"], "feature-ingest");
|
||||
|
|
@ -395,7 +395,7 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() {
|
|||
let feature_snapshot = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("snapshot")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--branch")
|
||||
.arg("feature-ingest")
|
||||
.arg("--json"),
|
||||
|
|
@ -405,7 +405,7 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() {
|
|||
let zoe = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
|
|
@ -422,7 +422,7 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() {
|
|||
let bob = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
|
|
@ -439,20 +439,20 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() {
|
|||
|
||||
#[test]
|
||||
fn local_cli_export_round_trips_full_branch_graph() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let graph = SystemGraph::loaded();
|
||||
|
||||
output_success(
|
||||
cli()
|
||||
.arg("branch")
|
||||
.arg("create")
|
||||
.arg("--uri")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--from")
|
||||
.arg("main")
|
||||
.arg("feature"),
|
||||
);
|
||||
|
||||
let feature_data = repo.write_jsonl(
|
||||
let feature_data = graph.write_jsonl(
|
||||
"system-local-export-feature.jsonl",
|
||||
r#"{"type":"Person","data":{"name":"Eve","age":29}}
|
||||
{"edge":"Knows","from":"Alice","to":"Eve"}"#,
|
||||
|
|
@ -466,53 +466,56 @@ fn local_cli_export_round_trips_full_branch_graph() {
|
|||
.arg("feature")
|
||||
.arg("--mode")
|
||||
.arg("append")
|
||||
.arg(repo.path()),
|
||||
.arg(graph.path()),
|
||||
);
|
||||
|
||||
let exported = stdout_string(&output_success(
|
||||
cli()
|
||||
.arg("export")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--branch")
|
||||
.arg("feature")
|
||||
.arg("--jsonl"),
|
||||
));
|
||||
let export_path = repo.write_jsonl("system-local-exported.jsonl", &exported);
|
||||
let imported_repo = repo.path().parent().unwrap().join("imported-export.omni");
|
||||
let export_path = graph.write_jsonl("system-local-exported.jsonl", &exported);
|
||||
let imported_graph = graph.path().parent().unwrap().join("imported-export.omni");
|
||||
|
||||
output_success(
|
||||
cli()
|
||||
.arg("init")
|
||||
.arg("--schema")
|
||||
.arg(fixture("test.pg"))
|
||||
.arg(&imported_repo),
|
||||
.arg(&imported_graph),
|
||||
);
|
||||
output_success(
|
||||
cli()
|
||||
.arg("load")
|
||||
.arg("--data")
|
||||
.arg(&export_path)
|
||||
.arg(&imported_repo),
|
||||
.arg(&imported_graph),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
snapshot_table_row_count_at(&imported_repo, "node:Person"),
|
||||
snapshot_table_row_count_at(&imported_graph, "node:Person"),
|
||||
5
|
||||
);
|
||||
assert_eq!(
|
||||
snapshot_table_row_count_at(&imported_repo, "node:Company"),
|
||||
snapshot_table_row_count_at(&imported_graph, "node:Company"),
|
||||
2
|
||||
);
|
||||
assert_eq!(snapshot_table_row_count_at(&imported_repo, "edge:Knows"), 4);
|
||||
assert_eq!(
|
||||
snapshot_table_row_count_at(&imported_repo, "edge:WorksAt"),
|
||||
snapshot_table_row_count_at(&imported_graph, "edge:Knows"),
|
||||
4
|
||||
);
|
||||
assert_eq!(
|
||||
snapshot_table_row_count_at(&imported_graph, "edge:WorksAt"),
|
||||
2
|
||||
);
|
||||
|
||||
let eve = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(&imported_repo)
|
||||
.arg(&imported_graph)
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
|
|
@ -527,7 +530,7 @@ fn local_cli_export_round_trips_full_branch_graph() {
|
|||
let friends = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(&imported_repo)
|
||||
.arg(&imported_graph)
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
|
|
@ -541,7 +544,7 @@ fn local_cli_export_round_trips_full_branch_graph() {
|
|||
|
||||
#[test]
|
||||
fn local_cli_s3_end_to_end_init_load_read_flow() {
|
||||
let Some(repo_uri) = s3_test_repo_uri("cli-local") else {
|
||||
let Some(graph_uri) = s3_test_graph_uri("cli-local") else {
|
||||
eprintln!("skipping s3 cli test: OMNIGRAPH_S3_TEST_BUCKET is not set");
|
||||
return;
|
||||
};
|
||||
|
|
@ -566,7 +569,7 @@ query:
|
|||
- .
|
||||
policy: {{}}
|
||||
",
|
||||
repo_uri
|
||||
graph_uri
|
||||
),
|
||||
);
|
||||
|
||||
|
|
@ -575,14 +578,14 @@ policy: {{}}
|
|||
.arg("init")
|
||||
.arg("--schema")
|
||||
.arg(fixture("test.pg"))
|
||||
.arg(&repo_uri),
|
||||
.arg(&graph_uri),
|
||||
);
|
||||
output_success(
|
||||
cli()
|
||||
.arg("load")
|
||||
.arg("--data")
|
||||
.arg(fixture("test.jsonl"))
|
||||
.arg(&repo_uri),
|
||||
.arg(&graph_uri),
|
||||
);
|
||||
|
||||
let read = parse_stdout_json(&output_success(
|
||||
|
|
@ -615,13 +618,13 @@ policy: {{}}
|
|||
|
||||
#[test]
|
||||
fn local_cli_failed_load_keeps_target_state_unchanged() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let bad_data = repo.write_jsonl(
|
||||
let graph = SystemGraph::loaded();
|
||||
let bad_data = graph.write_jsonl(
|
||||
"system-bad-load.jsonl",
|
||||
r#"{"edge":"Knows","from":"Alice","to":"Missing"}"#,
|
||||
);
|
||||
let person_rows_before = snapshot_table_row_count(&repo, "node:Person");
|
||||
let knows_rows_before = snapshot_table_row_count(&repo, "edge:Knows");
|
||||
let person_rows_before = snapshot_table_row_count(&graph, "node:Person");
|
||||
let knows_rows_before = snapshot_table_row_count(&graph, "edge:Knows");
|
||||
|
||||
let output = output_failure(
|
||||
cli()
|
||||
|
|
@ -630,17 +633,17 @@ fn local_cli_failed_load_keeps_target_state_unchanged() {
|
|||
.arg(&bad_data)
|
||||
.arg("--mode")
|
||||
.arg("append")
|
||||
.arg(repo.path()),
|
||||
.arg(graph.path()),
|
||||
);
|
||||
let stderr = String::from_utf8(output.stderr).unwrap();
|
||||
assert!(stderr.contains("not found") || stderr.contains("Missing"));
|
||||
|
||||
assert_eq!(
|
||||
snapshot_table_row_count(&repo, "node:Person"),
|
||||
snapshot_table_row_count(&graph, "node:Person"),
|
||||
person_rows_before
|
||||
);
|
||||
assert_eq!(
|
||||
snapshot_table_row_count(&repo, "edge:Knows"),
|
||||
snapshot_table_row_count(&graph, "edge:Knows"),
|
||||
knows_rows_before
|
||||
);
|
||||
// Failed loads leave no run record (the run lifecycle has been
|
||||
|
|
@ -649,13 +652,13 @@ fn local_cli_failed_load_keeps_target_state_unchanged() {
|
|||
|
||||
#[test]
|
||||
fn local_cli_failed_change_keeps_target_state_unchanged() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let mutation_file = add_friend_query(&repo, "system-invalid-change.gq");
|
||||
let graph = SystemGraph::loaded();
|
||||
let mutation_file = add_friend_query(&graph, "system-invalid-change.gq");
|
||||
|
||||
let output = output_failure(
|
||||
cli()
|
||||
.arg("change")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--query")
|
||||
.arg(&mutation_file)
|
||||
.arg("--params")
|
||||
|
|
@ -667,7 +670,7 @@ fn local_cli_failed_change_keeps_target_state_unchanged() {
|
|||
let friends_payload = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
|
|
@ -683,8 +686,8 @@ fn local_cli_failed_change_keeps_target_state_unchanged() {
|
|||
|
||||
#[test]
|
||||
fn local_cli_resolves_relative_query_against_config_base_dir() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let root = repo.path().parent().unwrap();
|
||||
let graph = SystemGraph::loaded();
|
||||
let root = graph.path().parent().unwrap();
|
||||
let config_dir = root.join("config");
|
||||
let query_dir = config_dir.join("queries");
|
||||
let ambient_dir = root.join("ambient");
|
||||
|
|
@ -707,7 +710,7 @@ query:
|
|||
- queries
|
||||
policy: {{}}
|
||||
",
|
||||
repo.path().display()
|
||||
graph.path().display()
|
||||
),
|
||||
);
|
||||
write_query_file(
|
||||
|
|
@ -761,7 +764,7 @@ query get_person($name: String) {
|
|||
#[test]
|
||||
fn local_cli_datetime_and_list_types_round_trip_through_load_read_and_change() {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
let repo = repo_path(temp.path());
|
||||
let graph = graph_path(temp.path());
|
||||
let schema = temp.path().join("datatypes.pg");
|
||||
let data = temp.path().join("datatypes.jsonl");
|
||||
let queries = temp.path().join("datatypes.gq");
|
||||
|
|
@ -836,13 +839,13 @@ query get_task($slug: String) {
|
|||
"#,
|
||||
);
|
||||
|
||||
output_success(cli().arg("init").arg("--schema").arg(&schema).arg(&repo));
|
||||
output_success(cli().arg("load").arg("--data").arg(&data).arg(&repo));
|
||||
output_success(cli().arg("init").arg("--schema").arg(&schema).arg(&graph));
|
||||
output_success(cli().arg("load").arg("--data").arg(&data).arg(&graph));
|
||||
|
||||
let filtered = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(&repo)
|
||||
.arg(&graph)
|
||||
.arg("--query")
|
||||
.arg(&queries)
|
||||
.arg("--name")
|
||||
|
|
@ -867,7 +870,7 @@ query get_task($slug: String) {
|
|||
let insert_payload = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("change")
|
||||
.arg(&repo)
|
||||
.arg(&graph)
|
||||
.arg("--query")
|
||||
.arg(&queries)
|
||||
.arg("--name")
|
||||
|
|
@ -883,7 +886,7 @@ query get_task($slug: String) {
|
|||
let update_payload = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("change")
|
||||
.arg(&repo)
|
||||
.arg(&graph)
|
||||
.arg("--query")
|
||||
.arg(&queries)
|
||||
.arg("--name")
|
||||
|
|
@ -897,7 +900,7 @@ query get_task($slug: String) {
|
|||
let gamma = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(&repo)
|
||||
.arg(&graph)
|
||||
.arg("--query")
|
||||
.arg(&queries)
|
||||
.arg("--name")
|
||||
|
|
@ -924,7 +927,7 @@ query get_task($slug: String) {
|
|||
#[ignore = "requires GEMINI_API_KEY and network access"]
|
||||
fn local_cli_real_gemini_string_nearest_query_returns_expected_match() {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
let repo = repo_path(temp.path());
|
||||
let graph = graph_path(temp.path());
|
||||
let schema = temp.path().join("gemini.pg");
|
||||
let data = temp.path().join("gemini.jsonl");
|
||||
let queries = temp.path().join("gemini.gq");
|
||||
|
|
@ -966,13 +969,13 @@ query vector_search($q: String) {
|
|||
"#,
|
||||
);
|
||||
|
||||
output_success(cli().arg("init").arg("--schema").arg(&schema).arg(&repo));
|
||||
output_success(cli().arg("load").arg("--data").arg(&data).arg(&repo));
|
||||
output_success(cli().arg("init").arg("--schema").arg(&schema).arg(&graph));
|
||||
output_success(cli().arg("load").arg("--data").arg(&data).arg(&graph));
|
||||
|
||||
let result = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(&repo)
|
||||
.arg(&graph)
|
||||
.arg("--query")
|
||||
.arg(&queries)
|
||||
.arg("--name")
|
||||
|
|
@ -999,10 +1002,10 @@ fn local_cli_policy_tooling_is_end_to_end() {
|
|||
// Sanity check for the read-only policy CLI surfaces. These don't
|
||||
// mutate the graph — they just parse and evaluate the policy file —
|
||||
// so they don't depend on PR #4's engine-side enforcement.
|
||||
let repo = SystemRepo::loaded();
|
||||
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
|
||||
repo.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
repo.write_config("policy.tests.yaml", POLICY_E2E_TESTS_YAML);
|
||||
let graph = SystemGraph::loaded();
|
||||
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
|
||||
graph.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
graph.write_config("policy.tests.yaml", POLICY_E2E_TESTS_YAML);
|
||||
|
||||
let validate = output_success(
|
||||
cli()
|
||||
|
|
@ -1053,10 +1056,10 @@ fn local_cli_change_enforces_engine_layer_policy() {
|
|||
// 3. Policy installed, `--as act-ragnor`, change on main →
|
||||
// Cedar permits (admins-write rule). Write succeeds and the
|
||||
// inserted row is readable.
|
||||
let repo = SystemRepo::loaded();
|
||||
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
|
||||
repo.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
let mutation_file = insert_person_query(&repo, "system-local-policy-change.gq");
|
||||
let graph = SystemGraph::loaded();
|
||||
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
|
||||
graph.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
let mutation_file = insert_person_query(&graph, "system-local-policy-change.gq");
|
||||
|
||||
// Case 1: policy configured, no actor threaded → footgun guard.
|
||||
let no_actor = output_failure(
|
||||
|
|
@ -1119,7 +1122,7 @@ fn local_cli_change_enforces_engine_layer_policy() {
|
|||
let verify = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
|
|
@ -1145,10 +1148,10 @@ fn local_cli_change_enforces_engine_layer_policy() {
|
|||
|
||||
#[test]
|
||||
fn local_cli_load_enforces_engine_layer_policy() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
|
||||
repo.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
let data = repo.write_jsonl(
|
||||
let graph = SystemGraph::loaded();
|
||||
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
|
||||
graph.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
let data = graph.write_jsonl(
|
||||
"system-local-policy-load.jsonl",
|
||||
r#"{"type":"Person","data":{"name":"LoadPolicy","age":11}}"#,
|
||||
);
|
||||
|
|
@ -1189,10 +1192,10 @@ fn local_cli_load_enforces_engine_layer_policy() {
|
|||
|
||||
#[test]
|
||||
fn local_cli_ingest_enforces_engine_layer_policy() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
|
||||
repo.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
let data = repo.write_jsonl(
|
||||
let graph = SystemGraph::loaded();
|
||||
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
|
||||
graph.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
let data = graph.write_jsonl(
|
||||
"system-local-policy-ingest.jsonl",
|
||||
r#"{"type":"Person","data":{"name":"IngestPolicy","age":12}}"#,
|
||||
);
|
||||
|
|
@ -1242,16 +1245,19 @@ fn local_cli_ingest_enforces_engine_layer_policy() {
|
|||
|
||||
#[test]
|
||||
fn local_cli_schema_apply_enforces_engine_layer_policy() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
|
||||
repo.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
let graph = SystemGraph::loaded();
|
||||
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
|
||||
graph.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
|
||||
// Additive: add a nullable property; SDK-compatible with the fixture
|
||||
// schema. Uses the schema-apply scope (TargetBranch("main")).
|
||||
let new_schema = std::fs::read_to_string(fixture("test.pg"))
|
||||
.unwrap()
|
||||
.replace(" age: I32?\n}", " age: I32?\n nickname: String?\n}");
|
||||
let schema_path = repo.path().join("policy-additive.pg");
|
||||
.replace(
|
||||
" age: I32?\n}",
|
||||
" age: I32?\n nickname: String?\n}",
|
||||
);
|
||||
let schema_path = graph.path().join("policy-additive.pg");
|
||||
std::fs::write(&schema_path, &new_schema).unwrap();
|
||||
|
||||
let denied = output_failure(
|
||||
|
|
@ -1289,9 +1295,9 @@ fn local_cli_schema_apply_enforces_engine_layer_policy() {
|
|||
|
||||
#[test]
|
||||
fn local_cli_branch_create_enforces_engine_layer_policy() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
|
||||
repo.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
let graph = SystemGraph::loaded();
|
||||
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
|
||||
graph.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
|
||||
let denied = output_failure(
|
||||
cli()
|
||||
|
|
@ -1327,9 +1333,9 @@ fn local_cli_branch_create_enforces_engine_layer_policy() {
|
|||
|
||||
#[test]
|
||||
fn local_cli_branch_delete_enforces_engine_layer_policy() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
|
||||
repo.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
let graph = SystemGraph::loaded();
|
||||
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
|
||||
graph.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
|
||||
// Pre-create the branch as ragnor so there's something to delete.
|
||||
output_success(
|
||||
|
|
@ -1375,9 +1381,9 @@ fn local_cli_branch_delete_enforces_engine_layer_policy() {
|
|||
|
||||
#[test]
|
||||
fn local_cli_branch_merge_enforces_engine_layer_policy() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
|
||||
repo.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
let graph = SystemGraph::loaded();
|
||||
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
|
||||
graph.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
|
||||
// Pre-create a feature branch as ragnor (admins-branch-ops covers it).
|
||||
output_success(
|
||||
|
|
@ -1431,7 +1437,7 @@ fn local_cli_branch_merge_enforces_engine_layer_policy() {
|
|||
// pin the precedence rule that `main.rs::resolve_cli_actor` implements:
|
||||
// `--as` flag > `cli.actor` from `omnigraph.yaml` > None.
|
||||
|
||||
fn local_policy_config_with_actor(repo: &SystemRepo, actor: &str) -> String {
|
||||
fn local_policy_config_with_actor(graph: &SystemGraph, actor: &str) -> String {
|
||||
// Mirrors `local_policy_config` but adds `cli.actor` so the
|
||||
// config-only precedence path is exercised. The `cli:` block
|
||||
// already has `graph` and `branch`; appending `actor` here.
|
||||
|
|
@ -1452,7 +1458,7 @@ query:
|
|||
policy:
|
||||
file: ./policy.yaml
|
||||
",
|
||||
yaml_string(&repo.path().to_string_lossy()),
|
||||
yaml_string(&graph.path().to_string_lossy()),
|
||||
actor,
|
||||
)
|
||||
}
|
||||
|
|
@ -1462,13 +1468,13 @@ fn local_cli_actor_from_config_used_when_no_flag() {
|
|||
// cli.actor: act-ragnor in omnigraph.yaml, no --as flag → change
|
||||
// permitted via admins-write rule. Proves the config-only path
|
||||
// works; previously the only proof was structural.
|
||||
let repo = SystemRepo::loaded();
|
||||
let config = repo.write_config(
|
||||
let graph = SystemGraph::loaded();
|
||||
let config = graph.write_config(
|
||||
"omnigraph-policy.yaml",
|
||||
&local_policy_config_with_actor(&repo, "act-ragnor"),
|
||||
&local_policy_config_with_actor(&graph, "act-ragnor"),
|
||||
);
|
||||
repo.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
let mutation_file = insert_person_query(&repo, "system-local-cli-actor.gq");
|
||||
graph.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
let mutation_file = insert_person_query(&graph, "system-local-cli-actor.gq");
|
||||
|
||||
let allowed = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
|
|
@ -1490,13 +1496,13 @@ fn local_cli_actor_flag_overrides_config_actor() {
|
|||
// cli.actor: act-ragnor in config + --as act-bruno on CLI → change
|
||||
// denied. Flag wins per the precedence rule. Without this test, a
|
||||
// future change that reverses precedence would ride through silently.
|
||||
let repo = SystemRepo::loaded();
|
||||
let config = repo.write_config(
|
||||
let graph = SystemGraph::loaded();
|
||||
let config = graph.write_config(
|
||||
"omnigraph-policy.yaml",
|
||||
&local_policy_config_with_actor(&repo, "act-ragnor"),
|
||||
&local_policy_config_with_actor(&graph, "act-ragnor"),
|
||||
);
|
||||
repo.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
let mutation_file = insert_person_query(&repo, "system-local-cli-actor-override.gq");
|
||||
graph.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
let mutation_file = insert_person_query(&graph, "system-local-cli-actor-override.gq");
|
||||
|
||||
let denied = output_failure(
|
||||
cli()
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ fn yaml_string(value: &str) -> String {
|
|||
format!("'{}'", value.replace('\'', "''"))
|
||||
}
|
||||
|
||||
fn remote_policy_server_config(repo: &SystemRepo) -> String {
|
||||
fn remote_policy_server_config(graph: &SystemGraph) -> String {
|
||||
format!(
|
||||
"\
|
||||
project:
|
||||
|
|
@ -54,7 +54,7 @@ server:
|
|||
policy:
|
||||
file: ./policy.yaml
|
||||
",
|
||||
yaml_string(&repo.path().to_string_lossy())
|
||||
yaml_string(&graph.path().to_string_lossy())
|
||||
)
|
||||
}
|
||||
|
||||
|
|
@ -81,10 +81,10 @@ auth:
|
|||
#[test]
|
||||
#[ignore = "requires loopback socket permissions in sandboxed runners"]
|
||||
fn remote_server_and_cli_end_to_end_flow() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let server = repo.spawn_server();
|
||||
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let mutation_file = repo.write_query(
|
||||
let graph = SystemGraph::loaded();
|
||||
let server = graph.spawn_server();
|
||||
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let mutation_file = graph.write_query(
|
||||
"system-remote-change.gq",
|
||||
r#"
|
||||
query insert_person($name: String, $age: I32) {
|
||||
|
|
@ -105,7 +105,7 @@ query insert_person($name: String, $age: I32) {
|
|||
assert_eq!(health["status"], "ok");
|
||||
|
||||
let local_snapshot = parse_stdout_json(&output_success(
|
||||
cli().arg("snapshot").arg(repo.path()).arg("--json"),
|
||||
cli().arg("snapshot").arg(graph.path()).arg("--json"),
|
||||
));
|
||||
let snapshot = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
|
|
@ -120,7 +120,7 @@ query insert_person($name: String, $age: I32) {
|
|||
let local_read = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
|
|
@ -180,7 +180,7 @@ query insert_person($name: String, $age: I32) {
|
|||
let local_verify = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
|
|
@ -260,11 +260,11 @@ query insert_person($name: String, $age: I32) {
|
|||
|
||||
#[test]
|
||||
#[ignore = "requires loopback socket permissions in sandboxed runners"]
|
||||
fn remote_schema_apply_via_cli_updates_repo() {
|
||||
let repo = SystemRepo::initialized();
|
||||
let server = repo.spawn_server();
|
||||
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let next_schema = repo.write_file(
|
||||
fn remote_schema_apply_via_cli_updates_graph() {
|
||||
let graph = SystemGraph::initialized();
|
||||
let server = graph.spawn_server();
|
||||
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let next_schema = graph.write_file(
|
||||
"next.pg",
|
||||
&fs::read_to_string(fixture("test.pg")).unwrap().replace(
|
||||
" age: I32?\n}",
|
||||
|
|
@ -286,7 +286,7 @@ fn remote_schema_apply_via_cli_updates_repo() {
|
|||
|
||||
let db = tokio::runtime::Runtime::new()
|
||||
.unwrap()
|
||||
.block_on(Omnigraph::open(repo.path().to_string_lossy().as_ref()))
|
||||
.block_on(Omnigraph::open(graph.path().to_string_lossy().as_ref()))
|
||||
.unwrap();
|
||||
assert!(
|
||||
db.catalog().node_types["Person"]
|
||||
|
|
@ -298,10 +298,10 @@ fn remote_schema_apply_via_cli_updates_repo() {
|
|||
#[test]
|
||||
#[ignore = "requires loopback socket permissions in sandboxed runners"]
|
||||
fn remote_schema_apply_rejects_unsupported_plan() {
|
||||
let repo = SystemRepo::initialized();
|
||||
let server = repo.spawn_server();
|
||||
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let breaking_schema = repo.write_file(
|
||||
let graph = SystemGraph::initialized();
|
||||
let server = graph.spawn_server();
|
||||
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let breaking_schema = graph.write_file(
|
||||
"breaking.pg",
|
||||
&fs::read_to_string(fixture("test.pg"))
|
||||
.unwrap()
|
||||
|
|
@ -324,7 +324,7 @@ fn remote_schema_apply_rejects_unsupported_plan() {
|
|||
#[test]
|
||||
#[ignore = "requires loopback socket permissions in sandboxed runners"]
|
||||
fn remote_schema_apply_rejects_when_non_main_branch_exists() {
|
||||
let repo = SystemRepo::initialized();
|
||||
let graph = SystemGraph::initialized();
|
||||
output_success(
|
||||
cli()
|
||||
.arg("branch")
|
||||
|
|
@ -332,12 +332,12 @@ fn remote_schema_apply_rejects_when_non_main_branch_exists() {
|
|||
.arg("--from")
|
||||
.arg("main")
|
||||
.arg("--uri")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("feature"),
|
||||
);
|
||||
let server = repo.spawn_server();
|
||||
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let next_schema = repo.write_file(
|
||||
let server = graph.spawn_server();
|
||||
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let next_schema = graph.write_file(
|
||||
"next.pg",
|
||||
&fs::read_to_string(fixture("test.pg")).unwrap().replace(
|
||||
" age: I32?\n}",
|
||||
|
|
@ -355,16 +355,16 @@ fn remote_schema_apply_rejects_when_non_main_branch_exists() {
|
|||
.arg(&next_schema),
|
||||
);
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
assert!(stderr.contains("schema apply requires a repo with only main"));
|
||||
assert!(stderr.contains("schema apply requires a graph with only main"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore = "requires loopback socket permissions in sandboxed runners"]
|
||||
fn remote_read_preserves_projection_order_in_json_and_csv() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let server = repo.spawn_server();
|
||||
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let ordered_query = repo.write_query(
|
||||
let graph = SystemGraph::loaded();
|
||||
let server = graph.spawn_server();
|
||||
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let ordered_query = graph.write_query(
|
||||
"ordered-remote.gq",
|
||||
r#"
|
||||
query ordered_person($name: String) {
|
||||
|
|
@ -419,10 +419,10 @@ query ordered_person($name: String) {
|
|||
#[test]
|
||||
#[ignore = "requires loopback socket permissions in sandboxed runners"]
|
||||
fn remote_branch_create_list_merge_flow() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let server = repo.spawn_server();
|
||||
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let mutation_file = repo.write_query(
|
||||
let graph = SystemGraph::loaded();
|
||||
let server = graph.spawn_server();
|
||||
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let mutation_file = graph.write_query(
|
||||
"system-remote-branch-change.gq",
|
||||
r#"
|
||||
query insert_person($name: String, $age: I32) {
|
||||
|
|
@ -516,9 +516,9 @@ query insert_person($name: String, $age: I32) {
|
|||
#[test]
|
||||
#[ignore = "requires loopback socket permissions in sandboxed runners"]
|
||||
fn remote_branch_delete_removes_branch() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let server = repo.spawn_server();
|
||||
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let graph = SystemGraph::loaded();
|
||||
let server = graph.spawn_server();
|
||||
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
|
||||
parse_stdout_json(&output_success(
|
||||
cli()
|
||||
|
|
@ -557,10 +557,10 @@ fn remote_branch_delete_removes_branch() {
|
|||
#[test]
|
||||
#[ignore = "requires loopback socket permissions in sandboxed runners"]
|
||||
fn remote_export_round_trips_full_branch_graph() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let server = repo.spawn_server();
|
||||
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let mutation_file = repo.write_query(
|
||||
let graph = SystemGraph::loaded();
|
||||
let server = graph.spawn_server();
|
||||
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let mutation_file = graph.write_query(
|
||||
"system-remote-export-change.gq",
|
||||
r#"
|
||||
query insert_person($name: String, $age: I32) {
|
||||
|
|
@ -624,8 +624,8 @@ query add_friend($from: String, $to: String) {
|
|||
.arg("feature")
|
||||
.arg("--jsonl"),
|
||||
));
|
||||
let export_path = repo.write_jsonl("system-remote-exported.jsonl", &exported);
|
||||
let imported_repo = repo
|
||||
let export_path = graph.write_jsonl("system-remote-exported.jsonl", &exported);
|
||||
let imported_graph = graph
|
||||
.path()
|
||||
.parent()
|
||||
.unwrap()
|
||||
|
|
@ -636,18 +636,18 @@ query add_friend($from: String, $to: String) {
|
|||
.arg("init")
|
||||
.arg("--schema")
|
||||
.arg(fixture("test.pg"))
|
||||
.arg(&imported_repo),
|
||||
.arg(&imported_graph),
|
||||
);
|
||||
output_success(
|
||||
cli()
|
||||
.arg("load")
|
||||
.arg("--data")
|
||||
.arg(&export_path)
|
||||
.arg(&imported_repo),
|
||||
.arg(&imported_graph),
|
||||
);
|
||||
|
||||
let snapshot = parse_stdout_json(&output_success(
|
||||
cli().arg("snapshot").arg(&imported_repo).arg("--json"),
|
||||
cli().arg("snapshot").arg(&imported_graph).arg("--json"),
|
||||
));
|
||||
assert_eq!(
|
||||
snapshot["tables"]
|
||||
|
|
@ -671,7 +671,7 @@ query add_friend($from: String, $to: String) {
|
|||
let eve = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(&imported_repo)
|
||||
.arg(&imported_graph)
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
|
|
@ -687,10 +687,10 @@ query add_friend($from: String, $to: String) {
|
|||
#[test]
|
||||
#[ignore = "requires loopback socket permissions in sandboxed runners"]
|
||||
fn remote_ingest_creates_review_branch_and_keeps_it_readable() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let server = repo.spawn_server();
|
||||
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let ingest_data = repo.write_jsonl(
|
||||
let graph = SystemGraph::loaded();
|
||||
let server = graph.spawn_server();
|
||||
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let ingest_data = graph.write_jsonl(
|
||||
"system-remote-ingest.jsonl",
|
||||
r#"{"type":"Person","data":{"name":"Zoe","age":33}}
|
||||
{"type":"Person","data":{"name":"Bob","age":26}}"#,
|
||||
|
|
@ -747,9 +747,9 @@ fn remote_ingest_creates_review_branch_and_keeps_it_readable() {
|
|||
#[test]
|
||||
#[ignore = "requires loopback socket permissions in sandboxed runners"]
|
||||
fn remote_ingest_reuses_existing_branch_and_merges_updates() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let server = repo.spawn_server();
|
||||
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let graph = SystemGraph::loaded();
|
||||
let server = graph.spawn_server();
|
||||
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
|
||||
output_success(
|
||||
cli()
|
||||
|
|
@ -762,7 +762,7 @@ fn remote_ingest_reuses_existing_branch_and_merges_updates() {
|
|||
.arg("feature-ingest"),
|
||||
);
|
||||
|
||||
let ingest_data = repo.write_jsonl(
|
||||
let ingest_data = graph.write_jsonl(
|
||||
"system-remote-ingest-merge.jsonl",
|
||||
r#"{"type":"Person","data":{"name":"Bob","age":26}}
|
||||
{"type":"Person","data":{"name":"Zoe","age":33}}"#,
|
||||
|
|
@ -828,23 +828,23 @@ fn remote_ingest_reuses_existing_branch_and_merges_updates() {
|
|||
#[test]
|
||||
#[ignore = "requires loopback socket permissions in sandboxed runners"]
|
||||
fn remote_policy_enforces_branch_first_cli_workflow() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let graph = SystemGraph::loaded();
|
||||
let server_config =
|
||||
repo.write_config("server-policy.yaml", &remote_policy_server_config(&repo));
|
||||
repo.write_config("policy.yaml", REMOTE_POLICY_E2E_YAML);
|
||||
let server = repo.spawn_server_with_config_env(
|
||||
graph.write_config("server-policy.yaml", &remote_policy_server_config(&graph));
|
||||
graph.write_config("policy.yaml", REMOTE_POLICY_E2E_YAML);
|
||||
let server = graph.spawn_server_with_config_env(
|
||||
&server_config,
|
||||
&[(
|
||||
"OMNIGRAPH_SERVER_BEARER_TOKENS_JSON",
|
||||
r#"{"act-bruno":"team-token","act-ragnor":"admin-token"}"#,
|
||||
)],
|
||||
);
|
||||
let client_config = repo.write_config(
|
||||
let client_config = graph.write_config(
|
||||
"omnigraph-policy.yaml",
|
||||
&remote_policy_client_config(&server.base_url),
|
||||
);
|
||||
repo.write_config(".env.omni", "POLICY_TEST_TOKEN=team-token\n");
|
||||
let mutation_file = repo.write_query(
|
||||
graph.write_config(".env.omni", "POLICY_TEST_TOKEN=team-token\n");
|
||||
let mutation_file = graph.write_query(
|
||||
"system-remote-policy-change.gq",
|
||||
r#"
|
||||
query insert_person($name: String, $age: I32) {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "omnigraph-compiler"
|
||||
version = "0.4.2"
|
||||
version = "0.6.0"
|
||||
edition = "2024"
|
||||
description = "Schema/query compiler for Omnigraph. Zero Lance dependency."
|
||||
license = "MIT"
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ pub enum QueryLintQueryKind {
|
|||
#[serde(rename_all = "lowercase")]
|
||||
pub enum QueryLintSchemaSourceKind {
|
||||
File,
|
||||
Repo,
|
||||
Graph,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
|
||||
|
|
@ -59,9 +59,9 @@ impl QueryLintSchemaSource {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn repo(uri: impl Into<String>) -> Self {
|
||||
pub fn graph(uri: impl Into<String>) -> Self {
|
||||
Self {
|
||||
kind: QueryLintSchemaSourceKind::Repo,
|
||||
kind: QueryLintSchemaSourceKind::Graph,
|
||||
path: None,
|
||||
uri: Some(uri.into()),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "omnigraph-policy"
|
||||
version = "0.4.2"
|
||||
version = "0.6.0"
|
||||
edition = "2024"
|
||||
description = "Policy / authorization layer for Omnigraph — Cedar-backed PolicyEngine, PolicyChecker trait, ResourceScope enum."
|
||||
license = "MIT"
|
||||
|
|
|
|||
|
|
@ -172,7 +172,7 @@ pub struct PolicyCompiler;
|
|||
|
||||
#[derive(Clone)]
|
||||
pub struct PolicyEngine {
|
||||
repo_id: String,
|
||||
graph_id: String,
|
||||
protected_branches: BTreeSet<String>,
|
||||
known_actors: BTreeSet<String>,
|
||||
schema: Schema,
|
||||
|
|
@ -291,7 +291,7 @@ impl PolicyTestConfig {
|
|||
}
|
||||
|
||||
impl PolicyCompiler {
|
||||
pub fn compile(config: &PolicyConfig, repo_id: &str) -> Result<PolicyEngine> {
|
||||
pub fn compile(config: &PolicyConfig, graph_id: &str) -> Result<PolicyEngine> {
|
||||
config.validate()?;
|
||||
let (schema, schema_warnings) = Schema::from_cedarschema_str(policy_schema_source())?;
|
||||
let schema_warnings = schema_warnings
|
||||
|
|
@ -300,8 +300,8 @@ impl PolicyCompiler {
|
|||
if !schema_warnings.is_empty() {
|
||||
bail!("policy schema warnings:\n{}", schema_warnings.join("\n"));
|
||||
}
|
||||
let entities = compile_entities(config, repo_id, &schema)?;
|
||||
let (policies, policy_to_rule) = compile_policies(config, repo_id)?;
|
||||
let entities = compile_entities(config, graph_id, &schema)?;
|
||||
let (policies, policy_to_rule) = compile_policies(config, graph_id)?;
|
||||
let validator = Validator::new(schema.clone());
|
||||
let validation = validator.validate(&policies, ValidationMode::Strict);
|
||||
let errors = validation
|
||||
|
|
@ -318,7 +318,7 @@ impl PolicyCompiler {
|
|||
.flat_map(|members| members.iter().cloned())
|
||||
.collect();
|
||||
Ok(PolicyEngine {
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_id: graph_id.to_string(),
|
||||
protected_branches: config.protected_branches.iter().cloned().collect(),
|
||||
known_actors,
|
||||
schema,
|
||||
|
|
@ -330,9 +330,9 @@ impl PolicyCompiler {
|
|||
}
|
||||
|
||||
impl PolicyEngine {
|
||||
pub fn load(path: &Path, repo_id: &str) -> Result<Self> {
|
||||
pub fn load(path: &Path, graph_id: &str) -> Result<Self> {
|
||||
let config = PolicyConfig::load(path)?;
|
||||
PolicyCompiler::compile(&config, repo_id)
|
||||
PolicyCompiler::compile(&config, graph_id)
|
||||
}
|
||||
|
||||
pub fn authorize(&self, request: &PolicyRequest) -> Result<PolicyDecision> {
|
||||
|
|
@ -349,7 +349,7 @@ impl PolicyEngine {
|
|||
|
||||
let principal = entity_uid("Actor", &request.actor_id)?;
|
||||
let action = entity_uid("Action", request.action.as_str())?;
|
||||
let resource = entity_uid("Repo", &self.repo_id)?;
|
||||
let resource = entity_uid("Graph", &self.graph_id)?;
|
||||
let context_value = json!({
|
||||
"has_branch": request.branch.is_some(),
|
||||
"branch": request.branch.clone().unwrap_or_default(),
|
||||
|
|
@ -462,7 +462,7 @@ impl PolicyEngine {
|
|||
}
|
||||
}
|
||||
|
||||
fn compile_entities(config: &PolicyConfig, repo_id: &str, schema: &Schema) -> Result<Entities> {
|
||||
fn compile_entities(config: &PolicyConfig, graph_id: &str, schema: &Schema) -> Result<Entities> {
|
||||
let mut group_entities = Vec::new();
|
||||
for group in config.groups.keys() {
|
||||
group_entities.push(Entity::new(
|
||||
|
|
@ -495,8 +495,8 @@ fn compile_entities(config: &PolicyConfig, repo_id: &str, schema: &Schema) -> Re
|
|||
)?);
|
||||
}
|
||||
|
||||
let repo_entity = Entity::new(
|
||||
entity_uid("Repo", repo_id)?,
|
||||
let graph_entity = Entity::new(
|
||||
entity_uid("Graph", graph_id)?,
|
||||
HashMap::new(),
|
||||
HashSet::<EntityUid>::new(),
|
||||
)?;
|
||||
|
|
@ -504,13 +504,13 @@ fn compile_entities(config: &PolicyConfig, repo_id: &str, schema: &Schema) -> Re
|
|||
let mut entities = Vec::new();
|
||||
entities.extend(group_entities);
|
||||
entities.extend(actor_entities);
|
||||
entities.push(repo_entity);
|
||||
entities.push(graph_entity);
|
||||
Ok(Entities::from_entities(entities, Some(schema))?)
|
||||
}
|
||||
|
||||
fn compile_policies(
|
||||
config: &PolicyConfig,
|
||||
repo_id: &str,
|
||||
graph_id: &str,
|
||||
) -> Result<(PolicySet, HashMap<String, String>)> {
|
||||
let mut policies = Vec::new();
|
||||
let mut policy_to_rule = HashMap::new();
|
||||
|
|
@ -518,7 +518,7 @@ fn compile_policies(
|
|||
for rule in &config.rules {
|
||||
for action in &rule.allow.actions {
|
||||
let policy_id = PolicyId::new(format!("{}:{}", rule.id, action.as_str()));
|
||||
let source = compile_policy_source(rule, action, repo_id);
|
||||
let source = compile_policy_source(rule, action, graph_id);
|
||||
let policy = Policy::parse(Some(policy_id.clone()), source.as_str())?;
|
||||
policy_to_rule.insert(policy_id.to_string(), rule.id.clone());
|
||||
policies.push(policy);
|
||||
|
|
@ -528,7 +528,7 @@ fn compile_policies(
|
|||
Ok((PolicySet::from_policies(policies)?, policy_to_rule))
|
||||
}
|
||||
|
||||
fn compile_policy_source(rule: &PolicyRule, action: &PolicyAction, repo_id: &str) -> String {
|
||||
fn compile_policy_source(rule: &PolicyRule, action: &PolicyAction, graph_id: &str) -> String {
|
||||
let mut conditions = Vec::new();
|
||||
if let Some(scope) = rule.allow.branch_scope {
|
||||
conditions.push(branch_scope_condition(scope));
|
||||
|
|
@ -547,11 +547,11 @@ fn compile_policy_source(rule: &PolicyRule, action: &PolicyAction, repo_id: &str
|
|||
r#"permit (
|
||||
principal in Omnigraph::Group::{group},
|
||||
action == Omnigraph::Action::{action},
|
||||
resource == Omnigraph::Repo::{repo}
|
||||
resource == Omnigraph::Graph::{graph}
|
||||
){when};"#,
|
||||
group = cedar_literal(&rule.allow.actors.group),
|
||||
action = cedar_literal(action.as_str()),
|
||||
repo = cedar_literal(repo_id),
|
||||
graph = cedar_literal(graph_id),
|
||||
when = when,
|
||||
)
|
||||
}
|
||||
|
|
@ -594,16 +594,16 @@ namespace Omnigraph {
|
|||
|
||||
entity Actor in [Group];
|
||||
entity Group;
|
||||
entity Repo;
|
||||
entity Graph;
|
||||
|
||||
action "read" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
|
||||
action "export" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
|
||||
action "change" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
|
||||
action "schema_apply" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
|
||||
action "branch_create" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
|
||||
action "branch_delete" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
|
||||
action "branch_merge" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
|
||||
action "admin" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
|
||||
action "read" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
|
||||
action "export" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
|
||||
action "change" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
|
||||
action "schema_apply" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
|
||||
action "branch_create" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
|
||||
action "branch_delete" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
|
||||
action "branch_merge" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
|
||||
action "admin" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
|
||||
}
|
||||
"#
|
||||
}
|
||||
|
|
@ -881,7 +881,7 @@ rules:
|
|||
)
|
||||
.unwrap();
|
||||
|
||||
let engine = PolicyCompiler::compile(&policy, "repo").unwrap();
|
||||
let engine = PolicyCompiler::compile(&policy, "graph").unwrap();
|
||||
let allow = engine
|
||||
.authorize(&PolicyRequest {
|
||||
actor_id: "act-bruno".to_string(),
|
||||
|
|
@ -932,7 +932,7 @@ rules:
|
|||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
let engine = PolicyCompiler::compile(&policy, "repo").unwrap();
|
||||
let engine = PolicyCompiler::compile(&policy, "graph").unwrap();
|
||||
let tests = PolicyTestConfig {
|
||||
version: 1,
|
||||
cases: vec![
|
||||
|
|
@ -976,7 +976,7 @@ rules:
|
|||
)
|
||||
.unwrap();
|
||||
|
||||
let engine = PolicyCompiler::compile(&policy, "repo").unwrap();
|
||||
let engine = PolicyCompiler::compile(&policy, "graph").unwrap();
|
||||
let allow = engine
|
||||
.authorize(&PolicyRequest {
|
||||
actor_id: "act-ragnor".to_string(),
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "omnigraph-server"
|
||||
version = "0.4.2"
|
||||
version = "0.6.0"
|
||||
edition = "2024"
|
||||
description = "HTTP server for the Omnigraph graph database."
|
||||
license = "MIT"
|
||||
|
|
@ -19,9 +19,9 @@ default = []
|
|||
aws = ["dep:aws-config", "dep:aws-sdk-secretsmanager"]
|
||||
|
||||
[dependencies]
|
||||
omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.4.2" }
|
||||
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.4.2" }
|
||||
omnigraph-policy = { path = "../omnigraph-policy", version = "0.4.2" }
|
||||
omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.6.0" }
|
||||
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.0" }
|
||||
omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.0" }
|
||||
axum = { workspace = true }
|
||||
clap = { workspace = true }
|
||||
color-eyre = { workspace = true }
|
||||
|
|
@ -45,4 +45,5 @@ aws-sdk-secretsmanager = { version = "1", optional = true, default-features = fa
|
|||
tempfile = { workspace = true }
|
||||
tower = { workspace = true }
|
||||
serial_test = "3"
|
||||
lance = { workspace = true }
|
||||
lance-index = { workspace = true }
|
||||
|
|
|
|||
|
|
@ -259,10 +259,10 @@ async fn main() {
|
|||
}
|
||||
|
||||
let temp = tempfile::tempdir().expect("tempdir");
|
||||
let repo = temp.path().join("bench.omni");
|
||||
Omnigraph::init(repo.to_str().unwrap(), SCHEMA)
|
||||
let graph = temp.path().join("bench.omni");
|
||||
Omnigraph::init(graph.to_str().unwrap(), SCHEMA)
|
||||
.await
|
||||
.expect("init repo");
|
||||
.expect("init graph");
|
||||
|
||||
// Build bearer tokens: one for the heavy actor + one per light actor.
|
||||
let mut tokens: Vec<(String, String)> =
|
||||
|
|
@ -270,21 +270,17 @@ async fn main() {
|
|||
for i in 0..args.light_actors {
|
||||
tokens.push((format!("act-light-{i}"), format!("light-token-{i}")));
|
||||
}
|
||||
let db = Omnigraph::open(repo.to_str().unwrap())
|
||||
let db = Omnigraph::open(graph.to_str().unwrap())
|
||||
.await
|
||||
.expect("open repo");
|
||||
.expect("open graph");
|
||||
// Construct a custom WorkloadController with the requested caps and
|
||||
// pass it through `AppState::new_with_workload`. Avoids the
|
||||
// `unsafe { std::env::set_var(...) }` antipattern that violates
|
||||
// `setenv`'s thread-safety precondition once the multi-thread tokio
|
||||
// runtime is up.
|
||||
let workload = WorkloadController::new(args.inflight_cap, args.byte_cap);
|
||||
let state = AppState::new_with_workload(
|
||||
repo.to_string_lossy().to_string(),
|
||||
db,
|
||||
tokens,
|
||||
workload,
|
||||
);
|
||||
let state =
|
||||
AppState::new_with_workload(graph.to_string_lossy().to_string(), db, tokens, workload);
|
||||
let app = build_app(state);
|
||||
|
||||
eprintln!(
|
||||
|
|
|
|||
|
|
@ -152,7 +152,9 @@ async fn drive_actor(
|
|||
errors += 1;
|
||||
// Drain body for logging on the first few failures.
|
||||
if errors <= 3 {
|
||||
let body = to_bytes(response.into_body(), 64 * 1024).await.unwrap_or_default();
|
||||
let body = to_bytes(response.into_body(), 64 * 1024)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
eprintln!(
|
||||
"actor {actor_idx} op {op_idx} status {status} body {}",
|
||||
String::from_utf8_lossy(&body)
|
||||
|
|
@ -173,13 +175,13 @@ async fn main() {
|
|||
}
|
||||
|
||||
let temp = tempfile::tempdir().expect("tempdir");
|
||||
let repo = temp.path().join("bench.omni");
|
||||
let graph = temp.path().join("bench.omni");
|
||||
let schema = build_schema(args.tables);
|
||||
Omnigraph::init(repo.to_str().unwrap(), &schema)
|
||||
Omnigraph::init(graph.to_str().unwrap(), &schema)
|
||||
.await
|
||||
.expect("init repo");
|
||||
.expect("init graph");
|
||||
|
||||
let state = AppState::open(repo.to_string_lossy().to_string())
|
||||
let state = AppState::open(graph.to_string_lossy().to_string())
|
||||
.await
|
||||
.expect("open AppState");
|
||||
let app = build_app(state);
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ use api::{
|
|||
IngestRequest, QueryRequest, ReadOutput, ReadRequest, SchemaApplyOutput, SchemaApplyRequest,
|
||||
SchemaOutput, SnapshotQuery, ingest_output, schema_apply_output, snapshot_payload,
|
||||
};
|
||||
pub use auth::{AWS_SECRET_ENV, EnvOrFileTokenSource, TokenSource, resolve_token_source};
|
||||
use axum::body::{Body, Bytes};
|
||||
use axum::extract::DefaultBodyLimit;
|
||||
use axum::extract::{Extension, Path, Query, Request, State};
|
||||
|
|
@ -39,7 +40,6 @@ use omnigraph::error::{ManifestConflictDetails, ManifestErrorKind, OmniError};
|
|||
use omnigraph_compiler::json_params_to_param_map;
|
||||
use omnigraph_compiler::query::parser::parse_query;
|
||||
use omnigraph_compiler::{JsonParamMode, ParamMap};
|
||||
pub use auth::{AWS_SECRET_ENV, EnvOrFileTokenSource, TokenSource, resolve_token_source};
|
||||
pub use policy::{
|
||||
PolicyAction, PolicyCompiler, PolicyConfig, PolicyDecision, PolicyEngine, PolicyExpectation,
|
||||
PolicyRequest, PolicyTestConfig,
|
||||
|
|
@ -439,10 +439,7 @@ impl ApiError {
|
|||
}
|
||||
}
|
||||
|
||||
fn manifest_version_conflict(
|
||||
message: String,
|
||||
details: api::ManifestConflictOutput,
|
||||
) -> Self {
|
||||
fn manifest_version_conflict(message: String, details: api::ManifestConflictOutput) -> Self {
|
||||
Self {
|
||||
status: StatusCode::CONFLICT,
|
||||
code: ErrorCode::Conflict,
|
||||
|
|
@ -2112,12 +2109,12 @@ server:
|
|||
("OMNIGRAPH_UNAUTHENTICATED", None),
|
||||
]);
|
||||
let temp = tempdir().unwrap();
|
||||
// Repo path doesn't need to exist — classifier fires before
|
||||
// Graph path doesn't need to exist — classifier fires before
|
||||
// `AppState::open_with_bearer_tokens_and_policy`.
|
||||
let config = ServerConfig {
|
||||
uri: temp
|
||||
.path()
|
||||
.join("repo.omni")
|
||||
.join("graph.omni")
|
||||
.to_string_lossy()
|
||||
.into_owned(),
|
||||
bind: "127.0.0.1:0".to_string(),
|
||||
|
|
@ -2125,7 +2122,8 @@ server:
|
|||
allow_unauthenticated: false,
|
||||
};
|
||||
let result = serve(config).await;
|
||||
let err = result.expect_err("serve should refuse to start in State 1 without --unauthenticated");
|
||||
let err =
|
||||
result.expect_err("serve should refuse to start in State 1 without --unauthenticated");
|
||||
let msg = format!("{:?}", err);
|
||||
assert!(
|
||||
msg.contains("no bearer tokens") || msg.contains("policy file"),
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ use omnigraph_server::{ServerConfig, init_tracing, load_server_settings, serve};
|
|||
#[command(name = "omnigraph-server")]
|
||||
#[command(about = "HTTP server for the Omnigraph graph database")]
|
||||
struct Cli {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
|
|
|
|||
|
|
@ -19,42 +19,42 @@ fn fixture(name: &str) -> PathBuf {
|
|||
.join(name)
|
||||
}
|
||||
|
||||
fn repo_path(root: &Path) -> PathBuf {
|
||||
fn graph_path(root: &Path) -> PathBuf {
|
||||
root.join("openapi_test.omni")
|
||||
}
|
||||
|
||||
async fn init_loaded_repo() -> tempfile::TempDir {
|
||||
async fn init_loaded_graph() -> tempfile::TempDir {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
let repo = repo_path(temp.path());
|
||||
fs::create_dir_all(&repo).unwrap();
|
||||
let graph = graph_path(temp.path());
|
||||
fs::create_dir_all(&graph).unwrap();
|
||||
let schema = fs::read_to_string(fixture("test.pg")).unwrap();
|
||||
let data = fs::read_to_string(fixture("test.jsonl")).unwrap();
|
||||
Omnigraph::init(repo.to_str().unwrap(), &schema)
|
||||
Omnigraph::init(graph.to_str().unwrap(), &schema)
|
||||
.await
|
||||
.unwrap();
|
||||
let mut db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap();
|
||||
let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
|
||||
load_jsonl(&mut db, &data, LoadMode::Overwrite)
|
||||
.await
|
||||
.unwrap();
|
||||
temp
|
||||
}
|
||||
|
||||
async fn app_for_loaded_repo() -> (tempfile::TempDir, Router) {
|
||||
let temp = init_loaded_repo().await;
|
||||
let repo = repo_path(temp.path());
|
||||
let state = AppState::open(repo.to_string_lossy().to_string())
|
||||
async fn app_for_loaded_graph() -> (tempfile::TempDir, Router) {
|
||||
let temp = init_loaded_graph().await;
|
||||
let graph = graph_path(temp.path());
|
||||
let state = AppState::open(graph.to_string_lossy().to_string())
|
||||
.await
|
||||
.unwrap();
|
||||
let app = build_app(state);
|
||||
(temp, app)
|
||||
}
|
||||
|
||||
async fn app_for_loaded_repo_with_auth(token: &str) -> (tempfile::TempDir, Router) {
|
||||
let temp = init_loaded_repo().await;
|
||||
let repo = repo_path(temp.path());
|
||||
let db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap();
|
||||
async fn app_for_loaded_graph_with_auth(token: &str) -> (tempfile::TempDir, Router) {
|
||||
let temp = init_loaded_graph().await;
|
||||
let graph = graph_path(temp.path());
|
||||
let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
|
||||
let state = AppState::new_with_bearer_token(
|
||||
repo.to_string_lossy().to_string(),
|
||||
graph.to_string_lossy().to_string(),
|
||||
db,
|
||||
Some(token.to_string()),
|
||||
);
|
||||
|
|
@ -84,7 +84,7 @@ fn openapi_json() -> Value {
|
|||
|
||||
#[tokio::test]
|
||||
async fn openapi_endpoint_returns_200_with_valid_json() {
|
||||
let (_temp, app) = app_for_loaded_repo().await;
|
||||
let (_temp, app) = app_for_loaded_graph().await;
|
||||
let request = Request::builder()
|
||||
.method(Method::GET)
|
||||
.uri("/openapi.json")
|
||||
|
|
@ -97,7 +97,7 @@ async fn openapi_endpoint_returns_200_with_valid_json() {
|
|||
|
||||
#[tokio::test]
|
||||
async fn openapi_endpoint_returns_openapi_31_version() {
|
||||
let (_temp, app) = app_for_loaded_repo().await;
|
||||
let (_temp, app) = app_for_loaded_graph().await;
|
||||
let request = Request::builder()
|
||||
.method(Method::GET)
|
||||
.uri("/openapi.json")
|
||||
|
|
@ -113,11 +113,11 @@ async fn openapi_endpoint_returns_openapi_31_version() {
|
|||
|
||||
#[tokio::test]
|
||||
async fn openapi_endpoint_does_not_require_auth() {
|
||||
let temp = init_loaded_repo().await;
|
||||
let repo = repo_path(temp.path());
|
||||
let db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap();
|
||||
let temp = init_loaded_graph().await;
|
||||
let graph = graph_path(temp.path());
|
||||
let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
|
||||
let state = AppState::new_with_bearer_token(
|
||||
repo.to_string_lossy().to_string(),
|
||||
graph.to_string_lossy().to_string(),
|
||||
db,
|
||||
Some("secret-token".to_string()),
|
||||
);
|
||||
|
|
@ -129,7 +129,11 @@ async fn openapi_endpoint_does_not_require_auth() {
|
|||
.body(Body::empty())
|
||||
.unwrap();
|
||||
let (status, _) = json_response(&app, request).await;
|
||||
assert_eq!(status, StatusCode::OK, "/openapi.json should not require auth");
|
||||
assert_eq!(
|
||||
status,
|
||||
StatusCode::OK,
|
||||
"/openapi.json should not require auth"
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -739,10 +743,13 @@ fn branch_delete_has_branch_path_parameter() {
|
|||
let params = doc["paths"]["/branches/{branch}"]["delete"]["parameters"]
|
||||
.as_array()
|
||||
.unwrap();
|
||||
let has_branch = params.iter().any(|p| {
|
||||
p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("path")
|
||||
});
|
||||
assert!(has_branch, "DELETE /branches/{{branch}} must have 'branch' path parameter");
|
||||
let has_branch = params
|
||||
.iter()
|
||||
.any(|p| p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("path"));
|
||||
assert!(
|
||||
has_branch,
|
||||
"DELETE /branches/{{branch}} must have 'branch' path parameter"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -751,10 +758,13 @@ fn commit_show_has_commit_id_path_parameter() {
|
|||
let params = doc["paths"]["/commits/{commit_id}"]["get"]["parameters"]
|
||||
.as_array()
|
||||
.unwrap();
|
||||
let has_commit_id = params.iter().any(|p| {
|
||||
p["name"].as_str() == Some("commit_id") && p["in"].as_str() == Some("path")
|
||||
});
|
||||
assert!(has_commit_id, "GET /commits/{{commit_id}} must have 'commit_id' path parameter");
|
||||
let has_commit_id = params
|
||||
.iter()
|
||||
.any(|p| p["name"].as_str() == Some("commit_id") && p["in"].as_str() == Some("path"));
|
||||
assert!(
|
||||
has_commit_id,
|
||||
"GET /commits/{{commit_id}} must have 'commit_id' path parameter"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -763,10 +773,13 @@ fn snapshot_has_branch_query_parameter() {
|
|||
let params = doc["paths"]["/snapshot"]["get"]["parameters"]
|
||||
.as_array()
|
||||
.unwrap();
|
||||
let has_branch = params.iter().any(|p| {
|
||||
p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("query")
|
||||
});
|
||||
assert!(has_branch, "GET /snapshot must have 'branch' query parameter");
|
||||
let has_branch = params
|
||||
.iter()
|
||||
.any(|p| p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("query"));
|
||||
assert!(
|
||||
has_branch,
|
||||
"GET /snapshot must have 'branch' query parameter"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -775,10 +788,13 @@ fn commits_has_branch_query_parameter() {
|
|||
let params = doc["paths"]["/commits"]["get"]["parameters"]
|
||||
.as_array()
|
||||
.unwrap();
|
||||
let has_branch = params.iter().any(|p| {
|
||||
p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("query")
|
||||
});
|
||||
assert!(has_branch, "GET /commits must have 'branch' query parameter");
|
||||
let has_branch = params
|
||||
.iter()
|
||||
.any(|p| p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("query"));
|
||||
assert!(
|
||||
has_branch,
|
||||
"GET /commits must have 'branch' query parameter"
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -854,8 +870,7 @@ fn error_responses_reference_error_output_schema() {
|
|||
];
|
||||
|
||||
for (path, method, status) in paths_with_errors {
|
||||
let content =
|
||||
&doc["paths"][path][method]["responses"][status]["content"];
|
||||
let content = &doc["paths"][path][method]["responses"][status]["content"];
|
||||
let schema = &content["application/json"]["schema"];
|
||||
let ref_path = schema["$ref"].as_str().unwrap();
|
||||
assert!(
|
||||
|
|
@ -917,7 +932,7 @@ fn openapi_spec_round_trips_through_json() {
|
|||
|
||||
#[tokio::test]
|
||||
async fn open_mode_spec_has_no_security_schemes() {
|
||||
let (_temp, app) = app_for_loaded_repo().await;
|
||||
let (_temp, app) = app_for_loaded_graph().await;
|
||||
let request = Request::builder()
|
||||
.method(Method::GET)
|
||||
.uri("/openapi.json")
|
||||
|
|
@ -933,7 +948,7 @@ async fn open_mode_spec_has_no_security_schemes() {
|
|||
|
||||
#[tokio::test]
|
||||
async fn open_mode_spec_has_no_operation_security() {
|
||||
let (_temp, app) = app_for_loaded_repo().await;
|
||||
let (_temp, app) = app_for_loaded_graph().await;
|
||||
let request = Request::builder()
|
||||
.method(Method::GET)
|
||||
.uri("/openapi.json")
|
||||
|
|
@ -954,7 +969,7 @@ async fn open_mode_spec_has_no_operation_security() {
|
|||
|
||||
#[tokio::test]
|
||||
async fn auth_mode_spec_includes_bearer_token_security_scheme() {
|
||||
let (_temp, app) = app_for_loaded_repo_with_auth("secret").await;
|
||||
let (_temp, app) = app_for_loaded_graph_with_auth("secret").await;
|
||||
let request = Request::builder()
|
||||
.method(Method::GET)
|
||||
.uri("/openapi.json")
|
||||
|
|
@ -968,7 +983,7 @@ async fn auth_mode_spec_includes_bearer_token_security_scheme() {
|
|||
|
||||
#[tokio::test]
|
||||
async fn auth_mode_spec_has_security_on_protected_operations() {
|
||||
let (_temp, app) = app_for_loaded_repo_with_auth("secret").await;
|
||||
let (_temp, app) = app_for_loaded_graph_with_auth("secret").await;
|
||||
let request = Request::builder()
|
||||
.method(Method::GET)
|
||||
.uri("/openapi.json")
|
||||
|
|
@ -999,7 +1014,7 @@ async fn auth_mode_spec_has_security_on_protected_operations() {
|
|||
|
||||
#[tokio::test]
|
||||
async fn auth_mode_spec_matches_static_generation() {
|
||||
let (_temp, app) = app_for_loaded_repo_with_auth("secret").await;
|
||||
let (_temp, app) = app_for_loaded_graph_with_auth("secret").await;
|
||||
let request = Request::builder()
|
||||
.method(Method::GET)
|
||||
.uri("/openapi.json")
|
||||
|
|
@ -1015,7 +1030,7 @@ async fn auth_mode_spec_matches_static_generation() {
|
|||
|
||||
#[tokio::test]
|
||||
async fn auth_mode_healthz_still_has_no_security() {
|
||||
let (_temp, app) = app_for_loaded_repo_with_auth("secret").await;
|
||||
let (_temp, app) = app_for_loaded_graph_with_auth("secret").await;
|
||||
let request = Request::builder()
|
||||
.method(Method::GET)
|
||||
.uri("/openapi.json")
|
||||
|
|
@ -1031,8 +1046,7 @@ async fn auth_mode_healthz_still_has_no_security() {
|
|||
|
||||
#[test]
|
||||
fn openapi_spec_is_up_to_date() {
|
||||
let spec_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.join("../../openapi.json");
|
||||
let spec_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../openapi.json");
|
||||
|
||||
let generated = serde_json::to_string_pretty(&openapi_doc()).unwrap() + "\n";
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "omnigraph-engine"
|
||||
version = "0.4.2"
|
||||
version = "0.6.0"
|
||||
edition = "2024"
|
||||
description = "Runtime engine for the Omnigraph graph database."
|
||||
license = "MIT"
|
||||
|
|
@ -16,8 +16,8 @@ default = []
|
|||
failpoints = ["dep:fail", "fail/failpoints"]
|
||||
|
||||
[dependencies]
|
||||
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.4.2" }
|
||||
omnigraph-policy = { path = "../omnigraph-policy", version = "0.4.2" }
|
||||
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.0" }
|
||||
omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.0" }
|
||||
lance = { workspace = true }
|
||||
lance-datafusion = { workspace = true }
|
||||
datafusion = { workspace = true }
|
||||
|
|
@ -51,7 +51,7 @@ chrono = { workspace = true }
|
|||
arc-swap = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.4.2" }
|
||||
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.0" }
|
||||
tokio = { workspace = true }
|
||||
lance-namespace-impls = { workspace = true }
|
||||
serial_test = "3"
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@ use lance::Dataset;
|
|||
use lance_namespace::models::CreateTableVersionRequest;
|
||||
use omnigraph_compiler::catalog::Catalog;
|
||||
|
||||
#[path = "manifest/graph.rs"]
|
||||
mod graph;
|
||||
#[path = "manifest/layout.rs"]
|
||||
mod layout;
|
||||
#[path = "manifest/metadata.rs"]
|
||||
|
|
@ -18,11 +20,10 @@ mod namespace;
|
|||
mod publisher;
|
||||
#[path = "manifest/recovery.rs"]
|
||||
mod recovery;
|
||||
#[path = "manifest/repo.rs"]
|
||||
mod repo;
|
||||
#[path = "manifest/state.rs"]
|
||||
mod state;
|
||||
|
||||
use graph::{init_manifest_graph, open_manifest_graph, snapshot_state_at};
|
||||
use layout::{manifest_uri, open_manifest_dataset, type_name_hash};
|
||||
pub(crate) use metadata::TableVersionMetadata;
|
||||
#[cfg(test)]
|
||||
|
|
@ -33,11 +34,10 @@ pub(crate) use namespace::open_table_head_for_write;
|
|||
use namespace::{branch_manifest_namespace, staged_table_namespace};
|
||||
use publisher::{GraphNamespacePublisher, ManifestBatchPublisher};
|
||||
pub(crate) use recovery::{
|
||||
delete_sidecar, has_schema_apply_sidecar, new_sidecar, recover_manifest_drift, write_sidecar,
|
||||
RecoveryMode, RecoverySidecar, RecoverySidecarHandle, SidecarKind, SidecarTablePin,
|
||||
SidecarTableRegistration, SidecarTombstone,
|
||||
SidecarTableRegistration, SidecarTombstone, delete_sidecar, has_schema_apply_sidecar,
|
||||
new_sidecar, recover_manifest_drift, write_sidecar,
|
||||
};
|
||||
use repo::{init_manifest_repo, open_manifest_repo, snapshot_state_at};
|
||||
pub use state::SubTableEntry;
|
||||
#[cfg(test)]
|
||||
use state::string_column;
|
||||
|
|
@ -215,12 +215,12 @@ impl ManifestCoordinator {
|
|||
self
|
||||
}
|
||||
|
||||
/// Create a new repo at `root_uri` from a catalog.
|
||||
/// Create a new graph at `root_uri` from a catalog.
|
||||
///
|
||||
/// Creates per-type Lance datasets and the namespace `__manifest` table.
|
||||
pub async fn init(root_uri: &str, catalog: &Catalog) -> Result<Self> {
|
||||
let root = root_uri.trim_end_matches('/');
|
||||
let (dataset, known_state) = init_manifest_repo(root, catalog).await?;
|
||||
let (dataset, known_state) = init_manifest_graph(root, catalog).await?;
|
||||
|
||||
Ok(Self::from_parts_with_default_publisher(
|
||||
root,
|
||||
|
|
@ -230,10 +230,10 @@ impl ManifestCoordinator {
|
|||
))
|
||||
}
|
||||
|
||||
/// Open an existing repo's manifest.
|
||||
/// Open an existing graph's manifest.
|
||||
pub async fn open(root_uri: &str) -> Result<Self> {
|
||||
let root = root_uri.trim_end_matches('/');
|
||||
let (dataset, known_state) = open_manifest_repo(root, None).await?;
|
||||
let (dataset, known_state) = open_manifest_graph(root, None).await?;
|
||||
Ok(Self::from_parts_with_default_publisher(
|
||||
root,
|
||||
dataset,
|
||||
|
|
@ -242,14 +242,14 @@ impl ManifestCoordinator {
|
|||
))
|
||||
}
|
||||
|
||||
/// Open an existing repo's manifest at a specific branch.
|
||||
/// Open an existing graph's manifest at a specific branch.
|
||||
pub async fn open_at_branch(root_uri: &str, branch: &str) -> Result<Self> {
|
||||
if branch == "main" {
|
||||
return Self::open(root_uri).await;
|
||||
}
|
||||
|
||||
let root = root_uri.trim_end_matches('/');
|
||||
let (dataset, known_state) = open_manifest_repo(root, Some(branch)).await?;
|
||||
let (dataset, known_state) = open_manifest_graph(root, Some(branch)).await?;
|
||||
Ok(Self::from_parts_with_default_publisher(
|
||||
root,
|
||||
dataset,
|
||||
|
|
@ -410,7 +410,7 @@ impl ManifestCoordinator {
|
|||
Ok(descendants)
|
||||
}
|
||||
|
||||
/// Root URI of the repo.
|
||||
/// Root URI of the graph.
|
||||
pub fn root_uri(&self) -> &str {
|
||||
&self.root_uri
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ use super::state::{
|
|||
ManifestState, SubTableEntry, entries_to_batch, manifest_schema, read_manifest_state,
|
||||
};
|
||||
|
||||
pub(super) async fn init_manifest_repo(
|
||||
pub(super) async fn init_manifest_graph(
|
||||
root_uri: &str,
|
||||
catalog: &Catalog,
|
||||
) -> Result<(Dataset, ManifestState)> {
|
||||
|
|
@ -47,7 +47,7 @@ pub(super) async fn init_manifest_repo(
|
|||
Ok((dataset, known_state))
|
||||
}
|
||||
|
||||
pub(super) async fn open_manifest_repo(
|
||||
pub(super) async fn open_manifest_graph(
|
||||
root_uri: &str,
|
||||
branch: Option<&str>,
|
||||
) -> Result<(Dataset, ManifestState)> {
|
||||
|
|
@ -24,8 +24,8 @@
|
|||
//! Only on open-for-write paths (the publisher's `load_publish_state`).
|
||||
//! Reads are side-effect-free by contract; an old-shape `__manifest` reads
|
||||
//! fine, it just lacks the protections introduced by later versions.
|
||||
//! `init_manifest_repo` stamps the current version at creation, so newly
|
||||
//! initialized repos never need migration.
|
||||
//! `init_manifest_graph` stamps the current version at creation, so newly
|
||||
//! initialized graphs never need migration.
|
||||
//!
|
||||
//! ## Forward-version protection
|
||||
//!
|
||||
|
|
@ -78,7 +78,7 @@ pub(super) async fn migrate_internal_schema(dataset: &mut Dataset) -> Result<()>
|
|||
if current > INTERNAL_MANIFEST_SCHEMA_VERSION {
|
||||
return Err(OmniError::manifest(format!(
|
||||
"__manifest is stamped at internal schema v{} but this binary expects v{} \
|
||||
— upgrade omnigraph before opening this repo for writes",
|
||||
— upgrade omnigraph before opening this graph for writes",
|
||||
current, INTERNAL_MANIFEST_SCHEMA_VERSION,
|
||||
)));
|
||||
}
|
||||
|
|
@ -112,7 +112,10 @@ pub(super) async fn migrate_internal_schema(dataset: &mut Dataset) -> Result<()>
|
|||
async fn migrate_v1_to_v2(dataset: &mut Dataset) -> Result<()> {
|
||||
dataset
|
||||
.update_field_metadata()
|
||||
.update("object_id", [(OBJECT_ID_PK_KEY.to_string(), "true".to_string())])
|
||||
.update(
|
||||
"object_id",
|
||||
[(OBJECT_ID_PK_KEY.to_string(), "true".to_string())],
|
||||
)
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
|
|
@ -121,10 +124,7 @@ async fn migrate_v1_to_v2(dataset: &mut Dataset) -> Result<()> {
|
|||
|
||||
async fn set_stamp(dataset: &mut Dataset, version: u32) -> Result<()> {
|
||||
dataset
|
||||
.update_schema_metadata([(
|
||||
INTERNAL_SCHEMA_VERSION_KEY.to_string(),
|
||||
version.to_string(),
|
||||
)])
|
||||
.update_schema_metadata([(INTERNAL_SCHEMA_VERSION_KEY.to_string(), version.to_string())])
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
Ok(())
|
||||
|
|
|
|||
|
|
@ -230,6 +230,11 @@ impl LanceNamespace for BranchManifestNamespace {
|
|||
metadata: None,
|
||||
properties: None,
|
||||
managed_versioning: Some(true),
|
||||
// Every table we return from describe_table is physically
|
||||
// materialized (open_manifest_dataset succeeds), never just
|
||||
// "declared." See lance-namespace 6.0.1 DescribeTableResponse
|
||||
// field docs.
|
||||
is_only_declared: Some(false),
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -373,6 +378,11 @@ impl LanceNamespace for StagedTableNamespace {
|
|||
metadata: None,
|
||||
properties: None,
|
||||
managed_versioning: Some(true),
|
||||
// Every table we return from describe_table is physically
|
||||
// materialized (open_manifest_dataset succeeds), never just
|
||||
// "declared." See lance-namespace 6.0.1 DescribeTableResponse
|
||||
// field docs.
|
||||
is_only_declared: Some(false),
|
||||
})
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ use super::{ManifestChange, SubTableUpdate, TableRegistration, TableTombstone};
|
|||
/// into the audit row's `recovery_for_actor` field.
|
||||
pub(crate) const RECOVERY_ACTOR: &str = "omnigraph:recovery";
|
||||
|
||||
/// Subdirectory under the repo root holding sidecar files.
|
||||
/// Subdirectory under the graph root holding sidecar files.
|
||||
pub(crate) const RECOVERY_DIR_NAME: &str = "__recovery";
|
||||
|
||||
/// Current sidecar JSON shape version. Bumping this is a breaking change:
|
||||
|
|
@ -142,7 +142,7 @@ pub(crate) struct SidecarTablePin {
|
|||
pub(crate) struct SidecarTableRegistration {
|
||||
/// Stable identifier (`node:Tag`, `edge:WorksAt`, etc.).
|
||||
pub table_key: String,
|
||||
/// Repo-relative path the manifest will register
|
||||
/// Graph-relative path the manifest will register
|
||||
/// (e.g. `nodes/{fnv1a64-hex}`); recovery joins this with `root_uri`
|
||||
/// to open the dataset Lance HEAD when constructing the
|
||||
/// accompanying `Update`.
|
||||
|
|
@ -295,7 +295,7 @@ pub(crate) enum SidecarDecision {
|
|||
Abort,
|
||||
}
|
||||
|
||||
/// Build the `__recovery/` directory URI under a repo root.
|
||||
/// Build the `__recovery/` directory URI under a graph root.
|
||||
pub(crate) fn recovery_dir_uri(root_uri: &str) -> String {
|
||||
let trimmed = root_uri.trim_end_matches('/');
|
||||
format!("{}/{}", trimmed, RECOVERY_DIR_NAME)
|
||||
|
|
@ -1122,7 +1122,7 @@ async fn record_audit(
|
|||
/// the rename so the recovery sweep's roll-forward step sees the new
|
||||
/// catalog. Without this, the disambiguation logic deletes the staging
|
||||
/// files (since manifest still pins the old table set) and leaves the
|
||||
/// repo with new-schema data on disk but the old `_schema.pg` live —
|
||||
/// graph with new-schema data on disk but the old `_schema.pg` live —
|
||||
/// real corruption.
|
||||
pub(crate) async fn has_schema_apply_sidecar(
|
||||
root_uri: &str,
|
||||
|
|
|
|||
|
|
@ -1393,7 +1393,10 @@ async fn test_concurrent_publish_with_overlapping_expected_versions_one_succeeds
|
|||
// version (no duplicate version rows).
|
||||
let mc = ManifestCoordinator::open(uri).await.unwrap();
|
||||
let entry = mc.snapshot().entry("node:Person").unwrap().clone();
|
||||
assert!(entry.table_version > 1, "Person should have advanced past v=1");
|
||||
assert!(
|
||||
entry.table_version > 1,
|
||||
"Person should have advanced past v=1"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
@ -1418,7 +1421,7 @@ async fn test_publish_migrates_pre_stamp_manifest_to_current_version() {
|
|||
let catalog = build_test_catalog();
|
||||
let mc = ManifestCoordinator::init(uri, &catalog).await.unwrap();
|
||||
|
||||
// Simulate a v1 (pre-stamp) repo by removing the schema-level stamp on disk.
|
||||
// Simulate a v1 (pre-stamp) graph by removing the schema-level stamp on disk.
|
||||
{
|
||||
let mut ds = open_manifest_dataset(uri, None).await.unwrap();
|
||||
ds.update_schema_metadata([(
|
||||
|
|
@ -1449,7 +1452,7 @@ async fn test_publish_migrates_pre_stamp_manifest_to_current_version() {
|
|||
assert_eq!(
|
||||
super::migrations::read_stamp(&post),
|
||||
super::migrations::INTERNAL_MANIFEST_SCHEMA_VERSION,
|
||||
"publish on a v1 repo should leave the manifest stamped at the current version",
|
||||
"publish on a v1 graph should leave the manifest stamped at the current version",
|
||||
);
|
||||
|
||||
// Manifest should still serve correctly post-migration.
|
||||
|
|
|
|||
|
|
@ -166,7 +166,7 @@ pub enum OpenMode {
|
|||
}
|
||||
|
||||
impl Omnigraph {
|
||||
/// Create a new repo at `uri` from schema source.
|
||||
/// Create a new graph at `uri` from schema source.
|
||||
///
|
||||
/// Creates `_schema.pg`, per-type Lance datasets, and `__manifest`.
|
||||
pub async fn init(uri: &str, schema_source: &str) -> Result<Self> {
|
||||
|
|
@ -205,7 +205,7 @@ impl Omnigraph {
|
|||
})
|
||||
}
|
||||
|
||||
/// Open an existing repo (read-write).
|
||||
/// Open an existing graph (read-write).
|
||||
///
|
||||
/// Reads `_schema.pg`, parses it, builds the catalog, and opens `__manifest`.
|
||||
/// Runs the open-time recovery sweep before returning — see [`OpenMode`].
|
||||
|
|
@ -213,7 +213,7 @@ impl Omnigraph {
|
|||
Self::open_with_storage_and_mode(uri, storage_for_uri(uri)?, OpenMode::ReadWrite).await
|
||||
}
|
||||
|
||||
/// Open an existing repo for read-only consumers (NDJSON export,
|
||||
/// Open an existing graph for read-only consumers (NDJSON export,
|
||||
/// `commit list`, etc.). Skips the recovery sweep — see [`OpenMode`].
|
||||
pub async fn open_read_only(uri: &str) -> Result<Self> {
|
||||
Self::open_with_storage_and_mode(uri, storage_for_uri(uri)?, OpenMode::ReadOnly).await
|
||||
|
|
@ -397,7 +397,8 @@ impl Omnigraph {
|
|||
desired_schema_source: &str,
|
||||
options: SchemaApplyOptions,
|
||||
) -> Result<SchemaApplyResult> {
|
||||
self.apply_schema_as(desired_schema_source, options, None).await
|
||||
self.apply_schema_as(desired_schema_source, options, None)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Apply a schema migration with an explicit actor for engine-layer
|
||||
|
|
@ -470,7 +471,7 @@ impl Omnigraph {
|
|||
Arc::clone(&self.merge_exclusive)
|
||||
}
|
||||
|
||||
/// Engine-level access to the repo's normalized root URI. Used by
|
||||
/// Engine-level access to the graph's normalized root URI. Used by
|
||||
/// the recovery sidecar protocol to compute `__recovery/` paths.
|
||||
pub(crate) fn root_uri(&self) -> &str {
|
||||
&self.root_uri
|
||||
|
|
@ -510,9 +511,10 @@ impl Omnigraph {
|
|||
let normalized = normalize_branch_name(branch.unwrap_or("main"))?;
|
||||
let coord = self.coordinator.read().await;
|
||||
if normalized.as_deref() == coord.current_branch() {
|
||||
let snapshot_id = coord.head_commit_id().await?.unwrap_or_else(|| {
|
||||
SnapshotId::synthetic(coord.current_branch(), coord.version())
|
||||
});
|
||||
let snapshot_id = coord
|
||||
.head_commit_id()
|
||||
.await?
|
||||
.unwrap_or_else(|| SnapshotId::synthetic(coord.current_branch(), coord.version()));
|
||||
return Ok(ResolvedTarget {
|
||||
requested,
|
||||
branch: coord.current_branch().map(str::to_string),
|
||||
|
|
@ -587,7 +589,7 @@ impl Omnigraph {
|
|||
/// exist. Required BEFORE manifest-drift recovery so a
|
||||
/// SchemaApply roll-forward doesn't publish the manifest while
|
||||
/// the staging files remain unrenamed (which would corrupt the
|
||||
/// repo: data on new schema, catalog on old).
|
||||
/// graph: data on new schema, catalog on old).
|
||||
/// 3. `recover_manifest_drift(... RollForwardOnly)` — close the
|
||||
/// finalize→publisher residual via roll-forward; defer rollback
|
||||
/// work to next ReadWrite open.
|
||||
|
|
@ -668,7 +670,11 @@ impl Omnigraph {
|
|||
|
||||
pub async fn resolve_snapshot(&self, branch: &str) -> Result<SnapshotId> {
|
||||
self.ensure_schema_state_valid().await?;
|
||||
self.coordinator.read().await.resolve_snapshot_id(branch).await
|
||||
self.coordinator
|
||||
.read()
|
||||
.await
|
||||
.resolve_snapshot_id(branch)
|
||||
.await
|
||||
}
|
||||
|
||||
pub(crate) async fn resolved_target(
|
||||
|
|
@ -676,7 +682,11 @@ impl Omnigraph {
|
|||
target: impl Into<ReadTarget>,
|
||||
) -> Result<ResolvedTarget> {
|
||||
self.ensure_schema_state_valid().await?;
|
||||
self.coordinator.read().await.resolve_target(&target.into()).await
|
||||
self.coordinator
|
||||
.read()
|
||||
.await
|
||||
.resolve_target(&target.into())
|
||||
.await
|
||||
}
|
||||
|
||||
// ─── Change detection ────────────────────────────────────────────────
|
||||
|
|
@ -708,7 +718,9 @@ impl Omnigraph {
|
|||
filter: &crate::changes::ChangeFilter,
|
||||
) -> Result<crate::changes::ChangeSet> {
|
||||
let coord = self.coordinator.read().await;
|
||||
let from_commit = coord.resolve_commit(&SnapshotId::new(from_commit_id)).await?;
|
||||
let from_commit = coord
|
||||
.resolve_commit(&SnapshotId::new(from_commit_id))
|
||||
.await?;
|
||||
let to_commit = coord.resolve_commit(&SnapshotId::new(to_commit_id)).await?;
|
||||
let from_snap = coord
|
||||
.resolve_target(&ReadTarget::Snapshot(SnapshotId::new(
|
||||
|
|
@ -753,7 +765,11 @@ impl Omnigraph {
|
|||
/// Create a Snapshot at any historical manifest version.
|
||||
pub async fn snapshot_at_version(&self, version: u64) -> Result<Snapshot> {
|
||||
self.ensure_schema_state_valid().await?;
|
||||
self.coordinator.read().await.snapshot_at_version(version).await
|
||||
self.coordinator
|
||||
.read()
|
||||
.await
|
||||
.snapshot_at_version(version)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn export_jsonl(
|
||||
|
|
@ -894,11 +910,20 @@ impl Omnigraph {
|
|||
}
|
||||
|
||||
pub(crate) async fn active_branch(&self) -> Option<String> {
|
||||
self.coordinator.read().await.current_branch().map(str::to_string)
|
||||
self.coordinator
|
||||
.read()
|
||||
.await
|
||||
.current_branch()
|
||||
.map(str::to_string)
|
||||
}
|
||||
|
||||
async fn ensure_branch_delete_safe(&self, branch: &str, branches: &[String]) -> Result<()> {
|
||||
let descendants = self.coordinator.read().await.branch_descendants(branch).await?;
|
||||
let descendants = self
|
||||
.coordinator
|
||||
.read()
|
||||
.await
|
||||
.branch_descendants(branch)
|
||||
.await?;
|
||||
if let Some(descendant) = descendants.first() {
|
||||
return Err(OmniError::manifest_conflict(format!(
|
||||
"cannot delete branch '{}' because descendant branch '{}' still depends on it",
|
||||
|
|
@ -954,7 +979,12 @@ impl Omnigraph {
|
|||
}
|
||||
|
||||
async fn delete_branch_storage_only(&self, branch: &str) -> Result<()> {
|
||||
let active = self.coordinator.read().await.current_branch().map(str::to_string);
|
||||
let active = self
|
||||
.coordinator
|
||||
.read()
|
||||
.await
|
||||
.current_branch()
|
||||
.map(str::to_string);
|
||||
if active.as_deref() == Some(branch) {
|
||||
return Err(OmniError::manifest_conflict(format!(
|
||||
"cannot delete currently active branch '{}'",
|
||||
|
|
@ -1013,11 +1043,7 @@ impl Omnigraph {
|
|||
self.coordinator.write().await.branch_create(name).await
|
||||
}
|
||||
|
||||
pub async fn branch_create_from(
|
||||
&self,
|
||||
from: impl Into<ReadTarget>,
|
||||
name: &str,
|
||||
) -> Result<()> {
|
||||
pub async fn branch_create_from(&self, from: impl Into<ReadTarget>, name: &str) -> Result<()> {
|
||||
self.branch_create_from_as(from, name, None).await
|
||||
}
|
||||
|
||||
|
|
@ -1134,7 +1160,9 @@ impl Omnigraph {
|
|||
|
||||
pub async fn get_commit(&self, commit_id: &str) -> Result<GraphCommit> {
|
||||
self.ensure_schema_state_valid().await?;
|
||||
self.coordinator.read().await
|
||||
self.coordinator
|
||||
.read()
|
||||
.await
|
||||
.resolve_commit(&SnapshotId::new(commit_id))
|
||||
.await
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
//! Lance compaction + version cleanup exposed at the graph level.
|
||||
//!
|
||||
//! Lance accumulates many small `.lance` fragment files per table over the
|
||||
//! life of a repo: each `write`, `load`, and `change` op appends one or more
|
||||
//! life of a graph: each `write`, `load`, and `change` op appends one or more
|
||||
//! fragments and a new manifest. Over long timescales this hurts open times
|
||||
//! and S3 object counts without improving anything.
|
||||
//!
|
||||
|
|
@ -176,10 +176,9 @@ pub async fn cleanup_all_tables(
|
|||
clean_referenced_branches: false,
|
||||
delete_rate_limit: None,
|
||||
};
|
||||
let removed: RemovalStats =
|
||||
lance::dataset::cleanup::cleanup_old_versions(&ds, policy)
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
let removed: RemovalStats = lance::dataset::cleanup::cleanup_old_versions(&ds, policy)
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
Ok(TableCleanupStats {
|
||||
table_key,
|
||||
bytes_removed: removed.bytes_removed,
|
||||
|
|
@ -198,12 +197,7 @@ fn all_table_keys(catalog: &omnigraph_compiler::catalog::Catalog) -> Vec<String>
|
|||
.node_types
|
||||
.keys()
|
||||
.map(|n| format!("node:{}", n))
|
||||
.chain(
|
||||
catalog
|
||||
.edge_types
|
||||
.keys()
|
||||
.map(|n| format!("edge:{}", n)),
|
||||
)
|
||||
.chain(catalog.edge_types.keys().map(|n| format!("edge:{}", n)))
|
||||
.collect();
|
||||
keys.sort();
|
||||
keys
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@ pub(super) async fn apply_schema_with_lock(
|
|||
// Skip `main` and internal system branches. The schema-apply lock branch
|
||||
// is excluded because it is the cluster-wide schema-apply serializer.
|
||||
// `__run__*` branches are no longer created; the filter remains as
|
||||
// defense-in-depth for legacy repos with leftover staging branches.
|
||||
// defense-in-depth for legacy graphs with leftover staging branches.
|
||||
// A future production sweep will let this guard go.
|
||||
let blocking_branches = branches
|
||||
.into_iter()
|
||||
|
|
@ -105,7 +105,7 @@ pub(super) async fn apply_schema_with_lock(
|
|||
.collect::<Vec<_>>();
|
||||
if !blocking_branches.is_empty() {
|
||||
return Err(OmniError::manifest_conflict(format!(
|
||||
"schema apply requires a repo with only main; found non-main branches: {}",
|
||||
"schema apply requires a graph with only main; found non-main branches: {}",
|
||||
blocking_branches.join(", ")
|
||||
)));
|
||||
}
|
||||
|
|
@ -780,7 +780,7 @@ pub(super) async fn acquire_schema_apply_lock(db: &Omnigraph) -> Result<()> {
|
|||
if !blocking_branches.is_empty() {
|
||||
let _ = release_schema_apply_lock(db).await;
|
||||
return Err(OmniError::manifest_conflict(format!(
|
||||
"schema apply requires a repo with only main; found non-main branches: {}",
|
||||
"schema apply requires a graph with only main; found non-main branches: {}",
|
||||
blocking_branches.join(", ")
|
||||
)));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -93,7 +93,7 @@ pub(crate) struct RecoveryAudit {
|
|||
}
|
||||
|
||||
impl RecoveryAudit {
|
||||
/// Open the recovery-audit dataset for the repo, or return a handle
|
||||
/// Open the recovery-audit dataset for the graph, or return a handle
|
||||
/// with no dataset yet (created on first append). Mirrors the
|
||||
/// optional-dataset pattern from `_graph_commit_actors.lance`.
|
||||
pub(crate) async fn open(root_uri: &str) -> Result<Self> {
|
||||
|
|
@ -205,9 +205,7 @@ fn recovery_record_to_batch(record: &RecoveryAuditRecord) -> Result<RecordBatch>
|
|||
vec![
|
||||
Arc::new(StringArray::from(vec![record.graph_commit_id.clone()])),
|
||||
Arc::new(StringArray::from(vec![record.recovery_kind.as_str()])),
|
||||
Arc::new(StringArray::from(vec![record
|
||||
.recovery_for_actor
|
||||
.clone()])),
|
||||
Arc::new(StringArray::from(vec![record.recovery_for_actor.clone()])),
|
||||
Arc::new(StringArray::from(vec![record.operation_id.clone()])),
|
||||
Arc::new(StringArray::from(vec![record.sidecar_writer_kind.clone()])),
|
||||
Arc::new(StringArray::from(vec![outcomes_json])),
|
||||
|
|
@ -221,10 +219,14 @@ fn decode_row(batch: &RecordBatch, row: usize) -> Result<RecoveryAuditRecord> {
|
|||
let str_col = |name: &str| -> Result<&StringArray> {
|
||||
batch
|
||||
.column_by_name(name)
|
||||
.ok_or_else(|| OmniError::manifest_internal(format!("missing column '{}' in recovery audit", name)))?
|
||||
.ok_or_else(|| {
|
||||
OmniError::manifest_internal(format!("missing column '{}' in recovery audit", name))
|
||||
})?
|
||||
.as_any()
|
||||
.downcast_ref::<StringArray>()
|
||||
.ok_or_else(|| OmniError::manifest_internal(format!("column '{}' has wrong type", name)))
|
||||
.ok_or_else(|| {
|
||||
OmniError::manifest_internal(format!("column '{}' has wrong type", name))
|
||||
})
|
||||
};
|
||||
let ts_col = batch
|
||||
.column_by_name("created_at")
|
||||
|
|
@ -269,9 +271,7 @@ pub(crate) fn now_micros() -> Result<i64> {
|
|||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map(|d| d.as_micros() as i64)
|
||||
.map_err(|e| {
|
||||
OmniError::manifest_internal(format!("system clock before unix epoch: {}", e))
|
||||
})
|
||||
.map_err(|e| OmniError::manifest_internal(format!("system clock before unix epoch: {}", e)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -307,7 +307,7 @@ mod tests {
|
|||
let root = dir.path().to_str().unwrap();
|
||||
|
||||
let mut audit = RecoveryAudit::open(root).await.unwrap();
|
||||
// Empty repo: list returns empty.
|
||||
// Empty graph: list returns empty.
|
||||
assert!(audit.list().await.unwrap().is_empty());
|
||||
|
||||
// Append + list.
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ pub(crate) async fn load_or_bootstrap_schema_contract(
|
|||
.collect::<Vec<_>>();
|
||||
if !public_non_main.is_empty() {
|
||||
return Err(schema_lock_conflict(format!(
|
||||
"repo is missing persisted schema state and has public branches ({}); public branches block schema evolution entirely",
|
||||
"graph is missing persisted schema state and has public branches ({}); public branches block schema evolution entirely",
|
||||
public_non_main.join(", ")
|
||||
)));
|
||||
}
|
||||
|
|
@ -70,7 +70,7 @@ pub(crate) async fn load_or_bootstrap_schema_contract(
|
|||
Ok((current_source_ir.clone(), state))
|
||||
}
|
||||
SchemaContractRead::PartialMissing => Err(schema_lock_conflict(
|
||||
"repo schema state is incomplete (_schema.ir.json and __schema_state.json must either both exist or both be absent)",
|
||||
"graph schema state is incomplete (_schema.ir.json and __schema_state.json must either both exist or both be absent)",
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
|
@ -84,7 +84,7 @@ pub(crate) async fn validate_schema_contract(
|
|||
SchemaContractRead::Present { ir, state } => (ir, state),
|
||||
SchemaContractRead::MissingAll | SchemaContractRead::PartialMissing => {
|
||||
return Err(schema_lock_conflict(
|
||||
"repo is missing persisted schema state; manual coordination is required before schema changes are allowed",
|
||||
"graph is missing persisted schema state; manual coordination is required before schema changes are allowed",
|
||||
));
|
||||
}
|
||||
};
|
||||
|
|
@ -163,7 +163,7 @@ pub(crate) async fn read_accepted_schema_ir(
|
|||
}
|
||||
SchemaContractRead::MissingAll | SchemaContractRead::PartialMissing => {
|
||||
Err(schema_lock_conflict(
|
||||
"repo is missing persisted schema state; manual coordination is required before schema changes are allowed",
|
||||
"graph is missing persisted schema state; manual coordination is required before schema changes are allowed",
|
||||
))
|
||||
}
|
||||
}
|
||||
|
|
@ -221,7 +221,7 @@ async fn read_schema_contract(
|
|||
})?;
|
||||
let state = serde_json::from_str::<SchemaState>(&state_json).map_err(|err| {
|
||||
schema_lock_conflict(format!(
|
||||
"repo schema state in {} is invalid: {}",
|
||||
"graph schema state in {} is invalid: {}",
|
||||
SCHEMA_STATE_FILENAME, err
|
||||
))
|
||||
})?;
|
||||
|
|
@ -234,7 +234,7 @@ async fn read_schema_contract(
|
|||
fn validate_persisted_schema_contract(ir: &SchemaIR, state: &SchemaState) -> Result<()> {
|
||||
if state.format_version != SCHEMA_STATE_FORMAT_VERSION {
|
||||
return Err(schema_lock_conflict(format!(
|
||||
"repo schema state format {} is unsupported",
|
||||
"graph schema state format {} is unsupported",
|
||||
state.format_version
|
||||
)));
|
||||
}
|
||||
|
|
@ -344,7 +344,7 @@ pub(crate) async fn recover_schema_state_files(
|
|||
// to the new Lance HEADs; we MUST also rename the staging files
|
||||
// forward so the catalog matches. Without this, the disambiguation
|
||||
// logic below sees actual_keys == live_keys (manifest didn't move)
|
||||
// and deletes the staging files, leaving the repo with new-schema
|
||||
// and deletes the staging files, leaving the graph with new-schema
|
||||
// data on disk but the old `_schema.pg` live — corruption.
|
||||
if crate::db::manifest::has_schema_apply_sidecar(root_uri, storage.as_ref()).await? {
|
||||
warn!(
|
||||
|
|
|
|||
|
|
@ -1037,8 +1037,16 @@ async fn execute_node_scan(
|
|||
let table_key = format!("node:{}", type_name);
|
||||
let ds = snapshot.open(&table_key).await?;
|
||||
|
||||
// Build Lance SQL filter string from non-search IR filters
|
||||
let filter_sql = build_lance_filter(filters, params);
|
||||
// Lower the IR filters to a DataFusion `Expr` and apply via
|
||||
// `Scanner::filter_expr` inside the configure closure. The string
|
||||
// pushdown path (`build_lance_filter` → `scanner.filter(&str)`) is
|
||||
// gone for node scans — structured Expr unlocks `CompOp::Contains`
|
||||
// pushdown (via `array_has`) and lets DF 53's optimizer rules
|
||||
// (vectorized IN-list, PhysicalExprSimplifier, CASE-NULL shortcut)
|
||||
// reach our predicates. Other call sites that still take string SQL
|
||||
// (hydrate_nodes for the Expand pushdown, count_rows, the mutation
|
||||
// delete path) migrate in follow-up MRs.
|
||||
let filter_expr = build_lance_filter_expr(filters, params);
|
||||
|
||||
// Blob columns must be excluded from scan when a filter is present
|
||||
// (Lance bug: BlobsDescriptions + filter triggers a projection assertion).
|
||||
|
|
@ -1056,10 +1064,15 @@ async fn execute_node_scan(
|
|||
let batches = crate::table_store::TableStore::scan_stream_with(
|
||||
&ds,
|
||||
projection,
|
||||
filter_sql.as_deref(),
|
||||
None,
|
||||
None,
|
||||
false,
|
||||
|scanner| {
|
||||
// Apply the structured IR filter via Lance's Expr pushdown.
|
||||
if let Some(ref expr) = filter_expr {
|
||||
scanner.filter_expr(expr.clone());
|
||||
}
|
||||
|
||||
// Apply FTS queries from hoisted search filters (search/fuzzy/match_text in match clause)
|
||||
for filter in filters {
|
||||
if is_search_filter(filter) {
|
||||
|
|
@ -1288,6 +1301,125 @@ pub(super) fn literal_to_sql(lit: &Literal) -> String {
|
|||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Structured DataFusion-Expr pushdown
|
||||
//
|
||||
// Parallel to the `ir_*_to_sql` family above, these helpers lower the same
|
||||
// IR filter shapes to `datafusion::prelude::Expr` so we can call
|
||||
// `Scanner::filter_expr(Expr)` instead of `Scanner::filter(&str)`. The
|
||||
// structured form unlocks two things the string path could not express:
|
||||
//
|
||||
// 1. `CompOp::Contains` against list-typed columns (lowered to
|
||||
// `array_has(col, value)` — requires the `nested_expressions`
|
||||
// feature on the `datafusion` crate, enabled in the workspace).
|
||||
// 2. Optimizer rules in DataFusion 53 that act on `Expr` shapes
|
||||
// (vectorized `IN`-list eq kernel, `PhysicalExprSimplifier`, the
|
||||
// `CASE WHEN x THEN y ELSE NULL` shortcut, etc.).
|
||||
//
|
||||
// Search predicates (`is_search_filter`) are still handled separately via
|
||||
// `scanner.full_text_search(...)`, not via filter_expr — they stay None
|
||||
// here just like in `ir_filter_to_sql`. The `literal_to_sql` path remains
|
||||
// because the mutation/update layer (`exec/mutation.rs`) still produces
|
||||
// SQL strings for `Dataset::delete(&str)`; that migration is MR-A's
|
||||
// territory (Lance #6658 + delete two-phase).
|
||||
|
||||
/// Convert IR filters to a single DataFusion `Expr` (AND-joined), or
|
||||
/// `None` if no filter is pushable.
|
||||
pub(super) fn build_lance_filter_expr(
|
||||
filters: &[IRFilter],
|
||||
params: &ParamMap,
|
||||
) -> Option<datafusion::prelude::Expr> {
|
||||
use datafusion::logical_expr::Operator;
|
||||
use datafusion::prelude::Expr;
|
||||
|
||||
let mut acc: Option<Expr> = None;
|
||||
for f in filters {
|
||||
let Some(e) = ir_filter_to_expr(f, params) else {
|
||||
continue;
|
||||
};
|
||||
acc = Some(match acc {
|
||||
None => e,
|
||||
Some(prev) => Expr::BinaryExpr(datafusion::logical_expr::BinaryExpr::new(
|
||||
Box::new(prev),
|
||||
Operator::And,
|
||||
Box::new(e),
|
||||
)),
|
||||
});
|
||||
}
|
||||
acc
|
||||
}
|
||||
|
||||
/// Convert a single IR filter to a DataFusion `Expr`. Returns `None` for
|
||||
/// search-mode filters (handled via `scanner.full_text_search`) or any
|
||||
/// expression shape we can't pushdown.
|
||||
pub(super) fn ir_filter_to_expr(
|
||||
filter: &IRFilter,
|
||||
params: &ParamMap,
|
||||
) -> Option<datafusion::prelude::Expr> {
|
||||
use datafusion::functions_nested::expr_fn::array_has;
|
||||
|
||||
if is_search_filter(filter) {
|
||||
return None;
|
||||
}
|
||||
|
||||
// List-contains: `prop CONTAINS value` lowers to `array_has(prop, value)`.
|
||||
// This is the case `ir_filter_to_sql` had to return None for ("Can't
|
||||
// pushdown list contains"); with structured Expr it pushes down fine.
|
||||
if matches!(filter.op, CompOp::Contains) {
|
||||
let left = ir_expr_to_expr(&filter.left, params)?;
|
||||
let right = ir_expr_to_expr(&filter.right, params)?;
|
||||
return Some(array_has(left, right));
|
||||
}
|
||||
|
||||
let left = ir_expr_to_expr(&filter.left, params)?;
|
||||
let right = ir_expr_to_expr(&filter.right, params)?;
|
||||
Some(match filter.op {
|
||||
CompOp::Eq => left.eq(right),
|
||||
CompOp::Ne => left.not_eq(right),
|
||||
CompOp::Gt => left.gt(right),
|
||||
CompOp::Lt => left.lt(right),
|
||||
CompOp::Ge => left.gt_eq(right),
|
||||
CompOp::Le => left.lt_eq(right),
|
||||
CompOp::Contains => unreachable!("handled above"),
|
||||
})
|
||||
}
|
||||
|
||||
/// Convert an IR expression to a DataFusion `Expr`. Returns `None` for
|
||||
/// shapes we don't support in pushdown (search funcs, RRF, aggregates,
|
||||
/// variable refs that aren't a property access).
|
||||
pub(super) fn ir_expr_to_expr(
|
||||
expr: &IRExpr,
|
||||
params: &ParamMap,
|
||||
) -> Option<datafusion::prelude::Expr> {
|
||||
use datafusion::prelude::{col, lit};
|
||||
match expr {
|
||||
IRExpr::PropAccess { property, .. } => Some(col(property)),
|
||||
IRExpr::Literal(l) => literal_to_expr(l),
|
||||
IRExpr::Param(name) => params.get(name).and_then(literal_to_expr),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a Literal to a DataFusion `Expr`. Returns `None` for List
|
||||
/// (which the existing SQL path also can't pushdown — falls through to
|
||||
/// post-scan in-memory application).
|
||||
fn literal_to_expr(lit: &Literal) -> Option<datafusion::prelude::Expr> {
|
||||
use datafusion::prelude::lit as df_lit;
|
||||
Some(match lit {
|
||||
Literal::Null => df_lit(datafusion::scalar::ScalarValue::Null),
|
||||
Literal::String(s) => df_lit(s.clone()),
|
||||
Literal::Integer(n) => df_lit(*n),
|
||||
Literal::Float(f) => df_lit(*f),
|
||||
Literal::Bool(b) => df_lit(*b),
|
||||
// Date/DateTime stored as strings; pass through as string literals
|
||||
// — Lance/DataFusion handles the comparison against typed columns
|
||||
// via implicit cast, matching the existing string-SQL behavior.
|
||||
Literal::Date(s) => df_lit(s.clone()),
|
||||
Literal::DateTime(s) => df_lit(s.clone()),
|
||||
Literal::List(_) => return None,
|
||||
})
|
||||
}
|
||||
|
||||
fn prefix_batch(batch: &RecordBatch, variable: &str) -> Result<RecordBatch> {
|
||||
let fields: Vec<Field> = batch.schema().fields().iter().map(|f| {
|
||||
Field::new(format!("{}.{}", variable, f.name()), f.data_type().clone(), f.is_nullable())
|
||||
|
|
|
|||
|
|
@ -26,10 +26,10 @@ use arrow_schema::SchemaRef;
|
|||
use lance::Dataset;
|
||||
use omnigraph_compiler::catalog::EdgeType;
|
||||
|
||||
use crate::db::{MutationOpKind, SubTableUpdate};
|
||||
use crate::db::manifest::{
|
||||
new_sidecar, write_sidecar, RecoverySidecarHandle, SidecarKind, SidecarTablePin,
|
||||
RecoverySidecarHandle, SidecarKind, SidecarTablePin, new_sidecar, write_sidecar,
|
||||
};
|
||||
use crate::db::{MutationOpKind, SubTableUpdate};
|
||||
use crate::error::{OmniError, Result};
|
||||
|
||||
/// Whether the per-table accumulator should commit via `stage_append`
|
||||
|
|
@ -119,10 +119,12 @@ impl MutationStaging {
|
|||
expected_version: u64,
|
||||
op_kind: MutationOpKind,
|
||||
) {
|
||||
self.paths.entry(table_key.to_string()).or_insert(StagedTablePath {
|
||||
full_path,
|
||||
table_branch,
|
||||
});
|
||||
self.paths
|
||||
.entry(table_key.to_string())
|
||||
.or_insert(StagedTablePath {
|
||||
full_path,
|
||||
table_branch,
|
||||
});
|
||||
self.expected_versions
|
||||
.entry(table_key.to_string())
|
||||
.or_insert(expected_version);
|
||||
|
|
@ -202,7 +204,8 @@ impl MutationStaging {
|
|||
|
||||
/// Record a delete that already inline-committed at the Lance layer.
|
||||
pub(crate) fn record_inline(&mut self, update: SubTableUpdate) {
|
||||
self.inline_committed.insert(update.table_key.clone(), update);
|
||||
self.inline_committed
|
||||
.insert(update.table_key.clone(), update);
|
||||
}
|
||||
|
||||
/// Read-your-writes accessor: the accumulated pending batches for
|
||||
|
|
@ -308,18 +311,13 @@ impl MutationStaging {
|
|||
// mode is exempt because no-key node and edge inserts use
|
||||
// ULID-generated ids that are unique within a query.
|
||||
let combined = match table.mode {
|
||||
PendingMode::Merge => {
|
||||
dedupe_merge_batches_by_id(&table.schema, table.batches)?
|
||||
}
|
||||
PendingMode::Merge => dedupe_merge_batches_by_id(&table.schema, table.batches)?,
|
||||
PendingMode::Append => {
|
||||
if table.batches.len() == 1 {
|
||||
table.batches.into_iter().next().unwrap()
|
||||
} else {
|
||||
arrow_select::concat::concat_batches(
|
||||
&table.schema,
|
||||
&table.batches,
|
||||
)
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?
|
||||
arrow_select::concat::concat_batches(&table.schema, &table.batches)
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
@ -327,9 +325,7 @@ impl MutationStaging {
|
|||
// Stage produces uncommitted fragments + transaction. No
|
||||
// Lance HEAD advance until `commit_all` runs `commit_staged`.
|
||||
let staged = match table.mode {
|
||||
PendingMode::Append => {
|
||||
db.table_store().stage_append(&ds, combined, &[]).await?
|
||||
}
|
||||
PendingMode::Append => db.table_store().stage_append(&ds, combined, &[]).await?,
|
||||
PendingMode::Merge => {
|
||||
db.table_store()
|
||||
.stage_merge_insert(
|
||||
|
|
@ -420,7 +416,7 @@ impl StagedMutation {
|
|||
///
|
||||
/// Revalidation: between `stage_all` and `commit_all`, another
|
||||
/// writer (in the same process or another process sharing the
|
||||
/// repo) may have committed to one of our touched tables, advancing
|
||||
/// graph) may have committed to one of our touched tables, advancing
|
||||
/// the manifest pin past our `expected_version`. We revalidate
|
||||
/// under the queue and fail-fast with `manifest_conflict` before
|
||||
/// any `commit_staged` so the orphaned uncommitted fragments stay
|
||||
|
|
@ -462,9 +458,8 @@ impl StagedMutation {
|
|||
// from interleaving between our delete and our publish, which
|
||||
// would otherwise leave a Lance-HEAD-ahead residual the
|
||||
// delete-only sidecar (added below) would have to recover.
|
||||
let mut queue_keys: Vec<(String, Option<String>)> = Vec::with_capacity(
|
||||
staged.len() + inline_committed.len(),
|
||||
);
|
||||
let mut queue_keys: Vec<(String, Option<String>)> =
|
||||
Vec::with_capacity(staged.len() + inline_committed.len());
|
||||
for entry in &staged {
|
||||
queue_keys.push((entry.table_key.clone(), entry.path.table_branch.clone()));
|
||||
}
|
||||
|
|
@ -565,9 +560,8 @@ impl StagedMutation {
|
|||
// Finding 3 hazard: delete-only mutations would otherwise skip
|
||||
// the sidecar, leaving any commit→publish residual unreachable
|
||||
// by recovery.
|
||||
let mut pins: Vec<SidecarTablePin> = Vec::with_capacity(
|
||||
staged.len() + inline_committed.len(),
|
||||
);
|
||||
let mut pins: Vec<SidecarTablePin> =
|
||||
Vec::with_capacity(staged.len() + inline_committed.len());
|
||||
for entry in &staged {
|
||||
pins.push(SidecarTablePin {
|
||||
table_key: entry.table_key.clone(),
|
||||
|
|
@ -899,10 +893,7 @@ pub(crate) async fn count_src_per_edge(
|
|||
/// Count pending edges per `src` with NO dedup. Correct when caller
|
||||
/// guarantees pending rows have unique primary keys (engine inserts via
|
||||
/// fresh ULID; loader Append mode).
|
||||
fn count_pending_src_naive(
|
||||
pending_batches: &[RecordBatch],
|
||||
counts: &mut HashMap<String, u32>,
|
||||
) {
|
||||
fn count_pending_src_naive(pending_batches: &[RecordBatch], counts: &mut HashMap<String, u32>) {
|
||||
for batch in pending_batches {
|
||||
let Some(col) = batch.column_by_name("src") else {
|
||||
continue;
|
||||
|
|
@ -947,12 +938,15 @@ fn count_pending_src_with_dedupe(
|
|||
dedupe_key_column
|
||||
)));
|
||||
};
|
||||
let key_arr = key_col.as_any().downcast_ref::<StringArray>().ok_or_else(|| {
|
||||
OmniError::Lance(format!(
|
||||
"count_src_per_edge: pending '{}' column is not Utf8",
|
||||
dedupe_key_column
|
||||
))
|
||||
})?;
|
||||
let key_arr = key_col
|
||||
.as_any()
|
||||
.downcast_ref::<StringArray>()
|
||||
.ok_or_else(|| {
|
||||
OmniError::Lance(format!(
|
||||
"count_src_per_edge: pending '{}' column is not Utf8",
|
||||
dedupe_key_column
|
||||
))
|
||||
})?;
|
||||
let src_arr = batch
|
||||
.column_by_name("src")
|
||||
.and_then(|c| c.as_any().downcast_ref::<StringArray>());
|
||||
|
|
|
|||
|
|
@ -1,3 +1,12 @@
|
|||
// Lance 6's trait surface (heavier futures/streams nesting around the
|
||||
// staged-write API in `storage_layer.rs`) pushes us past the default
|
||||
// trait-resolution recursion limit of 128 on Linux builds. Raising to
|
||||
// 256 here is the upstream-suggested fix from rustc itself
|
||||
// ("consider increasing the recursion limit"). macOS happens to short-
|
||||
// circuit before tripping the limit; CI on Linux does not. Revisit if
|
||||
// future Lance bumps stop needing this.
|
||||
#![recursion_limit = "256"]
|
||||
|
||||
pub mod changes;
|
||||
pub mod db;
|
||||
pub mod embedding;
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@ impl StorageAdapter for LocalStorageAdapter {
|
|||
// Ensure parent directory exists. S3 has no equivalent (PutObject
|
||||
// is path-agnostic). For local fs, callers like the recovery
|
||||
// sidecar protocol expect transparent directory creation under
|
||||
// the repo root (the `__recovery/` directory doesn't pre-exist;
|
||||
// the graph root (the `__recovery/` directory doesn't pre-exist;
|
||||
// first sidecar write creates it).
|
||||
if let Some(parent) = path.parent() {
|
||||
if !parent.as_os_str().is_empty() {
|
||||
|
|
@ -398,10 +398,13 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn storage_backend_selection_is_scheme_aware() {
|
||||
assert_eq!(storage_kind_for_uri("/tmp/repo"), StorageKind::Local);
|
||||
assert_eq!(storage_kind_for_uri("file:///tmp/repo"), StorageKind::Local);
|
||||
assert_eq!(storage_kind_for_uri("/tmp/graph"), StorageKind::Local);
|
||||
assert_eq!(
|
||||
storage_kind_for_uri("s3://omnigraph-preview/repo"),
|
||||
storage_kind_for_uri("file:///tmp/graph"),
|
||||
StorageKind::Local
|
||||
);
|
||||
assert_eq!(
|
||||
storage_kind_for_uri("s3://omnigraph-preview/graph"),
|
||||
StorageKind::S3
|
||||
);
|
||||
}
|
||||
|
|
@ -440,8 +443,8 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn parse_s3_uri_splits_bucket_and_key() {
|
||||
let location = parse_s3_uri("s3://bucket/repo/_schema.pg").unwrap();
|
||||
let location = parse_s3_uri("s3://bucket/graph/_schema.pg").unwrap();
|
||||
assert_eq!(location.bucket, "bucket");
|
||||
assert_eq!(location.key, "repo/_schema.pg");
|
||||
assert_eq!(location.key, "graph/_schema.pg");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,11 +10,15 @@
|
|||
//! ## Transitional residuals on the trait
|
||||
//!
|
||||
//! Several inline-commit methods remain on the trait surface as
|
||||
//! documented residuals: `delete_where` (Lance 4.0.0's `DeleteJob` is
|
||||
//! `pub(crate)` — see [#6658](https://github.com/lance-format/lance/issues/6658)),
|
||||
//! documented residuals: `delete_where`
|
||||
//! ([#6658](https://github.com/lance-format/lance/issues/6658) closed
|
||||
//! 2026-05-14, but the public `DeleteBuilder::execute_uncommitted` API
|
||||
//! did not backport to the 6.x release line — it first ships in
|
||||
//! `v7.0.0-beta.10`. Migration to staged two-phase delete is tracked as
|
||||
//! MR-A and is gated on the Lance v7.x bump, not the current v6.0.1 pin),
|
||||
//! `create_vector_index` (segment-commit-path requires
|
||||
//! `build_index_metadata_from_segments` which is `pub(crate)` — see
|
||||
//! [#6666](https://github.com/lance-format/lance/issues/6666)), and the
|
||||
//! [#6666](https://github.com/lance-format/lance/issues/6666), still open), and the
|
||||
//! legacy `append_batch` / `merge_insert_batches` / `overwrite_batch` /
|
||||
//! `create_btree_index` / `create_inverted_index` paths kept while
|
||||
//! engine call sites finish migrating off of them (Phase 1b / Phase 9
|
||||
|
|
|
|||
|
|
@ -8,15 +8,17 @@ use lance::Dataset;
|
|||
use lance::blob::BlobArrayBuilder;
|
||||
use lance::dataset::scanner::{ColumnOrdering, DatasetRecordBatchStream, Scanner};
|
||||
use lance::dataset::transaction::{Operation, Transaction, TransactionBuilder};
|
||||
use lance::dataset::write::merge_insert::SourceDedupeBehavior;
|
||||
use lance::dataset::{
|
||||
CommitBuilder, InsertBuilder, MergeInsertBuilder, WhenMatched, WhenNotMatched, WriteMode,
|
||||
WriteParams,
|
||||
};
|
||||
use lance::datatypes::BlobKind;
|
||||
use lance::index::DatasetIndexExt;
|
||||
use lance::index::scalar::IndexDetails;
|
||||
use lance_file::version::LanceFileVersion;
|
||||
use lance_index::scalar::{InvertedIndexParams, ScalarIndexParams};
|
||||
use lance_index::{DatasetIndexExt, IndexType, is_system_index};
|
||||
use lance_index::{IndexType, is_system_index};
|
||||
use lance_linalg::distance::MetricType;
|
||||
use lance_table::format::{Fragment, IndexMetadata, RowIdMeta};
|
||||
use lance_table::rowids::{RowIdSequence, write_row_ids};
|
||||
|
|
@ -651,15 +653,58 @@ impl TableStore {
|
|||
return self.table_state(dataset_uri, &ds).await;
|
||||
}
|
||||
|
||||
// Precondition for the FirstSeen workaround below: every caller of
|
||||
// this primitive must hand in a source batch that is unique by
|
||||
// `key_columns`. Without this check, `SourceDedupeBehavior::FirstSeen`
|
||||
// would silently collapse genuine duplicates instead of erroring.
|
||||
check_batch_unique_by_keys(&batch, &key_columns, "merge_insert_batch")?;
|
||||
|
||||
// TODO(lance-upstream): MergeInsertBuilder does not accept WriteParams,
|
||||
// so allow_external_blob_outside_bases cannot be set here. External URI
|
||||
// blobs via merge_insert (LoadMode::Merge, mutations) are unsupported
|
||||
// until Lance exposes WriteParams on MergeInsertBuilder.
|
||||
let ds = Arc::new(ds);
|
||||
let job = MergeInsertBuilder::try_new(ds, key_columns)
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?
|
||||
.when_matched(when_matched)
|
||||
.when_not_matched(when_not_matched)
|
||||
let mut builder = MergeInsertBuilder::try_new(ds, key_columns)
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
builder.when_matched(when_matched);
|
||||
builder.when_not_matched(when_not_matched);
|
||||
// Workaround for a Lance 4.0.x bug class where sequential
|
||||
// merge_insert calls against rows previously rewritten by
|
||||
// merge_insert produce a spurious "Ambiguous merge inserts:
|
||||
// multiple source rows match the same target row on (id = ...)"
|
||||
// error. Lance's `processed_row_ids: Mutex<HashSet<u64>>`
|
||||
// (lance-4.0.0 `src/dataset/write/merge_insert.rs:2099`)
|
||||
// double-processes the same source/target match against
|
||||
// datasets previously rewritten by merge_insert, and the default
|
||||
// `SourceDedupeBehavior::Fail` errors on the second insertion.
|
||||
// `FirstSeen` makes Lance skip the duplicate match instead.
|
||||
//
|
||||
// Covers both observed surfaces:
|
||||
// - PR #98 (sequential `load --mode merge` against same keys).
|
||||
// - MR-920 (sequential `update T set {f} where x=y` on same row).
|
||||
//
|
||||
// Correctness-preserving for OmniGraph because every call path
|
||||
// that reaches this primitive either pre-dedupes the source batch
|
||||
// by id, or surfaces a real source dup via the
|
||||
// `check_batch_unique_by_keys` precondition above (which fires
|
||||
// before the FirstSeen setter has a chance to silently collapse
|
||||
// anything):
|
||||
// - Load path: `enforce_unique_constraints_intra_batch`
|
||||
// (`loader/mod.rs:1453`) errors on intra-batch `@key` dups.
|
||||
// - Mutate path: `MutationStaging::finalize` (`exec/staging.rs`)
|
||||
// accumulates and dedupes by `id`.
|
||||
// - Branch-merge path: `compute_source_delta` /
|
||||
// `compute_three_way_delta` (`exec/merge.rs`) walk via
|
||||
// `OrderedTableCursor` and `push_row` each id at most once.
|
||||
// So FirstSeen only suppresses the spurious Lance behavior, never
|
||||
// user data. Pinned by `loader_rejects_intra_batch_duplicate_keys`
|
||||
// in `tests/consistency.rs` plus the
|
||||
// `check_batch_unique_by_keys` precondition.
|
||||
//
|
||||
// Retire when upstream Lance fixes the bug class. Tracked at
|
||||
// MR-957; upstream: lance-format/lance#6877.
|
||||
builder.source_dedupe_behavior(SourceDedupeBehavior::FirstSeen);
|
||||
let job = builder
|
||||
.try_build()
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
|
||||
|
|
@ -870,11 +915,26 @@ impl TableStore {
|
|||
"stage_merge_insert called with empty batch".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
// Precondition for FirstSeen below. See the comment on
|
||||
// `merge_insert_batch` for why this check is here, not on the caller:
|
||||
// every call path that reaches stage_merge_insert (load,
|
||||
// MutationStaging::finalize, branch_merge::publish_rewritten_merge_table)
|
||||
// must hand in a source batch that is unique by `key_columns`.
|
||||
check_batch_unique_by_keys(&batch, &key_columns, "stage_merge_insert")?;
|
||||
|
||||
let ds = Arc::new(ds);
|
||||
let job = MergeInsertBuilder::try_new(ds, key_columns)
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?
|
||||
.when_matched(when_matched)
|
||||
.when_not_matched(when_not_matched)
|
||||
let mut builder = MergeInsertBuilder::try_new(ds, key_columns)
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
builder.when_matched(when_matched);
|
||||
builder.when_not_matched(when_not_matched);
|
||||
// See `merge_insert_batch` for the FirstSeen rationale. Workaround
|
||||
// for the Lance 4.0.x bug class where sequential merge_insert /
|
||||
// update against rows previously rewritten by merge_insert trips
|
||||
// Lance's `processed_row_ids` HashSet and errors under the default
|
||||
// `SourceDedupeBehavior::Fail`. Retire when upstream Lance is fixed.
|
||||
builder.source_dedupe_behavior(SourceDedupeBehavior::FirstSeen);
|
||||
let job = builder
|
||||
.try_build()
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
let schema = batch.schema();
|
||||
|
|
@ -1651,3 +1711,107 @@ fn combine_committed_with_staged(ds: &Dataset, staged: &[StagedWrite]) -> Vec<Fr
|
|||
}
|
||||
combined
|
||||
}
|
||||
|
||||
/// Precondition guard for `merge_insert_batch` and `stage_merge_insert`.
|
||||
/// Both opt into `SourceDedupeBehavior::FirstSeen` to suppress the Lance
|
||||
/// `processed_row_ids` bug (MR-957). FirstSeen would *also* silently
|
||||
/// collapse genuine duplicate source keys; this check restores fail-fast
|
||||
/// behavior on real dups by erroring before the builder gets a chance to
|
||||
/// silently skip them.
|
||||
///
|
||||
/// Today only single-column string keys are used at the call sites
|
||||
/// (`vec!["id".to_string()]`). The check restricts itself to that shape
|
||||
/// and surfaces an internal error if a future caller passes anything
|
||||
/// else — keeping the assumption explicit instead of silently degrading.
|
||||
fn check_batch_unique_by_keys(
|
||||
batch: &RecordBatch,
|
||||
key_columns: &[String],
|
||||
context: &'static str,
|
||||
) -> Result<()> {
|
||||
if key_columns.len() != 1 {
|
||||
return Err(OmniError::manifest_internal(format!(
|
||||
"{}: check_batch_unique_by_keys currently supports single-column keys only, got {:?}",
|
||||
context, key_columns
|
||||
)));
|
||||
}
|
||||
let key_col_name = &key_columns[0];
|
||||
let column = batch.column_by_name(key_col_name).ok_or_else(|| {
|
||||
OmniError::manifest_internal(format!(
|
||||
"{}: source batch missing key column '{}'",
|
||||
context, key_col_name
|
||||
))
|
||||
})?;
|
||||
let strs = column
|
||||
.as_any()
|
||||
.downcast_ref::<StringArray>()
|
||||
.ok_or_else(|| {
|
||||
OmniError::manifest_internal(format!(
|
||||
"{}: key column '{}' is not a StringArray (got {:?})",
|
||||
context,
|
||||
key_col_name,
|
||||
column.data_type()
|
||||
))
|
||||
})?;
|
||||
|
||||
let mut seen: std::collections::HashSet<&str> =
|
||||
std::collections::HashSet::with_capacity(batch.num_rows());
|
||||
for i in 0..strs.len() {
|
||||
if !strs.is_valid(i) {
|
||||
continue;
|
||||
}
|
||||
let v = strs.value(i);
|
||||
if !seen.insert(v) {
|
||||
return Err(OmniError::manifest(format!(
|
||||
"{}: duplicate source row for key '{}' (column '{}'); \
|
||||
callers must hand in a batch unique by `key_columns` \
|
||||
— see MR-957",
|
||||
context, v, key_col_name
|
||||
)));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use arrow_array::StringArray;
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
|
||||
fn batch_with_ids(ids: &[&str]) -> RecordBatch {
|
||||
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Utf8, false)]));
|
||||
let col = Arc::new(StringArray::from(ids.to_vec())) as ArrayRef;
|
||||
RecordBatch::try_new(schema, vec![col]).unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_batch_unique_by_keys_passes_when_all_unique() {
|
||||
let batch = batch_with_ids(&["a", "b", "c"]);
|
||||
check_batch_unique_by_keys(&batch, &["id".to_string()], "test").unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_batch_unique_by_keys_errors_on_duplicate_id() {
|
||||
let batch = batch_with_ids(&["a", "b", "a"]);
|
||||
let err =
|
||||
check_batch_unique_by_keys(&batch, &["id".to_string()], "test").unwrap_err();
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
msg.contains("duplicate source row for key 'a'"),
|
||||
"unexpected error: {msg}"
|
||||
);
|
||||
assert!(msg.contains("MR-957"), "error should reference MR-957: {msg}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_batch_unique_by_keys_rejects_multi_column_keys() {
|
||||
let batch = batch_with_ids(&["a"]);
|
||||
let err = check_batch_unique_by_keys(
|
||||
&batch,
|
||||
&["id".to_string(), "other".to_string()],
|
||||
"test",
|
||||
)
|
||||
.unwrap_err();
|
||||
assert!(err.to_string().contains("single-column keys only"));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@ use std::fs;
|
|||
|
||||
use arrow_array::{Array, Int32Array, UInt64Array};
|
||||
use futures::TryStreamExt;
|
||||
use lance_index::{DatasetIndexExt, is_system_index};
|
||||
use lance::index::DatasetIndexExt;
|
||||
use lance_index::is_system_index;
|
||||
|
||||
use omnigraph::db::commit_graph::CommitGraph;
|
||||
use omnigraph::db::{MergeOutcome, Omnigraph, ReadTarget};
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ async fn composite_flow_canonical_lifecycle() {
|
|||
let uri = dir.path().to_str().unwrap();
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
// Step 1: init a fresh repo with the standard test schema.
|
||||
// Step 1: init a fresh graph with the standard test schema.
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
|
||||
let v_init = version_branch(&db, "main").await.unwrap();
|
||||
|
|
@ -70,7 +70,9 @@ async fn composite_flow_canonical_lifecycle() {
|
|||
// Step 2: load JSONL seed data (Person + Company nodes,
|
||||
// Knows + WorksAt edges).
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
load_jsonl(&mut db, TEST_DATA, LoadMode::Append).await.unwrap();
|
||||
load_jsonl(&mut db, TEST_DATA, LoadMode::Append)
|
||||
.await
|
||||
.unwrap();
|
||||
let v_after_load = version_branch(&db, "main").await.unwrap();
|
||||
assert!(
|
||||
v_after_load > v_init,
|
||||
|
|
@ -119,19 +121,13 @@ async fn composite_flow_canonical_lifecycle() {
|
|||
"feature",
|
||||
MUTATION_QUERIES,
|
||||
"insert_person_and_friend",
|
||||
&mixed_params(
|
||||
&[("$name", "Frank"), ("$friend", "Eve")],
|
||||
&[("$age", 33)],
|
||||
),
|
||||
&mixed_params(&[("$name", "Frank"), ("$friend", "Eve")], &[("$age", 33)]),
|
||||
)
|
||||
.await
|
||||
.expect("multi-statement insert+edge on feature");
|
||||
|
||||
// After: feature has 4 + Eve + Frank = 6 Persons.
|
||||
let snap = db
|
||||
.snapshot_of(ReadTarget::branch("feature"))
|
||||
.await
|
||||
.unwrap();
|
||||
let snap = db.snapshot_of(ReadTarget::branch("feature")).await.unwrap();
|
||||
let person_ds = snap.open("node:Person").await.unwrap();
|
||||
assert_eq!(
|
||||
person_ds.count_rows(None).await.unwrap(),
|
||||
|
|
@ -321,14 +317,10 @@ async fn composite_flow_canonical_lifecycle() {
|
|||
);
|
||||
|
||||
// Re-run a query to verify post-optimize correctness.
|
||||
let post_optimize_total = query_main(
|
||||
&mut db,
|
||||
TEST_QUERIES,
|
||||
"total_people",
|
||||
&ParamMap::default(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let post_optimize_total =
|
||||
query_main(&mut db, TEST_QUERIES, "total_people", &ParamMap::default())
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(
|
||||
!post_optimize_total.batches().is_empty(),
|
||||
"queries must still work after optimize"
|
||||
|
|
@ -385,14 +377,9 @@ async fn composite_flow_canonical_lifecycle() {
|
|||
// post-cleanup. Post-cleanup mutation is omitted here pending
|
||||
// resolution of the optimize-vs-manifest-pin interaction documented
|
||||
// in Step 10.
|
||||
let final_total = query_main(
|
||||
&mut db,
|
||||
TEST_QUERIES,
|
||||
"total_people",
|
||||
&ParamMap::default(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let final_total = query_main(&mut db, TEST_QUERIES, "total_people", &ParamMap::default())
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(!final_total.batches().is_empty());
|
||||
}
|
||||
|
||||
|
|
@ -431,10 +418,12 @@ async fn composite_flow_schema_apply_then_branch_ops_no_deadlock_in_refresh() {
|
|||
|
||||
// Step 1: init + load on handle A.
|
||||
let mut db_a = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
|
||||
load_jsonl(&mut db_a, TEST_DATA, LoadMode::Append).await.unwrap();
|
||||
load_jsonl(&mut db_a, TEST_DATA, LoadMode::Append)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(count_rows(&db_a, "node:Person").await, 4);
|
||||
|
||||
// Step 2: open handle B on the same repo. B's in-memory schema_source
|
||||
// Step 2: open handle B on the same graph. B's in-memory schema_source
|
||||
// cache is now a snapshot of `_schema.pg` at open time.
|
||||
let db_b = Omnigraph::open(uri).await.unwrap();
|
||||
|
||||
|
|
@ -444,7 +433,7 @@ async fn composite_flow_schema_apply_then_branch_ops_no_deadlock_in_refresh() {
|
|||
// to disk.
|
||||
const TEST_SCHEMA_V2: &str = "node Person {\n name: String @key\n age: I32?\n nickname: String?\n}\n\nnode Company {\n name: String @key\n}\n\nedge Knows: Person -> Person {\n since: Date?\n}\n\nedge WorksAt: Person -> Company\n";
|
||||
let plan = db_a.apply_schema(TEST_SCHEMA_V2).await.unwrap();
|
||||
assert!(plan.applied, "apply_schema must succeed on a clean repo");
|
||||
assert!(plan.applied, "apply_schema must succeed on a clean graph");
|
||||
assert!(
|
||||
!plan.steps.is_empty(),
|
||||
"apply_schema must record the AddProperty step"
|
||||
|
|
@ -561,7 +550,9 @@ async fn composite_flow_multi_branch_sequential_merges() {
|
|||
// edges from test.jsonl).
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
|
||||
load_jsonl(&mut db, TEST_DATA, LoadMode::Append).await.unwrap();
|
||||
load_jsonl(&mut db, TEST_DATA, LoadMode::Append)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(count_rows(&db, "node:Person").await, 4);
|
||||
assert_eq!(count_rows(&db, "edge:Knows").await, 3);
|
||||
|
||||
|
|
@ -687,10 +678,7 @@ async fn composite_flow_multi_branch_sequential_merges() {
|
|||
"feat-a",
|
||||
MUTATION_QUERIES,
|
||||
"insert_person_and_friend",
|
||||
&mixed_params(
|
||||
&[("$name", "Grace"), ("$friend", "Eve")],
|
||||
&[("$age", 28)],
|
||||
),
|
||||
&mixed_params(&[("$name", "Grace"), ("$friend", "Eve")], &[("$age", 28)]),
|
||||
)
|
||||
.await
|
||||
.expect("insert Grace + Knows(Grace → Eve) on feat-a");
|
||||
|
|
@ -821,15 +809,14 @@ async fn composite_flow_multi_branch_sequential_merges() {
|
|||
// `total_people` returns count(Person) = 10. Catches regressions in
|
||||
// group-by/count execution against a multi-fragment table whose
|
||||
// current shape was produced by two sequential merges.
|
||||
let total_post_merges = query_main(
|
||||
&mut db,
|
||||
TEST_QUERIES,
|
||||
"total_people",
|
||||
&ParamMap::default(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_total(&total_post_merges, 10, "post both merges, main must total 10 Persons");
|
||||
let total_post_merges = query_main(&mut db, TEST_QUERIES, "total_people", &ParamMap::default())
|
||||
.await
|
||||
.unwrap();
|
||||
assert_total(
|
||||
&total_post_merges,
|
||||
10,
|
||||
"post both merges, main must total 10 Persons",
|
||||
);
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
// Step 14: time-travel to pre-merge-a-version. Reads must return
|
||||
|
|
@ -1021,14 +1008,9 @@ async fn composite_flow_multi_branch_sequential_merges() {
|
|||
// correctly to disk but the reopened catalog can't bind them.
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
let mut db = db;
|
||||
let post_reopen_total = query_main(
|
||||
&mut db,
|
||||
TEST_QUERIES,
|
||||
"total_people",
|
||||
&ParamMap::default(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let post_reopen_total = query_main(&mut db, TEST_QUERIES, "total_people", &ParamMap::default())
|
||||
.await
|
||||
.unwrap();
|
||||
assert_total(
|
||||
&post_reopen_total,
|
||||
10,
|
||||
|
|
|
|||
|
|
@ -119,6 +119,187 @@ async fn load_merge_upserts_existing_and_inserts_new() {
|
|||
}
|
||||
}
|
||||
|
||||
/// Regression: two sequential `LoadMode::Merge` invocations against the
|
||||
/// same set of keys must both succeed. Pre-fix, the second one failed
|
||||
/// with `Ambiguous merge inserts are prohibited: multiple source rows
|
||||
/// match the same target row on (id = "TEST-1")` even though every
|
||||
/// source batch had one row per key.
|
||||
///
|
||||
/// Triggered by Lance's `processed_row_ids: Mutex<HashSet<u64>>`
|
||||
/// (lance-4.0.0 `src/dataset/write/merge_insert.rs:2099`) double-
|
||||
/// processing the same source/target match against datasets previously
|
||||
/// rewritten by merge_insert. Worked around by opting
|
||||
/// `MergeInsertBuilder` into `SourceDedupeBehavior::FirstSeen` in
|
||||
/// `crates/omnigraph/src/table_store.rs` — see that file for the full
|
||||
/// rationale and the safety pin (`loader_rejects_intra_batch_duplicate_keys`).
|
||||
/// Tracked at MR-957; upstream: lance-format/lance#6877.
|
||||
#[tokio::test]
|
||||
async fn load_merge_repeated_against_overlapping_keys_succeeds() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let schema = r#"
|
||||
node Thing {
|
||||
key: String @key
|
||||
required_val: String
|
||||
optional_val: String?
|
||||
}
|
||||
"#;
|
||||
let mut db = Omnigraph::init(uri, schema).await.unwrap();
|
||||
|
||||
// Seed with 50 fully-populated rows (id + required + optional).
|
||||
let mut seed = String::new();
|
||||
for i in 1..=50 {
|
||||
seed.push_str(&format!(
|
||||
r#"{{"type":"Thing","data":{{"key":"TEST-{i}","required_val":"required {i}","optional_val":"optional {i}"}}}}
|
||||
"#,
|
||||
));
|
||||
}
|
||||
load_jsonl(&mut db, &seed, LoadMode::Overwrite)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Partial-schema delta — mirrors the bug report exactly: omits
|
||||
// `optional_val`. 25 existing keys + 5 new keys, one row per key.
|
||||
let mut delta = String::new();
|
||||
for i in (1..=25).chain(51..=55) {
|
||||
delta.push_str(&format!(
|
||||
r#"{{"type":"Thing","data":{{"key":"TEST-{i}","required_val":"required {i} UPDATED"}}}}
|
||||
"#,
|
||||
));
|
||||
}
|
||||
|
||||
load_jsonl(&mut db, &delta, LoadMode::Merge)
|
||||
.await
|
||||
.expect("first merge must succeed");
|
||||
assert_eq!(count_rows(&db, "node:Thing").await, 55);
|
||||
|
||||
load_jsonl(&mut db, &delta, LoadMode::Merge)
|
||||
.await
|
||||
.expect("second merge against same keys must succeed");
|
||||
assert_eq!(count_rows(&db, "node:Thing").await, 55);
|
||||
}
|
||||
|
||||
/// Safety pin for the `SourceDedupeBehavior::FirstSeen` workaround in
|
||||
/// `crates/omnigraph/src/table_store.rs`. FirstSeen tells Lance to
|
||||
/// silently skip a duplicate source row instead of erroring. Our use of
|
||||
/// it depends on user-provided duplicates being rejected *before* the
|
||||
/// batch reaches Lance — otherwise FirstSeen could silently drop user
|
||||
/// data.
|
||||
///
|
||||
/// Defense in depth:
|
||||
/// 1. The loader's `enforce_unique_constraints_intra_batch`
|
||||
/// (`loader/mod.rs:1453`), invoked unconditionally on any node type
|
||||
/// with a `@key`, errors on intra-batch duplicate `@key` values at
|
||||
/// intake — pinned by this test across every `LoadMode`.
|
||||
/// 2. The `check_batch_unique_by_keys` precondition at the top of
|
||||
/// `merge_insert_batch` and `stage_merge_insert` is the final
|
||||
/// fail-fast guard: even if a future caller bypasses the loader path
|
||||
/// (e.g. branch-merge's `publish_rewritten_merge_table` builds its
|
||||
/// own source batch directly), a real duplicate id reaches Lance
|
||||
/// only after surfacing as an `OmniError::Manifest`, never silently
|
||||
/// via FirstSeen. Pinned by the unit tests in `table_store::tests`.
|
||||
#[tokio::test]
|
||||
async fn loader_rejects_intra_batch_duplicate_keys() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let schema = r#"
|
||||
node Thing {
|
||||
key: String @key
|
||||
value: String
|
||||
}
|
||||
"#;
|
||||
let mut db = Omnigraph::init(uri, schema).await.unwrap();
|
||||
|
||||
let dupes = r#"{"type":"Thing","data":{"key":"DUP","value":"first"}}
|
||||
{"type":"Thing","data":{"key":"DUP","value":"second"}}
|
||||
"#;
|
||||
|
||||
for mode in [LoadMode::Overwrite, LoadMode::Append, LoadMode::Merge] {
|
||||
let err = load_jsonl(&mut db, dupes, mode).await.unwrap_err();
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
msg.contains("@unique violation") && msg.contains("DUP"),
|
||||
"load mode {mode:?} must reject intra-batch duplicate @key (got: {msg})"
|
||||
);
|
||||
assert_eq!(
|
||||
count_rows(&db, "node:Thing").await,
|
||||
0,
|
||||
"load mode {mode:?} must not persist any rows when the batch is rejected"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Canary for the upstream Lance gap that the `FirstSeen` workaround
|
||||
/// in `table_store.rs` masks. The bug class is "Window 2": load →
|
||||
/// indices built explicitly → merge → merge. Even with the engine
|
||||
/// fully aligned to the "indexes are derived state" invariant
|
||||
/// (MR-848), as long as an `id` index has been built between the
|
||||
/// first and second merge_insert, the Lance internal that triggers
|
||||
/// the bug remains reachable.
|
||||
///
|
||||
/// This test runs the Window-2 sequence under the FirstSeen workaround.
|
||||
/// It is expected to pass today. If a future Lance upgrade or local
|
||||
/// change makes it START failing, the workaround has lost effectiveness
|
||||
/// (upstream Lance changed something, or the FirstSeen setter was
|
||||
/// dropped from `table_store.rs`). If a future Lance upgrade fixes the
|
||||
/// bug class, this test continues to pass and the FirstSeen setter can
|
||||
/// be retired.
|
||||
///
|
||||
/// Tracked at MR-957; upstream: lance-format/lance#6877.
|
||||
#[tokio::test]
|
||||
async fn load_merge_window_2_documents_upstream_lance_gap() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let schema = r#"
|
||||
node Thing {
|
||||
key: String @key
|
||||
required_val: String
|
||||
optional_val: String?
|
||||
}
|
||||
"#;
|
||||
let mut db = Omnigraph::init(uri, schema).await.unwrap();
|
||||
|
||||
let mut seed = String::new();
|
||||
for i in 1..=50 {
|
||||
seed.push_str(&format!(
|
||||
r#"{{"type":"Thing","data":{{"key":"TEST-{i}","required_val":"required {i}","optional_val":"optional {i}"}}}}
|
||||
"#,
|
||||
));
|
||||
}
|
||||
load_jsonl(&mut db, &seed, LoadMode::Overwrite)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Explicit ensure_indices between seed and the merges — the Window
|
||||
// 2 trigger. The eager-build behavior (MR-583) means the BTREE on
|
||||
// `id` is already present here, but calling explicitly pins the
|
||||
// invariant for the post-MR-848 future where the eager build is
|
||||
// gone.
|
||||
db.ensure_indices().await.unwrap();
|
||||
|
||||
let mut delta = String::new();
|
||||
for i in (1..=25).chain(51..=55) {
|
||||
delta.push_str(&format!(
|
||||
r#"{{"type":"Thing","data":{{"key":"TEST-{i}","required_val":"required {i} UPDATED"}}}}
|
||||
"#,
|
||||
));
|
||||
}
|
||||
|
||||
// Both merges must succeed under the FirstSeen workaround.
|
||||
// `processed_row_ids` re-processes the same target row_id under
|
||||
// the default `SourceDedupeBehavior::Fail`; FirstSeen tolerates it.
|
||||
load_jsonl(&mut db, &delta, LoadMode::Merge)
|
||||
.await
|
||||
.expect("first merge after ensure_indices must succeed");
|
||||
db.ensure_indices().await.unwrap();
|
||||
load_jsonl(&mut db, &delta, LoadMode::Merge).await.expect(
|
||||
"second merge after ensure_indices must succeed \
|
||||
(Window 2 canary: drop the FirstSeen setter in table_store.rs \
|
||||
only when this stays green WITHOUT it)",
|
||||
);
|
||||
assert_eq!(count_rows(&db, "node:Thing").await, 55);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn cross_type_traversal_deduplicates_duplicate_edges() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
|
|
@ -163,7 +344,7 @@ async fn explicit_target_query_sees_other_writer_commits_without_refresh() {
|
|||
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
|
||||
// Two independent handles to the same repo
|
||||
// Two independent handles to the same graph
|
||||
let mut db1 = Omnigraph::open(uri).await.unwrap();
|
||||
let mut db2 = Omnigraph::open(uri).await.unwrap();
|
||||
|
||||
|
|
|
|||
|
|
@ -1866,3 +1866,65 @@ async fn ensure_indices_does_not_error_on_repeated_call() {
|
|||
let ds = snap.open("node:Person").await.unwrap();
|
||||
assert_eq!(ds.count_rows(None).await.unwrap(), 4);
|
||||
}
|
||||
|
||||
// ─── DataFusion-Expr filter pushdown (Tier-1 follow-up to the Lance v6 bump) ──
|
||||
|
||||
/// Regression for `CompOp::Contains` pushdown via `array_has` in
|
||||
/// `ir_filter_to_expr`. Before the Expr-pushdown refactor, the
|
||||
/// `ir_filter_to_sql` family returned `None` for list-contains (the
|
||||
/// comment said *"Can't pushdown list contains"*) and the predicate was
|
||||
/// applied post-scan in memory. With `Scanner::filter_expr(Expr)` and
|
||||
/// DF's `array_has` builtin, the contains predicate now pushes down to
|
||||
/// Lance — the test confirms results are correct AND the pushdown path
|
||||
/// is exercised (a regression on the pushdown would land all rows in
|
||||
/// the scan, then be filtered post-hoc; that still produces the right
|
||||
/// count so this test pins correctness, while `lance_surface_guards.rs`
|
||||
/// is the structural pin for the surface itself).
|
||||
#[tokio::test]
|
||||
async fn ir_filter_with_list_contains_pushes_down() {
|
||||
let schema = r#"
|
||||
node Doc {
|
||||
slug: String @key
|
||||
tags: [String]
|
||||
}
|
||||
"#;
|
||||
let data = r#"{"type":"Doc","data":{"slug":"alpha","tags":["red","blue"]}}
|
||||
{"type":"Doc","data":{"slug":"bravo","tags":["green"]}}
|
||||
{"type":"Doc","data":{"slug":"charlie","tags":["red","green"]}}
|
||||
{"type":"Doc","data":{"slug":"delta","tags":[]}}"#;
|
||||
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let mut db = Omnigraph::init(dir.path().to_str().unwrap(), schema)
|
||||
.await
|
||||
.unwrap();
|
||||
load_jsonl(&mut db, data, LoadMode::Overwrite)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let queries = r#"
|
||||
query docs_with_tag($tag: String) {
|
||||
match {
|
||||
$d: Doc
|
||||
$d.tags contains $tag
|
||||
}
|
||||
return { $d.slug }
|
||||
}
|
||||
"#;
|
||||
let result = query_main(&mut db, queries, "docs_with_tag", ¶ms(&[("$tag", "red")]))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let batch = result.concat_batches().unwrap();
|
||||
let slugs = batch
|
||||
.column(0)
|
||||
.as_any()
|
||||
.downcast_ref::<StringArray>()
|
||||
.unwrap();
|
||||
let mut got: Vec<&str> = (0..slugs.len()).map(|i| slugs.value(i)).collect();
|
||||
got.sort();
|
||||
assert_eq!(
|
||||
got,
|
||||
vec!["alpha", "charlie"],
|
||||
"contains-pushdown should return exactly the rows whose tags list contains 'red'"
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@ async fn graph_publish_failpoint_triggers_before_commit_append() {
|
|||
|
||||
// Atomic schema apply: schema apply writes staging files first, then commits
|
||||
// the manifest, then renames staging → final. Tests below inject crashes at
|
||||
// the two boundaries and assert that reopening the repo yields a consistent
|
||||
// the two boundaries and assert that reopening the graph yields a consistent
|
||||
// state.
|
||||
|
||||
#[tokio::test]
|
||||
|
|
@ -303,14 +303,10 @@ async fn inline_delete_conflict_writes_sidecar_before_rejecting() {
|
|||
let person_uri = node_table_uri(&uri, "Person");
|
||||
|
||||
{
|
||||
let _pause_delete = ScopedFailPoint::new("mutation.delete_node_pre_primary_delete", "pause");
|
||||
let _pause_delete =
|
||||
ScopedFailPoint::new("mutation.delete_node_pre_primary_delete", "pause");
|
||||
let delete_params = helpers::params(&[("$name", "Alice")]);
|
||||
let delete = db.mutate(
|
||||
"main",
|
||||
MUTATION_QUERIES,
|
||||
"remove_person",
|
||||
&delete_params,
|
||||
);
|
||||
let delete = db.mutate("main", MUTATION_QUERIES, "remove_person", &delete_params);
|
||||
tokio::pin!(delete);
|
||||
|
||||
let mut concurrent_update_succeeded = false;
|
||||
|
|
@ -325,15 +321,18 @@ async fn inline_delete_conflict_writes_sidecar_before_rejecting() {
|
|||
"set_age",
|
||||
&mixed_params(&[("$name", "Bob")], &[("$age", 26)]),
|
||||
)
|
||||
.await
|
||||
.is_ok()
|
||||
.await
|
||||
.is_ok()
|
||||
{
|
||||
concurrent_update_succeeded = true;
|
||||
break;
|
||||
}
|
||||
tokio::time::sleep(std::time::Duration::from_millis(20)).await;
|
||||
}
|
||||
assert!(concurrent_update_succeeded, "concurrent update must land while delete is paused");
|
||||
assert!(
|
||||
concurrent_update_succeeded,
|
||||
"concurrent update must land while delete is paused"
|
||||
);
|
||||
fail::remove("mutation.delete_node_pre_primary_delete");
|
||||
|
||||
let err = delete.await.unwrap_err();
|
||||
|
|
@ -464,7 +463,7 @@ async fn recovery_rolls_forward_load_on_feature_branch() {
|
|||
|
||||
#[tokio::test]
|
||||
async fn recovery_rolls_forward_ensure_indices_on_feature_branch() {
|
||||
use lance_index::DatasetIndexExt;
|
||||
use lance::index::DatasetIndexExt;
|
||||
use omnigraph::loader::{LoadMode, load_jsonl};
|
||||
use omnigraph::table_store::TableStore;
|
||||
|
||||
|
|
@ -925,13 +924,13 @@ async fn ensure_indices_stage_btree_failure_leaves_existing_tables_writable() {
|
|||
.expect("Person mutation must succeed after the failed schema apply — existing tables are not drifted");
|
||||
}
|
||||
|
||||
fn assert_no_staging_files(repo: &std::path::Path) {
|
||||
fn assert_no_staging_files(graph: &std::path::Path) {
|
||||
for name in [
|
||||
"_schema.pg.staging",
|
||||
"_schema.ir.json.staging",
|
||||
"__schema_state.json.staging",
|
||||
] {
|
||||
let path = repo.join(name);
|
||||
let path = graph.join(name);
|
||||
assert!(
|
||||
!path.exists(),
|
||||
"staging file {} still exists after recovery",
|
||||
|
|
@ -1164,7 +1163,7 @@ edge WorksAt: Person -> Company
|
|||
// NEW schema (city column on Person, Tag node type) — not the old.
|
||||
// Without the schema-staging coordination, the schema-state
|
||||
// recovery would have deleted the staging files (because manifest
|
||||
// hadn't advanced when it ran), leaving a corrupt repo with new-
|
||||
// hadn't advanced when it ran), leaving a corrupt graph with new-
|
||||
// schema data on disk but old-schema catalog.
|
||||
let live_schema = std::fs::read_to_string(dir.path().join("_schema.pg")).unwrap();
|
||||
assert!(
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ query insert_person_and_friend($name: String, $age: I32, $friend: String) {
|
|||
}
|
||||
"#;
|
||||
|
||||
/// Init a repo and load the standard test data.
|
||||
/// Init a graph and load the standard test data.
|
||||
pub async fn init_and_load(dir: &tempfile::TempDir) -> Omnigraph {
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
|
||||
|
|
@ -249,7 +249,7 @@ pub fn vector_and_string_params(
|
|||
map
|
||||
}
|
||||
|
||||
pub fn s3_test_repo_uri(suite: &str) -> Option<String> {
|
||||
pub fn s3_test_graph_uri(suite: &str) -> Option<String> {
|
||||
let bucket = std::env::var("OMNIGRAPH_S3_TEST_BUCKET").ok()?;
|
||||
let prefix = std::env::var("OMNIGRAPH_S3_TEST_PREFIX")
|
||||
.ok()
|
||||
|
|
|
|||
|
|
@ -110,8 +110,8 @@ impl FollowUpMutation {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn single_sidecar_operation_id(repo_root: &Path) -> String {
|
||||
let ids = sidecar_operation_ids(repo_root);
|
||||
pub fn single_sidecar_operation_id(graph_root: &Path) -> String {
|
||||
let ids = sidecar_operation_ids(graph_root);
|
||||
assert_eq!(
|
||||
ids.len(),
|
||||
1,
|
||||
|
|
@ -121,8 +121,8 @@ pub fn single_sidecar_operation_id(repo_root: &Path) -> String {
|
|||
ids.into_iter().next().unwrap()
|
||||
}
|
||||
|
||||
pub fn sidecar_operation_ids(repo_root: &Path) -> Vec<String> {
|
||||
let dir = repo_root.join("__recovery");
|
||||
pub fn sidecar_operation_ids(graph_root: &Path) -> Vec<String> {
|
||||
let dir = graph_root.join("__recovery");
|
||||
if !dir.exists() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
|
@ -143,10 +143,10 @@ pub fn sidecar_operation_ids(repo_root: &Path) -> Vec<String> {
|
|||
ids
|
||||
}
|
||||
|
||||
pub async fn branch_head_commit_id(repo_root: &Path, branch: &str) -> Result<String> {
|
||||
pub async fn branch_head_commit_id(graph_root: &Path, branch: &str) -> Result<String> {
|
||||
let graph = match branch {
|
||||
"main" => CommitGraph::open(&repo_uri(repo_root)).await?,
|
||||
branch => CommitGraph::open_at_branch(&repo_uri(repo_root), branch).await?,
|
||||
"main" => CommitGraph::open(&graph_uri(graph_root)).await?,
|
||||
branch => CommitGraph::open_at_branch(&graph_uri(graph_root), branch).await?,
|
||||
};
|
||||
graph.head_commit_id().await?.ok_or_else(|| {
|
||||
OmniError::manifest_internal(format!("commit graph for branch {branch} has no head"))
|
||||
|
|
@ -154,52 +154,52 @@ pub async fn branch_head_commit_id(repo_root: &Path, branch: &str) -> Result<Str
|
|||
}
|
||||
|
||||
pub async fn assert_post_recovery_invariants(
|
||||
repo_root: &Path,
|
||||
graph_root: &Path,
|
||||
operation_id: &str,
|
||||
expectation: RecoveryExpectation,
|
||||
) -> Result<()> {
|
||||
match expectation {
|
||||
RecoveryExpectation::RolledForward { tables } => {
|
||||
assert_sidecar_absent(repo_root, operation_id);
|
||||
let audit = read_audit_row(repo_root, operation_id).await?;
|
||||
assert_sidecar_absent(graph_root, operation_id);
|
||||
let audit = read_audit_row(graph_root, operation_id).await?;
|
||||
assert_eq!(
|
||||
audit.recovery_kind, "RolledForward",
|
||||
"audit row for {operation_id} recorded the wrong recovery_kind",
|
||||
);
|
||||
assert_manifest_pins_match_lance_heads(repo_root, &tables).await?;
|
||||
assert_audit_to_versions_match_lance_heads(repo_root, &audit, &tables).await?;
|
||||
assert_recovery_commit_shape(repo_root, &audit, &tables).await?;
|
||||
assert_non_main_did_not_move_main(repo_root, &tables).await?;
|
||||
assert_idempotent_reopen(repo_root, operation_id).await?;
|
||||
run_follow_up_mutations(repo_root, tables).await?;
|
||||
assert_manifest_pins_match_lance_heads(graph_root, &tables).await?;
|
||||
assert_audit_to_versions_match_lance_heads(graph_root, &audit, &tables).await?;
|
||||
assert_recovery_commit_shape(graph_root, &audit, &tables).await?;
|
||||
assert_non_main_did_not_move_main(graph_root, &tables).await?;
|
||||
assert_idempotent_reopen(graph_root, operation_id).await?;
|
||||
run_follow_up_mutations(graph_root, tables).await?;
|
||||
}
|
||||
RecoveryExpectation::RolledBack { tables } => {
|
||||
assert_sidecar_absent(repo_root, operation_id);
|
||||
let audit = read_audit_row(repo_root, operation_id).await?;
|
||||
assert_sidecar_absent(graph_root, operation_id);
|
||||
let audit = read_audit_row(graph_root, operation_id).await?;
|
||||
assert_eq!(
|
||||
audit.recovery_kind, "RolledBack",
|
||||
"audit row for {operation_id} recorded the wrong recovery_kind",
|
||||
);
|
||||
assert_rollback_outcomes_record_drift(&audit);
|
||||
assert_recovery_commit_shape(repo_root, &audit, &tables).await?;
|
||||
assert_non_main_did_not_move_main(repo_root, &tables).await?;
|
||||
assert_idempotent_reopen(repo_root, operation_id).await?;
|
||||
run_follow_up_mutations(repo_root, tables).await?;
|
||||
assert_recovery_commit_shape(graph_root, &audit, &tables).await?;
|
||||
assert_non_main_did_not_move_main(graph_root, &tables).await?;
|
||||
assert_idempotent_reopen(graph_root, operation_id).await?;
|
||||
run_follow_up_mutations(graph_root, tables).await?;
|
||||
}
|
||||
RecoveryExpectation::Deferred => {
|
||||
assert!(
|
||||
sidecar_path(repo_root, operation_id).exists(),
|
||||
sidecar_path(graph_root, operation_id).exists(),
|
||||
"deferred recovery must leave sidecar {operation_id} on disk",
|
||||
);
|
||||
assert!(
|
||||
read_audit_row(repo_root, operation_id).await.is_err(),
|
||||
read_audit_row(graph_root, operation_id).await.is_err(),
|
||||
"deferred recovery must not record an audit row for {operation_id}",
|
||||
);
|
||||
}
|
||||
RecoveryExpectation::NoOp => {
|
||||
assert_sidecar_absent(repo_root, operation_id);
|
||||
assert_sidecar_absent(graph_root, operation_id);
|
||||
assert!(
|
||||
read_audit_row(repo_root, operation_id).await.is_err(),
|
||||
read_audit_row(graph_root, operation_id).await.is_err(),
|
||||
"no-op recovery must not record an audit row for {operation_id}",
|
||||
);
|
||||
}
|
||||
|
|
@ -216,24 +216,24 @@ fn branch_context(tables: &[TableExpectation]) -> Option<String> {
|
|||
.map(str::to_string)
|
||||
}
|
||||
|
||||
fn sidecar_path(repo_root: &Path, operation_id: &str) -> PathBuf {
|
||||
repo_root
|
||||
fn sidecar_path(graph_root: &Path, operation_id: &str) -> PathBuf {
|
||||
graph_root
|
||||
.join("__recovery")
|
||||
.join(format!("{operation_id}.json"))
|
||||
}
|
||||
|
||||
fn assert_sidecar_absent(repo_root: &Path, operation_id: &str) {
|
||||
fn assert_sidecar_absent(graph_root: &Path, operation_id: &str) {
|
||||
assert!(
|
||||
!sidecar_path(repo_root, operation_id).exists(),
|
||||
!sidecar_path(graph_root, operation_id).exists(),
|
||||
"recovery sidecar {operation_id} must be deleted after successful recovery",
|
||||
);
|
||||
}
|
||||
|
||||
async fn assert_manifest_pins_match_lance_heads(
|
||||
repo_root: &Path,
|
||||
graph_root: &Path,
|
||||
tables: &[TableExpectation],
|
||||
) -> Result<()> {
|
||||
let uri = repo_uri(repo_root);
|
||||
let uri = graph_uri(graph_root);
|
||||
let db = Omnigraph::open(&uri).await?;
|
||||
for table in tables {
|
||||
let (entry, lance_head) = entry_and_lance_head(&db, &uri, table).await?;
|
||||
|
|
@ -254,11 +254,11 @@ async fn assert_manifest_pins_match_lance_heads(
|
|||
}
|
||||
|
||||
async fn assert_audit_to_versions_match_lance_heads(
|
||||
repo_root: &Path,
|
||||
graph_root: &Path,
|
||||
audit: &RecoveryAuditRow,
|
||||
tables: &[TableExpectation],
|
||||
) -> Result<()> {
|
||||
let uri = repo_uri(repo_root);
|
||||
let uri = graph_uri(graph_root);
|
||||
let db = Omnigraph::open(&uri).await?;
|
||||
for table in tables {
|
||||
let (_, lance_head) = entry_and_lance_head(&db, &uri, table).await?;
|
||||
|
|
@ -301,10 +301,10 @@ fn assert_rollback_outcomes_record_drift(audit: &RecoveryAuditRow) {
|
|||
}
|
||||
|
||||
async fn assert_non_main_did_not_move_main(
|
||||
repo_root: &Path,
|
||||
graph_root: &Path,
|
||||
tables: &[TableExpectation],
|
||||
) -> Result<()> {
|
||||
let uri = repo_uri(repo_root);
|
||||
let uri = graph_uri(graph_root);
|
||||
let db = Omnigraph::open(&uri).await?;
|
||||
let main = db.snapshot_of(ReadTarget::branch("main")).await?;
|
||||
for table in tables {
|
||||
|
|
@ -327,14 +327,14 @@ async fn assert_non_main_did_not_move_main(
|
|||
}
|
||||
|
||||
async fn assert_recovery_commit_shape(
|
||||
repo_root: &Path,
|
||||
graph_root: &Path,
|
||||
audit: &RecoveryAuditRow,
|
||||
tables: &[TableExpectation],
|
||||
) -> Result<()> {
|
||||
let branch = branch_context(tables);
|
||||
let expected_parent = expected_recovery_parent(tables)?;
|
||||
let branch = branch.as_deref();
|
||||
let commit = read_recovery_commit(repo_root, audit, branch).await?;
|
||||
let commit = read_recovery_commit(graph_root, audit, branch).await?;
|
||||
|
||||
assert_eq!(
|
||||
commit.actor_id.as_deref(),
|
||||
|
|
@ -362,7 +362,7 @@ async fn assert_recovery_commit_shape(
|
|||
);
|
||||
|
||||
if let Some(branch) = branch {
|
||||
let graph = CommitGraph::open_at_branch(&repo_uri(repo_root), branch).await?;
|
||||
let graph = CommitGraph::open_at_branch(&graph_uri(graph_root), branch).await?;
|
||||
let commits = graph.load_commits().await?;
|
||||
let parent = commit.parent_commit_id.as_deref().ok_or_else(|| {
|
||||
OmniError::manifest_internal(format!(
|
||||
|
|
@ -403,12 +403,12 @@ fn expected_recovery_parent(tables: &[TableExpectation]) -> Result<Option<String
|
|||
Ok(expected)
|
||||
}
|
||||
|
||||
async fn assert_idempotent_reopen(repo_root: &Path, operation_id: &str) -> Result<()> {
|
||||
let before = matching_audit_rows(repo_root, operation_id).await?;
|
||||
let uri = repo_uri(repo_root);
|
||||
async fn assert_idempotent_reopen(graph_root: &Path, operation_id: &str) -> Result<()> {
|
||||
let before = matching_audit_rows(graph_root, operation_id).await?;
|
||||
let uri = graph_uri(graph_root);
|
||||
let _db = Omnigraph::open(&uri).await?;
|
||||
assert_sidecar_absent(repo_root, operation_id);
|
||||
let after = matching_audit_rows(repo_root, operation_id).await?;
|
||||
assert_sidecar_absent(graph_root, operation_id);
|
||||
let after = matching_audit_rows(graph_root, operation_id).await?;
|
||||
assert_eq!(
|
||||
after.len(),
|
||||
before.len(),
|
||||
|
|
@ -417,14 +417,14 @@ async fn assert_idempotent_reopen(repo_root: &Path, operation_id: &str) -> Resul
|
|||
Ok(())
|
||||
}
|
||||
|
||||
async fn run_follow_up_mutations(repo_root: &Path, tables: Vec<TableExpectation>) -> Result<()> {
|
||||
async fn run_follow_up_mutations(graph_root: &Path, tables: Vec<TableExpectation>) -> Result<()> {
|
||||
let mut db: Option<Omnigraph> = None;
|
||||
for table in tables {
|
||||
let Some(mutation) = table.follow_up_mutation else {
|
||||
continue;
|
||||
};
|
||||
if db.is_none() {
|
||||
db = Some(Omnigraph::open(&repo_uri(repo_root)).await?);
|
||||
db = Some(Omnigraph::open(&graph_uri(graph_root)).await?);
|
||||
}
|
||||
let db = db.as_mut().unwrap();
|
||||
db.mutate(
|
||||
|
|
@ -480,11 +480,11 @@ async fn lance_head_for_entry(root_uri: &str, entry: &SubTableEntry) -> Result<u
|
|||
}
|
||||
|
||||
async fn read_recovery_commit(
|
||||
repo_root: &Path,
|
||||
graph_root: &Path,
|
||||
audit: &RecoveryAuditRow,
|
||||
branch: Option<&str>,
|
||||
) -> Result<GraphCommit> {
|
||||
let uri = repo_uri(repo_root);
|
||||
let uri = graph_uri(graph_root);
|
||||
let graph = match branch {
|
||||
Some(branch) => CommitGraph::open_at_branch(&uri, branch).await?,
|
||||
None => CommitGraph::open(&uri).await?,
|
||||
|
|
@ -502,8 +502,8 @@ async fn read_recovery_commit(
|
|||
})
|
||||
}
|
||||
|
||||
async fn read_audit_row(repo_root: &Path, operation_id: &str) -> Result<RecoveryAuditRow> {
|
||||
let mut rows = matching_audit_rows(repo_root, operation_id).await?;
|
||||
async fn read_audit_row(graph_root: &Path, operation_id: &str) -> Result<RecoveryAuditRow> {
|
||||
let mut rows = matching_audit_rows(graph_root, operation_id).await?;
|
||||
if rows.len() != 1 {
|
||||
return Err(OmniError::manifest_internal(format!(
|
||||
"expected exactly one recovery audit row for {operation_id}, got {}",
|
||||
|
|
@ -514,10 +514,10 @@ async fn read_audit_row(repo_root: &Path, operation_id: &str) -> Result<Recovery
|
|||
}
|
||||
|
||||
async fn matching_audit_rows(
|
||||
repo_root: &Path,
|
||||
graph_root: &Path,
|
||||
operation_id: &str,
|
||||
) -> Result<Vec<RecoveryAuditRow>> {
|
||||
let recoveries_dir = repo_root.join("_graph_commit_recoveries.lance");
|
||||
let recoveries_dir = graph_root.join("_graph_commit_recoveries.lance");
|
||||
if !recoveries_dir.exists() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
|
@ -575,6 +575,6 @@ fn string_column<'a>(batch: &'a RecordBatch, name: &str) -> Result<&'a StringArr
|
|||
})
|
||||
}
|
||||
|
||||
fn repo_uri(repo_root: &Path) -> String {
|
||||
repo_root.to_str().unwrap().to_string()
|
||||
fn graph_uri(graph_root: &Path) -> String {
|
||||
graph_root.to_str().unwrap().to_string()
|
||||
}
|
||||
|
|
|
|||
244
crates/omnigraph/tests/lance_surface_guards.rs
Normal file
244
crates/omnigraph/tests/lance_surface_guards.rs
Normal file
|
|
@ -0,0 +1,244 @@
|
|||
//! Lance API surface guards.
|
||||
//!
|
||||
//! Each guard pins a Lance API surface that OmniGraph relies on. If a future
|
||||
//! Lance bump silently renames a variant, restructures a public struct, or
|
||||
//! flips a method to async, the corresponding guard either fails to compile
|
||||
//! (compile-time guards) or fails at runtime (runtime guards). The purpose
|
||||
//! is to turn silent-break risks into red CI bars on the *next* Lance bump,
|
||||
//! rather than into wrong-state recovery in production.
|
||||
//!
|
||||
//! Pair this file with `docs/dev/lance.md`'s alignment audit stanza: any
|
||||
//! Lance bump runs `cargo test -p omnigraph-engine --test lance_surface_guards`
|
||||
//! first as the smoke check.
|
||||
//!
|
||||
//! ## Compile-only guards
|
||||
//!
|
||||
//! Functions prefixed with `_compile_` are gated with a broad `#[allow(...)]`
|
||||
//! and never called. They exist to make `cargo build -p omnigraph-engine --tests`
|
||||
//! enforce the API shape. Using `unimplemented!()` as a placeholder lets type
|
||||
//! inference proceed without running anything.
|
||||
//!
|
||||
//! ## Runtime guards
|
||||
//!
|
||||
//! Functions decorated `#[tokio::test]` actually run; they construct real
|
||||
//! values and assert field shapes / types.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator, StringArray};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
use lance::Dataset;
|
||||
use lance::dataset::builder::DatasetBuilder;
|
||||
use lance::dataset::optimize::{CompactionOptions, compact_files};
|
||||
use lance::dataset::write::delete::DeleteResult;
|
||||
use lance::dataset::{MergeInsertBuilder, WhenMatched, WhenNotMatched, WriteMode, WriteParams};
|
||||
use lance_file::version::LanceFileVersion;
|
||||
use lance_namespace::LanceNamespace;
|
||||
use lance_table::io::commit::ManifestNamingScheme;
|
||||
|
||||
/// Helper: build a small fresh dataset in a tempdir. Pinned at V2_2 to match
|
||||
/// production write paths (blob v2 requires V2_2; see `docs/dev/lance.md`).
|
||||
async fn fresh_dataset(uri: &str) -> Dataset {
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("id", DataType::Utf8, false),
|
||||
Field::new("value", DataType::Int32, false),
|
||||
]));
|
||||
let batch = RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![
|
||||
Arc::new(StringArray::from(vec!["alice", "bob"])),
|
||||
Arc::new(Int32Array::from(vec![1, 2])),
|
||||
],
|
||||
)
|
||||
.unwrap();
|
||||
let reader = RecordBatchIterator::new(vec![Ok(batch)], schema);
|
||||
let params = WriteParams {
|
||||
mode: WriteMode::Create,
|
||||
enable_stable_row_ids: true,
|
||||
data_storage_version: Some(LanceFileVersion::V2_2),
|
||||
..Default::default()
|
||||
};
|
||||
Dataset::write(reader, uri, Some(params)).await.unwrap()
|
||||
}
|
||||
|
||||
// --- Guard 1: LanceError::TooMuchWriteContention variant exists ------------
|
||||
//
|
||||
// `db/manifest/publisher.rs::map_lance_publish_error` pattern-matches on this
|
||||
// variant to surface typed `OmniError::ManifestRowLevelCasContention`. If
|
||||
// Lance renames the variant or removes the builder, this guard fails.
|
||||
|
||||
#[tokio::test]
|
||||
async fn lance_error_too_much_write_contention_variant_exists() {
|
||||
let err = lance::Error::too_much_write_contention("guard");
|
||||
assert!(
|
||||
matches!(err, lance::Error::TooMuchWriteContention { .. }),
|
||||
"Lance::Error::TooMuchWriteContention variant missing or renamed; \
|
||||
update db/manifest/publisher.rs::map_lance_publish_error and \
|
||||
this guard, then re-pin docs/dev/lance.md."
|
||||
);
|
||||
}
|
||||
|
||||
// --- Guard 2: ManifestLocation field shape ---------------------------------
|
||||
//
|
||||
// `db/manifest/metadata.rs:84-88` reads `.path`, `.size`, `.e_tag`,
|
||||
// `.naming_scheme` off `dataset.manifest_location()`. If any field renames
|
||||
// or changes type, this guard fails to compile.
|
||||
|
||||
#[tokio::test]
|
||||
async fn manifest_location_field_shape() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().join("guard.lance");
|
||||
let ds = fresh_dataset(uri.to_str().unwrap()).await;
|
||||
|
||||
let loc = ds.manifest_location();
|
||||
// Explicit type bindings — these are the load-bearing assertions. If a
|
||||
// type drifts (e.g. .size: Option<u64> → .size: u64), this fails to
|
||||
// compile.
|
||||
let _path: &object_store::path::Path = &loc.path;
|
||||
let _size: Option<u64> = loc.size;
|
||||
let _e_tag: Option<String> = loc.e_tag.clone();
|
||||
let _scheme: ManifestNamingScheme = loc.naming_scheme;
|
||||
// Runtime sanity — naming_scheme should produce a Debug string we use
|
||||
// verbatim in `TableVersionMetadata::naming_scheme`.
|
||||
assert!(!format!("{:?}", loc.naming_scheme).is_empty());
|
||||
}
|
||||
|
||||
// --- Guard 3: checkout_version + restore async chain -----------------------
|
||||
//
|
||||
// `db/manifest/recovery.rs:505-522` chains `Dataset::open(...).await?
|
||||
// .checkout_version(N).await?.restore().await?` as the recovery rollback
|
||||
// hammer. Compile-only — never runs.
|
||||
|
||||
#[allow(
|
||||
dead_code,
|
||||
unreachable_code,
|
||||
unused_variables,
|
||||
unused_mut,
|
||||
clippy::diverging_sub_expression
|
||||
)]
|
||||
async fn _compile_checkout_version_then_restore_signature() -> lance::Result<()> {
|
||||
let ds: Dataset = unimplemented!();
|
||||
let mut ds: Dataset = ds.checkout_version(1u64).await?;
|
||||
// `restore()` takes `&mut self` and returns `Result<()>`; the dataset
|
||||
// mutates in place. If Lance flips this to return a fresh `Dataset`
|
||||
// (consuming `self`), this guard fails to compile.
|
||||
let _: () = ds.restore().await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// --- Guard 4: DatasetBuilder::from_namespace fluent chain ------------------
|
||||
//
|
||||
// `db/manifest/namespace.rs:162-174` chains
|
||||
// `DatasetBuilder::from_namespace(ns, vec![id]).await?.with_branch(...).with_version(...).load().await?`.
|
||||
// Compile-only.
|
||||
|
||||
#[allow(
|
||||
dead_code,
|
||||
unreachable_code,
|
||||
unused_variables,
|
||||
unused_mut,
|
||||
clippy::diverging_sub_expression
|
||||
)]
|
||||
async fn _compile_dataset_builder_from_namespace_signature(
|
||||
ns: Arc<dyn LanceNamespace>,
|
||||
) -> lance::Result<()> {
|
||||
let builder: DatasetBuilder =
|
||||
DatasetBuilder::from_namespace(ns, vec!["table".to_string()]).await?;
|
||||
let builder: DatasetBuilder = builder.with_branch("b", None);
|
||||
let builder: DatasetBuilder = builder.with_version(1u64);
|
||||
let _ds: Dataset = builder.load().await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// --- Guard 5: MergeInsertBuilder fluent chain ------------------------------
|
||||
//
|
||||
// `db/manifest/publisher.rs:370-391` is the manifest CAS. If any method on
|
||||
// the builder renames or changes signature, the publisher silently breaks.
|
||||
// Compile-only.
|
||||
|
||||
#[allow(
|
||||
dead_code,
|
||||
unreachable_code,
|
||||
unused_variables,
|
||||
unused_mut,
|
||||
clippy::diverging_sub_expression
|
||||
)]
|
||||
async fn _compile_merge_insert_builder_method_chain() -> lance::Result<()> {
|
||||
use lance::dataset::MergeStats;
|
||||
|
||||
let ds: Arc<Dataset> = unimplemented!();
|
||||
let job = MergeInsertBuilder::try_new(ds, vec!["object_id".to_string()])?
|
||||
.when_matched(WhenMatched::UpdateAll)
|
||||
.when_not_matched(WhenNotMatched::InsertAll)
|
||||
.conflict_retries(0)
|
||||
.use_index(false)
|
||||
.try_build()?;
|
||||
|
||||
// execute_reader takes `impl StreamingWriteSource` (lance trait), which
|
||||
// RecordBatchIterator implements. Pin the return shape
|
||||
// `(Arc<Dataset>, MergeStats)` — the publisher's CAS loop depends on
|
||||
// both: the new Dataset to advance HEAD, the stats for the audit row.
|
||||
let source: RecordBatchIterator<Vec<Result<RecordBatch, arrow_schema::ArrowError>>> =
|
||||
unimplemented!();
|
||||
let result: (Arc<Dataset>, MergeStats) = job.execute_reader(source).await?;
|
||||
let _ds: Arc<Dataset> = result.0;
|
||||
let _stats: MergeStats = result.1;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// --- Guard 6: WriteParams::default() leaves data_storage_version = None ----
|
||||
//
|
||||
// Our V2_2 pin is load-bearing for blob v2 (verified earlier this session
|
||||
// when V2_1 produced "Blob v2 requires file version >= 2.2" on 13 blob
|
||||
// tests). If Lance changes the default to pin some version itself, audit
|
||||
// every `data_storage_version: Some(LanceFileVersion::V2_2)` site.
|
||||
|
||||
#[test]
|
||||
fn write_params_default_does_not_set_storage_version() {
|
||||
let params = WriteParams::default();
|
||||
assert_eq!(
|
||||
params.data_storage_version, None,
|
||||
"WriteParams::default().data_storage_version is no longer None; \
|
||||
audit every explicit V2_2 pin (see rg 'LanceFileVersion::V2_2')."
|
||||
);
|
||||
}
|
||||
|
||||
// --- Guard 7: compact_files signature --------------------------------------
|
||||
//
|
||||
// `db/omnigraph/optimize.rs:107` calls `compact_files(&mut ds, options, None)`.
|
||||
// Compile-only.
|
||||
|
||||
#[allow(
|
||||
dead_code,
|
||||
unreachable_code,
|
||||
unused_variables,
|
||||
unused_mut,
|
||||
clippy::diverging_sub_expression
|
||||
)]
|
||||
async fn _compile_compact_files_signature() -> lance::Result<()> {
|
||||
let mut ds: Dataset = unimplemented!();
|
||||
let options: CompactionOptions = CompactionOptions::default();
|
||||
let _metrics = compact_files(&mut ds, options, None).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// --- Guard 8: Dataset::delete returns DeleteResult { new_dataset, num_deleted_rows } ---
|
||||
//
|
||||
// `table_store.rs::delete_where` consumes both fields. When MR-A migrates
|
||||
// `delete_where` to two-phase via `DeleteBuilder::execute_uncommitted`, this
|
||||
// guard updates to pin the staged path. Compile-only.
|
||||
|
||||
#[allow(
|
||||
dead_code,
|
||||
unreachable_code,
|
||||
unused_variables,
|
||||
unused_mut,
|
||||
clippy::diverging_sub_expression
|
||||
)]
|
||||
async fn _compile_delete_result_field_shape() -> lance::Result<()> {
|
||||
let mut ds: Dataset = unimplemented!();
|
||||
let result: DeleteResult = ds.delete("x = 1").await?;
|
||||
let _new_dataset: Arc<Dataset> = result.new_dataset;
|
||||
let _num_deleted: u64 = result.num_deleted_rows;
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -3,13 +3,13 @@ mod helpers;
|
|||
use std::fs;
|
||||
|
||||
use omnigraph::db::{Omnigraph, ReadTarget};
|
||||
use omnigraph_compiler::{build_schema_ir, schema_ir_pretty_json};
|
||||
use omnigraph_compiler::schema::parser::parse_schema;
|
||||
use omnigraph_compiler::{build_schema_ir, schema_ir_pretty_json};
|
||||
|
||||
use helpers::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn init_creates_repo() {
|
||||
async fn init_creates_graph() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
|
||||
|
|
@ -34,7 +34,7 @@ async fn init_creates_repo() {
|
|||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn open_reads_existing_repo() {
|
||||
async fn open_reads_existing_graph() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
|
||||
|
|
@ -49,7 +49,7 @@ async fn open_reads_existing_repo() {
|
|||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn open_bootstraps_legacy_schema_state_for_main_only_repo() {
|
||||
async fn open_bootstraps_legacy_schema_state_for_main_only_graph() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
|
||||
|
|
@ -64,7 +64,7 @@ async fn open_bootstraps_legacy_schema_state_for_main_only_repo() {
|
|||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn open_rejects_legacy_repo_with_public_branch() {
|
||||
async fn open_rejects_legacy_graph_with_public_branch() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
|
||||
|
|
@ -74,7 +74,7 @@ async fn open_rejects_legacy_repo_with_public_branch() {
|
|||
fs::remove_file(dir.path().join("__schema_state.json")).unwrap();
|
||||
|
||||
let err = match Omnigraph::open(uri).await {
|
||||
Ok(_) => panic!("expected legacy repo with public branch to fail schema bootstrap"),
|
||||
Ok(_) => panic!("expected legacy graph with public branch to fail schema bootstrap"),
|
||||
Err(err) => err,
|
||||
};
|
||||
assert!(
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
// Maintenance tests: `optimize` (Lance compact_files) and `cleanup`
|
||||
// (Lance cleanup_old_versions) at the graph level. Covers no-op edges
|
||||
// (empty repo, already-optimized repo), the policy-validation contract on
|
||||
// (empty graph, already-optimized graph), the policy-validation contract on
|
||||
// `cleanup`, and the keep-versions cap that protects head.
|
||||
|
||||
mod helpers;
|
||||
|
|
@ -13,7 +13,7 @@ use omnigraph::loader::{LoadMode, load_jsonl};
|
|||
use helpers::{TEST_DATA, TEST_SCHEMA, count_rows, init_and_load};
|
||||
|
||||
#[tokio::test]
|
||||
async fn optimize_on_empty_repo_returns_stats_per_table_with_no_changes() {
|
||||
async fn optimize_on_empty_graph_returns_stats_per_table_with_no_changes() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
|
||||
|
|
@ -37,7 +37,7 @@ async fn optimize_after_load_then_again_is_idempotent() {
|
|||
// First pass may compact (load wrote real fragments).
|
||||
let _first = db.optimize().await.unwrap();
|
||||
|
||||
// Second pass should be a no-op: already-compacted repo produces no
|
||||
// Second pass should be a no-op: already-compacted graph produces no
|
||||
// fragments_removed / fragments_added.
|
||||
let second = db.optimize().await.unwrap();
|
||||
for s in &second {
|
||||
|
|
@ -119,7 +119,9 @@ async fn cleanup_older_than_zero_preserves_head() {
|
|||
|
||||
// Smoke test: after aggressive cleanup, we can still read and write the
|
||||
// graph — head wasn't pruned.
|
||||
load_jsonl(&mut db, TEST_DATA, LoadMode::Merge).await.unwrap();
|
||||
load_jsonl(&mut db, TEST_DATA, LoadMode::Merge)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
@ -151,6 +153,8 @@ async fn cleanup_then_optimize_preserves_rows_and_table_remains_writable() {
|
|||
assert_eq!(count_rows(&db, "node:Company").await, companies_before);
|
||||
|
||||
// Table is still writable after the cleanup+optimize sequence.
|
||||
load_jsonl(&mut db, TEST_DATA, LoadMode::Merge).await.unwrap();
|
||||
load_jsonl(&mut db, TEST_DATA, LoadMode::Merge)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(count_rows(&db, "node:Person").await, people_before);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,16 +22,16 @@ use helpers::recovery::{RecoveryExpectation, TableExpectation, assert_post_recov
|
|||
|
||||
const TEST_SCHEMA: &str = include_str!("fixtures/test.pg");
|
||||
|
||||
fn write_sidecar_file(repo_root: &Path, operation_id: &str, json: &str) {
|
||||
let dir = repo_root.join("__recovery");
|
||||
fn write_sidecar_file(graph_root: &Path, operation_id: &str, json: &str) {
|
||||
let dir = graph_root.join("__recovery");
|
||||
if !dir.exists() {
|
||||
std::fs::create_dir(&dir).unwrap();
|
||||
}
|
||||
std::fs::write(dir.join(format!("{}.json", operation_id)), json).unwrap();
|
||||
}
|
||||
|
||||
fn list_recovery_dir(repo_root: &Path) -> Vec<String> {
|
||||
let dir = repo_root.join("__recovery");
|
||||
fn list_recovery_dir(graph_root: &Path) -> Vec<String> {
|
||||
let dir = graph_root.join("__recovery");
|
||||
if !dir.exists() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
|
@ -41,7 +41,7 @@ fn list_recovery_dir(repo_root: &Path) -> Vec<String> {
|
|||
.collect()
|
||||
}
|
||||
|
||||
/// Full URI of a node-type Lance dataset under a fresh Omnigraph repo.
|
||||
/// Full URI of a node-type Lance dataset under a fresh Omnigraph graph.
|
||||
/// Mirrors the `nodes/{fnv1a64-hex(type_name)}` layout in `db/manifest/layout.rs`.
|
||||
fn node_table_uri(root: &str, type_name: &str) -> String {
|
||||
let h: u64 = fnv1a64(type_name.as_bytes());
|
||||
|
|
@ -283,8 +283,8 @@ async fn recovery_rolls_back_synthetic_drift_on_open() {
|
|||
// =====================================================================
|
||||
|
||||
/// Helper: count rows in `_graph_commit_recoveries.lance` at the given root.
|
||||
async fn count_recovery_audit_rows(repo_root: &Path) -> usize {
|
||||
let recoveries_dir = repo_root.join("_graph_commit_recoveries.lance");
|
||||
async fn count_recovery_audit_rows(graph_root: &Path) -> usize {
|
||||
let recoveries_dir = graph_root.join("_graph_commit_recoveries.lance");
|
||||
if !recoveries_dir.exists() {
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -306,9 +306,9 @@ async fn count_recovery_audit_rows(repo_root: &Path) -> usize {
|
|||
/// Helper: read the most recent recovery audit row's `recovery_kind`,
|
||||
/// `recovery_for_actor`, and `operation_id`. Returns `None` if no rows.
|
||||
async fn read_latest_recovery_audit(
|
||||
repo_root: &Path,
|
||||
graph_root: &Path,
|
||||
) -> Option<(String, Option<String>, String, String)> {
|
||||
let recoveries_dir = repo_root.join("_graph_commit_recoveries.lance");
|
||||
let recoveries_dir = graph_root.join("_graph_commit_recoveries.lance");
|
||||
if !recoveries_dir.exists() {
|
||||
return None;
|
||||
}
|
||||
|
|
@ -357,8 +357,8 @@ async fn read_latest_recovery_audit(
|
|||
/// storage order (multiple batches concatenated). Used by the
|
||||
/// multi-sidecar fresh-snapshot test as a diagnostic alongside the
|
||||
/// post-recovery Lance HEAD assertion.
|
||||
async fn list_recovery_audit_kinds(repo_root: &Path) -> Vec<String> {
|
||||
let recoveries_dir = repo_root.join("_graph_commit_recoveries.lance");
|
||||
async fn list_recovery_audit_kinds(graph_root: &Path) -> Vec<String> {
|
||||
let recoveries_dir = graph_root.join("_graph_commit_recoveries.lance");
|
||||
if !recoveries_dir.exists() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
|
@ -391,8 +391,8 @@ async fn list_recovery_audit_kinds(repo_root: &Path) -> Vec<String> {
|
|||
}
|
||||
|
||||
/// Helper: count `_graph_commits.lance` rows tagged with the recovery actor.
|
||||
async fn count_recovery_actor_commits(repo_root: &Path) -> usize {
|
||||
let actors_dir = repo_root.join("_graph_commit_actors.lance");
|
||||
async fn count_recovery_actor_commits(graph_root: &Path) -> usize {
|
||||
let actors_dir = graph_root.join("_graph_commit_actors.lance");
|
||||
if !actors_dir.exists() {
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -908,7 +908,7 @@ async fn recovery_ensure_indices_steady_state_no_sidecar() {
|
|||
/// ran) and rolls back any sibling table's legitimate index work.
|
||||
///
|
||||
/// Integration verification: after a real init + ensure_indices on a
|
||||
/// repo where every table is empty, the recovery sweep must complete
|
||||
/// graph where every table is empty, the recovery sweep must complete
|
||||
/// cleanly (no leftover sidecar) AND the next ensure_indices must also
|
||||
/// leave no sidecar — proving the empty-table-scoping behavior lets
|
||||
/// steady-state runs incur zero sidecar I/O. The
|
||||
|
|
@ -930,7 +930,7 @@ async fn recovery_ensure_indices_handles_empty_tables() {
|
|||
db.ensure_indices().await.unwrap();
|
||||
assert!(
|
||||
list_recovery_dir(dir.path()).is_empty(),
|
||||
"ensure_indices on an all-empty repo must not leave a sidecar"
|
||||
"ensure_indices on an all-empty graph must not leave a sidecar"
|
||||
);
|
||||
// Reopen + ensure_indices — still steady state, still no sidecar.
|
||||
drop(db);
|
||||
|
|
@ -938,7 +938,7 @@ async fn recovery_ensure_indices_handles_empty_tables() {
|
|||
db.ensure_indices().await.unwrap();
|
||||
assert!(
|
||||
list_recovery_dir(dir.path()).is_empty(),
|
||||
"second ensure_indices on an all-empty repo must also not leave a sidecar"
|
||||
"second ensure_indices on an all-empty graph must also not leave a sidecar"
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -521,6 +521,10 @@ query delete_two_persons($first: String, $second: String) {
|
|||
delete Person where name = $first
|
||||
delete Person where name = $second
|
||||
}
|
||||
|
||||
query update_age_by_name($name: String, $age: I32) {
|
||||
update Person set { age: $age } where name = $name
|
||||
}
|
||||
"#;
|
||||
|
||||
/// D₂: a query mixing inserts/updates with deletes is rejected at parse
|
||||
|
|
@ -1362,3 +1366,85 @@ query insert_then_update_note(
|
|||
.unwrap();
|
||||
assert_eq!(qr.num_rows(), 0, "letter must not be visible after early error");
|
||||
}
|
||||
|
||||
/// MR-920 regression: two sequential `update T set {f:v} where x=y`
|
||||
/// invocations against the same row must both succeed. Pre-fix, the
|
||||
/// second one failed with `Ambiguous merge inserts are prohibited:
|
||||
/// multiple source rows match the same target row on (id = "Alice")`
|
||||
/// even though the scan returned exactly one row.
|
||||
///
|
||||
/// Root cause hypothesis (per MR-920): Lance's
|
||||
/// `processed_row_ids: Mutex<HashSet<u64>>`
|
||||
/// (`src/dataset/write/merge_insert.rs:2099`) double-processes the
|
||||
/// same target row_id against datasets previously rewritten by
|
||||
/// merge_insert. `SourceDedupeBehavior::FirstSeen` makes Lance skip
|
||||
/// rather than error.
|
||||
///
|
||||
/// Companion to `consistency.rs::load_merge_repeated_against_overlapping_keys_succeeds`
|
||||
/// (PR #98 / Window 1 of the bug class via the load surface).
|
||||
#[tokio::test]
|
||||
async fn second_sequential_update_on_same_row_succeeds() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let mut db = init_and_load(&dir).await;
|
||||
|
||||
db.mutate(
|
||||
"main",
|
||||
STAGED_QUERIES,
|
||||
"update_age_by_name",
|
||||
&mixed_params(&[("$name", "Alice")], &[("$age", 99)]),
|
||||
)
|
||||
.await
|
||||
.expect("first sequential update on Alice must succeed");
|
||||
|
||||
let batches = read_table(&db, "node:Person").await;
|
||||
let alice_count: usize = batches
|
||||
.iter()
|
||||
.map(|b| {
|
||||
let names = b
|
||||
.column_by_name("name")
|
||||
.unwrap()
|
||||
.as_any()
|
||||
.downcast_ref::<arrow_array::StringArray>()
|
||||
.unwrap();
|
||||
(0..b.num_rows())
|
||||
.filter(|i| names.is_valid(*i) && names.value(*i) == "Alice")
|
||||
.count()
|
||||
})
|
||||
.sum();
|
||||
assert_eq!(
|
||||
alice_count, 1,
|
||||
"after first update, exactly one Alice row should be visible"
|
||||
);
|
||||
|
||||
db.mutate(
|
||||
"main",
|
||||
STAGED_QUERIES,
|
||||
"update_age_by_name",
|
||||
&mixed_params(&[("$name", "Alice")], &[("$age", 42)]),
|
||||
)
|
||||
.await
|
||||
.expect("second sequential update on Alice must succeed");
|
||||
|
||||
let batches = read_table(&db, "node:Person").await;
|
||||
let mut alice_age: Option<i32> = None;
|
||||
for batch in &batches {
|
||||
let names = batch
|
||||
.column_by_name("name")
|
||||
.unwrap()
|
||||
.as_any()
|
||||
.downcast_ref::<arrow_array::StringArray>()
|
||||
.unwrap();
|
||||
let ages = batch
|
||||
.column_by_name("age")
|
||||
.unwrap()
|
||||
.as_any()
|
||||
.downcast_ref::<arrow_array::Int32Array>()
|
||||
.unwrap();
|
||||
for i in 0..batch.num_rows() {
|
||||
if names.is_valid(i) && names.value(i) == "Alice" && ages.is_valid(i) {
|
||||
alice_age = Some(ages.value(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
assert_eq!(alice_age, Some(42), "Alice's age must reflect the second update");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,8 +7,8 @@ use omnigraph::loader::{LoadMode, load_jsonl};
|
|||
use helpers::*;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn s3_compatible_repo_lifecycle_works() {
|
||||
let Some(uri) = s3_test_repo_uri("omnigraph-runtime") else {
|
||||
async fn s3_compatible_graph_lifecycle_works() {
|
||||
let Some(uri) = s3_test_graph_uri("omnigraph-runtime") else {
|
||||
eprintln!("skipping s3 runtime test: OMNIGRAPH_S3_TEST_BUCKET is not set");
|
||||
return;
|
||||
};
|
||||
|
|
@ -81,7 +81,7 @@ async fn s3_compatible_repo_lifecycle_works() {
|
|||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn s3_branch_change_merge_flow_works() {
|
||||
let Some(uri) = s3_test_repo_uri("omnigraph-branching") else {
|
||||
let Some(uri) = s3_test_graph_uri("omnigraph-branching") else {
|
||||
eprintln!("skipping s3 branch test: OMNIGRAPH_S3_TEST_BUCKET is not set");
|
||||
return;
|
||||
};
|
||||
|
|
@ -135,7 +135,7 @@ async fn s3_branch_change_merge_flow_works() {
|
|||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn s3_public_load_uses_hidden_run_and_publishes() {
|
||||
let Some(uri) = s3_test_repo_uri("omnigraph-public-load") else {
|
||||
let Some(uri) = s3_test_graph_uri("omnigraph-public-load") else {
|
||||
eprintln!("skipping s3 public load test: OMNIGRAPH_S3_TEST_BUCKET is not set");
|
||||
return;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ async fn apply_schema_rejects_when_non_main_branch_exists() {
|
|||
let err = db.apply_schema(&desired).await.unwrap_err();
|
||||
assert!(
|
||||
err.to_string()
|
||||
.contains("schema apply requires a repo with only main")
|
||||
.contains("schema apply requires a graph with only main")
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -402,10 +402,7 @@ async fn apply_schema_rejects_adding_a_required_property_without_backfill() {
|
|||
|
||||
// Add `email: String` (required, non-nullable, no @rename_from). Existing
|
||||
// rows have no value to fill in, so this is unsupported in v1.
|
||||
let desired = TEST_SCHEMA.replace(
|
||||
" age: I32?\n}",
|
||||
" age: I32?\n email: String\n}",
|
||||
);
|
||||
let desired = TEST_SCHEMA.replace(" age: I32?\n}", " age: I32?\n email: String\n}");
|
||||
let err = db.apply_schema(&desired).await.unwrap_err();
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
|
|
@ -437,7 +434,10 @@ async fn plan_schema_for_property_type_narrowing_is_not_supported() {
|
|||
.unwrap();
|
||||
|
||||
let plan = db.plan_schema(TEST_SCHEMA).await.unwrap();
|
||||
assert!(!plan.supported, "narrowing I64 -> I32 must not be supported");
|
||||
assert!(
|
||||
!plan.supported,
|
||||
"narrowing I64 -> I32 must not be supported"
|
||||
);
|
||||
assert!(plan.steps.iter().any(|step| matches!(
|
||||
step,
|
||||
SchemaMigrationStep::UnsupportedChange { code, .. }
|
||||
|
|
|
|||
|
|
@ -3,7 +3,8 @@ mod helpers;
|
|||
use std::env;
|
||||
|
||||
use arrow_array::{Array, StringArray};
|
||||
use lance_index::{DatasetIndexExt, is_system_index};
|
||||
use lance::index::DatasetIndexExt;
|
||||
use lance_index::is_system_index;
|
||||
use serial_test::serial;
|
||||
|
||||
use omnigraph::db::Omnigraph;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue