Merge branch 'main' into devin/1779464281-mr-656-inline-query-strings

Resolve conflicts: keep query/mutate canonical CLI subcommands and
top-level lint command (this branch) alongside the repo→graph terminology
rename from main. Update test helpers (repo_path → graph_path,
init_repo → init_graph, app_for_loaded_repo → app_for_loaded_graph) and
align tempdir variable names so the merged tests compile. Drop the now-
unused QueryCommand enum (Lint was promoted to a top-level Command).

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>
This commit is contained in:
Devin AI 2026-05-24 17:27:48 +00:00
commit 9ff4af47fb
79 changed files with 2780 additions and 1894 deletions

View file

@ -1,6 +1,6 @@
[package]
name = "omnigraph-cli"
version = "0.4.2"
version = "0.6.0"
edition = "2024"
description = "CLI for the Omnigraph graph database."
license = "MIT"
@ -13,10 +13,10 @@ name = "omnigraph"
path = "src/main.rs"
[dependencies]
omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.4.2" }
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.4.2" }
omnigraph-policy = { path = "../omnigraph-policy", version = "0.4.2" }
omnigraph-server = { path = "../omnigraph-server", version = "0.4.2" }
omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.6.0" }
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.0" }
omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.0" }
omnigraph-server = { path = "../omnigraph-server", version = "0.6.0" }
clap = { workspace = true }
color-eyre = { workspace = true }
serde = { workspace = true }
@ -30,4 +30,5 @@ assert_cmd = "2"
predicates = "3"
serde_json = { workspace = true }
tempfile = { workspace = true }
lance = { workspace = true }
lance-index = { workspace = true }

View file

@ -67,16 +67,16 @@ enum Command {
Version,
/// Generate, clean, or refresh explicit seed embeddings
Embed(EmbedArgs),
/// Initialize a new repo from a schema
/// Initialize a new graph from a schema
Init {
#[arg(long)]
schema: PathBuf,
/// Repo URI (local path or s3://)
/// Graph URI (local path or s3://)
uri: String,
},
/// Load data into a repo
/// Load data into a graph
Load {
/// Repo URI
/// Graph URI
uri: Option<String>,
#[arg(long)]
target: Option<String>,
@ -93,7 +93,7 @@ enum Command {
},
/// Ingest data into a reviewable named branch
Ingest {
/// Repo URI
/// Graph URI
uri: Option<String>,
#[arg(long)]
target: Option<String>,
@ -127,7 +127,7 @@ enum Command {
/// printed and the invocation is rewritten to `omnigraph lint`).
#[command(visible_alias = "check")]
Lint {
/// Repo URI
/// Graph URI
uri: Option<String>,
#[arg(long)]
target: Option<String>,
@ -140,9 +140,9 @@ enum Command {
#[arg(long)]
json: bool,
},
/// Show repo snapshot
/// Show graph snapshot
Snapshot {
/// Repo URI
/// Graph URI
uri: Option<String>,
#[arg(long)]
target: Option<String>,
@ -155,7 +155,7 @@ enum Command {
},
/// Export a full graph snapshot as JSONL
Export {
/// Repo URI
/// Graph URI
uri: Option<String>,
#[arg(long)]
target: Option<String>,
@ -182,7 +182,7 @@ enum Command {
/// when used. Pairs with `omnigraph mutate` on the write side.
#[command(visible_alias = "read")]
Query {
/// Repo URI
/// Graph URI
#[arg(long)]
uri: Option<String>,
#[arg(hide = true)]
@ -220,7 +220,7 @@ enum Command {
/// warning when used. Pairs with `omnigraph query` on the read side.
#[command(visible_alias = "change")]
Mutate {
/// Repo URI
/// Graph URI
#[arg(long)]
uri: Option<String>,
#[arg(hide = true)]
@ -252,9 +252,9 @@ enum Command {
#[command(subcommand)]
command: PolicyCommand,
},
/// Compact small Lance fragments in every table of the repo
/// Compact small Lance fragments in every table of the graph
Optimize {
/// Repo URI
/// Graph URI
uri: Option<String>,
#[arg(long)]
target: Option<String>,
@ -263,9 +263,9 @@ enum Command {
#[arg(long)]
json: bool,
},
/// Remove old Lance versions from every table of the repo (destructive)
/// Remove old Lance versions from every table of the graph (destructive)
Cleanup {
/// Repo URI
/// Graph URI
uri: Option<String>,
#[arg(long)]
target: Option<String>,
@ -291,7 +291,7 @@ enum Command {
enum BranchCommand {
/// Create a new branch
Create {
/// Repo URI
/// Graph URI
#[arg(long)]
uri: Option<String>,
#[arg(long)]
@ -306,7 +306,7 @@ enum BranchCommand {
},
/// List branches
List {
/// Repo URI
/// Graph URI
#[arg(long)]
uri: Option<String>,
#[arg(long)]
@ -318,7 +318,7 @@ enum BranchCommand {
},
/// Delete a branch
Delete {
/// Repo URI
/// Graph URI
#[arg(long)]
uri: Option<String>,
#[arg(long)]
@ -331,7 +331,7 @@ enum BranchCommand {
},
/// Merge a source branch into a target branch
Merge {
/// Repo URI
/// Graph URI
#[arg(long)]
uri: Option<String>,
#[arg(long)]
@ -350,7 +350,7 @@ enum BranchCommand {
enum SchemaCommand {
/// Plan a schema migration against the accepted persisted schema
Plan {
/// Repo URI
/// Graph URI
uri: Option<String>,
#[arg(long)]
target: Option<String>,
@ -368,7 +368,7 @@ enum SchemaCommand {
},
/// Apply a supported schema migration
Apply {
/// Repo URI
/// Graph URI
uri: Option<String>,
#[arg(long)]
target: Option<String>,
@ -393,7 +393,7 @@ enum SchemaCommand {
/// Show the current accepted schema source
#[command(alias = "get")]
Show {
/// Repo URI
/// Graph URI
uri: Option<String>,
#[arg(long)]
target: Option<String>,
@ -405,10 +405,11 @@ enum SchemaCommand {
}
#[derive(Debug, Subcommand)]
enum CommitCommand {
/// List graph commits
List {
/// Repo URI
/// Graph URI
uri: Option<String>,
#[arg(long)]
target: Option<String>,
@ -421,7 +422,7 @@ enum CommitCommand {
},
/// Show a graph commit
Show {
/// Repo URI
/// Graph URI
#[arg(long)]
uri: Option<String>,
#[arg(long)]
@ -594,7 +595,7 @@ fn finish_query_lint(output: &QueryLintOutput, json: bool) -> Result<()> {
Ok(())
}
fn ensure_local_repo_parent(uri: &str) -> Result<()> {
fn ensure_local_graph_parent(uri: &str) -> Result<()> {
if !uri.contains("://") {
fs::create_dir_all(uri)?;
}
@ -706,10 +707,10 @@ fn resolve_policy_engine(config: &OmnigraphConfig) -> Result<PolicyEngine> {
let policy_file = config
.resolve_policy_file()
.ok_or_else(|| color_eyre::eyre::eyre!("policy.file must be set in omnigraph.yaml"))?;
PolicyEngine::load(&policy_file, &policy_repo_id(config))
PolicyEngine::load(&policy_file, &policy_graph_id(config))
}
/// Open a local-URI repo and, when `policy.file` is configured in
/// Open a local-URI graph and, when `policy.file` is configured in
/// `omnigraph.yaml`, install the resolved `PolicyEngine` on the engine
/// handle so every direct-engine write goes through
/// `Omnigraph::enforce(...)` (MR-722). Without a configured policy this
@ -733,10 +734,7 @@ async fn open_local_db_with_policy(uri: &str, config: &OmnigraphConfig) -> Resul
/// policy is configured and this returns `None`, the engine-layer
/// footgun guard intentionally denies — silent bypass via "I forgot the
/// actor" is what the guard prevents.
fn resolve_cli_actor<'a>(
cli_as: Option<&'a str>,
config: &'a OmnigraphConfig,
) -> Option<&'a str> {
fn resolve_cli_actor<'a>(cli_as: Option<&'a str>, config: &'a OmnigraphConfig) -> Option<&'a str> {
cli_as.or(config.cli.actor.as_deref())
}
@ -748,7 +746,7 @@ fn resolve_policy_tests_path(config: &OmnigraphConfig) -> Result<PathBuf> {
})
}
fn policy_repo_id(config: &OmnigraphConfig) -> String {
fn policy_graph_id(config: &OmnigraphConfig) -> String {
if let Some(name) = &config.project.name {
return name.clone();
}
@ -846,8 +844,15 @@ fn parse_duration_arg(s: &str) -> Result<std::time::Duration> {
if s.is_empty() {
bail!("duration is empty");
}
let (num_part, unit) = match s.char_indices().rev().find(|(_, c)| c.is_ascii_alphabetic()) {
Some((i, _)) => (&s[..i + 1 - s[i..].chars().next().unwrap().len_utf8()], &s[i..]),
let (num_part, unit) = match s
.char_indices()
.rev()
.find(|(_, c)| c.is_ascii_alphabetic())
{
Some((i, _)) => (
&s[..i + 1 - s[i..].chars().next().unwrap().len_utf8()],
&s[i..],
),
None => (s, ""),
};
let n: u64 = num_part
@ -873,7 +878,7 @@ fn resolve_local_uri(
let uri = resolve_uri(config, cli_uri, cli_target)?;
if is_remote_uri(&uri) {
bail!(
"{} is only supported against local repo URIs in this milestone",
"{} is only supported against local graph URIs in this milestone",
operation
);
}
@ -1138,9 +1143,7 @@ fn render_schema_plan_step(step: &SchemaMigrationStep) -> String {
type_name,
drop_mode_label(*mode),
),
SchemaMigrationStep::UnsupportedChange {
entity, reason, ..
} => {
SchemaMigrationStep::UnsupportedChange { entity, reason, .. } => {
// When a schema-lint code is attached, render code + tier
// so operators see at-a-glance the kind of risk (destructive
// / validated / safe) — not just the rule identifier.
@ -1550,10 +1553,10 @@ async fn execute_query_lint(
));
}
let has_repo_target =
let has_graph_target =
cli_uri.is_some() || cli_target.is_some() || config.cli_graph_name().is_some();
if !has_repo_target {
bail!("query lint requires --schema <schema.pg> or a resolvable repo target");
if !has_graph_target {
bail!("query lint requires --schema <schema.pg> or a resolvable graph target");
}
let uri = resolve_local_uri(config, cli_uri, cli_target, "query lint")?;
@ -1562,7 +1565,7 @@ async fn execute_query_lint(
&db.catalog(),
&query_source,
query_path,
QueryLintSchemaSource::repo(uri),
QueryLintSchemaSource::graph(uri),
))
}
@ -1806,7 +1809,7 @@ async fn main() -> Result<()> {
}
Command::Init { schema, uri } => {
let schema_source = fs::read_to_string(&schema)?;
ensure_local_repo_parent(&uri)?;
ensure_local_graph_parent(&uri)?;
Omnigraph::init(&uri, &schema_source).await?;
scaffold_config_if_missing(&uri)?;
println!("initialized {}", uri);
@ -2589,17 +2592,16 @@ async fn main() -> Result<()> {
let config = load_cli_config(config.as_ref())?;
let uri = resolve_uri(&config, uri, target.as_deref())?;
let older_than_dur = older_than
.as_deref()
.map(parse_duration_arg)
.transpose()?;
let older_than_dur = older_than.as_deref().map(parse_duration_arg).transpose()?;
if keep.is_none() && older_than_dur.is_none() {
bail!("cleanup requires at least one of --keep or --older-than");
}
let policy_desc = match (keep, older_than_dur) {
(Some(k), Some(d)) => format!("keep {} versions, remove anything older than {:?}", k, d),
(Some(k), Some(d)) => {
format!("keep {} versions, remove anything older than {:?}", k, d)
}
(Some(k), None) => format!("keep {} versions", k),
(None, Some(d)) => format!("remove anything older than {:?}", d),
_ => unreachable!(),

File diff suppressed because it is too large Load diff

View file

@ -52,7 +52,7 @@ pub fn fixture(name: &str) -> PathBuf {
.join(name)
}
pub fn repo_path(root: &Path) -> PathBuf {
pub fn graph_path(root: &Path) -> PathBuf {
root.join("demo.omni")
}
@ -86,14 +86,14 @@ pub fn parse_stdout_json(output: &Output) -> Value {
serde_json::from_slice(&output.stdout).unwrap()
}
pub fn init_repo(repo: &Path) {
pub fn init_graph(graph: &Path) {
let schema = fixture("test.pg");
output_success(cli().arg("init").arg("--schema").arg(&schema).arg(repo));
output_success(cli().arg("init").arg("--schema").arg(&schema).arg(graph));
}
pub fn load_fixture(repo: &Path) {
pub fn load_fixture(graph: &Path) {
let data = fixture("test.jsonl");
output_success(cli().arg("load").arg("--data").arg(&data).arg(repo));
output_success(cli().arg("load").arg("--data").arg(&data).arg(graph));
}
pub fn write_jsonl(path: &Path, rows: &str) {
@ -116,7 +116,7 @@ fn yaml_string(value: &str) -> String {
format!("'{}'", value.replace('\'', "''"))
}
pub fn local_yaml_config(repo: &Path) -> String {
pub fn local_yaml_config(graph: &Path) -> String {
format!(
"\
graphs:
@ -130,7 +130,7 @@ query:
- .
policy: {{}}
",
yaml_string(&repo.to_string_lossy())
yaml_string(&graph.to_string_lossy())
)
}
@ -200,9 +200,9 @@ fn spawn_server_process(mut command: StdCommand) -> TestServer {
panic!("server did not become healthy");
}
pub fn spawn_server(repo: &Path) -> TestServer {
pub fn spawn_server(graph: &Path) -> TestServer {
let mut command = server_process();
command.arg(repo);
command.arg(graph);
spawn_server_process(command)
}
@ -221,58 +221,57 @@ pub fn spawn_server_with_config_env(config: &Path, envs: &[(&str, &str)]) -> Tes
spawn_server_process(command)
}
pub struct SystemRepo {
pub struct SystemGraph {
_temp: TempDir,
repo: PathBuf,
graph: PathBuf,
}
impl SystemRepo {
impl SystemGraph {
pub fn initialized() -> Self {
let temp = tempdir().unwrap();
let repo = repo_path(temp.path());
init_repo(&repo);
Self { _temp: temp, repo }
let graph = graph_path(temp.path());
init_graph(&graph);
Self { _temp: temp, graph }
}
pub fn loaded() -> Self {
let temp = tempdir().unwrap();
let repo = repo_path(temp.path());
init_repo(&repo);
load_fixture(&repo);
Self { _temp: temp, repo }
let graph = graph_path(temp.path());
init_graph(&graph);
load_fixture(&graph);
Self { _temp: temp, graph }
}
pub fn path(&self) -> &Path {
&self.repo
&self.graph
}
pub fn write_query(&self, name: &str, source: &str) -> PathBuf {
let path = self.repo.parent().unwrap().join(name);
let path = self.graph.parent().unwrap().join(name);
write_query_file(&path, source);
path
}
pub fn write_jsonl(&self, name: &str, rows: &str) -> PathBuf {
let path = self.repo.parent().unwrap().join(name);
let path = self.graph.parent().unwrap().join(name);
write_jsonl(&path, rows);
path
}
pub fn write_config(&self, name: &str, source: &str) -> PathBuf {
let path = self.repo.parent().unwrap().join(name);
let path = self.graph.parent().unwrap().join(name);
write_config(&path, source);
path
}
pub fn write_file(&self, name: &str, source: &str) -> PathBuf {
let path = self.repo.parent().unwrap().join(name);
let path = self.graph.parent().unwrap().join(name);
write_file(&path, source);
path
}
pub fn spawn_server(&self) -> TestServer {
spawn_server(&self.repo)
spawn_server(&self.graph)
}
pub fn spawn_server_with_config(&self, config: &Path) -> TestServer {

View file

@ -66,7 +66,7 @@ fn yaml_string(value: &str) -> String {
format!("'{}'", value.replace('\'', "''"))
}
fn local_policy_config(repo: &SystemRepo) -> String {
fn local_policy_config(graph: &SystemGraph) -> String {
format!(
"\
project:
@ -83,12 +83,12 @@ query:
policy:
file: ./policy.yaml
",
yaml_string(&repo.path().to_string_lossy())
yaml_string(&graph.path().to_string_lossy())
)
}
fn insert_person_query(repo: &SystemRepo, name: &str) -> std::path::PathBuf {
repo.write_query(
fn insert_person_query(graph: &SystemGraph, name: &str) -> std::path::PathBuf {
graph.write_query(
name,
r#"
query insert_person($name: String, $age: I32) {
@ -98,8 +98,8 @@ query insert_person($name: String, $age: I32) {
)
}
fn add_friend_query(repo: &SystemRepo, name: &str) -> std::path::PathBuf {
repo.write_query(
fn add_friend_query(graph: &SystemGraph, name: &str) -> std::path::PathBuf {
graph.write_query(
name,
r#"
query add_friend($from: String, $to: String) {
@ -109,13 +109,13 @@ query add_friend($from: String, $to: String) {
)
}
fn snapshot_table_row_count(repo: &SystemRepo, table_key: &str) -> u64 {
snapshot_table_row_count_at(repo.path(), table_key)
fn snapshot_table_row_count(graph: &SystemGraph, table_key: &str) -> u64 {
snapshot_table_row_count_at(graph.path(), table_key)
}
fn snapshot_table_row_count_at(repo: &std::path::Path, table_key: &str) -> u64 {
fn snapshot_table_row_count_at(graph: &std::path::Path, table_key: &str) -> u64 {
let payload = parse_stdout_json(&output_success(
cli().arg("snapshot").arg(repo).arg("--json"),
cli().arg("snapshot").arg(graph).arg("--json"),
));
payload["tables"]
.as_array()
@ -178,7 +178,7 @@ fn format_vector(values: &[f32]) -> String {
.join(", ")
}
fn s3_test_repo_uri(suite: &str) -> Option<String> {
fn s3_test_graph_uri(suite: &str) -> Option<String> {
let bucket = env::var("OMNIGRAPH_S3_TEST_BUCKET").ok()?;
let prefix = env::var("OMNIGRAPH_S3_TEST_PREFIX")
.ok()
@ -193,21 +193,21 @@ fn s3_test_repo_uri(suite: &str) -> Option<String> {
#[test]
fn local_cli_end_to_end_init_load_read_change_read_flow() {
let repo = SystemRepo::initialized();
let mutation_file = insert_person_query(&repo, "system-local-init-change.gq");
let graph = SystemGraph::initialized();
let mutation_file = insert_person_query(&graph, "system-local-init-change.gq");
output_success(
cli()
.arg("load")
.arg("--data")
.arg(fixture("test.jsonl"))
.arg(repo.path()),
.arg(graph.path()),
);
let read_before = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(repo.path())
.arg(graph.path())
.arg("--query")
.arg(fixture("test.gq"))
.arg("--name")
@ -222,7 +222,7 @@ fn local_cli_end_to_end_init_load_read_change_read_flow() {
let change_payload = parse_stdout_json(&output_success(
cli()
.arg("change")
.arg(repo.path())
.arg(graph.path())
.arg("--query")
.arg(&mutation_file)
.arg("--params")
@ -235,7 +235,7 @@ fn local_cli_end_to_end_init_load_read_change_read_flow() {
let read_after = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(repo.path())
.arg(graph.path())
.arg("--query")
.arg(fixture("test.gq"))
.arg("--name")
@ -253,7 +253,7 @@ fn local_cli_end_to_end_init_load_read_change_read_flow() {
let inline_change = parse_stdout_json(&output_success(
cli()
.arg("change")
.arg(repo.path())
.arg(graph.path())
.arg("-e")
.arg("query add($name: String, $age: I32) { insert Person { name: $name, age: $age } }")
.arg("--params")
@ -267,7 +267,7 @@ fn local_cli_end_to_end_init_load_read_change_read_flow() {
let inline_read = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(repo.path())
.arg(graph.path())
.arg("--query-string")
.arg("query find($name: String) { match { $p: Person { name: $name } } return { $p.name, $p.age } }")
.arg("--params")
@ -281,15 +281,15 @@ fn local_cli_end_to_end_init_load_read_change_read_flow() {
#[test]
fn local_cli_end_to_end_branch_change_merge_flow() {
let repo = SystemRepo::loaded();
let mutation_file = insert_person_query(&repo, "system-local-change.gq");
let graph = SystemGraph::loaded();
let mutation_file = insert_person_query(&graph, "system-local-change.gq");
output_success(
cli()
.arg("branch")
.arg("create")
.arg("--uri")
.arg(repo.path())
.arg(graph.path())
.arg("--from")
.arg("main")
.arg("feature"),
@ -298,7 +298,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() {
let change_payload = parse_stdout_json(&output_success(
cli()
.arg("change")
.arg(repo.path())
.arg(graph.path())
.arg("--query")
.arg(&mutation_file)
.arg("--branch")
@ -313,7 +313,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() {
let feature_read = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(repo.path())
.arg(graph.path())
.arg("--query")
.arg(fixture("test.gq"))
.arg("--name")
@ -332,7 +332,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() {
.arg("branch")
.arg("merge")
.arg("--uri")
.arg(repo.path())
.arg(graph.path())
.arg("feature")
.arg("--json"),
));
@ -341,7 +341,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() {
let main_read = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(repo.path())
.arg(graph.path())
.arg("--query")
.arg(fixture("test.gq"))
.arg("--name")
@ -358,7 +358,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() {
cli()
.arg("commit")
.arg("list")
.arg(repo.path())
.arg(graph.path())
.arg("--branch")
.arg("main")
.arg("--json"),
@ -368,8 +368,8 @@ fn local_cli_end_to_end_branch_change_merge_flow() {
#[test]
fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() {
let repo = SystemRepo::loaded();
let ingest_data = repo.write_jsonl(
let graph = SystemGraph::loaded();
let ingest_data = graph.write_jsonl(
"system-local-ingest.jsonl",
r#"{"type":"Person","data":{"name":"Zoe","age":33}}
{"type":"Person","data":{"name":"Bob","age":26}}"#,
@ -382,7 +382,7 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() {
.arg(&ingest_data)
.arg("--branch")
.arg("feature-ingest")
.arg(repo.path())
.arg(graph.path())
.arg("--json"),
));
assert_eq!(ingest_payload["branch"], "feature-ingest");
@ -395,7 +395,7 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() {
let feature_snapshot = parse_stdout_json(&output_success(
cli()
.arg("snapshot")
.arg(repo.path())
.arg(graph.path())
.arg("--branch")
.arg("feature-ingest")
.arg("--json"),
@ -405,7 +405,7 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() {
let zoe = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(repo.path())
.arg(graph.path())
.arg("--query")
.arg(fixture("test.gq"))
.arg("--name")
@ -422,7 +422,7 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() {
let bob = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(repo.path())
.arg(graph.path())
.arg("--query")
.arg(fixture("test.gq"))
.arg("--name")
@ -439,20 +439,20 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() {
#[test]
fn local_cli_export_round_trips_full_branch_graph() {
let repo = SystemRepo::loaded();
let graph = SystemGraph::loaded();
output_success(
cli()
.arg("branch")
.arg("create")
.arg("--uri")
.arg(repo.path())
.arg(graph.path())
.arg("--from")
.arg("main")
.arg("feature"),
);
let feature_data = repo.write_jsonl(
let feature_data = graph.write_jsonl(
"system-local-export-feature.jsonl",
r#"{"type":"Person","data":{"name":"Eve","age":29}}
{"edge":"Knows","from":"Alice","to":"Eve"}"#,
@ -466,53 +466,56 @@ fn local_cli_export_round_trips_full_branch_graph() {
.arg("feature")
.arg("--mode")
.arg("append")
.arg(repo.path()),
.arg(graph.path()),
);
let exported = stdout_string(&output_success(
cli()
.arg("export")
.arg(repo.path())
.arg(graph.path())
.arg("--branch")
.arg("feature")
.arg("--jsonl"),
));
let export_path = repo.write_jsonl("system-local-exported.jsonl", &exported);
let imported_repo = repo.path().parent().unwrap().join("imported-export.omni");
let export_path = graph.write_jsonl("system-local-exported.jsonl", &exported);
let imported_graph = graph.path().parent().unwrap().join("imported-export.omni");
output_success(
cli()
.arg("init")
.arg("--schema")
.arg(fixture("test.pg"))
.arg(&imported_repo),
.arg(&imported_graph),
);
output_success(
cli()
.arg("load")
.arg("--data")
.arg(&export_path)
.arg(&imported_repo),
.arg(&imported_graph),
);
assert_eq!(
snapshot_table_row_count_at(&imported_repo, "node:Person"),
snapshot_table_row_count_at(&imported_graph, "node:Person"),
5
);
assert_eq!(
snapshot_table_row_count_at(&imported_repo, "node:Company"),
snapshot_table_row_count_at(&imported_graph, "node:Company"),
2
);
assert_eq!(snapshot_table_row_count_at(&imported_repo, "edge:Knows"), 4);
assert_eq!(
snapshot_table_row_count_at(&imported_repo, "edge:WorksAt"),
snapshot_table_row_count_at(&imported_graph, "edge:Knows"),
4
);
assert_eq!(
snapshot_table_row_count_at(&imported_graph, "edge:WorksAt"),
2
);
let eve = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(&imported_repo)
.arg(&imported_graph)
.arg("--query")
.arg(fixture("test.gq"))
.arg("--name")
@ -527,7 +530,7 @@ fn local_cli_export_round_trips_full_branch_graph() {
let friends = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(&imported_repo)
.arg(&imported_graph)
.arg("--query")
.arg(fixture("test.gq"))
.arg("--name")
@ -541,7 +544,7 @@ fn local_cli_export_round_trips_full_branch_graph() {
#[test]
fn local_cli_s3_end_to_end_init_load_read_flow() {
let Some(repo_uri) = s3_test_repo_uri("cli-local") else {
let Some(graph_uri) = s3_test_graph_uri("cli-local") else {
eprintln!("skipping s3 cli test: OMNIGRAPH_S3_TEST_BUCKET is not set");
return;
};
@ -566,7 +569,7 @@ query:
- .
policy: {{}}
",
repo_uri
graph_uri
),
);
@ -575,14 +578,14 @@ policy: {{}}
.arg("init")
.arg("--schema")
.arg(fixture("test.pg"))
.arg(&repo_uri),
.arg(&graph_uri),
);
output_success(
cli()
.arg("load")
.arg("--data")
.arg(fixture("test.jsonl"))
.arg(&repo_uri),
.arg(&graph_uri),
);
let read = parse_stdout_json(&output_success(
@ -615,13 +618,13 @@ policy: {{}}
#[test]
fn local_cli_failed_load_keeps_target_state_unchanged() {
let repo = SystemRepo::loaded();
let bad_data = repo.write_jsonl(
let graph = SystemGraph::loaded();
let bad_data = graph.write_jsonl(
"system-bad-load.jsonl",
r#"{"edge":"Knows","from":"Alice","to":"Missing"}"#,
);
let person_rows_before = snapshot_table_row_count(&repo, "node:Person");
let knows_rows_before = snapshot_table_row_count(&repo, "edge:Knows");
let person_rows_before = snapshot_table_row_count(&graph, "node:Person");
let knows_rows_before = snapshot_table_row_count(&graph, "edge:Knows");
let output = output_failure(
cli()
@ -630,17 +633,17 @@ fn local_cli_failed_load_keeps_target_state_unchanged() {
.arg(&bad_data)
.arg("--mode")
.arg("append")
.arg(repo.path()),
.arg(graph.path()),
);
let stderr = String::from_utf8(output.stderr).unwrap();
assert!(stderr.contains("not found") || stderr.contains("Missing"));
assert_eq!(
snapshot_table_row_count(&repo, "node:Person"),
snapshot_table_row_count(&graph, "node:Person"),
person_rows_before
);
assert_eq!(
snapshot_table_row_count(&repo, "edge:Knows"),
snapshot_table_row_count(&graph, "edge:Knows"),
knows_rows_before
);
// Failed loads leave no run record (the run lifecycle has been
@ -649,13 +652,13 @@ fn local_cli_failed_load_keeps_target_state_unchanged() {
#[test]
fn local_cli_failed_change_keeps_target_state_unchanged() {
let repo = SystemRepo::loaded();
let mutation_file = add_friend_query(&repo, "system-invalid-change.gq");
let graph = SystemGraph::loaded();
let mutation_file = add_friend_query(&graph, "system-invalid-change.gq");
let output = output_failure(
cli()
.arg("change")
.arg(repo.path())
.arg(graph.path())
.arg("--query")
.arg(&mutation_file)
.arg("--params")
@ -667,7 +670,7 @@ fn local_cli_failed_change_keeps_target_state_unchanged() {
let friends_payload = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(repo.path())
.arg(graph.path())
.arg("--query")
.arg(fixture("test.gq"))
.arg("--name")
@ -683,8 +686,8 @@ fn local_cli_failed_change_keeps_target_state_unchanged() {
#[test]
fn local_cli_resolves_relative_query_against_config_base_dir() {
let repo = SystemRepo::loaded();
let root = repo.path().parent().unwrap();
let graph = SystemGraph::loaded();
let root = graph.path().parent().unwrap();
let config_dir = root.join("config");
let query_dir = config_dir.join("queries");
let ambient_dir = root.join("ambient");
@ -707,7 +710,7 @@ query:
- queries
policy: {{}}
",
repo.path().display()
graph.path().display()
),
);
write_query_file(
@ -761,7 +764,7 @@ query get_person($name: String) {
#[test]
fn local_cli_datetime_and_list_types_round_trip_through_load_read_and_change() {
let temp = tempfile::tempdir().unwrap();
let repo = repo_path(temp.path());
let graph = graph_path(temp.path());
let schema = temp.path().join("datatypes.pg");
let data = temp.path().join("datatypes.jsonl");
let queries = temp.path().join("datatypes.gq");
@ -836,13 +839,13 @@ query get_task($slug: String) {
"#,
);
output_success(cli().arg("init").arg("--schema").arg(&schema).arg(&repo));
output_success(cli().arg("load").arg("--data").arg(&data).arg(&repo));
output_success(cli().arg("init").arg("--schema").arg(&schema).arg(&graph));
output_success(cli().arg("load").arg("--data").arg(&data).arg(&graph));
let filtered = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(&repo)
.arg(&graph)
.arg("--query")
.arg(&queries)
.arg("--name")
@ -867,7 +870,7 @@ query get_task($slug: String) {
let insert_payload = parse_stdout_json(&output_success(
cli()
.arg("change")
.arg(&repo)
.arg(&graph)
.arg("--query")
.arg(&queries)
.arg("--name")
@ -883,7 +886,7 @@ query get_task($slug: String) {
let update_payload = parse_stdout_json(&output_success(
cli()
.arg("change")
.arg(&repo)
.arg(&graph)
.arg("--query")
.arg(&queries)
.arg("--name")
@ -897,7 +900,7 @@ query get_task($slug: String) {
let gamma = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(&repo)
.arg(&graph)
.arg("--query")
.arg(&queries)
.arg("--name")
@ -924,7 +927,7 @@ query get_task($slug: String) {
#[ignore = "requires GEMINI_API_KEY and network access"]
fn local_cli_real_gemini_string_nearest_query_returns_expected_match() {
let temp = tempfile::tempdir().unwrap();
let repo = repo_path(temp.path());
let graph = graph_path(temp.path());
let schema = temp.path().join("gemini.pg");
let data = temp.path().join("gemini.jsonl");
let queries = temp.path().join("gemini.gq");
@ -966,13 +969,13 @@ query vector_search($q: String) {
"#,
);
output_success(cli().arg("init").arg("--schema").arg(&schema).arg(&repo));
output_success(cli().arg("load").arg("--data").arg(&data).arg(&repo));
output_success(cli().arg("init").arg("--schema").arg(&schema).arg(&graph));
output_success(cli().arg("load").arg("--data").arg(&data).arg(&graph));
let result = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(&repo)
.arg(&graph)
.arg("--query")
.arg(&queries)
.arg("--name")
@ -999,10 +1002,10 @@ fn local_cli_policy_tooling_is_end_to_end() {
// Sanity check for the read-only policy CLI surfaces. These don't
// mutate the graph — they just parse and evaluate the policy file —
// so they don't depend on PR #4's engine-side enforcement.
let repo = SystemRepo::loaded();
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
repo.write_config("policy.yaml", POLICY_E2E_YAML);
repo.write_config("policy.tests.yaml", POLICY_E2E_TESTS_YAML);
let graph = SystemGraph::loaded();
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
graph.write_config("policy.yaml", POLICY_E2E_YAML);
graph.write_config("policy.tests.yaml", POLICY_E2E_TESTS_YAML);
let validate = output_success(
cli()
@ -1053,10 +1056,10 @@ fn local_cli_change_enforces_engine_layer_policy() {
// 3. Policy installed, `--as act-ragnor`, change on main →
// Cedar permits (admins-write rule). Write succeeds and the
// inserted row is readable.
let repo = SystemRepo::loaded();
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
repo.write_config("policy.yaml", POLICY_E2E_YAML);
let mutation_file = insert_person_query(&repo, "system-local-policy-change.gq");
let graph = SystemGraph::loaded();
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
graph.write_config("policy.yaml", POLICY_E2E_YAML);
let mutation_file = insert_person_query(&graph, "system-local-policy-change.gq");
// Case 1: policy configured, no actor threaded → footgun guard.
let no_actor = output_failure(
@ -1119,7 +1122,7 @@ fn local_cli_change_enforces_engine_layer_policy() {
let verify = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(repo.path())
.arg(graph.path())
.arg("--query")
.arg(fixture("test.gq"))
.arg("--name")
@ -1145,10 +1148,10 @@ fn local_cli_change_enforces_engine_layer_policy() {
#[test]
fn local_cli_load_enforces_engine_layer_policy() {
let repo = SystemRepo::loaded();
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
repo.write_config("policy.yaml", POLICY_E2E_YAML);
let data = repo.write_jsonl(
let graph = SystemGraph::loaded();
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
graph.write_config("policy.yaml", POLICY_E2E_YAML);
let data = graph.write_jsonl(
"system-local-policy-load.jsonl",
r#"{"type":"Person","data":{"name":"LoadPolicy","age":11}}"#,
);
@ -1189,10 +1192,10 @@ fn local_cli_load_enforces_engine_layer_policy() {
#[test]
fn local_cli_ingest_enforces_engine_layer_policy() {
let repo = SystemRepo::loaded();
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
repo.write_config("policy.yaml", POLICY_E2E_YAML);
let data = repo.write_jsonl(
let graph = SystemGraph::loaded();
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
graph.write_config("policy.yaml", POLICY_E2E_YAML);
let data = graph.write_jsonl(
"system-local-policy-ingest.jsonl",
r#"{"type":"Person","data":{"name":"IngestPolicy","age":12}}"#,
);
@ -1242,16 +1245,19 @@ fn local_cli_ingest_enforces_engine_layer_policy() {
#[test]
fn local_cli_schema_apply_enforces_engine_layer_policy() {
let repo = SystemRepo::loaded();
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
repo.write_config("policy.yaml", POLICY_E2E_YAML);
let graph = SystemGraph::loaded();
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
graph.write_config("policy.yaml", POLICY_E2E_YAML);
// Additive: add a nullable property; SDK-compatible with the fixture
// schema. Uses the schema-apply scope (TargetBranch("main")).
let new_schema = std::fs::read_to_string(fixture("test.pg"))
.unwrap()
.replace(" age: I32?\n}", " age: I32?\n nickname: String?\n}");
let schema_path = repo.path().join("policy-additive.pg");
.replace(
" age: I32?\n}",
" age: I32?\n nickname: String?\n}",
);
let schema_path = graph.path().join("policy-additive.pg");
std::fs::write(&schema_path, &new_schema).unwrap();
let denied = output_failure(
@ -1289,9 +1295,9 @@ fn local_cli_schema_apply_enforces_engine_layer_policy() {
#[test]
fn local_cli_branch_create_enforces_engine_layer_policy() {
let repo = SystemRepo::loaded();
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
repo.write_config("policy.yaml", POLICY_E2E_YAML);
let graph = SystemGraph::loaded();
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
graph.write_config("policy.yaml", POLICY_E2E_YAML);
let denied = output_failure(
cli()
@ -1327,9 +1333,9 @@ fn local_cli_branch_create_enforces_engine_layer_policy() {
#[test]
fn local_cli_branch_delete_enforces_engine_layer_policy() {
let repo = SystemRepo::loaded();
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
repo.write_config("policy.yaml", POLICY_E2E_YAML);
let graph = SystemGraph::loaded();
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
graph.write_config("policy.yaml", POLICY_E2E_YAML);
// Pre-create the branch as ragnor so there's something to delete.
output_success(
@ -1375,9 +1381,9 @@ fn local_cli_branch_delete_enforces_engine_layer_policy() {
#[test]
fn local_cli_branch_merge_enforces_engine_layer_policy() {
let repo = SystemRepo::loaded();
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
repo.write_config("policy.yaml", POLICY_E2E_YAML);
let graph = SystemGraph::loaded();
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
graph.write_config("policy.yaml", POLICY_E2E_YAML);
// Pre-create a feature branch as ragnor (admins-branch-ops covers it).
output_success(
@ -1431,7 +1437,7 @@ fn local_cli_branch_merge_enforces_engine_layer_policy() {
// pin the precedence rule that `main.rs::resolve_cli_actor` implements:
// `--as` flag > `cli.actor` from `omnigraph.yaml` > None.
fn local_policy_config_with_actor(repo: &SystemRepo, actor: &str) -> String {
fn local_policy_config_with_actor(graph: &SystemGraph, actor: &str) -> String {
// Mirrors `local_policy_config` but adds `cli.actor` so the
// config-only precedence path is exercised. The `cli:` block
// already has `graph` and `branch`; appending `actor` here.
@ -1452,7 +1458,7 @@ query:
policy:
file: ./policy.yaml
",
yaml_string(&repo.path().to_string_lossy()),
yaml_string(&graph.path().to_string_lossy()),
actor,
)
}
@ -1462,13 +1468,13 @@ fn local_cli_actor_from_config_used_when_no_flag() {
// cli.actor: act-ragnor in omnigraph.yaml, no --as flag → change
// permitted via admins-write rule. Proves the config-only path
// works; previously the only proof was structural.
let repo = SystemRepo::loaded();
let config = repo.write_config(
let graph = SystemGraph::loaded();
let config = graph.write_config(
"omnigraph-policy.yaml",
&local_policy_config_with_actor(&repo, "act-ragnor"),
&local_policy_config_with_actor(&graph, "act-ragnor"),
);
repo.write_config("policy.yaml", POLICY_E2E_YAML);
let mutation_file = insert_person_query(&repo, "system-local-cli-actor.gq");
graph.write_config("policy.yaml", POLICY_E2E_YAML);
let mutation_file = insert_person_query(&graph, "system-local-cli-actor.gq");
let allowed = parse_stdout_json(&output_success(
cli()
@ -1490,13 +1496,13 @@ fn local_cli_actor_flag_overrides_config_actor() {
// cli.actor: act-ragnor in config + --as act-bruno on CLI → change
// denied. Flag wins per the precedence rule. Without this test, a
// future change that reverses precedence would ride through silently.
let repo = SystemRepo::loaded();
let config = repo.write_config(
let graph = SystemGraph::loaded();
let config = graph.write_config(
"omnigraph-policy.yaml",
&local_policy_config_with_actor(&repo, "act-ragnor"),
&local_policy_config_with_actor(&graph, "act-ragnor"),
);
repo.write_config("policy.yaml", POLICY_E2E_YAML);
let mutation_file = insert_person_query(&repo, "system-local-cli-actor-override.gq");
graph.write_config("policy.yaml", POLICY_E2E_YAML);
let mutation_file = insert_person_query(&graph, "system-local-cli-actor-override.gq");
let denied = output_failure(
cli()

View file

@ -41,7 +41,7 @@ fn yaml_string(value: &str) -> String {
format!("'{}'", value.replace('\'', "''"))
}
fn remote_policy_server_config(repo: &SystemRepo) -> String {
fn remote_policy_server_config(graph: &SystemGraph) -> String {
format!(
"\
project:
@ -54,7 +54,7 @@ server:
policy:
file: ./policy.yaml
",
yaml_string(&repo.path().to_string_lossy())
yaml_string(&graph.path().to_string_lossy())
)
}
@ -81,10 +81,10 @@ auth:
#[test]
#[ignore = "requires loopback socket permissions in sandboxed runners"]
fn remote_server_and_cli_end_to_end_flow() {
let repo = SystemRepo::loaded();
let server = repo.spawn_server();
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let mutation_file = repo.write_query(
let graph = SystemGraph::loaded();
let server = graph.spawn_server();
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let mutation_file = graph.write_query(
"system-remote-change.gq",
r#"
query insert_person($name: String, $age: I32) {
@ -105,7 +105,7 @@ query insert_person($name: String, $age: I32) {
assert_eq!(health["status"], "ok");
let local_snapshot = parse_stdout_json(&output_success(
cli().arg("snapshot").arg(repo.path()).arg("--json"),
cli().arg("snapshot").arg(graph.path()).arg("--json"),
));
let snapshot = parse_stdout_json(&output_success(
cli()
@ -120,7 +120,7 @@ query insert_person($name: String, $age: I32) {
let local_read = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(repo.path())
.arg(graph.path())
.arg("--query")
.arg(fixture("test.gq"))
.arg("--name")
@ -180,7 +180,7 @@ query insert_person($name: String, $age: I32) {
let local_verify = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(repo.path())
.arg(graph.path())
.arg("--query")
.arg(fixture("test.gq"))
.arg("--name")
@ -260,11 +260,11 @@ query insert_person($name: String, $age: I32) {
#[test]
#[ignore = "requires loopback socket permissions in sandboxed runners"]
fn remote_schema_apply_via_cli_updates_repo() {
let repo = SystemRepo::initialized();
let server = repo.spawn_server();
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let next_schema = repo.write_file(
fn remote_schema_apply_via_cli_updates_graph() {
let graph = SystemGraph::initialized();
let server = graph.spawn_server();
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let next_schema = graph.write_file(
"next.pg",
&fs::read_to_string(fixture("test.pg")).unwrap().replace(
" age: I32?\n}",
@ -286,7 +286,7 @@ fn remote_schema_apply_via_cli_updates_repo() {
let db = tokio::runtime::Runtime::new()
.unwrap()
.block_on(Omnigraph::open(repo.path().to_string_lossy().as_ref()))
.block_on(Omnigraph::open(graph.path().to_string_lossy().as_ref()))
.unwrap();
assert!(
db.catalog().node_types["Person"]
@ -298,10 +298,10 @@ fn remote_schema_apply_via_cli_updates_repo() {
#[test]
#[ignore = "requires loopback socket permissions in sandboxed runners"]
fn remote_schema_apply_rejects_unsupported_plan() {
let repo = SystemRepo::initialized();
let server = repo.spawn_server();
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let breaking_schema = repo.write_file(
let graph = SystemGraph::initialized();
let server = graph.spawn_server();
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let breaking_schema = graph.write_file(
"breaking.pg",
&fs::read_to_string(fixture("test.pg"))
.unwrap()
@ -324,7 +324,7 @@ fn remote_schema_apply_rejects_unsupported_plan() {
#[test]
#[ignore = "requires loopback socket permissions in sandboxed runners"]
fn remote_schema_apply_rejects_when_non_main_branch_exists() {
let repo = SystemRepo::initialized();
let graph = SystemGraph::initialized();
output_success(
cli()
.arg("branch")
@ -332,12 +332,12 @@ fn remote_schema_apply_rejects_when_non_main_branch_exists() {
.arg("--from")
.arg("main")
.arg("--uri")
.arg(repo.path())
.arg(graph.path())
.arg("feature"),
);
let server = repo.spawn_server();
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let next_schema = repo.write_file(
let server = graph.spawn_server();
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let next_schema = graph.write_file(
"next.pg",
&fs::read_to_string(fixture("test.pg")).unwrap().replace(
" age: I32?\n}",
@ -355,16 +355,16 @@ fn remote_schema_apply_rejects_when_non_main_branch_exists() {
.arg(&next_schema),
);
let stderr = String::from_utf8_lossy(&output.stderr);
assert!(stderr.contains("schema apply requires a repo with only main"));
assert!(stderr.contains("schema apply requires a graph with only main"));
}
#[test]
#[ignore = "requires loopback socket permissions in sandboxed runners"]
fn remote_read_preserves_projection_order_in_json_and_csv() {
let repo = SystemRepo::loaded();
let server = repo.spawn_server();
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let ordered_query = repo.write_query(
let graph = SystemGraph::loaded();
let server = graph.spawn_server();
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let ordered_query = graph.write_query(
"ordered-remote.gq",
r#"
query ordered_person($name: String) {
@ -419,10 +419,10 @@ query ordered_person($name: String) {
#[test]
#[ignore = "requires loopback socket permissions in sandboxed runners"]
fn remote_branch_create_list_merge_flow() {
let repo = SystemRepo::loaded();
let server = repo.spawn_server();
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let mutation_file = repo.write_query(
let graph = SystemGraph::loaded();
let server = graph.spawn_server();
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let mutation_file = graph.write_query(
"system-remote-branch-change.gq",
r#"
query insert_person($name: String, $age: I32) {
@ -516,9 +516,9 @@ query insert_person($name: String, $age: I32) {
#[test]
#[ignore = "requires loopback socket permissions in sandboxed runners"]
fn remote_branch_delete_removes_branch() {
let repo = SystemRepo::loaded();
let server = repo.spawn_server();
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let graph = SystemGraph::loaded();
let server = graph.spawn_server();
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
parse_stdout_json(&output_success(
cli()
@ -557,10 +557,10 @@ fn remote_branch_delete_removes_branch() {
#[test]
#[ignore = "requires loopback socket permissions in sandboxed runners"]
fn remote_export_round_trips_full_branch_graph() {
let repo = SystemRepo::loaded();
let server = repo.spawn_server();
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let mutation_file = repo.write_query(
let graph = SystemGraph::loaded();
let server = graph.spawn_server();
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let mutation_file = graph.write_query(
"system-remote-export-change.gq",
r#"
query insert_person($name: String, $age: I32) {
@ -624,8 +624,8 @@ query add_friend($from: String, $to: String) {
.arg("feature")
.arg("--jsonl"),
));
let export_path = repo.write_jsonl("system-remote-exported.jsonl", &exported);
let imported_repo = repo
let export_path = graph.write_jsonl("system-remote-exported.jsonl", &exported);
let imported_graph = graph
.path()
.parent()
.unwrap()
@ -636,18 +636,18 @@ query add_friend($from: String, $to: String) {
.arg("init")
.arg("--schema")
.arg(fixture("test.pg"))
.arg(&imported_repo),
.arg(&imported_graph),
);
output_success(
cli()
.arg("load")
.arg("--data")
.arg(&export_path)
.arg(&imported_repo),
.arg(&imported_graph),
);
let snapshot = parse_stdout_json(&output_success(
cli().arg("snapshot").arg(&imported_repo).arg("--json"),
cli().arg("snapshot").arg(&imported_graph).arg("--json"),
));
assert_eq!(
snapshot["tables"]
@ -671,7 +671,7 @@ query add_friend($from: String, $to: String) {
let eve = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(&imported_repo)
.arg(&imported_graph)
.arg("--query")
.arg(fixture("test.gq"))
.arg("--name")
@ -687,10 +687,10 @@ query add_friend($from: String, $to: String) {
#[test]
#[ignore = "requires loopback socket permissions in sandboxed runners"]
fn remote_ingest_creates_review_branch_and_keeps_it_readable() {
let repo = SystemRepo::loaded();
let server = repo.spawn_server();
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let ingest_data = repo.write_jsonl(
let graph = SystemGraph::loaded();
let server = graph.spawn_server();
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let ingest_data = graph.write_jsonl(
"system-remote-ingest.jsonl",
r#"{"type":"Person","data":{"name":"Zoe","age":33}}
{"type":"Person","data":{"name":"Bob","age":26}}"#,
@ -747,9 +747,9 @@ fn remote_ingest_creates_review_branch_and_keeps_it_readable() {
#[test]
#[ignore = "requires loopback socket permissions in sandboxed runners"]
fn remote_ingest_reuses_existing_branch_and_merges_updates() {
let repo = SystemRepo::loaded();
let server = repo.spawn_server();
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let graph = SystemGraph::loaded();
let server = graph.spawn_server();
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
output_success(
cli()
@ -762,7 +762,7 @@ fn remote_ingest_reuses_existing_branch_and_merges_updates() {
.arg("feature-ingest"),
);
let ingest_data = repo.write_jsonl(
let ingest_data = graph.write_jsonl(
"system-remote-ingest-merge.jsonl",
r#"{"type":"Person","data":{"name":"Bob","age":26}}
{"type":"Person","data":{"name":"Zoe","age":33}}"#,
@ -828,23 +828,23 @@ fn remote_ingest_reuses_existing_branch_and_merges_updates() {
#[test]
#[ignore = "requires loopback socket permissions in sandboxed runners"]
fn remote_policy_enforces_branch_first_cli_workflow() {
let repo = SystemRepo::loaded();
let graph = SystemGraph::loaded();
let server_config =
repo.write_config("server-policy.yaml", &remote_policy_server_config(&repo));
repo.write_config("policy.yaml", REMOTE_POLICY_E2E_YAML);
let server = repo.spawn_server_with_config_env(
graph.write_config("server-policy.yaml", &remote_policy_server_config(&graph));
graph.write_config("policy.yaml", REMOTE_POLICY_E2E_YAML);
let server = graph.spawn_server_with_config_env(
&server_config,
&[(
"OMNIGRAPH_SERVER_BEARER_TOKENS_JSON",
r#"{"act-bruno":"team-token","act-ragnor":"admin-token"}"#,
)],
);
let client_config = repo.write_config(
let client_config = graph.write_config(
"omnigraph-policy.yaml",
&remote_policy_client_config(&server.base_url),
);
repo.write_config(".env.omni", "POLICY_TEST_TOKEN=team-token\n");
let mutation_file = repo.write_query(
graph.write_config(".env.omni", "POLICY_TEST_TOKEN=team-token\n");
let mutation_file = graph.write_query(
"system-remote-policy-change.gq",
r#"
query insert_person($name: String, $age: I32) {

View file

@ -1,6 +1,6 @@
[package]
name = "omnigraph-compiler"
version = "0.4.2"
version = "0.6.0"
edition = "2024"
description = "Schema/query compiler for Omnigraph. Zero Lance dependency."
license = "MIT"

View file

@ -38,7 +38,7 @@ pub enum QueryLintQueryKind {
#[serde(rename_all = "lowercase")]
pub enum QueryLintSchemaSourceKind {
File,
Repo,
Graph,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
@ -59,9 +59,9 @@ impl QueryLintSchemaSource {
}
}
pub fn repo(uri: impl Into<String>) -> Self {
pub fn graph(uri: impl Into<String>) -> Self {
Self {
kind: QueryLintSchemaSourceKind::Repo,
kind: QueryLintSchemaSourceKind::Graph,
path: None,
uri: Some(uri.into()),
}

View file

@ -1,6 +1,6 @@
[package]
name = "omnigraph-policy"
version = "0.4.2"
version = "0.6.0"
edition = "2024"
description = "Policy / authorization layer for Omnigraph — Cedar-backed PolicyEngine, PolicyChecker trait, ResourceScope enum."
license = "MIT"

View file

@ -172,7 +172,7 @@ pub struct PolicyCompiler;
#[derive(Clone)]
pub struct PolicyEngine {
repo_id: String,
graph_id: String,
protected_branches: BTreeSet<String>,
known_actors: BTreeSet<String>,
schema: Schema,
@ -291,7 +291,7 @@ impl PolicyTestConfig {
}
impl PolicyCompiler {
pub fn compile(config: &PolicyConfig, repo_id: &str) -> Result<PolicyEngine> {
pub fn compile(config: &PolicyConfig, graph_id: &str) -> Result<PolicyEngine> {
config.validate()?;
let (schema, schema_warnings) = Schema::from_cedarschema_str(policy_schema_source())?;
let schema_warnings = schema_warnings
@ -300,8 +300,8 @@ impl PolicyCompiler {
if !schema_warnings.is_empty() {
bail!("policy schema warnings:\n{}", schema_warnings.join("\n"));
}
let entities = compile_entities(config, repo_id, &schema)?;
let (policies, policy_to_rule) = compile_policies(config, repo_id)?;
let entities = compile_entities(config, graph_id, &schema)?;
let (policies, policy_to_rule) = compile_policies(config, graph_id)?;
let validator = Validator::new(schema.clone());
let validation = validator.validate(&policies, ValidationMode::Strict);
let errors = validation
@ -318,7 +318,7 @@ impl PolicyCompiler {
.flat_map(|members| members.iter().cloned())
.collect();
Ok(PolicyEngine {
repo_id: repo_id.to_string(),
graph_id: graph_id.to_string(),
protected_branches: config.protected_branches.iter().cloned().collect(),
known_actors,
schema,
@ -330,9 +330,9 @@ impl PolicyCompiler {
}
impl PolicyEngine {
pub fn load(path: &Path, repo_id: &str) -> Result<Self> {
pub fn load(path: &Path, graph_id: &str) -> Result<Self> {
let config = PolicyConfig::load(path)?;
PolicyCompiler::compile(&config, repo_id)
PolicyCompiler::compile(&config, graph_id)
}
pub fn authorize(&self, request: &PolicyRequest) -> Result<PolicyDecision> {
@ -349,7 +349,7 @@ impl PolicyEngine {
let principal = entity_uid("Actor", &request.actor_id)?;
let action = entity_uid("Action", request.action.as_str())?;
let resource = entity_uid("Repo", &self.repo_id)?;
let resource = entity_uid("Graph", &self.graph_id)?;
let context_value = json!({
"has_branch": request.branch.is_some(),
"branch": request.branch.clone().unwrap_or_default(),
@ -462,7 +462,7 @@ impl PolicyEngine {
}
}
fn compile_entities(config: &PolicyConfig, repo_id: &str, schema: &Schema) -> Result<Entities> {
fn compile_entities(config: &PolicyConfig, graph_id: &str, schema: &Schema) -> Result<Entities> {
let mut group_entities = Vec::new();
for group in config.groups.keys() {
group_entities.push(Entity::new(
@ -495,8 +495,8 @@ fn compile_entities(config: &PolicyConfig, repo_id: &str, schema: &Schema) -> Re
)?);
}
let repo_entity = Entity::new(
entity_uid("Repo", repo_id)?,
let graph_entity = Entity::new(
entity_uid("Graph", graph_id)?,
HashMap::new(),
HashSet::<EntityUid>::new(),
)?;
@ -504,13 +504,13 @@ fn compile_entities(config: &PolicyConfig, repo_id: &str, schema: &Schema) -> Re
let mut entities = Vec::new();
entities.extend(group_entities);
entities.extend(actor_entities);
entities.push(repo_entity);
entities.push(graph_entity);
Ok(Entities::from_entities(entities, Some(schema))?)
}
fn compile_policies(
config: &PolicyConfig,
repo_id: &str,
graph_id: &str,
) -> Result<(PolicySet, HashMap<String, String>)> {
let mut policies = Vec::new();
let mut policy_to_rule = HashMap::new();
@ -518,7 +518,7 @@ fn compile_policies(
for rule in &config.rules {
for action in &rule.allow.actions {
let policy_id = PolicyId::new(format!("{}:{}", rule.id, action.as_str()));
let source = compile_policy_source(rule, action, repo_id);
let source = compile_policy_source(rule, action, graph_id);
let policy = Policy::parse(Some(policy_id.clone()), source.as_str())?;
policy_to_rule.insert(policy_id.to_string(), rule.id.clone());
policies.push(policy);
@ -528,7 +528,7 @@ fn compile_policies(
Ok((PolicySet::from_policies(policies)?, policy_to_rule))
}
fn compile_policy_source(rule: &PolicyRule, action: &PolicyAction, repo_id: &str) -> String {
fn compile_policy_source(rule: &PolicyRule, action: &PolicyAction, graph_id: &str) -> String {
let mut conditions = Vec::new();
if let Some(scope) = rule.allow.branch_scope {
conditions.push(branch_scope_condition(scope));
@ -547,11 +547,11 @@ fn compile_policy_source(rule: &PolicyRule, action: &PolicyAction, repo_id: &str
r#"permit (
principal in Omnigraph::Group::{group},
action == Omnigraph::Action::{action},
resource == Omnigraph::Repo::{repo}
resource == Omnigraph::Graph::{graph}
){when};"#,
group = cedar_literal(&rule.allow.actors.group),
action = cedar_literal(action.as_str()),
repo = cedar_literal(repo_id),
graph = cedar_literal(graph_id),
when = when,
)
}
@ -594,16 +594,16 @@ namespace Omnigraph {
entity Actor in [Group];
entity Group;
entity Repo;
entity Graph;
action "read" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
action "export" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
action "change" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
action "schema_apply" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
action "branch_create" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
action "branch_delete" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
action "branch_merge" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
action "admin" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
action "read" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
action "export" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
action "change" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
action "schema_apply" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
action "branch_create" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
action "branch_delete" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
action "branch_merge" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
action "admin" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
}
"#
}
@ -881,7 +881,7 @@ rules:
)
.unwrap();
let engine = PolicyCompiler::compile(&policy, "repo").unwrap();
let engine = PolicyCompiler::compile(&policy, "graph").unwrap();
let allow = engine
.authorize(&PolicyRequest {
actor_id: "act-bruno".to_string(),
@ -932,7 +932,7 @@ rules:
"#,
)
.unwrap();
let engine = PolicyCompiler::compile(&policy, "repo").unwrap();
let engine = PolicyCompiler::compile(&policy, "graph").unwrap();
let tests = PolicyTestConfig {
version: 1,
cases: vec![
@ -976,7 +976,7 @@ rules:
)
.unwrap();
let engine = PolicyCompiler::compile(&policy, "repo").unwrap();
let engine = PolicyCompiler::compile(&policy, "graph").unwrap();
let allow = engine
.authorize(&PolicyRequest {
actor_id: "act-ragnor".to_string(),

View file

@ -1,6 +1,6 @@
[package]
name = "omnigraph-server"
version = "0.4.2"
version = "0.6.0"
edition = "2024"
description = "HTTP server for the Omnigraph graph database."
license = "MIT"
@ -19,9 +19,9 @@ default = []
aws = ["dep:aws-config", "dep:aws-sdk-secretsmanager"]
[dependencies]
omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.4.2" }
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.4.2" }
omnigraph-policy = { path = "../omnigraph-policy", version = "0.4.2" }
omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.6.0" }
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.0" }
omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.0" }
axum = { workspace = true }
clap = { workspace = true }
color-eyre = { workspace = true }
@ -45,4 +45,5 @@ aws-sdk-secretsmanager = { version = "1", optional = true, default-features = fa
tempfile = { workspace = true }
tower = { workspace = true }
serial_test = "3"
lance = { workspace = true }
lance-index = { workspace = true }

View file

@ -259,10 +259,10 @@ async fn main() {
}
let temp = tempfile::tempdir().expect("tempdir");
let repo = temp.path().join("bench.omni");
Omnigraph::init(repo.to_str().unwrap(), SCHEMA)
let graph = temp.path().join("bench.omni");
Omnigraph::init(graph.to_str().unwrap(), SCHEMA)
.await
.expect("init repo");
.expect("init graph");
// Build bearer tokens: one for the heavy actor + one per light actor.
let mut tokens: Vec<(String, String)> =
@ -270,21 +270,17 @@ async fn main() {
for i in 0..args.light_actors {
tokens.push((format!("act-light-{i}"), format!("light-token-{i}")));
}
let db = Omnigraph::open(repo.to_str().unwrap())
let db = Omnigraph::open(graph.to_str().unwrap())
.await
.expect("open repo");
.expect("open graph");
// Construct a custom WorkloadController with the requested caps and
// pass it through `AppState::new_with_workload`. Avoids the
// `unsafe { std::env::set_var(...) }` antipattern that violates
// `setenv`'s thread-safety precondition once the multi-thread tokio
// runtime is up.
let workload = WorkloadController::new(args.inflight_cap, args.byte_cap);
let state = AppState::new_with_workload(
repo.to_string_lossy().to_string(),
db,
tokens,
workload,
);
let state =
AppState::new_with_workload(graph.to_string_lossy().to_string(), db, tokens, workload);
let app = build_app(state);
eprintln!(

View file

@ -152,7 +152,9 @@ async fn drive_actor(
errors += 1;
// Drain body for logging on the first few failures.
if errors <= 3 {
let body = to_bytes(response.into_body(), 64 * 1024).await.unwrap_or_default();
let body = to_bytes(response.into_body(), 64 * 1024)
.await
.unwrap_or_default();
eprintln!(
"actor {actor_idx} op {op_idx} status {status} body {}",
String::from_utf8_lossy(&body)
@ -173,13 +175,13 @@ async fn main() {
}
let temp = tempfile::tempdir().expect("tempdir");
let repo = temp.path().join("bench.omni");
let graph = temp.path().join("bench.omni");
let schema = build_schema(args.tables);
Omnigraph::init(repo.to_str().unwrap(), &schema)
Omnigraph::init(graph.to_str().unwrap(), &schema)
.await
.expect("init repo");
.expect("init graph");
let state = AppState::open(repo.to_string_lossy().to_string())
let state = AppState::open(graph.to_string_lossy().to_string())
.await
.expect("open AppState");
let app = build_app(state);

View file

@ -18,6 +18,7 @@ use api::{
IngestRequest, QueryRequest, ReadOutput, ReadRequest, SchemaApplyOutput, SchemaApplyRequest,
SchemaOutput, SnapshotQuery, ingest_output, schema_apply_output, snapshot_payload,
};
pub use auth::{AWS_SECRET_ENV, EnvOrFileTokenSource, TokenSource, resolve_token_source};
use axum::body::{Body, Bytes};
use axum::extract::DefaultBodyLimit;
use axum::extract::{Extension, Path, Query, Request, State};
@ -39,7 +40,6 @@ use omnigraph::error::{ManifestConflictDetails, ManifestErrorKind, OmniError};
use omnigraph_compiler::json_params_to_param_map;
use omnigraph_compiler::query::parser::parse_query;
use omnigraph_compiler::{JsonParamMode, ParamMap};
pub use auth::{AWS_SECRET_ENV, EnvOrFileTokenSource, TokenSource, resolve_token_source};
pub use policy::{
PolicyAction, PolicyCompiler, PolicyConfig, PolicyDecision, PolicyEngine, PolicyExpectation,
PolicyRequest, PolicyTestConfig,
@ -439,10 +439,7 @@ impl ApiError {
}
}
fn manifest_version_conflict(
message: String,
details: api::ManifestConflictOutput,
) -> Self {
fn manifest_version_conflict(message: String, details: api::ManifestConflictOutput) -> Self {
Self {
status: StatusCode::CONFLICT,
code: ErrorCode::Conflict,
@ -2112,12 +2109,12 @@ server:
("OMNIGRAPH_UNAUTHENTICATED", None),
]);
let temp = tempdir().unwrap();
// Repo path doesn't need to exist — classifier fires before
// Graph path doesn't need to exist — classifier fires before
// `AppState::open_with_bearer_tokens_and_policy`.
let config = ServerConfig {
uri: temp
.path()
.join("repo.omni")
.join("graph.omni")
.to_string_lossy()
.into_owned(),
bind: "127.0.0.1:0".to_string(),
@ -2125,7 +2122,8 @@ server:
allow_unauthenticated: false,
};
let result = serve(config).await;
let err = result.expect_err("serve should refuse to start in State 1 without --unauthenticated");
let err =
result.expect_err("serve should refuse to start in State 1 without --unauthenticated");
let msg = format!("{:?}", err);
assert!(
msg.contains("no bearer tokens") || msg.contains("policy file"),

View file

@ -8,7 +8,7 @@ use omnigraph_server::{ServerConfig, init_tracing, load_server_settings, serve};
#[command(name = "omnigraph-server")]
#[command(about = "HTTP server for the Omnigraph graph database")]
struct Cli {
/// Repo URI
/// Graph URI
uri: Option<String>,
#[arg(long)]
target: Option<String>,

View file

@ -19,42 +19,42 @@ fn fixture(name: &str) -> PathBuf {
.join(name)
}
fn repo_path(root: &Path) -> PathBuf {
fn graph_path(root: &Path) -> PathBuf {
root.join("openapi_test.omni")
}
async fn init_loaded_repo() -> tempfile::TempDir {
async fn init_loaded_graph() -> tempfile::TempDir {
let temp = tempfile::tempdir().unwrap();
let repo = repo_path(temp.path());
fs::create_dir_all(&repo).unwrap();
let graph = graph_path(temp.path());
fs::create_dir_all(&graph).unwrap();
let schema = fs::read_to_string(fixture("test.pg")).unwrap();
let data = fs::read_to_string(fixture("test.jsonl")).unwrap();
Omnigraph::init(repo.to_str().unwrap(), &schema)
Omnigraph::init(graph.to_str().unwrap(), &schema)
.await
.unwrap();
let mut db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap();
let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
load_jsonl(&mut db, &data, LoadMode::Overwrite)
.await
.unwrap();
temp
}
async fn app_for_loaded_repo() -> (tempfile::TempDir, Router) {
let temp = init_loaded_repo().await;
let repo = repo_path(temp.path());
let state = AppState::open(repo.to_string_lossy().to_string())
async fn app_for_loaded_graph() -> (tempfile::TempDir, Router) {
let temp = init_loaded_graph().await;
let graph = graph_path(temp.path());
let state = AppState::open(graph.to_string_lossy().to_string())
.await
.unwrap();
let app = build_app(state);
(temp, app)
}
async fn app_for_loaded_repo_with_auth(token: &str) -> (tempfile::TempDir, Router) {
let temp = init_loaded_repo().await;
let repo = repo_path(temp.path());
let db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap();
async fn app_for_loaded_graph_with_auth(token: &str) -> (tempfile::TempDir, Router) {
let temp = init_loaded_graph().await;
let graph = graph_path(temp.path());
let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
let state = AppState::new_with_bearer_token(
repo.to_string_lossy().to_string(),
graph.to_string_lossy().to_string(),
db,
Some(token.to_string()),
);
@ -84,7 +84,7 @@ fn openapi_json() -> Value {
#[tokio::test]
async fn openapi_endpoint_returns_200_with_valid_json() {
let (_temp, app) = app_for_loaded_repo().await;
let (_temp, app) = app_for_loaded_graph().await;
let request = Request::builder()
.method(Method::GET)
.uri("/openapi.json")
@ -97,7 +97,7 @@ async fn openapi_endpoint_returns_200_with_valid_json() {
#[tokio::test]
async fn openapi_endpoint_returns_openapi_31_version() {
let (_temp, app) = app_for_loaded_repo().await;
let (_temp, app) = app_for_loaded_graph().await;
let request = Request::builder()
.method(Method::GET)
.uri("/openapi.json")
@ -113,11 +113,11 @@ async fn openapi_endpoint_returns_openapi_31_version() {
#[tokio::test]
async fn openapi_endpoint_does_not_require_auth() {
let temp = init_loaded_repo().await;
let repo = repo_path(temp.path());
let db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap();
let temp = init_loaded_graph().await;
let graph = graph_path(temp.path());
let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
let state = AppState::new_with_bearer_token(
repo.to_string_lossy().to_string(),
graph.to_string_lossy().to_string(),
db,
Some("secret-token".to_string()),
);
@ -129,7 +129,11 @@ async fn openapi_endpoint_does_not_require_auth() {
.body(Body::empty())
.unwrap();
let (status, _) = json_response(&app, request).await;
assert_eq!(status, StatusCode::OK, "/openapi.json should not require auth");
assert_eq!(
status,
StatusCode::OK,
"/openapi.json should not require auth"
);
}
// ---------------------------------------------------------------------------
@ -739,10 +743,13 @@ fn branch_delete_has_branch_path_parameter() {
let params = doc["paths"]["/branches/{branch}"]["delete"]["parameters"]
.as_array()
.unwrap();
let has_branch = params.iter().any(|p| {
p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("path")
});
assert!(has_branch, "DELETE /branches/{{branch}} must have 'branch' path parameter");
let has_branch = params
.iter()
.any(|p| p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("path"));
assert!(
has_branch,
"DELETE /branches/{{branch}} must have 'branch' path parameter"
);
}
#[test]
@ -751,10 +758,13 @@ fn commit_show_has_commit_id_path_parameter() {
let params = doc["paths"]["/commits/{commit_id}"]["get"]["parameters"]
.as_array()
.unwrap();
let has_commit_id = params.iter().any(|p| {
p["name"].as_str() == Some("commit_id") && p["in"].as_str() == Some("path")
});
assert!(has_commit_id, "GET /commits/{{commit_id}} must have 'commit_id' path parameter");
let has_commit_id = params
.iter()
.any(|p| p["name"].as_str() == Some("commit_id") && p["in"].as_str() == Some("path"));
assert!(
has_commit_id,
"GET /commits/{{commit_id}} must have 'commit_id' path parameter"
);
}
#[test]
@ -763,10 +773,13 @@ fn snapshot_has_branch_query_parameter() {
let params = doc["paths"]["/snapshot"]["get"]["parameters"]
.as_array()
.unwrap();
let has_branch = params.iter().any(|p| {
p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("query")
});
assert!(has_branch, "GET /snapshot must have 'branch' query parameter");
let has_branch = params
.iter()
.any(|p| p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("query"));
assert!(
has_branch,
"GET /snapshot must have 'branch' query parameter"
);
}
#[test]
@ -775,10 +788,13 @@ fn commits_has_branch_query_parameter() {
let params = doc["paths"]["/commits"]["get"]["parameters"]
.as_array()
.unwrap();
let has_branch = params.iter().any(|p| {
p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("query")
});
assert!(has_branch, "GET /commits must have 'branch' query parameter");
let has_branch = params
.iter()
.any(|p| p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("query"));
assert!(
has_branch,
"GET /commits must have 'branch' query parameter"
);
}
// ---------------------------------------------------------------------------
@ -854,8 +870,7 @@ fn error_responses_reference_error_output_schema() {
];
for (path, method, status) in paths_with_errors {
let content =
&doc["paths"][path][method]["responses"][status]["content"];
let content = &doc["paths"][path][method]["responses"][status]["content"];
let schema = &content["application/json"]["schema"];
let ref_path = schema["$ref"].as_str().unwrap();
assert!(
@ -917,7 +932,7 @@ fn openapi_spec_round_trips_through_json() {
#[tokio::test]
async fn open_mode_spec_has_no_security_schemes() {
let (_temp, app) = app_for_loaded_repo().await;
let (_temp, app) = app_for_loaded_graph().await;
let request = Request::builder()
.method(Method::GET)
.uri("/openapi.json")
@ -933,7 +948,7 @@ async fn open_mode_spec_has_no_security_schemes() {
#[tokio::test]
async fn open_mode_spec_has_no_operation_security() {
let (_temp, app) = app_for_loaded_repo().await;
let (_temp, app) = app_for_loaded_graph().await;
let request = Request::builder()
.method(Method::GET)
.uri("/openapi.json")
@ -954,7 +969,7 @@ async fn open_mode_spec_has_no_operation_security() {
#[tokio::test]
async fn auth_mode_spec_includes_bearer_token_security_scheme() {
let (_temp, app) = app_for_loaded_repo_with_auth("secret").await;
let (_temp, app) = app_for_loaded_graph_with_auth("secret").await;
let request = Request::builder()
.method(Method::GET)
.uri("/openapi.json")
@ -968,7 +983,7 @@ async fn auth_mode_spec_includes_bearer_token_security_scheme() {
#[tokio::test]
async fn auth_mode_spec_has_security_on_protected_operations() {
let (_temp, app) = app_for_loaded_repo_with_auth("secret").await;
let (_temp, app) = app_for_loaded_graph_with_auth("secret").await;
let request = Request::builder()
.method(Method::GET)
.uri("/openapi.json")
@ -999,7 +1014,7 @@ async fn auth_mode_spec_has_security_on_protected_operations() {
#[tokio::test]
async fn auth_mode_spec_matches_static_generation() {
let (_temp, app) = app_for_loaded_repo_with_auth("secret").await;
let (_temp, app) = app_for_loaded_graph_with_auth("secret").await;
let request = Request::builder()
.method(Method::GET)
.uri("/openapi.json")
@ -1015,7 +1030,7 @@ async fn auth_mode_spec_matches_static_generation() {
#[tokio::test]
async fn auth_mode_healthz_still_has_no_security() {
let (_temp, app) = app_for_loaded_repo_with_auth("secret").await;
let (_temp, app) = app_for_loaded_graph_with_auth("secret").await;
let request = Request::builder()
.method(Method::GET)
.uri("/openapi.json")
@ -1031,8 +1046,7 @@ async fn auth_mode_healthz_still_has_no_security() {
#[test]
fn openapi_spec_is_up_to_date() {
let spec_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("../../openapi.json");
let spec_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../openapi.json");
let generated = serde_json::to_string_pretty(&openapi_doc()).unwrap() + "\n";

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,6 @@
[package]
name = "omnigraph-engine"
version = "0.4.2"
version = "0.6.0"
edition = "2024"
description = "Runtime engine for the Omnigraph graph database."
license = "MIT"
@ -16,8 +16,8 @@ default = []
failpoints = ["dep:fail", "fail/failpoints"]
[dependencies]
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.4.2" }
omnigraph-policy = { path = "../omnigraph-policy", version = "0.4.2" }
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.0" }
omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.0" }
lance = { workspace = true }
lance-datafusion = { workspace = true }
datafusion = { workspace = true }
@ -51,7 +51,7 @@ chrono = { workspace = true }
arc-swap = { workspace = true }
[dev-dependencies]
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.4.2" }
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.0" }
tokio = { workspace = true }
lance-namespace-impls = { workspace = true }
serial_test = "3"

View file

@ -6,6 +6,8 @@ use lance::Dataset;
use lance_namespace::models::CreateTableVersionRequest;
use omnigraph_compiler::catalog::Catalog;
#[path = "manifest/graph.rs"]
mod graph;
#[path = "manifest/layout.rs"]
mod layout;
#[path = "manifest/metadata.rs"]
@ -18,11 +20,10 @@ mod namespace;
mod publisher;
#[path = "manifest/recovery.rs"]
mod recovery;
#[path = "manifest/repo.rs"]
mod repo;
#[path = "manifest/state.rs"]
mod state;
use graph::{init_manifest_graph, open_manifest_graph, snapshot_state_at};
use layout::{manifest_uri, open_manifest_dataset, type_name_hash};
pub(crate) use metadata::TableVersionMetadata;
#[cfg(test)]
@ -33,11 +34,10 @@ pub(crate) use namespace::open_table_head_for_write;
use namespace::{branch_manifest_namespace, staged_table_namespace};
use publisher::{GraphNamespacePublisher, ManifestBatchPublisher};
pub(crate) use recovery::{
delete_sidecar, has_schema_apply_sidecar, new_sidecar, recover_manifest_drift, write_sidecar,
RecoveryMode, RecoverySidecar, RecoverySidecarHandle, SidecarKind, SidecarTablePin,
SidecarTableRegistration, SidecarTombstone,
SidecarTableRegistration, SidecarTombstone, delete_sidecar, has_schema_apply_sidecar,
new_sidecar, recover_manifest_drift, write_sidecar,
};
use repo::{init_manifest_repo, open_manifest_repo, snapshot_state_at};
pub use state::SubTableEntry;
#[cfg(test)]
use state::string_column;
@ -215,12 +215,12 @@ impl ManifestCoordinator {
self
}
/// Create a new repo at `root_uri` from a catalog.
/// Create a new graph at `root_uri` from a catalog.
///
/// Creates per-type Lance datasets and the namespace `__manifest` table.
pub async fn init(root_uri: &str, catalog: &Catalog) -> Result<Self> {
let root = root_uri.trim_end_matches('/');
let (dataset, known_state) = init_manifest_repo(root, catalog).await?;
let (dataset, known_state) = init_manifest_graph(root, catalog).await?;
Ok(Self::from_parts_with_default_publisher(
root,
@ -230,10 +230,10 @@ impl ManifestCoordinator {
))
}
/// Open an existing repo's manifest.
/// Open an existing graph's manifest.
pub async fn open(root_uri: &str) -> Result<Self> {
let root = root_uri.trim_end_matches('/');
let (dataset, known_state) = open_manifest_repo(root, None).await?;
let (dataset, known_state) = open_manifest_graph(root, None).await?;
Ok(Self::from_parts_with_default_publisher(
root,
dataset,
@ -242,14 +242,14 @@ impl ManifestCoordinator {
))
}
/// Open an existing repo's manifest at a specific branch.
/// Open an existing graph's manifest at a specific branch.
pub async fn open_at_branch(root_uri: &str, branch: &str) -> Result<Self> {
if branch == "main" {
return Self::open(root_uri).await;
}
let root = root_uri.trim_end_matches('/');
let (dataset, known_state) = open_manifest_repo(root, Some(branch)).await?;
let (dataset, known_state) = open_manifest_graph(root, Some(branch)).await?;
Ok(Self::from_parts_with_default_publisher(
root,
dataset,
@ -410,7 +410,7 @@ impl ManifestCoordinator {
Ok(descendants)
}
/// Root URI of the repo.
/// Root URI of the graph.
pub fn root_uri(&self) -> &str {
&self.root_uri
}

View file

@ -17,7 +17,7 @@ use super::state::{
ManifestState, SubTableEntry, entries_to_batch, manifest_schema, read_manifest_state,
};
pub(super) async fn init_manifest_repo(
pub(super) async fn init_manifest_graph(
root_uri: &str,
catalog: &Catalog,
) -> Result<(Dataset, ManifestState)> {
@ -47,7 +47,7 @@ pub(super) async fn init_manifest_repo(
Ok((dataset, known_state))
}
pub(super) async fn open_manifest_repo(
pub(super) async fn open_manifest_graph(
root_uri: &str,
branch: Option<&str>,
) -> Result<(Dataset, ManifestState)> {

View file

@ -24,8 +24,8 @@
//! Only on open-for-write paths (the publisher's `load_publish_state`).
//! Reads are side-effect-free by contract; an old-shape `__manifest` reads
//! fine, it just lacks the protections introduced by later versions.
//! `init_manifest_repo` stamps the current version at creation, so newly
//! initialized repos never need migration.
//! `init_manifest_graph` stamps the current version at creation, so newly
//! initialized graphs never need migration.
//!
//! ## Forward-version protection
//!
@ -78,7 +78,7 @@ pub(super) async fn migrate_internal_schema(dataset: &mut Dataset) -> Result<()>
if current > INTERNAL_MANIFEST_SCHEMA_VERSION {
return Err(OmniError::manifest(format!(
"__manifest is stamped at internal schema v{} but this binary expects v{} \
upgrade omnigraph before opening this repo for writes",
upgrade omnigraph before opening this graph for writes",
current, INTERNAL_MANIFEST_SCHEMA_VERSION,
)));
}
@ -112,7 +112,10 @@ pub(super) async fn migrate_internal_schema(dataset: &mut Dataset) -> Result<()>
async fn migrate_v1_to_v2(dataset: &mut Dataset) -> Result<()> {
dataset
.update_field_metadata()
.update("object_id", [(OBJECT_ID_PK_KEY.to_string(), "true".to_string())])
.update(
"object_id",
[(OBJECT_ID_PK_KEY.to_string(), "true".to_string())],
)
.map_err(|e| OmniError::Lance(e.to_string()))?
.await
.map_err(|e| OmniError::Lance(e.to_string()))?;
@ -121,10 +124,7 @@ async fn migrate_v1_to_v2(dataset: &mut Dataset) -> Result<()> {
async fn set_stamp(dataset: &mut Dataset, version: u32) -> Result<()> {
dataset
.update_schema_metadata([(
INTERNAL_SCHEMA_VERSION_KEY.to_string(),
version.to_string(),
)])
.update_schema_metadata([(INTERNAL_SCHEMA_VERSION_KEY.to_string(), version.to_string())])
.await
.map_err(|e| OmniError::Lance(e.to_string()))?;
Ok(())

View file

@ -230,6 +230,11 @@ impl LanceNamespace for BranchManifestNamespace {
metadata: None,
properties: None,
managed_versioning: Some(true),
// Every table we return from describe_table is physically
// materialized (open_manifest_dataset succeeds), never just
// "declared." See lance-namespace 6.0.1 DescribeTableResponse
// field docs.
is_only_declared: Some(false),
})
}
@ -373,6 +378,11 @@ impl LanceNamespace for StagedTableNamespace {
metadata: None,
properties: None,
managed_versioning: Some(true),
// Every table we return from describe_table is physically
// materialized (open_manifest_dataset succeeds), never just
// "declared." See lance-namespace 6.0.1 DescribeTableResponse
// field docs.
is_only_declared: Some(false),
})
}

View file

@ -58,7 +58,7 @@ use super::{ManifestChange, SubTableUpdate, TableRegistration, TableTombstone};
/// into the audit row's `recovery_for_actor` field.
pub(crate) const RECOVERY_ACTOR: &str = "omnigraph:recovery";
/// Subdirectory under the repo root holding sidecar files.
/// Subdirectory under the graph root holding sidecar files.
pub(crate) const RECOVERY_DIR_NAME: &str = "__recovery";
/// Current sidecar JSON shape version. Bumping this is a breaking change:
@ -142,7 +142,7 @@ pub(crate) struct SidecarTablePin {
pub(crate) struct SidecarTableRegistration {
/// Stable identifier (`node:Tag`, `edge:WorksAt`, etc.).
pub table_key: String,
/// Repo-relative path the manifest will register
/// Graph-relative path the manifest will register
/// (e.g. `nodes/{fnv1a64-hex}`); recovery joins this with `root_uri`
/// to open the dataset Lance HEAD when constructing the
/// accompanying `Update`.
@ -295,7 +295,7 @@ pub(crate) enum SidecarDecision {
Abort,
}
/// Build the `__recovery/` directory URI under a repo root.
/// Build the `__recovery/` directory URI under a graph root.
pub(crate) fn recovery_dir_uri(root_uri: &str) -> String {
let trimmed = root_uri.trim_end_matches('/');
format!("{}/{}", trimmed, RECOVERY_DIR_NAME)
@ -1122,7 +1122,7 @@ async fn record_audit(
/// the rename so the recovery sweep's roll-forward step sees the new
/// catalog. Without this, the disambiguation logic deletes the staging
/// files (since manifest still pins the old table set) and leaves the
/// repo with new-schema data on disk but the old `_schema.pg` live —
/// graph with new-schema data on disk but the old `_schema.pg` live —
/// real corruption.
pub(crate) async fn has_schema_apply_sidecar(
root_uri: &str,

View file

@ -1393,7 +1393,10 @@ async fn test_concurrent_publish_with_overlapping_expected_versions_one_succeeds
// version (no duplicate version rows).
let mc = ManifestCoordinator::open(uri).await.unwrap();
let entry = mc.snapshot().entry("node:Person").unwrap().clone();
assert!(entry.table_version > 1, "Person should have advanced past v=1");
assert!(
entry.table_version > 1,
"Person should have advanced past v=1"
);
}
#[tokio::test]
@ -1418,7 +1421,7 @@ async fn test_publish_migrates_pre_stamp_manifest_to_current_version() {
let catalog = build_test_catalog();
let mc = ManifestCoordinator::init(uri, &catalog).await.unwrap();
// Simulate a v1 (pre-stamp) repo by removing the schema-level stamp on disk.
// Simulate a v1 (pre-stamp) graph by removing the schema-level stamp on disk.
{
let mut ds = open_manifest_dataset(uri, None).await.unwrap();
ds.update_schema_metadata([(
@ -1449,7 +1452,7 @@ async fn test_publish_migrates_pre_stamp_manifest_to_current_version() {
assert_eq!(
super::migrations::read_stamp(&post),
super::migrations::INTERNAL_MANIFEST_SCHEMA_VERSION,
"publish on a v1 repo should leave the manifest stamped at the current version",
"publish on a v1 graph should leave the manifest stamped at the current version",
);
// Manifest should still serve correctly post-migration.

View file

@ -166,7 +166,7 @@ pub enum OpenMode {
}
impl Omnigraph {
/// Create a new repo at `uri` from schema source.
/// Create a new graph at `uri` from schema source.
///
/// Creates `_schema.pg`, per-type Lance datasets, and `__manifest`.
pub async fn init(uri: &str, schema_source: &str) -> Result<Self> {
@ -205,7 +205,7 @@ impl Omnigraph {
})
}
/// Open an existing repo (read-write).
/// Open an existing graph (read-write).
///
/// Reads `_schema.pg`, parses it, builds the catalog, and opens `__manifest`.
/// Runs the open-time recovery sweep before returning — see [`OpenMode`].
@ -213,7 +213,7 @@ impl Omnigraph {
Self::open_with_storage_and_mode(uri, storage_for_uri(uri)?, OpenMode::ReadWrite).await
}
/// Open an existing repo for read-only consumers (NDJSON export,
/// Open an existing graph for read-only consumers (NDJSON export,
/// `commit list`, etc.). Skips the recovery sweep — see [`OpenMode`].
pub async fn open_read_only(uri: &str) -> Result<Self> {
Self::open_with_storage_and_mode(uri, storage_for_uri(uri)?, OpenMode::ReadOnly).await
@ -397,7 +397,8 @@ impl Omnigraph {
desired_schema_source: &str,
options: SchemaApplyOptions,
) -> Result<SchemaApplyResult> {
self.apply_schema_as(desired_schema_source, options, None).await
self.apply_schema_as(desired_schema_source, options, None)
.await
}
/// Apply a schema migration with an explicit actor for engine-layer
@ -470,7 +471,7 @@ impl Omnigraph {
Arc::clone(&self.merge_exclusive)
}
/// Engine-level access to the repo's normalized root URI. Used by
/// Engine-level access to the graph's normalized root URI. Used by
/// the recovery sidecar protocol to compute `__recovery/` paths.
pub(crate) fn root_uri(&self) -> &str {
&self.root_uri
@ -510,9 +511,10 @@ impl Omnigraph {
let normalized = normalize_branch_name(branch.unwrap_or("main"))?;
let coord = self.coordinator.read().await;
if normalized.as_deref() == coord.current_branch() {
let snapshot_id = coord.head_commit_id().await?.unwrap_or_else(|| {
SnapshotId::synthetic(coord.current_branch(), coord.version())
});
let snapshot_id = coord
.head_commit_id()
.await?
.unwrap_or_else(|| SnapshotId::synthetic(coord.current_branch(), coord.version()));
return Ok(ResolvedTarget {
requested,
branch: coord.current_branch().map(str::to_string),
@ -587,7 +589,7 @@ impl Omnigraph {
/// exist. Required BEFORE manifest-drift recovery so a
/// SchemaApply roll-forward doesn't publish the manifest while
/// the staging files remain unrenamed (which would corrupt the
/// repo: data on new schema, catalog on old).
/// graph: data on new schema, catalog on old).
/// 3. `recover_manifest_drift(... RollForwardOnly)` — close the
/// finalize→publisher residual via roll-forward; defer rollback
/// work to next ReadWrite open.
@ -668,7 +670,11 @@ impl Omnigraph {
pub async fn resolve_snapshot(&self, branch: &str) -> Result<SnapshotId> {
self.ensure_schema_state_valid().await?;
self.coordinator.read().await.resolve_snapshot_id(branch).await
self.coordinator
.read()
.await
.resolve_snapshot_id(branch)
.await
}
pub(crate) async fn resolved_target(
@ -676,7 +682,11 @@ impl Omnigraph {
target: impl Into<ReadTarget>,
) -> Result<ResolvedTarget> {
self.ensure_schema_state_valid().await?;
self.coordinator.read().await.resolve_target(&target.into()).await
self.coordinator
.read()
.await
.resolve_target(&target.into())
.await
}
// ─── Change detection ────────────────────────────────────────────────
@ -708,7 +718,9 @@ impl Omnigraph {
filter: &crate::changes::ChangeFilter,
) -> Result<crate::changes::ChangeSet> {
let coord = self.coordinator.read().await;
let from_commit = coord.resolve_commit(&SnapshotId::new(from_commit_id)).await?;
let from_commit = coord
.resolve_commit(&SnapshotId::new(from_commit_id))
.await?;
let to_commit = coord.resolve_commit(&SnapshotId::new(to_commit_id)).await?;
let from_snap = coord
.resolve_target(&ReadTarget::Snapshot(SnapshotId::new(
@ -753,7 +765,11 @@ impl Omnigraph {
/// Create a Snapshot at any historical manifest version.
pub async fn snapshot_at_version(&self, version: u64) -> Result<Snapshot> {
self.ensure_schema_state_valid().await?;
self.coordinator.read().await.snapshot_at_version(version).await
self.coordinator
.read()
.await
.snapshot_at_version(version)
.await
}
pub async fn export_jsonl(
@ -894,11 +910,20 @@ impl Omnigraph {
}
pub(crate) async fn active_branch(&self) -> Option<String> {
self.coordinator.read().await.current_branch().map(str::to_string)
self.coordinator
.read()
.await
.current_branch()
.map(str::to_string)
}
async fn ensure_branch_delete_safe(&self, branch: &str, branches: &[String]) -> Result<()> {
let descendants = self.coordinator.read().await.branch_descendants(branch).await?;
let descendants = self
.coordinator
.read()
.await
.branch_descendants(branch)
.await?;
if let Some(descendant) = descendants.first() {
return Err(OmniError::manifest_conflict(format!(
"cannot delete branch '{}' because descendant branch '{}' still depends on it",
@ -954,7 +979,12 @@ impl Omnigraph {
}
async fn delete_branch_storage_only(&self, branch: &str) -> Result<()> {
let active = self.coordinator.read().await.current_branch().map(str::to_string);
let active = self
.coordinator
.read()
.await
.current_branch()
.map(str::to_string);
if active.as_deref() == Some(branch) {
return Err(OmniError::manifest_conflict(format!(
"cannot delete currently active branch '{}'",
@ -1013,11 +1043,7 @@ impl Omnigraph {
self.coordinator.write().await.branch_create(name).await
}
pub async fn branch_create_from(
&self,
from: impl Into<ReadTarget>,
name: &str,
) -> Result<()> {
pub async fn branch_create_from(&self, from: impl Into<ReadTarget>, name: &str) -> Result<()> {
self.branch_create_from_as(from, name, None).await
}
@ -1134,7 +1160,9 @@ impl Omnigraph {
pub async fn get_commit(&self, commit_id: &str) -> Result<GraphCommit> {
self.ensure_schema_state_valid().await?;
self.coordinator.read().await
self.coordinator
.read()
.await
.resolve_commit(&SnapshotId::new(commit_id))
.await
}

View file

@ -1,7 +1,7 @@
//! Lance compaction + version cleanup exposed at the graph level.
//!
//! Lance accumulates many small `.lance` fragment files per table over the
//! life of a repo: each `write`, `load`, and `change` op appends one or more
//! life of a graph: each `write`, `load`, and `change` op appends one or more
//! fragments and a new manifest. Over long timescales this hurts open times
//! and S3 object counts without improving anything.
//!
@ -176,10 +176,9 @@ pub async fn cleanup_all_tables(
clean_referenced_branches: false,
delete_rate_limit: None,
};
let removed: RemovalStats =
lance::dataset::cleanup::cleanup_old_versions(&ds, policy)
.await
.map_err(|e| OmniError::Lance(e.to_string()))?;
let removed: RemovalStats = lance::dataset::cleanup::cleanup_old_versions(&ds, policy)
.await
.map_err(|e| OmniError::Lance(e.to_string()))?;
Ok(TableCleanupStats {
table_key,
bytes_removed: removed.bytes_removed,
@ -198,12 +197,7 @@ fn all_table_keys(catalog: &omnigraph_compiler::catalog::Catalog) -> Vec<String>
.node_types
.keys()
.map(|n| format!("node:{}", n))
.chain(
catalog
.edge_types
.keys()
.map(|n| format!("edge:{}", n)),
)
.chain(catalog.edge_types.keys().map(|n| format!("edge:{}", n)))
.collect();
keys.sort();
keys

View file

@ -97,7 +97,7 @@ pub(super) async fn apply_schema_with_lock(
// Skip `main` and internal system branches. The schema-apply lock branch
// is excluded because it is the cluster-wide schema-apply serializer.
// `__run__*` branches are no longer created; the filter remains as
// defense-in-depth for legacy repos with leftover staging branches.
// defense-in-depth for legacy graphs with leftover staging branches.
// A future production sweep will let this guard go.
let blocking_branches = branches
.into_iter()
@ -105,7 +105,7 @@ pub(super) async fn apply_schema_with_lock(
.collect::<Vec<_>>();
if !blocking_branches.is_empty() {
return Err(OmniError::manifest_conflict(format!(
"schema apply requires a repo with only main; found non-main branches: {}",
"schema apply requires a graph with only main; found non-main branches: {}",
blocking_branches.join(", ")
)));
}
@ -780,7 +780,7 @@ pub(super) async fn acquire_schema_apply_lock(db: &Omnigraph) -> Result<()> {
if !blocking_branches.is_empty() {
let _ = release_schema_apply_lock(db).await;
return Err(OmniError::manifest_conflict(format!(
"schema apply requires a repo with only main; found non-main branches: {}",
"schema apply requires a graph with only main; found non-main branches: {}",
blocking_branches.join(", ")
)));
}

View file

@ -93,7 +93,7 @@ pub(crate) struct RecoveryAudit {
}
impl RecoveryAudit {
/// Open the recovery-audit dataset for the repo, or return a handle
/// Open the recovery-audit dataset for the graph, or return a handle
/// with no dataset yet (created on first append). Mirrors the
/// optional-dataset pattern from `_graph_commit_actors.lance`.
pub(crate) async fn open(root_uri: &str) -> Result<Self> {
@ -205,9 +205,7 @@ fn recovery_record_to_batch(record: &RecoveryAuditRecord) -> Result<RecordBatch>
vec![
Arc::new(StringArray::from(vec![record.graph_commit_id.clone()])),
Arc::new(StringArray::from(vec![record.recovery_kind.as_str()])),
Arc::new(StringArray::from(vec![record
.recovery_for_actor
.clone()])),
Arc::new(StringArray::from(vec![record.recovery_for_actor.clone()])),
Arc::new(StringArray::from(vec![record.operation_id.clone()])),
Arc::new(StringArray::from(vec![record.sidecar_writer_kind.clone()])),
Arc::new(StringArray::from(vec![outcomes_json])),
@ -221,10 +219,14 @@ fn decode_row(batch: &RecordBatch, row: usize) -> Result<RecoveryAuditRecord> {
let str_col = |name: &str| -> Result<&StringArray> {
batch
.column_by_name(name)
.ok_or_else(|| OmniError::manifest_internal(format!("missing column '{}' in recovery audit", name)))?
.ok_or_else(|| {
OmniError::manifest_internal(format!("missing column '{}' in recovery audit", name))
})?
.as_any()
.downcast_ref::<StringArray>()
.ok_or_else(|| OmniError::manifest_internal(format!("column '{}' has wrong type", name)))
.ok_or_else(|| {
OmniError::manifest_internal(format!("column '{}' has wrong type", name))
})
};
let ts_col = batch
.column_by_name("created_at")
@ -269,9 +271,7 @@ pub(crate) fn now_micros() -> Result<i64> {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_micros() as i64)
.map_err(|e| {
OmniError::manifest_internal(format!("system clock before unix epoch: {}", e))
})
.map_err(|e| OmniError::manifest_internal(format!("system clock before unix epoch: {}", e)))
}
#[cfg(test)]
@ -307,7 +307,7 @@ mod tests {
let root = dir.path().to_str().unwrap();
let mut audit = RecoveryAudit::open(root).await.unwrap();
// Empty repo: list returns empty.
// Empty graph: list returns empty.
assert!(audit.list().await.unwrap().is_empty());
// Append + list.

View file

@ -61,7 +61,7 @@ pub(crate) async fn load_or_bootstrap_schema_contract(
.collect::<Vec<_>>();
if !public_non_main.is_empty() {
return Err(schema_lock_conflict(format!(
"repo is missing persisted schema state and has public branches ({}); public branches block schema evolution entirely",
"graph is missing persisted schema state and has public branches ({}); public branches block schema evolution entirely",
public_non_main.join(", ")
)));
}
@ -70,7 +70,7 @@ pub(crate) async fn load_or_bootstrap_schema_contract(
Ok((current_source_ir.clone(), state))
}
SchemaContractRead::PartialMissing => Err(schema_lock_conflict(
"repo schema state is incomplete (_schema.ir.json and __schema_state.json must either both exist or both be absent)",
"graph schema state is incomplete (_schema.ir.json and __schema_state.json must either both exist or both be absent)",
)),
}
}
@ -84,7 +84,7 @@ pub(crate) async fn validate_schema_contract(
SchemaContractRead::Present { ir, state } => (ir, state),
SchemaContractRead::MissingAll | SchemaContractRead::PartialMissing => {
return Err(schema_lock_conflict(
"repo is missing persisted schema state; manual coordination is required before schema changes are allowed",
"graph is missing persisted schema state; manual coordination is required before schema changes are allowed",
));
}
};
@ -163,7 +163,7 @@ pub(crate) async fn read_accepted_schema_ir(
}
SchemaContractRead::MissingAll | SchemaContractRead::PartialMissing => {
Err(schema_lock_conflict(
"repo is missing persisted schema state; manual coordination is required before schema changes are allowed",
"graph is missing persisted schema state; manual coordination is required before schema changes are allowed",
))
}
}
@ -221,7 +221,7 @@ async fn read_schema_contract(
})?;
let state = serde_json::from_str::<SchemaState>(&state_json).map_err(|err| {
schema_lock_conflict(format!(
"repo schema state in {} is invalid: {}",
"graph schema state in {} is invalid: {}",
SCHEMA_STATE_FILENAME, err
))
})?;
@ -234,7 +234,7 @@ async fn read_schema_contract(
fn validate_persisted_schema_contract(ir: &SchemaIR, state: &SchemaState) -> Result<()> {
if state.format_version != SCHEMA_STATE_FORMAT_VERSION {
return Err(schema_lock_conflict(format!(
"repo schema state format {} is unsupported",
"graph schema state format {} is unsupported",
state.format_version
)));
}
@ -344,7 +344,7 @@ pub(crate) async fn recover_schema_state_files(
// to the new Lance HEADs; we MUST also rename the staging files
// forward so the catalog matches. Without this, the disambiguation
// logic below sees actual_keys == live_keys (manifest didn't move)
// and deletes the staging files, leaving the repo with new-schema
// and deletes the staging files, leaving the graph with new-schema
// data on disk but the old `_schema.pg` live — corruption.
if crate::db::manifest::has_schema_apply_sidecar(root_uri, storage.as_ref()).await? {
warn!(

View file

@ -1037,8 +1037,16 @@ async fn execute_node_scan(
let table_key = format!("node:{}", type_name);
let ds = snapshot.open(&table_key).await?;
// Build Lance SQL filter string from non-search IR filters
let filter_sql = build_lance_filter(filters, params);
// Lower the IR filters to a DataFusion `Expr` and apply via
// `Scanner::filter_expr` inside the configure closure. The string
// pushdown path (`build_lance_filter` → `scanner.filter(&str)`) is
// gone for node scans — structured Expr unlocks `CompOp::Contains`
// pushdown (via `array_has`) and lets DF 53's optimizer rules
// (vectorized IN-list, PhysicalExprSimplifier, CASE-NULL shortcut)
// reach our predicates. Other call sites that still take string SQL
// (hydrate_nodes for the Expand pushdown, count_rows, the mutation
// delete path) migrate in follow-up MRs.
let filter_expr = build_lance_filter_expr(filters, params);
// Blob columns must be excluded from scan when a filter is present
// (Lance bug: BlobsDescriptions + filter triggers a projection assertion).
@ -1056,10 +1064,15 @@ async fn execute_node_scan(
let batches = crate::table_store::TableStore::scan_stream_with(
&ds,
projection,
filter_sql.as_deref(),
None,
None,
false,
|scanner| {
// Apply the structured IR filter via Lance's Expr pushdown.
if let Some(ref expr) = filter_expr {
scanner.filter_expr(expr.clone());
}
// Apply FTS queries from hoisted search filters (search/fuzzy/match_text in match clause)
for filter in filters {
if is_search_filter(filter) {
@ -1288,6 +1301,125 @@ pub(super) fn literal_to_sql(lit: &Literal) -> String {
}
}
// ---------------------------------------------------------------------------
// Structured DataFusion-Expr pushdown
//
// Parallel to the `ir_*_to_sql` family above, these helpers lower the same
// IR filter shapes to `datafusion::prelude::Expr` so we can call
// `Scanner::filter_expr(Expr)` instead of `Scanner::filter(&str)`. The
// structured form unlocks two things the string path could not express:
//
// 1. `CompOp::Contains` against list-typed columns (lowered to
// `array_has(col, value)` — requires the `nested_expressions`
// feature on the `datafusion` crate, enabled in the workspace).
// 2. Optimizer rules in DataFusion 53 that act on `Expr` shapes
// (vectorized `IN`-list eq kernel, `PhysicalExprSimplifier`, the
// `CASE WHEN x THEN y ELSE NULL` shortcut, etc.).
//
// Search predicates (`is_search_filter`) are still handled separately via
// `scanner.full_text_search(...)`, not via filter_expr — they stay None
// here just like in `ir_filter_to_sql`. The `literal_to_sql` path remains
// because the mutation/update layer (`exec/mutation.rs`) still produces
// SQL strings for `Dataset::delete(&str)`; that migration is MR-A's
// territory (Lance #6658 + delete two-phase).
/// Convert IR filters to a single DataFusion `Expr` (AND-joined), or
/// `None` if no filter is pushable.
pub(super) fn build_lance_filter_expr(
filters: &[IRFilter],
params: &ParamMap,
) -> Option<datafusion::prelude::Expr> {
use datafusion::logical_expr::Operator;
use datafusion::prelude::Expr;
let mut acc: Option<Expr> = None;
for f in filters {
let Some(e) = ir_filter_to_expr(f, params) else {
continue;
};
acc = Some(match acc {
None => e,
Some(prev) => Expr::BinaryExpr(datafusion::logical_expr::BinaryExpr::new(
Box::new(prev),
Operator::And,
Box::new(e),
)),
});
}
acc
}
/// Convert a single IR filter to a DataFusion `Expr`. Returns `None` for
/// search-mode filters (handled via `scanner.full_text_search`) or any
/// expression shape we can't pushdown.
pub(super) fn ir_filter_to_expr(
filter: &IRFilter,
params: &ParamMap,
) -> Option<datafusion::prelude::Expr> {
use datafusion::functions_nested::expr_fn::array_has;
if is_search_filter(filter) {
return None;
}
// List-contains: `prop CONTAINS value` lowers to `array_has(prop, value)`.
// This is the case `ir_filter_to_sql` had to return None for ("Can't
// pushdown list contains"); with structured Expr it pushes down fine.
if matches!(filter.op, CompOp::Contains) {
let left = ir_expr_to_expr(&filter.left, params)?;
let right = ir_expr_to_expr(&filter.right, params)?;
return Some(array_has(left, right));
}
let left = ir_expr_to_expr(&filter.left, params)?;
let right = ir_expr_to_expr(&filter.right, params)?;
Some(match filter.op {
CompOp::Eq => left.eq(right),
CompOp::Ne => left.not_eq(right),
CompOp::Gt => left.gt(right),
CompOp::Lt => left.lt(right),
CompOp::Ge => left.gt_eq(right),
CompOp::Le => left.lt_eq(right),
CompOp::Contains => unreachable!("handled above"),
})
}
/// Convert an IR expression to a DataFusion `Expr`. Returns `None` for
/// shapes we don't support in pushdown (search funcs, RRF, aggregates,
/// variable refs that aren't a property access).
pub(super) fn ir_expr_to_expr(
expr: &IRExpr,
params: &ParamMap,
) -> Option<datafusion::prelude::Expr> {
use datafusion::prelude::{col, lit};
match expr {
IRExpr::PropAccess { property, .. } => Some(col(property)),
IRExpr::Literal(l) => literal_to_expr(l),
IRExpr::Param(name) => params.get(name).and_then(literal_to_expr),
_ => None,
}
}
/// Convert a Literal to a DataFusion `Expr`. Returns `None` for List
/// (which the existing SQL path also can't pushdown — falls through to
/// post-scan in-memory application).
fn literal_to_expr(lit: &Literal) -> Option<datafusion::prelude::Expr> {
use datafusion::prelude::lit as df_lit;
Some(match lit {
Literal::Null => df_lit(datafusion::scalar::ScalarValue::Null),
Literal::String(s) => df_lit(s.clone()),
Literal::Integer(n) => df_lit(*n),
Literal::Float(f) => df_lit(*f),
Literal::Bool(b) => df_lit(*b),
// Date/DateTime stored as strings; pass through as string literals
// — Lance/DataFusion handles the comparison against typed columns
// via implicit cast, matching the existing string-SQL behavior.
Literal::Date(s) => df_lit(s.clone()),
Literal::DateTime(s) => df_lit(s.clone()),
Literal::List(_) => return None,
})
}
fn prefix_batch(batch: &RecordBatch, variable: &str) -> Result<RecordBatch> {
let fields: Vec<Field> = batch.schema().fields().iter().map(|f| {
Field::new(format!("{}.{}", variable, f.name()), f.data_type().clone(), f.is_nullable())

View file

@ -26,10 +26,10 @@ use arrow_schema::SchemaRef;
use lance::Dataset;
use omnigraph_compiler::catalog::EdgeType;
use crate::db::{MutationOpKind, SubTableUpdate};
use crate::db::manifest::{
new_sidecar, write_sidecar, RecoverySidecarHandle, SidecarKind, SidecarTablePin,
RecoverySidecarHandle, SidecarKind, SidecarTablePin, new_sidecar, write_sidecar,
};
use crate::db::{MutationOpKind, SubTableUpdate};
use crate::error::{OmniError, Result};
/// Whether the per-table accumulator should commit via `stage_append`
@ -119,10 +119,12 @@ impl MutationStaging {
expected_version: u64,
op_kind: MutationOpKind,
) {
self.paths.entry(table_key.to_string()).or_insert(StagedTablePath {
full_path,
table_branch,
});
self.paths
.entry(table_key.to_string())
.or_insert(StagedTablePath {
full_path,
table_branch,
});
self.expected_versions
.entry(table_key.to_string())
.or_insert(expected_version);
@ -202,7 +204,8 @@ impl MutationStaging {
/// Record a delete that already inline-committed at the Lance layer.
pub(crate) fn record_inline(&mut self, update: SubTableUpdate) {
self.inline_committed.insert(update.table_key.clone(), update);
self.inline_committed
.insert(update.table_key.clone(), update);
}
/// Read-your-writes accessor: the accumulated pending batches for
@ -308,18 +311,13 @@ impl MutationStaging {
// mode is exempt because no-key node and edge inserts use
// ULID-generated ids that are unique within a query.
let combined = match table.mode {
PendingMode::Merge => {
dedupe_merge_batches_by_id(&table.schema, table.batches)?
}
PendingMode::Merge => dedupe_merge_batches_by_id(&table.schema, table.batches)?,
PendingMode::Append => {
if table.batches.len() == 1 {
table.batches.into_iter().next().unwrap()
} else {
arrow_select::concat::concat_batches(
&table.schema,
&table.batches,
)
.map_err(|e| OmniError::Lance(e.to_string()))?
arrow_select::concat::concat_batches(&table.schema, &table.batches)
.map_err(|e| OmniError::Lance(e.to_string()))?
}
}
};
@ -327,9 +325,7 @@ impl MutationStaging {
// Stage produces uncommitted fragments + transaction. No
// Lance HEAD advance until `commit_all` runs `commit_staged`.
let staged = match table.mode {
PendingMode::Append => {
db.table_store().stage_append(&ds, combined, &[]).await?
}
PendingMode::Append => db.table_store().stage_append(&ds, combined, &[]).await?,
PendingMode::Merge => {
db.table_store()
.stage_merge_insert(
@ -420,7 +416,7 @@ impl StagedMutation {
///
/// Revalidation: between `stage_all` and `commit_all`, another
/// writer (in the same process or another process sharing the
/// repo) may have committed to one of our touched tables, advancing
/// graph) may have committed to one of our touched tables, advancing
/// the manifest pin past our `expected_version`. We revalidate
/// under the queue and fail-fast with `manifest_conflict` before
/// any `commit_staged` so the orphaned uncommitted fragments stay
@ -462,9 +458,8 @@ impl StagedMutation {
// from interleaving between our delete and our publish, which
// would otherwise leave a Lance-HEAD-ahead residual the
// delete-only sidecar (added below) would have to recover.
let mut queue_keys: Vec<(String, Option<String>)> = Vec::with_capacity(
staged.len() + inline_committed.len(),
);
let mut queue_keys: Vec<(String, Option<String>)> =
Vec::with_capacity(staged.len() + inline_committed.len());
for entry in &staged {
queue_keys.push((entry.table_key.clone(), entry.path.table_branch.clone()));
}
@ -565,9 +560,8 @@ impl StagedMutation {
// Finding 3 hazard: delete-only mutations would otherwise skip
// the sidecar, leaving any commit→publish residual unreachable
// by recovery.
let mut pins: Vec<SidecarTablePin> = Vec::with_capacity(
staged.len() + inline_committed.len(),
);
let mut pins: Vec<SidecarTablePin> =
Vec::with_capacity(staged.len() + inline_committed.len());
for entry in &staged {
pins.push(SidecarTablePin {
table_key: entry.table_key.clone(),
@ -899,10 +893,7 @@ pub(crate) async fn count_src_per_edge(
/// Count pending edges per `src` with NO dedup. Correct when caller
/// guarantees pending rows have unique primary keys (engine inserts via
/// fresh ULID; loader Append mode).
fn count_pending_src_naive(
pending_batches: &[RecordBatch],
counts: &mut HashMap<String, u32>,
) {
fn count_pending_src_naive(pending_batches: &[RecordBatch], counts: &mut HashMap<String, u32>) {
for batch in pending_batches {
let Some(col) = batch.column_by_name("src") else {
continue;
@ -947,12 +938,15 @@ fn count_pending_src_with_dedupe(
dedupe_key_column
)));
};
let key_arr = key_col.as_any().downcast_ref::<StringArray>().ok_or_else(|| {
OmniError::Lance(format!(
"count_src_per_edge: pending '{}' column is not Utf8",
dedupe_key_column
))
})?;
let key_arr = key_col
.as_any()
.downcast_ref::<StringArray>()
.ok_or_else(|| {
OmniError::Lance(format!(
"count_src_per_edge: pending '{}' column is not Utf8",
dedupe_key_column
))
})?;
let src_arr = batch
.column_by_name("src")
.and_then(|c| c.as_any().downcast_ref::<StringArray>());

View file

@ -1,3 +1,12 @@
// Lance 6's trait surface (heavier futures/streams nesting around the
// staged-write API in `storage_layer.rs`) pushes us past the default
// trait-resolution recursion limit of 128 on Linux builds. Raising to
// 256 here is the upstream-suggested fix from rustc itself
// ("consider increasing the recursion limit"). macOS happens to short-
// circuit before tripping the limit; CI on Linux does not. Revisit if
// future Lance bumps stop needing this.
#![recursion_limit = "256"]
pub mod changes;
pub mod db;
pub mod embedding;

View file

@ -66,7 +66,7 @@ impl StorageAdapter for LocalStorageAdapter {
// Ensure parent directory exists. S3 has no equivalent (PutObject
// is path-agnostic). For local fs, callers like the recovery
// sidecar protocol expect transparent directory creation under
// the repo root (the `__recovery/` directory doesn't pre-exist;
// the graph root (the `__recovery/` directory doesn't pre-exist;
// first sidecar write creates it).
if let Some(parent) = path.parent() {
if !parent.as_os_str().is_empty() {
@ -398,10 +398,13 @@ mod tests {
#[test]
fn storage_backend_selection_is_scheme_aware() {
assert_eq!(storage_kind_for_uri("/tmp/repo"), StorageKind::Local);
assert_eq!(storage_kind_for_uri("file:///tmp/repo"), StorageKind::Local);
assert_eq!(storage_kind_for_uri("/tmp/graph"), StorageKind::Local);
assert_eq!(
storage_kind_for_uri("s3://omnigraph-preview/repo"),
storage_kind_for_uri("file:///tmp/graph"),
StorageKind::Local
);
assert_eq!(
storage_kind_for_uri("s3://omnigraph-preview/graph"),
StorageKind::S3
);
}
@ -440,8 +443,8 @@ mod tests {
#[test]
fn parse_s3_uri_splits_bucket_and_key() {
let location = parse_s3_uri("s3://bucket/repo/_schema.pg").unwrap();
let location = parse_s3_uri("s3://bucket/graph/_schema.pg").unwrap();
assert_eq!(location.bucket, "bucket");
assert_eq!(location.key, "repo/_schema.pg");
assert_eq!(location.key, "graph/_schema.pg");
}
}

View file

@ -10,11 +10,15 @@
//! ## Transitional residuals on the trait
//!
//! Several inline-commit methods remain on the trait surface as
//! documented residuals: `delete_where` (Lance 4.0.0's `DeleteJob` is
//! `pub(crate)` — see [#6658](https://github.com/lance-format/lance/issues/6658)),
//! documented residuals: `delete_where`
//! ([#6658](https://github.com/lance-format/lance/issues/6658) closed
//! 2026-05-14, but the public `DeleteBuilder::execute_uncommitted` API
//! did not backport to the 6.x release line — it first ships in
//! `v7.0.0-beta.10`. Migration to staged two-phase delete is tracked as
//! MR-A and is gated on the Lance v7.x bump, not the current v6.0.1 pin),
//! `create_vector_index` (segment-commit-path requires
//! `build_index_metadata_from_segments` which is `pub(crate)` — see
//! [#6666](https://github.com/lance-format/lance/issues/6666)), and the
//! [#6666](https://github.com/lance-format/lance/issues/6666), still open), and the
//! legacy `append_batch` / `merge_insert_batches` / `overwrite_batch` /
//! `create_btree_index` / `create_inverted_index` paths kept while
//! engine call sites finish migrating off of them (Phase 1b / Phase 9

View file

@ -8,15 +8,17 @@ use lance::Dataset;
use lance::blob::BlobArrayBuilder;
use lance::dataset::scanner::{ColumnOrdering, DatasetRecordBatchStream, Scanner};
use lance::dataset::transaction::{Operation, Transaction, TransactionBuilder};
use lance::dataset::write::merge_insert::SourceDedupeBehavior;
use lance::dataset::{
CommitBuilder, InsertBuilder, MergeInsertBuilder, WhenMatched, WhenNotMatched, WriteMode,
WriteParams,
};
use lance::datatypes::BlobKind;
use lance::index::DatasetIndexExt;
use lance::index::scalar::IndexDetails;
use lance_file::version::LanceFileVersion;
use lance_index::scalar::{InvertedIndexParams, ScalarIndexParams};
use lance_index::{DatasetIndexExt, IndexType, is_system_index};
use lance_index::{IndexType, is_system_index};
use lance_linalg::distance::MetricType;
use lance_table::format::{Fragment, IndexMetadata, RowIdMeta};
use lance_table::rowids::{RowIdSequence, write_row_ids};
@ -651,15 +653,58 @@ impl TableStore {
return self.table_state(dataset_uri, &ds).await;
}
// Precondition for the FirstSeen workaround below: every caller of
// this primitive must hand in a source batch that is unique by
// `key_columns`. Without this check, `SourceDedupeBehavior::FirstSeen`
// would silently collapse genuine duplicates instead of erroring.
check_batch_unique_by_keys(&batch, &key_columns, "merge_insert_batch")?;
// TODO(lance-upstream): MergeInsertBuilder does not accept WriteParams,
// so allow_external_blob_outside_bases cannot be set here. External URI
// blobs via merge_insert (LoadMode::Merge, mutations) are unsupported
// until Lance exposes WriteParams on MergeInsertBuilder.
let ds = Arc::new(ds);
let job = MergeInsertBuilder::try_new(ds, key_columns)
.map_err(|e| OmniError::Lance(e.to_string()))?
.when_matched(when_matched)
.when_not_matched(when_not_matched)
let mut builder = MergeInsertBuilder::try_new(ds, key_columns)
.map_err(|e| OmniError::Lance(e.to_string()))?;
builder.when_matched(when_matched);
builder.when_not_matched(when_not_matched);
// Workaround for a Lance 4.0.x bug class where sequential
// merge_insert calls against rows previously rewritten by
// merge_insert produce a spurious "Ambiguous merge inserts:
// multiple source rows match the same target row on (id = ...)"
// error. Lance's `processed_row_ids: Mutex<HashSet<u64>>`
// (lance-4.0.0 `src/dataset/write/merge_insert.rs:2099`)
// double-processes the same source/target match against
// datasets previously rewritten by merge_insert, and the default
// `SourceDedupeBehavior::Fail` errors on the second insertion.
// `FirstSeen` makes Lance skip the duplicate match instead.
//
// Covers both observed surfaces:
// - PR #98 (sequential `load --mode merge` against same keys).
// - MR-920 (sequential `update T set {f} where x=y` on same row).
//
// Correctness-preserving for OmniGraph because every call path
// that reaches this primitive either pre-dedupes the source batch
// by id, or surfaces a real source dup via the
// `check_batch_unique_by_keys` precondition above (which fires
// before the FirstSeen setter has a chance to silently collapse
// anything):
// - Load path: `enforce_unique_constraints_intra_batch`
// (`loader/mod.rs:1453`) errors on intra-batch `@key` dups.
// - Mutate path: `MutationStaging::finalize` (`exec/staging.rs`)
// accumulates and dedupes by `id`.
// - Branch-merge path: `compute_source_delta` /
// `compute_three_way_delta` (`exec/merge.rs`) walk via
// `OrderedTableCursor` and `push_row` each id at most once.
// So FirstSeen only suppresses the spurious Lance behavior, never
// user data. Pinned by `loader_rejects_intra_batch_duplicate_keys`
// in `tests/consistency.rs` plus the
// `check_batch_unique_by_keys` precondition.
//
// Retire when upstream Lance fixes the bug class. Tracked at
// MR-957; upstream: lance-format/lance#6877.
builder.source_dedupe_behavior(SourceDedupeBehavior::FirstSeen);
let job = builder
.try_build()
.map_err(|e| OmniError::Lance(e.to_string()))?;
@ -870,11 +915,26 @@ impl TableStore {
"stage_merge_insert called with empty batch".to_string(),
));
}
// Precondition for FirstSeen below. See the comment on
// `merge_insert_batch` for why this check is here, not on the caller:
// every call path that reaches stage_merge_insert (load,
// MutationStaging::finalize, branch_merge::publish_rewritten_merge_table)
// must hand in a source batch that is unique by `key_columns`.
check_batch_unique_by_keys(&batch, &key_columns, "stage_merge_insert")?;
let ds = Arc::new(ds);
let job = MergeInsertBuilder::try_new(ds, key_columns)
.map_err(|e| OmniError::Lance(e.to_string()))?
.when_matched(when_matched)
.when_not_matched(when_not_matched)
let mut builder = MergeInsertBuilder::try_new(ds, key_columns)
.map_err(|e| OmniError::Lance(e.to_string()))?;
builder.when_matched(when_matched);
builder.when_not_matched(when_not_matched);
// See `merge_insert_batch` for the FirstSeen rationale. Workaround
// for the Lance 4.0.x bug class where sequential merge_insert /
// update against rows previously rewritten by merge_insert trips
// Lance's `processed_row_ids` HashSet and errors under the default
// `SourceDedupeBehavior::Fail`. Retire when upstream Lance is fixed.
builder.source_dedupe_behavior(SourceDedupeBehavior::FirstSeen);
let job = builder
.try_build()
.map_err(|e| OmniError::Lance(e.to_string()))?;
let schema = batch.schema();
@ -1651,3 +1711,107 @@ fn combine_committed_with_staged(ds: &Dataset, staged: &[StagedWrite]) -> Vec<Fr
}
combined
}
/// Precondition guard for `merge_insert_batch` and `stage_merge_insert`.
/// Both opt into `SourceDedupeBehavior::FirstSeen` to suppress the Lance
/// `processed_row_ids` bug (MR-957). FirstSeen would *also* silently
/// collapse genuine duplicate source keys; this check restores fail-fast
/// behavior on real dups by erroring before the builder gets a chance to
/// silently skip them.
///
/// Today only single-column string keys are used at the call sites
/// (`vec!["id".to_string()]`). The check restricts itself to that shape
/// and surfaces an internal error if a future caller passes anything
/// else — keeping the assumption explicit instead of silently degrading.
fn check_batch_unique_by_keys(
batch: &RecordBatch,
key_columns: &[String],
context: &'static str,
) -> Result<()> {
if key_columns.len() != 1 {
return Err(OmniError::manifest_internal(format!(
"{}: check_batch_unique_by_keys currently supports single-column keys only, got {:?}",
context, key_columns
)));
}
let key_col_name = &key_columns[0];
let column = batch.column_by_name(key_col_name).ok_or_else(|| {
OmniError::manifest_internal(format!(
"{}: source batch missing key column '{}'",
context, key_col_name
))
})?;
let strs = column
.as_any()
.downcast_ref::<StringArray>()
.ok_or_else(|| {
OmniError::manifest_internal(format!(
"{}: key column '{}' is not a StringArray (got {:?})",
context,
key_col_name,
column.data_type()
))
})?;
let mut seen: std::collections::HashSet<&str> =
std::collections::HashSet::with_capacity(batch.num_rows());
for i in 0..strs.len() {
if !strs.is_valid(i) {
continue;
}
let v = strs.value(i);
if !seen.insert(v) {
return Err(OmniError::manifest(format!(
"{}: duplicate source row for key '{}' (column '{}'); \
callers must hand in a batch unique by `key_columns` \
see MR-957",
context, v, key_col_name
)));
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use arrow_array::StringArray;
use arrow_schema::{DataType, Field, Schema};
fn batch_with_ids(ids: &[&str]) -> RecordBatch {
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Utf8, false)]));
let col = Arc::new(StringArray::from(ids.to_vec())) as ArrayRef;
RecordBatch::try_new(schema, vec![col]).unwrap()
}
#[test]
fn check_batch_unique_by_keys_passes_when_all_unique() {
let batch = batch_with_ids(&["a", "b", "c"]);
check_batch_unique_by_keys(&batch, &["id".to_string()], "test").unwrap();
}
#[test]
fn check_batch_unique_by_keys_errors_on_duplicate_id() {
let batch = batch_with_ids(&["a", "b", "a"]);
let err =
check_batch_unique_by_keys(&batch, &["id".to_string()], "test").unwrap_err();
let msg = err.to_string();
assert!(
msg.contains("duplicate source row for key 'a'"),
"unexpected error: {msg}"
);
assert!(msg.contains("MR-957"), "error should reference MR-957: {msg}");
}
#[test]
fn check_batch_unique_by_keys_rejects_multi_column_keys() {
let batch = batch_with_ids(&["a"]);
let err = check_batch_unique_by_keys(
&batch,
&["id".to_string(), "other".to_string()],
"test",
)
.unwrap_err();
assert!(err.to_string().contains("single-column keys only"));
}
}

View file

@ -4,7 +4,8 @@ use std::fs;
use arrow_array::{Array, Int32Array, UInt64Array};
use futures::TryStreamExt;
use lance_index::{DatasetIndexExt, is_system_index};
use lance::index::DatasetIndexExt;
use lance_index::is_system_index;
use omnigraph::db::commit_graph::CommitGraph;
use omnigraph::db::{MergeOutcome, Omnigraph, ReadTarget};

View file

@ -56,7 +56,7 @@ async fn composite_flow_canonical_lifecycle() {
let uri = dir.path().to_str().unwrap();
// ─────────────────────────────────────────────────────────────────
// Step 1: init a fresh repo with the standard test schema.
// Step 1: init a fresh graph with the standard test schema.
// ─────────────────────────────────────────────────────────────────
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
let v_init = version_branch(&db, "main").await.unwrap();
@ -70,7 +70,9 @@ async fn composite_flow_canonical_lifecycle() {
// Step 2: load JSONL seed data (Person + Company nodes,
// Knows + WorksAt edges).
// ─────────────────────────────────────────────────────────────────
load_jsonl(&mut db, TEST_DATA, LoadMode::Append).await.unwrap();
load_jsonl(&mut db, TEST_DATA, LoadMode::Append)
.await
.unwrap();
let v_after_load = version_branch(&db, "main").await.unwrap();
assert!(
v_after_load > v_init,
@ -119,19 +121,13 @@ async fn composite_flow_canonical_lifecycle() {
"feature",
MUTATION_QUERIES,
"insert_person_and_friend",
&mixed_params(
&[("$name", "Frank"), ("$friend", "Eve")],
&[("$age", 33)],
),
&mixed_params(&[("$name", "Frank"), ("$friend", "Eve")], &[("$age", 33)]),
)
.await
.expect("multi-statement insert+edge on feature");
// After: feature has 4 + Eve + Frank = 6 Persons.
let snap = db
.snapshot_of(ReadTarget::branch("feature"))
.await
.unwrap();
let snap = db.snapshot_of(ReadTarget::branch("feature")).await.unwrap();
let person_ds = snap.open("node:Person").await.unwrap();
assert_eq!(
person_ds.count_rows(None).await.unwrap(),
@ -321,14 +317,10 @@ async fn composite_flow_canonical_lifecycle() {
);
// Re-run a query to verify post-optimize correctness.
let post_optimize_total = query_main(
&mut db,
TEST_QUERIES,
"total_people",
&ParamMap::default(),
)
.await
.unwrap();
let post_optimize_total =
query_main(&mut db, TEST_QUERIES, "total_people", &ParamMap::default())
.await
.unwrap();
assert!(
!post_optimize_total.batches().is_empty(),
"queries must still work after optimize"
@ -385,14 +377,9 @@ async fn composite_flow_canonical_lifecycle() {
// post-cleanup. Post-cleanup mutation is omitted here pending
// resolution of the optimize-vs-manifest-pin interaction documented
// in Step 10.
let final_total = query_main(
&mut db,
TEST_QUERIES,
"total_people",
&ParamMap::default(),
)
.await
.unwrap();
let final_total = query_main(&mut db, TEST_QUERIES, "total_people", &ParamMap::default())
.await
.unwrap();
assert!(!final_total.batches().is_empty());
}
@ -431,10 +418,12 @@ async fn composite_flow_schema_apply_then_branch_ops_no_deadlock_in_refresh() {
// Step 1: init + load on handle A.
let mut db_a = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
load_jsonl(&mut db_a, TEST_DATA, LoadMode::Append).await.unwrap();
load_jsonl(&mut db_a, TEST_DATA, LoadMode::Append)
.await
.unwrap();
assert_eq!(count_rows(&db_a, "node:Person").await, 4);
// Step 2: open handle B on the same repo. B's in-memory schema_source
// Step 2: open handle B on the same graph. B's in-memory schema_source
// cache is now a snapshot of `_schema.pg` at open time.
let db_b = Omnigraph::open(uri).await.unwrap();
@ -444,7 +433,7 @@ async fn composite_flow_schema_apply_then_branch_ops_no_deadlock_in_refresh() {
// to disk.
const TEST_SCHEMA_V2: &str = "node Person {\n name: String @key\n age: I32?\n nickname: String?\n}\n\nnode Company {\n name: String @key\n}\n\nedge Knows: Person -> Person {\n since: Date?\n}\n\nedge WorksAt: Person -> Company\n";
let plan = db_a.apply_schema(TEST_SCHEMA_V2).await.unwrap();
assert!(plan.applied, "apply_schema must succeed on a clean repo");
assert!(plan.applied, "apply_schema must succeed on a clean graph");
assert!(
!plan.steps.is_empty(),
"apply_schema must record the AddProperty step"
@ -561,7 +550,9 @@ async fn composite_flow_multi_branch_sequential_merges() {
// edges from test.jsonl).
// ─────────────────────────────────────────────────────────────────
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
load_jsonl(&mut db, TEST_DATA, LoadMode::Append).await.unwrap();
load_jsonl(&mut db, TEST_DATA, LoadMode::Append)
.await
.unwrap();
assert_eq!(count_rows(&db, "node:Person").await, 4);
assert_eq!(count_rows(&db, "edge:Knows").await, 3);
@ -687,10 +678,7 @@ async fn composite_flow_multi_branch_sequential_merges() {
"feat-a",
MUTATION_QUERIES,
"insert_person_and_friend",
&mixed_params(
&[("$name", "Grace"), ("$friend", "Eve")],
&[("$age", 28)],
),
&mixed_params(&[("$name", "Grace"), ("$friend", "Eve")], &[("$age", 28)]),
)
.await
.expect("insert Grace + Knows(Grace → Eve) on feat-a");
@ -821,15 +809,14 @@ async fn composite_flow_multi_branch_sequential_merges() {
// `total_people` returns count(Person) = 10. Catches regressions in
// group-by/count execution against a multi-fragment table whose
// current shape was produced by two sequential merges.
let total_post_merges = query_main(
&mut db,
TEST_QUERIES,
"total_people",
&ParamMap::default(),
)
.await
.unwrap();
assert_total(&total_post_merges, 10, "post both merges, main must total 10 Persons");
let total_post_merges = query_main(&mut db, TEST_QUERIES, "total_people", &ParamMap::default())
.await
.unwrap();
assert_total(
&total_post_merges,
10,
"post both merges, main must total 10 Persons",
);
// ─────────────────────────────────────────────────────────────────
// Step 14: time-travel to pre-merge-a-version. Reads must return
@ -1021,14 +1008,9 @@ async fn composite_flow_multi_branch_sequential_merges() {
// correctly to disk but the reopened catalog can't bind them.
// ─────────────────────────────────────────────────────────────────
let mut db = db;
let post_reopen_total = query_main(
&mut db,
TEST_QUERIES,
"total_people",
&ParamMap::default(),
)
.await
.unwrap();
let post_reopen_total = query_main(&mut db, TEST_QUERIES, "total_people", &ParamMap::default())
.await
.unwrap();
assert_total(
&post_reopen_total,
10,

View file

@ -119,6 +119,187 @@ async fn load_merge_upserts_existing_and_inserts_new() {
}
}
/// Regression: two sequential `LoadMode::Merge` invocations against the
/// same set of keys must both succeed. Pre-fix, the second one failed
/// with `Ambiguous merge inserts are prohibited: multiple source rows
/// match the same target row on (id = "TEST-1")` even though every
/// source batch had one row per key.
///
/// Triggered by Lance's `processed_row_ids: Mutex<HashSet<u64>>`
/// (lance-4.0.0 `src/dataset/write/merge_insert.rs:2099`) double-
/// processing the same source/target match against datasets previously
/// rewritten by merge_insert. Worked around by opting
/// `MergeInsertBuilder` into `SourceDedupeBehavior::FirstSeen` in
/// `crates/omnigraph/src/table_store.rs` — see that file for the full
/// rationale and the safety pin (`loader_rejects_intra_batch_duplicate_keys`).
/// Tracked at MR-957; upstream: lance-format/lance#6877.
#[tokio::test]
async fn load_merge_repeated_against_overlapping_keys_succeeds() {
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap();
let schema = r#"
node Thing {
key: String @key
required_val: String
optional_val: String?
}
"#;
let mut db = Omnigraph::init(uri, schema).await.unwrap();
// Seed with 50 fully-populated rows (id + required + optional).
let mut seed = String::new();
for i in 1..=50 {
seed.push_str(&format!(
r#"{{"type":"Thing","data":{{"key":"TEST-{i}","required_val":"required {i}","optional_val":"optional {i}"}}}}
"#,
));
}
load_jsonl(&mut db, &seed, LoadMode::Overwrite)
.await
.unwrap();
// Partial-schema delta — mirrors the bug report exactly: omits
// `optional_val`. 25 existing keys + 5 new keys, one row per key.
let mut delta = String::new();
for i in (1..=25).chain(51..=55) {
delta.push_str(&format!(
r#"{{"type":"Thing","data":{{"key":"TEST-{i}","required_val":"required {i} UPDATED"}}}}
"#,
));
}
load_jsonl(&mut db, &delta, LoadMode::Merge)
.await
.expect("first merge must succeed");
assert_eq!(count_rows(&db, "node:Thing").await, 55);
load_jsonl(&mut db, &delta, LoadMode::Merge)
.await
.expect("second merge against same keys must succeed");
assert_eq!(count_rows(&db, "node:Thing").await, 55);
}
/// Safety pin for the `SourceDedupeBehavior::FirstSeen` workaround in
/// `crates/omnigraph/src/table_store.rs`. FirstSeen tells Lance to
/// silently skip a duplicate source row instead of erroring. Our use of
/// it depends on user-provided duplicates being rejected *before* the
/// batch reaches Lance — otherwise FirstSeen could silently drop user
/// data.
///
/// Defense in depth:
/// 1. The loader's `enforce_unique_constraints_intra_batch`
/// (`loader/mod.rs:1453`), invoked unconditionally on any node type
/// with a `@key`, errors on intra-batch duplicate `@key` values at
/// intake — pinned by this test across every `LoadMode`.
/// 2. The `check_batch_unique_by_keys` precondition at the top of
/// `merge_insert_batch` and `stage_merge_insert` is the final
/// fail-fast guard: even if a future caller bypasses the loader path
/// (e.g. branch-merge's `publish_rewritten_merge_table` builds its
/// own source batch directly), a real duplicate id reaches Lance
/// only after surfacing as an `OmniError::Manifest`, never silently
/// via FirstSeen. Pinned by the unit tests in `table_store::tests`.
#[tokio::test]
async fn loader_rejects_intra_batch_duplicate_keys() {
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap();
let schema = r#"
node Thing {
key: String @key
value: String
}
"#;
let mut db = Omnigraph::init(uri, schema).await.unwrap();
let dupes = r#"{"type":"Thing","data":{"key":"DUP","value":"first"}}
{"type":"Thing","data":{"key":"DUP","value":"second"}}
"#;
for mode in [LoadMode::Overwrite, LoadMode::Append, LoadMode::Merge] {
let err = load_jsonl(&mut db, dupes, mode).await.unwrap_err();
let msg = err.to_string();
assert!(
msg.contains("@unique violation") && msg.contains("DUP"),
"load mode {mode:?} must reject intra-batch duplicate @key (got: {msg})"
);
assert_eq!(
count_rows(&db, "node:Thing").await,
0,
"load mode {mode:?} must not persist any rows when the batch is rejected"
);
}
}
/// Canary for the upstream Lance gap that the `FirstSeen` workaround
/// in `table_store.rs` masks. The bug class is "Window 2": load →
/// indices built explicitly → merge → merge. Even with the engine
/// fully aligned to the "indexes are derived state" invariant
/// (MR-848), as long as an `id` index has been built between the
/// first and second merge_insert, the Lance internal that triggers
/// the bug remains reachable.
///
/// This test runs the Window-2 sequence under the FirstSeen workaround.
/// It is expected to pass today. If a future Lance upgrade or local
/// change makes it START failing, the workaround has lost effectiveness
/// (upstream Lance changed something, or the FirstSeen setter was
/// dropped from `table_store.rs`). If a future Lance upgrade fixes the
/// bug class, this test continues to pass and the FirstSeen setter can
/// be retired.
///
/// Tracked at MR-957; upstream: lance-format/lance#6877.
#[tokio::test]
async fn load_merge_window_2_documents_upstream_lance_gap() {
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap();
let schema = r#"
node Thing {
key: String @key
required_val: String
optional_val: String?
}
"#;
let mut db = Omnigraph::init(uri, schema).await.unwrap();
let mut seed = String::new();
for i in 1..=50 {
seed.push_str(&format!(
r#"{{"type":"Thing","data":{{"key":"TEST-{i}","required_val":"required {i}","optional_val":"optional {i}"}}}}
"#,
));
}
load_jsonl(&mut db, &seed, LoadMode::Overwrite)
.await
.unwrap();
// Explicit ensure_indices between seed and the merges — the Window
// 2 trigger. The eager-build behavior (MR-583) means the BTREE on
// `id` is already present here, but calling explicitly pins the
// invariant for the post-MR-848 future where the eager build is
// gone.
db.ensure_indices().await.unwrap();
let mut delta = String::new();
for i in (1..=25).chain(51..=55) {
delta.push_str(&format!(
r#"{{"type":"Thing","data":{{"key":"TEST-{i}","required_val":"required {i} UPDATED"}}}}
"#,
));
}
// Both merges must succeed under the FirstSeen workaround.
// `processed_row_ids` re-processes the same target row_id under
// the default `SourceDedupeBehavior::Fail`; FirstSeen tolerates it.
load_jsonl(&mut db, &delta, LoadMode::Merge)
.await
.expect("first merge after ensure_indices must succeed");
db.ensure_indices().await.unwrap();
load_jsonl(&mut db, &delta, LoadMode::Merge).await.expect(
"second merge after ensure_indices must succeed \
(Window 2 canary: drop the FirstSeen setter in table_store.rs \
only when this stays green WITHOUT it)",
);
assert_eq!(count_rows(&db, "node:Thing").await, 55);
}
#[tokio::test]
async fn cross_type_traversal_deduplicates_duplicate_edges() {
let dir = tempfile::tempdir().unwrap();
@ -163,7 +344,7 @@ async fn explicit_target_query_sees_other_writer_commits_without_refresh() {
let uri = dir.path().to_str().unwrap();
// Two independent handles to the same repo
// Two independent handles to the same graph
let mut db1 = Omnigraph::open(uri).await.unwrap();
let mut db2 = Omnigraph::open(uri).await.unwrap();

View file

@ -1866,3 +1866,65 @@ async fn ensure_indices_does_not_error_on_repeated_call() {
let ds = snap.open("node:Person").await.unwrap();
assert_eq!(ds.count_rows(None).await.unwrap(), 4);
}
// ─── DataFusion-Expr filter pushdown (Tier-1 follow-up to the Lance v6 bump) ──
/// Regression for `CompOp::Contains` pushdown via `array_has` in
/// `ir_filter_to_expr`. Before the Expr-pushdown refactor, the
/// `ir_filter_to_sql` family returned `None` for list-contains (the
/// comment said *"Can't pushdown list contains"*) and the predicate was
/// applied post-scan in memory. With `Scanner::filter_expr(Expr)` and
/// DF's `array_has` builtin, the contains predicate now pushes down to
/// Lance — the test confirms results are correct AND the pushdown path
/// is exercised (a regression on the pushdown would land all rows in
/// the scan, then be filtered post-hoc; that still produces the right
/// count so this test pins correctness, while `lance_surface_guards.rs`
/// is the structural pin for the surface itself).
#[tokio::test]
async fn ir_filter_with_list_contains_pushes_down() {
let schema = r#"
node Doc {
slug: String @key
tags: [String]
}
"#;
let data = r#"{"type":"Doc","data":{"slug":"alpha","tags":["red","blue"]}}
{"type":"Doc","data":{"slug":"bravo","tags":["green"]}}
{"type":"Doc","data":{"slug":"charlie","tags":["red","green"]}}
{"type":"Doc","data":{"slug":"delta","tags":[]}}"#;
let dir = tempfile::tempdir().unwrap();
let mut db = Omnigraph::init(dir.path().to_str().unwrap(), schema)
.await
.unwrap();
load_jsonl(&mut db, data, LoadMode::Overwrite)
.await
.unwrap();
let queries = r#"
query docs_with_tag($tag: String) {
match {
$d: Doc
$d.tags contains $tag
}
return { $d.slug }
}
"#;
let result = query_main(&mut db, queries, "docs_with_tag", &params(&[("$tag", "red")]))
.await
.unwrap();
let batch = result.concat_batches().unwrap();
let slugs = batch
.column(0)
.as_any()
.downcast_ref::<StringArray>()
.unwrap();
let mut got: Vec<&str> = (0..slugs.len()).map(|i| slugs.value(i)).collect();
got.sort();
assert_eq!(
got,
vec!["alpha", "charlie"],
"contains-pushdown should return exactly the rows whose tags list contains 'red'"
);
}

View file

@ -66,7 +66,7 @@ async fn graph_publish_failpoint_triggers_before_commit_append() {
// Atomic schema apply: schema apply writes staging files first, then commits
// the manifest, then renames staging → final. Tests below inject crashes at
// the two boundaries and assert that reopening the repo yields a consistent
// the two boundaries and assert that reopening the graph yields a consistent
// state.
#[tokio::test]
@ -303,14 +303,10 @@ async fn inline_delete_conflict_writes_sidecar_before_rejecting() {
let person_uri = node_table_uri(&uri, "Person");
{
let _pause_delete = ScopedFailPoint::new("mutation.delete_node_pre_primary_delete", "pause");
let _pause_delete =
ScopedFailPoint::new("mutation.delete_node_pre_primary_delete", "pause");
let delete_params = helpers::params(&[("$name", "Alice")]);
let delete = db.mutate(
"main",
MUTATION_QUERIES,
"remove_person",
&delete_params,
);
let delete = db.mutate("main", MUTATION_QUERIES, "remove_person", &delete_params);
tokio::pin!(delete);
let mut concurrent_update_succeeded = false;
@ -325,15 +321,18 @@ async fn inline_delete_conflict_writes_sidecar_before_rejecting() {
"set_age",
&mixed_params(&[("$name", "Bob")], &[("$age", 26)]),
)
.await
.is_ok()
.await
.is_ok()
{
concurrent_update_succeeded = true;
break;
}
tokio::time::sleep(std::time::Duration::from_millis(20)).await;
}
assert!(concurrent_update_succeeded, "concurrent update must land while delete is paused");
assert!(
concurrent_update_succeeded,
"concurrent update must land while delete is paused"
);
fail::remove("mutation.delete_node_pre_primary_delete");
let err = delete.await.unwrap_err();
@ -464,7 +463,7 @@ async fn recovery_rolls_forward_load_on_feature_branch() {
#[tokio::test]
async fn recovery_rolls_forward_ensure_indices_on_feature_branch() {
use lance_index::DatasetIndexExt;
use lance::index::DatasetIndexExt;
use omnigraph::loader::{LoadMode, load_jsonl};
use omnigraph::table_store::TableStore;
@ -925,13 +924,13 @@ async fn ensure_indices_stage_btree_failure_leaves_existing_tables_writable() {
.expect("Person mutation must succeed after the failed schema apply — existing tables are not drifted");
}
fn assert_no_staging_files(repo: &std::path::Path) {
fn assert_no_staging_files(graph: &std::path::Path) {
for name in [
"_schema.pg.staging",
"_schema.ir.json.staging",
"__schema_state.json.staging",
] {
let path = repo.join(name);
let path = graph.join(name);
assert!(
!path.exists(),
"staging file {} still exists after recovery",
@ -1164,7 +1163,7 @@ edge WorksAt: Person -> Company
// NEW schema (city column on Person, Tag node type) — not the old.
// Without the schema-staging coordination, the schema-state
// recovery would have deleted the staging files (because manifest
// hadn't advanced when it ran), leaving a corrupt repo with new-
// hadn't advanced when it ran), leaving a corrupt graph with new-
// schema data on disk but old-schema catalog.
let live_schema = std::fs::read_to_string(dir.path().join("_schema.pg")).unwrap();
assert!(

View file

@ -44,7 +44,7 @@ query insert_person_and_friend($name: String, $age: I32, $friend: String) {
}
"#;
/// Init a repo and load the standard test data.
/// Init a graph and load the standard test data.
pub async fn init_and_load(dir: &tempfile::TempDir) -> Omnigraph {
let uri = dir.path().to_str().unwrap();
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
@ -249,7 +249,7 @@ pub fn vector_and_string_params(
map
}
pub fn s3_test_repo_uri(suite: &str) -> Option<String> {
pub fn s3_test_graph_uri(suite: &str) -> Option<String> {
let bucket = std::env::var("OMNIGRAPH_S3_TEST_BUCKET").ok()?;
let prefix = std::env::var("OMNIGRAPH_S3_TEST_PREFIX")
.ok()

View file

@ -110,8 +110,8 @@ impl FollowUpMutation {
}
}
pub fn single_sidecar_operation_id(repo_root: &Path) -> String {
let ids = sidecar_operation_ids(repo_root);
pub fn single_sidecar_operation_id(graph_root: &Path) -> String {
let ids = sidecar_operation_ids(graph_root);
assert_eq!(
ids.len(),
1,
@ -121,8 +121,8 @@ pub fn single_sidecar_operation_id(repo_root: &Path) -> String {
ids.into_iter().next().unwrap()
}
pub fn sidecar_operation_ids(repo_root: &Path) -> Vec<String> {
let dir = repo_root.join("__recovery");
pub fn sidecar_operation_ids(graph_root: &Path) -> Vec<String> {
let dir = graph_root.join("__recovery");
if !dir.exists() {
return Vec::new();
}
@ -143,10 +143,10 @@ pub fn sidecar_operation_ids(repo_root: &Path) -> Vec<String> {
ids
}
pub async fn branch_head_commit_id(repo_root: &Path, branch: &str) -> Result<String> {
pub async fn branch_head_commit_id(graph_root: &Path, branch: &str) -> Result<String> {
let graph = match branch {
"main" => CommitGraph::open(&repo_uri(repo_root)).await?,
branch => CommitGraph::open_at_branch(&repo_uri(repo_root), branch).await?,
"main" => CommitGraph::open(&graph_uri(graph_root)).await?,
branch => CommitGraph::open_at_branch(&graph_uri(graph_root), branch).await?,
};
graph.head_commit_id().await?.ok_or_else(|| {
OmniError::manifest_internal(format!("commit graph for branch {branch} has no head"))
@ -154,52 +154,52 @@ pub async fn branch_head_commit_id(repo_root: &Path, branch: &str) -> Result<Str
}
pub async fn assert_post_recovery_invariants(
repo_root: &Path,
graph_root: &Path,
operation_id: &str,
expectation: RecoveryExpectation,
) -> Result<()> {
match expectation {
RecoveryExpectation::RolledForward { tables } => {
assert_sidecar_absent(repo_root, operation_id);
let audit = read_audit_row(repo_root, operation_id).await?;
assert_sidecar_absent(graph_root, operation_id);
let audit = read_audit_row(graph_root, operation_id).await?;
assert_eq!(
audit.recovery_kind, "RolledForward",
"audit row for {operation_id} recorded the wrong recovery_kind",
);
assert_manifest_pins_match_lance_heads(repo_root, &tables).await?;
assert_audit_to_versions_match_lance_heads(repo_root, &audit, &tables).await?;
assert_recovery_commit_shape(repo_root, &audit, &tables).await?;
assert_non_main_did_not_move_main(repo_root, &tables).await?;
assert_idempotent_reopen(repo_root, operation_id).await?;
run_follow_up_mutations(repo_root, tables).await?;
assert_manifest_pins_match_lance_heads(graph_root, &tables).await?;
assert_audit_to_versions_match_lance_heads(graph_root, &audit, &tables).await?;
assert_recovery_commit_shape(graph_root, &audit, &tables).await?;
assert_non_main_did_not_move_main(graph_root, &tables).await?;
assert_idempotent_reopen(graph_root, operation_id).await?;
run_follow_up_mutations(graph_root, tables).await?;
}
RecoveryExpectation::RolledBack { tables } => {
assert_sidecar_absent(repo_root, operation_id);
let audit = read_audit_row(repo_root, operation_id).await?;
assert_sidecar_absent(graph_root, operation_id);
let audit = read_audit_row(graph_root, operation_id).await?;
assert_eq!(
audit.recovery_kind, "RolledBack",
"audit row for {operation_id} recorded the wrong recovery_kind",
);
assert_rollback_outcomes_record_drift(&audit);
assert_recovery_commit_shape(repo_root, &audit, &tables).await?;
assert_non_main_did_not_move_main(repo_root, &tables).await?;
assert_idempotent_reopen(repo_root, operation_id).await?;
run_follow_up_mutations(repo_root, tables).await?;
assert_recovery_commit_shape(graph_root, &audit, &tables).await?;
assert_non_main_did_not_move_main(graph_root, &tables).await?;
assert_idempotent_reopen(graph_root, operation_id).await?;
run_follow_up_mutations(graph_root, tables).await?;
}
RecoveryExpectation::Deferred => {
assert!(
sidecar_path(repo_root, operation_id).exists(),
sidecar_path(graph_root, operation_id).exists(),
"deferred recovery must leave sidecar {operation_id} on disk",
);
assert!(
read_audit_row(repo_root, operation_id).await.is_err(),
read_audit_row(graph_root, operation_id).await.is_err(),
"deferred recovery must not record an audit row for {operation_id}",
);
}
RecoveryExpectation::NoOp => {
assert_sidecar_absent(repo_root, operation_id);
assert_sidecar_absent(graph_root, operation_id);
assert!(
read_audit_row(repo_root, operation_id).await.is_err(),
read_audit_row(graph_root, operation_id).await.is_err(),
"no-op recovery must not record an audit row for {operation_id}",
);
}
@ -216,24 +216,24 @@ fn branch_context(tables: &[TableExpectation]) -> Option<String> {
.map(str::to_string)
}
fn sidecar_path(repo_root: &Path, operation_id: &str) -> PathBuf {
repo_root
fn sidecar_path(graph_root: &Path, operation_id: &str) -> PathBuf {
graph_root
.join("__recovery")
.join(format!("{operation_id}.json"))
}
fn assert_sidecar_absent(repo_root: &Path, operation_id: &str) {
fn assert_sidecar_absent(graph_root: &Path, operation_id: &str) {
assert!(
!sidecar_path(repo_root, operation_id).exists(),
!sidecar_path(graph_root, operation_id).exists(),
"recovery sidecar {operation_id} must be deleted after successful recovery",
);
}
async fn assert_manifest_pins_match_lance_heads(
repo_root: &Path,
graph_root: &Path,
tables: &[TableExpectation],
) -> Result<()> {
let uri = repo_uri(repo_root);
let uri = graph_uri(graph_root);
let db = Omnigraph::open(&uri).await?;
for table in tables {
let (entry, lance_head) = entry_and_lance_head(&db, &uri, table).await?;
@ -254,11 +254,11 @@ async fn assert_manifest_pins_match_lance_heads(
}
async fn assert_audit_to_versions_match_lance_heads(
repo_root: &Path,
graph_root: &Path,
audit: &RecoveryAuditRow,
tables: &[TableExpectation],
) -> Result<()> {
let uri = repo_uri(repo_root);
let uri = graph_uri(graph_root);
let db = Omnigraph::open(&uri).await?;
for table in tables {
let (_, lance_head) = entry_and_lance_head(&db, &uri, table).await?;
@ -301,10 +301,10 @@ fn assert_rollback_outcomes_record_drift(audit: &RecoveryAuditRow) {
}
async fn assert_non_main_did_not_move_main(
repo_root: &Path,
graph_root: &Path,
tables: &[TableExpectation],
) -> Result<()> {
let uri = repo_uri(repo_root);
let uri = graph_uri(graph_root);
let db = Omnigraph::open(&uri).await?;
let main = db.snapshot_of(ReadTarget::branch("main")).await?;
for table in tables {
@ -327,14 +327,14 @@ async fn assert_non_main_did_not_move_main(
}
async fn assert_recovery_commit_shape(
repo_root: &Path,
graph_root: &Path,
audit: &RecoveryAuditRow,
tables: &[TableExpectation],
) -> Result<()> {
let branch = branch_context(tables);
let expected_parent = expected_recovery_parent(tables)?;
let branch = branch.as_deref();
let commit = read_recovery_commit(repo_root, audit, branch).await?;
let commit = read_recovery_commit(graph_root, audit, branch).await?;
assert_eq!(
commit.actor_id.as_deref(),
@ -362,7 +362,7 @@ async fn assert_recovery_commit_shape(
);
if let Some(branch) = branch {
let graph = CommitGraph::open_at_branch(&repo_uri(repo_root), branch).await?;
let graph = CommitGraph::open_at_branch(&graph_uri(graph_root), branch).await?;
let commits = graph.load_commits().await?;
let parent = commit.parent_commit_id.as_deref().ok_or_else(|| {
OmniError::manifest_internal(format!(
@ -403,12 +403,12 @@ fn expected_recovery_parent(tables: &[TableExpectation]) -> Result<Option<String
Ok(expected)
}
async fn assert_idempotent_reopen(repo_root: &Path, operation_id: &str) -> Result<()> {
let before = matching_audit_rows(repo_root, operation_id).await?;
let uri = repo_uri(repo_root);
async fn assert_idempotent_reopen(graph_root: &Path, operation_id: &str) -> Result<()> {
let before = matching_audit_rows(graph_root, operation_id).await?;
let uri = graph_uri(graph_root);
let _db = Omnigraph::open(&uri).await?;
assert_sidecar_absent(repo_root, operation_id);
let after = matching_audit_rows(repo_root, operation_id).await?;
assert_sidecar_absent(graph_root, operation_id);
let after = matching_audit_rows(graph_root, operation_id).await?;
assert_eq!(
after.len(),
before.len(),
@ -417,14 +417,14 @@ async fn assert_idempotent_reopen(repo_root: &Path, operation_id: &str) -> Resul
Ok(())
}
async fn run_follow_up_mutations(repo_root: &Path, tables: Vec<TableExpectation>) -> Result<()> {
async fn run_follow_up_mutations(graph_root: &Path, tables: Vec<TableExpectation>) -> Result<()> {
let mut db: Option<Omnigraph> = None;
for table in tables {
let Some(mutation) = table.follow_up_mutation else {
continue;
};
if db.is_none() {
db = Some(Omnigraph::open(&repo_uri(repo_root)).await?);
db = Some(Omnigraph::open(&graph_uri(graph_root)).await?);
}
let db = db.as_mut().unwrap();
db.mutate(
@ -480,11 +480,11 @@ async fn lance_head_for_entry(root_uri: &str, entry: &SubTableEntry) -> Result<u
}
async fn read_recovery_commit(
repo_root: &Path,
graph_root: &Path,
audit: &RecoveryAuditRow,
branch: Option<&str>,
) -> Result<GraphCommit> {
let uri = repo_uri(repo_root);
let uri = graph_uri(graph_root);
let graph = match branch {
Some(branch) => CommitGraph::open_at_branch(&uri, branch).await?,
None => CommitGraph::open(&uri).await?,
@ -502,8 +502,8 @@ async fn read_recovery_commit(
})
}
async fn read_audit_row(repo_root: &Path, operation_id: &str) -> Result<RecoveryAuditRow> {
let mut rows = matching_audit_rows(repo_root, operation_id).await?;
async fn read_audit_row(graph_root: &Path, operation_id: &str) -> Result<RecoveryAuditRow> {
let mut rows = matching_audit_rows(graph_root, operation_id).await?;
if rows.len() != 1 {
return Err(OmniError::manifest_internal(format!(
"expected exactly one recovery audit row for {operation_id}, got {}",
@ -514,10 +514,10 @@ async fn read_audit_row(repo_root: &Path, operation_id: &str) -> Result<Recovery
}
async fn matching_audit_rows(
repo_root: &Path,
graph_root: &Path,
operation_id: &str,
) -> Result<Vec<RecoveryAuditRow>> {
let recoveries_dir = repo_root.join("_graph_commit_recoveries.lance");
let recoveries_dir = graph_root.join("_graph_commit_recoveries.lance");
if !recoveries_dir.exists() {
return Ok(Vec::new());
}
@ -575,6 +575,6 @@ fn string_column<'a>(batch: &'a RecordBatch, name: &str) -> Result<&'a StringArr
})
}
fn repo_uri(repo_root: &Path) -> String {
repo_root.to_str().unwrap().to_string()
fn graph_uri(graph_root: &Path) -> String {
graph_root.to_str().unwrap().to_string()
}

View file

@ -0,0 +1,244 @@
//! Lance API surface guards.
//!
//! Each guard pins a Lance API surface that OmniGraph relies on. If a future
//! Lance bump silently renames a variant, restructures a public struct, or
//! flips a method to async, the corresponding guard either fails to compile
//! (compile-time guards) or fails at runtime (runtime guards). The purpose
//! is to turn silent-break risks into red CI bars on the *next* Lance bump,
//! rather than into wrong-state recovery in production.
//!
//! Pair this file with `docs/dev/lance.md`'s alignment audit stanza: any
//! Lance bump runs `cargo test -p omnigraph-engine --test lance_surface_guards`
//! first as the smoke check.
//!
//! ## Compile-only guards
//!
//! Functions prefixed with `_compile_` are gated with a broad `#[allow(...)]`
//! and never called. They exist to make `cargo build -p omnigraph-engine --tests`
//! enforce the API shape. Using `unimplemented!()` as a placeholder lets type
//! inference proceed without running anything.
//!
//! ## Runtime guards
//!
//! Functions decorated `#[tokio::test]` actually run; they construct real
//! values and assert field shapes / types.
use std::sync::Arc;
use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator, StringArray};
use arrow_schema::{DataType, Field, Schema};
use lance::Dataset;
use lance::dataset::builder::DatasetBuilder;
use lance::dataset::optimize::{CompactionOptions, compact_files};
use lance::dataset::write::delete::DeleteResult;
use lance::dataset::{MergeInsertBuilder, WhenMatched, WhenNotMatched, WriteMode, WriteParams};
use lance_file::version::LanceFileVersion;
use lance_namespace::LanceNamespace;
use lance_table::io::commit::ManifestNamingScheme;
/// Helper: build a small fresh dataset in a tempdir. Pinned at V2_2 to match
/// production write paths (blob v2 requires V2_2; see `docs/dev/lance.md`).
async fn fresh_dataset(uri: &str) -> Dataset {
let schema = Arc::new(Schema::new(vec![
Field::new("id", DataType::Utf8, false),
Field::new("value", DataType::Int32, false),
]));
let batch = RecordBatch::try_new(
schema.clone(),
vec![
Arc::new(StringArray::from(vec!["alice", "bob"])),
Arc::new(Int32Array::from(vec![1, 2])),
],
)
.unwrap();
let reader = RecordBatchIterator::new(vec![Ok(batch)], schema);
let params = WriteParams {
mode: WriteMode::Create,
enable_stable_row_ids: true,
data_storage_version: Some(LanceFileVersion::V2_2),
..Default::default()
};
Dataset::write(reader, uri, Some(params)).await.unwrap()
}
// --- Guard 1: LanceError::TooMuchWriteContention variant exists ------------
//
// `db/manifest/publisher.rs::map_lance_publish_error` pattern-matches on this
// variant to surface typed `OmniError::ManifestRowLevelCasContention`. If
// Lance renames the variant or removes the builder, this guard fails.
#[tokio::test]
async fn lance_error_too_much_write_contention_variant_exists() {
let err = lance::Error::too_much_write_contention("guard");
assert!(
matches!(err, lance::Error::TooMuchWriteContention { .. }),
"Lance::Error::TooMuchWriteContention variant missing or renamed; \
update db/manifest/publisher.rs::map_lance_publish_error and \
this guard, then re-pin docs/dev/lance.md."
);
}
// --- Guard 2: ManifestLocation field shape ---------------------------------
//
// `db/manifest/metadata.rs:84-88` reads `.path`, `.size`, `.e_tag`,
// `.naming_scheme` off `dataset.manifest_location()`. If any field renames
// or changes type, this guard fails to compile.
#[tokio::test]
async fn manifest_location_field_shape() {
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().join("guard.lance");
let ds = fresh_dataset(uri.to_str().unwrap()).await;
let loc = ds.manifest_location();
// Explicit type bindings — these are the load-bearing assertions. If a
// type drifts (e.g. .size: Option<u64> → .size: u64), this fails to
// compile.
let _path: &object_store::path::Path = &loc.path;
let _size: Option<u64> = loc.size;
let _e_tag: Option<String> = loc.e_tag.clone();
let _scheme: ManifestNamingScheme = loc.naming_scheme;
// Runtime sanity — naming_scheme should produce a Debug string we use
// verbatim in `TableVersionMetadata::naming_scheme`.
assert!(!format!("{:?}", loc.naming_scheme).is_empty());
}
// --- Guard 3: checkout_version + restore async chain -----------------------
//
// `db/manifest/recovery.rs:505-522` chains `Dataset::open(...).await?
// .checkout_version(N).await?.restore().await?` as the recovery rollback
// hammer. Compile-only — never runs.
#[allow(
dead_code,
unreachable_code,
unused_variables,
unused_mut,
clippy::diverging_sub_expression
)]
async fn _compile_checkout_version_then_restore_signature() -> lance::Result<()> {
let ds: Dataset = unimplemented!();
let mut ds: Dataset = ds.checkout_version(1u64).await?;
// `restore()` takes `&mut self` and returns `Result<()>`; the dataset
// mutates in place. If Lance flips this to return a fresh `Dataset`
// (consuming `self`), this guard fails to compile.
let _: () = ds.restore().await?;
Ok(())
}
// --- Guard 4: DatasetBuilder::from_namespace fluent chain ------------------
//
// `db/manifest/namespace.rs:162-174` chains
// `DatasetBuilder::from_namespace(ns, vec![id]).await?.with_branch(...).with_version(...).load().await?`.
// Compile-only.
#[allow(
dead_code,
unreachable_code,
unused_variables,
unused_mut,
clippy::diverging_sub_expression
)]
async fn _compile_dataset_builder_from_namespace_signature(
ns: Arc<dyn LanceNamespace>,
) -> lance::Result<()> {
let builder: DatasetBuilder =
DatasetBuilder::from_namespace(ns, vec!["table".to_string()]).await?;
let builder: DatasetBuilder = builder.with_branch("b", None);
let builder: DatasetBuilder = builder.with_version(1u64);
let _ds: Dataset = builder.load().await?;
Ok(())
}
// --- Guard 5: MergeInsertBuilder fluent chain ------------------------------
//
// `db/manifest/publisher.rs:370-391` is the manifest CAS. If any method on
// the builder renames or changes signature, the publisher silently breaks.
// Compile-only.
#[allow(
dead_code,
unreachable_code,
unused_variables,
unused_mut,
clippy::diverging_sub_expression
)]
async fn _compile_merge_insert_builder_method_chain() -> lance::Result<()> {
use lance::dataset::MergeStats;
let ds: Arc<Dataset> = unimplemented!();
let job = MergeInsertBuilder::try_new(ds, vec!["object_id".to_string()])?
.when_matched(WhenMatched::UpdateAll)
.when_not_matched(WhenNotMatched::InsertAll)
.conflict_retries(0)
.use_index(false)
.try_build()?;
// execute_reader takes `impl StreamingWriteSource` (lance trait), which
// RecordBatchIterator implements. Pin the return shape
// `(Arc<Dataset>, MergeStats)` — the publisher's CAS loop depends on
// both: the new Dataset to advance HEAD, the stats for the audit row.
let source: RecordBatchIterator<Vec<Result<RecordBatch, arrow_schema::ArrowError>>> =
unimplemented!();
let result: (Arc<Dataset>, MergeStats) = job.execute_reader(source).await?;
let _ds: Arc<Dataset> = result.0;
let _stats: MergeStats = result.1;
Ok(())
}
// --- Guard 6: WriteParams::default() leaves data_storage_version = None ----
//
// Our V2_2 pin is load-bearing for blob v2 (verified earlier this session
// when V2_1 produced "Blob v2 requires file version >= 2.2" on 13 blob
// tests). If Lance changes the default to pin some version itself, audit
// every `data_storage_version: Some(LanceFileVersion::V2_2)` site.
#[test]
fn write_params_default_does_not_set_storage_version() {
let params = WriteParams::default();
assert_eq!(
params.data_storage_version, None,
"WriteParams::default().data_storage_version is no longer None; \
audit every explicit V2_2 pin (see rg 'LanceFileVersion::V2_2')."
);
}
// --- Guard 7: compact_files signature --------------------------------------
//
// `db/omnigraph/optimize.rs:107` calls `compact_files(&mut ds, options, None)`.
// Compile-only.
#[allow(
dead_code,
unreachable_code,
unused_variables,
unused_mut,
clippy::diverging_sub_expression
)]
async fn _compile_compact_files_signature() -> lance::Result<()> {
let mut ds: Dataset = unimplemented!();
let options: CompactionOptions = CompactionOptions::default();
let _metrics = compact_files(&mut ds, options, None).await?;
Ok(())
}
// --- Guard 8: Dataset::delete returns DeleteResult { new_dataset, num_deleted_rows } ---
//
// `table_store.rs::delete_where` consumes both fields. When MR-A migrates
// `delete_where` to two-phase via `DeleteBuilder::execute_uncommitted`, this
// guard updates to pin the staged path. Compile-only.
#[allow(
dead_code,
unreachable_code,
unused_variables,
unused_mut,
clippy::diverging_sub_expression
)]
async fn _compile_delete_result_field_shape() -> lance::Result<()> {
let mut ds: Dataset = unimplemented!();
let result: DeleteResult = ds.delete("x = 1").await?;
let _new_dataset: Arc<Dataset> = result.new_dataset;
let _num_deleted: u64 = result.num_deleted_rows;
Ok(())
}

View file

@ -3,13 +3,13 @@ mod helpers;
use std::fs;
use omnigraph::db::{Omnigraph, ReadTarget};
use omnigraph_compiler::{build_schema_ir, schema_ir_pretty_json};
use omnigraph_compiler::schema::parser::parse_schema;
use omnigraph_compiler::{build_schema_ir, schema_ir_pretty_json};
use helpers::*;
#[tokio::test]
async fn init_creates_repo() {
async fn init_creates_graph() {
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap();
@ -34,7 +34,7 @@ async fn init_creates_repo() {
}
#[tokio::test]
async fn open_reads_existing_repo() {
async fn open_reads_existing_graph() {
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap();
@ -49,7 +49,7 @@ async fn open_reads_existing_repo() {
}
#[tokio::test]
async fn open_bootstraps_legacy_schema_state_for_main_only_repo() {
async fn open_bootstraps_legacy_schema_state_for_main_only_graph() {
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap();
Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
@ -64,7 +64,7 @@ async fn open_bootstraps_legacy_schema_state_for_main_only_repo() {
}
#[tokio::test]
async fn open_rejects_legacy_repo_with_public_branch() {
async fn open_rejects_legacy_graph_with_public_branch() {
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap();
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
@ -74,7 +74,7 @@ async fn open_rejects_legacy_repo_with_public_branch() {
fs::remove_file(dir.path().join("__schema_state.json")).unwrap();
let err = match Omnigraph::open(uri).await {
Ok(_) => panic!("expected legacy repo with public branch to fail schema bootstrap"),
Ok(_) => panic!("expected legacy graph with public branch to fail schema bootstrap"),
Err(err) => err,
};
assert!(

View file

@ -1,6 +1,6 @@
// Maintenance tests: `optimize` (Lance compact_files) and `cleanup`
// (Lance cleanup_old_versions) at the graph level. Covers no-op edges
// (empty repo, already-optimized repo), the policy-validation contract on
// (empty graph, already-optimized graph), the policy-validation contract on
// `cleanup`, and the keep-versions cap that protects head.
mod helpers;
@ -13,7 +13,7 @@ use omnigraph::loader::{LoadMode, load_jsonl};
use helpers::{TEST_DATA, TEST_SCHEMA, count_rows, init_and_load};
#[tokio::test]
async fn optimize_on_empty_repo_returns_stats_per_table_with_no_changes() {
async fn optimize_on_empty_graph_returns_stats_per_table_with_no_changes() {
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap();
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
@ -37,7 +37,7 @@ async fn optimize_after_load_then_again_is_idempotent() {
// First pass may compact (load wrote real fragments).
let _first = db.optimize().await.unwrap();
// Second pass should be a no-op: already-compacted repo produces no
// Second pass should be a no-op: already-compacted graph produces no
// fragments_removed / fragments_added.
let second = db.optimize().await.unwrap();
for s in &second {
@ -119,7 +119,9 @@ async fn cleanup_older_than_zero_preserves_head() {
// Smoke test: after aggressive cleanup, we can still read and write the
// graph — head wasn't pruned.
load_jsonl(&mut db, TEST_DATA, LoadMode::Merge).await.unwrap();
load_jsonl(&mut db, TEST_DATA, LoadMode::Merge)
.await
.unwrap();
}
#[tokio::test]
@ -151,6 +153,8 @@ async fn cleanup_then_optimize_preserves_rows_and_table_remains_writable() {
assert_eq!(count_rows(&db, "node:Company").await, companies_before);
// Table is still writable after the cleanup+optimize sequence.
load_jsonl(&mut db, TEST_DATA, LoadMode::Merge).await.unwrap();
load_jsonl(&mut db, TEST_DATA, LoadMode::Merge)
.await
.unwrap();
assert_eq!(count_rows(&db, "node:Person").await, people_before);
}

View file

@ -22,16 +22,16 @@ use helpers::recovery::{RecoveryExpectation, TableExpectation, assert_post_recov
const TEST_SCHEMA: &str = include_str!("fixtures/test.pg");
fn write_sidecar_file(repo_root: &Path, operation_id: &str, json: &str) {
let dir = repo_root.join("__recovery");
fn write_sidecar_file(graph_root: &Path, operation_id: &str, json: &str) {
let dir = graph_root.join("__recovery");
if !dir.exists() {
std::fs::create_dir(&dir).unwrap();
}
std::fs::write(dir.join(format!("{}.json", operation_id)), json).unwrap();
}
fn list_recovery_dir(repo_root: &Path) -> Vec<String> {
let dir = repo_root.join("__recovery");
fn list_recovery_dir(graph_root: &Path) -> Vec<String> {
let dir = graph_root.join("__recovery");
if !dir.exists() {
return Vec::new();
}
@ -41,7 +41,7 @@ fn list_recovery_dir(repo_root: &Path) -> Vec<String> {
.collect()
}
/// Full URI of a node-type Lance dataset under a fresh Omnigraph repo.
/// Full URI of a node-type Lance dataset under a fresh Omnigraph graph.
/// Mirrors the `nodes/{fnv1a64-hex(type_name)}` layout in `db/manifest/layout.rs`.
fn node_table_uri(root: &str, type_name: &str) -> String {
let h: u64 = fnv1a64(type_name.as_bytes());
@ -283,8 +283,8 @@ async fn recovery_rolls_back_synthetic_drift_on_open() {
// =====================================================================
/// Helper: count rows in `_graph_commit_recoveries.lance` at the given root.
async fn count_recovery_audit_rows(repo_root: &Path) -> usize {
let recoveries_dir = repo_root.join("_graph_commit_recoveries.lance");
async fn count_recovery_audit_rows(graph_root: &Path) -> usize {
let recoveries_dir = graph_root.join("_graph_commit_recoveries.lance");
if !recoveries_dir.exists() {
return 0;
}
@ -306,9 +306,9 @@ async fn count_recovery_audit_rows(repo_root: &Path) -> usize {
/// Helper: read the most recent recovery audit row's `recovery_kind`,
/// `recovery_for_actor`, and `operation_id`. Returns `None` if no rows.
async fn read_latest_recovery_audit(
repo_root: &Path,
graph_root: &Path,
) -> Option<(String, Option<String>, String, String)> {
let recoveries_dir = repo_root.join("_graph_commit_recoveries.lance");
let recoveries_dir = graph_root.join("_graph_commit_recoveries.lance");
if !recoveries_dir.exists() {
return None;
}
@ -357,8 +357,8 @@ async fn read_latest_recovery_audit(
/// storage order (multiple batches concatenated). Used by the
/// multi-sidecar fresh-snapshot test as a diagnostic alongside the
/// post-recovery Lance HEAD assertion.
async fn list_recovery_audit_kinds(repo_root: &Path) -> Vec<String> {
let recoveries_dir = repo_root.join("_graph_commit_recoveries.lance");
async fn list_recovery_audit_kinds(graph_root: &Path) -> Vec<String> {
let recoveries_dir = graph_root.join("_graph_commit_recoveries.lance");
if !recoveries_dir.exists() {
return Vec::new();
}
@ -391,8 +391,8 @@ async fn list_recovery_audit_kinds(repo_root: &Path) -> Vec<String> {
}
/// Helper: count `_graph_commits.lance` rows tagged with the recovery actor.
async fn count_recovery_actor_commits(repo_root: &Path) -> usize {
let actors_dir = repo_root.join("_graph_commit_actors.lance");
async fn count_recovery_actor_commits(graph_root: &Path) -> usize {
let actors_dir = graph_root.join("_graph_commit_actors.lance");
if !actors_dir.exists() {
return 0;
}
@ -908,7 +908,7 @@ async fn recovery_ensure_indices_steady_state_no_sidecar() {
/// ran) and rolls back any sibling table's legitimate index work.
///
/// Integration verification: after a real init + ensure_indices on a
/// repo where every table is empty, the recovery sweep must complete
/// graph where every table is empty, the recovery sweep must complete
/// cleanly (no leftover sidecar) AND the next ensure_indices must also
/// leave no sidecar — proving the empty-table-scoping behavior lets
/// steady-state runs incur zero sidecar I/O. The
@ -930,7 +930,7 @@ async fn recovery_ensure_indices_handles_empty_tables() {
db.ensure_indices().await.unwrap();
assert!(
list_recovery_dir(dir.path()).is_empty(),
"ensure_indices on an all-empty repo must not leave a sidecar"
"ensure_indices on an all-empty graph must not leave a sidecar"
);
// Reopen + ensure_indices — still steady state, still no sidecar.
drop(db);
@ -938,7 +938,7 @@ async fn recovery_ensure_indices_handles_empty_tables() {
db.ensure_indices().await.unwrap();
assert!(
list_recovery_dir(dir.path()).is_empty(),
"second ensure_indices on an all-empty repo must also not leave a sidecar"
"second ensure_indices on an all-empty graph must also not leave a sidecar"
);
}

View file

@ -521,6 +521,10 @@ query delete_two_persons($first: String, $second: String) {
delete Person where name = $first
delete Person where name = $second
}
query update_age_by_name($name: String, $age: I32) {
update Person set { age: $age } where name = $name
}
"#;
/// D₂: a query mixing inserts/updates with deletes is rejected at parse
@ -1362,3 +1366,85 @@ query insert_then_update_note(
.unwrap();
assert_eq!(qr.num_rows(), 0, "letter must not be visible after early error");
}
/// MR-920 regression: two sequential `update T set {f:v} where x=y`
/// invocations against the same row must both succeed. Pre-fix, the
/// second one failed with `Ambiguous merge inserts are prohibited:
/// multiple source rows match the same target row on (id = "Alice")`
/// even though the scan returned exactly one row.
///
/// Root cause hypothesis (per MR-920): Lance's
/// `processed_row_ids: Mutex<HashSet<u64>>`
/// (`src/dataset/write/merge_insert.rs:2099`) double-processes the
/// same target row_id against datasets previously rewritten by
/// merge_insert. `SourceDedupeBehavior::FirstSeen` makes Lance skip
/// rather than error.
///
/// Companion to `consistency.rs::load_merge_repeated_against_overlapping_keys_succeeds`
/// (PR #98 / Window 1 of the bug class via the load surface).
#[tokio::test]
async fn second_sequential_update_on_same_row_succeeds() {
let dir = tempfile::tempdir().unwrap();
let mut db = init_and_load(&dir).await;
db.mutate(
"main",
STAGED_QUERIES,
"update_age_by_name",
&mixed_params(&[("$name", "Alice")], &[("$age", 99)]),
)
.await
.expect("first sequential update on Alice must succeed");
let batches = read_table(&db, "node:Person").await;
let alice_count: usize = batches
.iter()
.map(|b| {
let names = b
.column_by_name("name")
.unwrap()
.as_any()
.downcast_ref::<arrow_array::StringArray>()
.unwrap();
(0..b.num_rows())
.filter(|i| names.is_valid(*i) && names.value(*i) == "Alice")
.count()
})
.sum();
assert_eq!(
alice_count, 1,
"after first update, exactly one Alice row should be visible"
);
db.mutate(
"main",
STAGED_QUERIES,
"update_age_by_name",
&mixed_params(&[("$name", "Alice")], &[("$age", 42)]),
)
.await
.expect("second sequential update on Alice must succeed");
let batches = read_table(&db, "node:Person").await;
let mut alice_age: Option<i32> = None;
for batch in &batches {
let names = batch
.column_by_name("name")
.unwrap()
.as_any()
.downcast_ref::<arrow_array::StringArray>()
.unwrap();
let ages = batch
.column_by_name("age")
.unwrap()
.as_any()
.downcast_ref::<arrow_array::Int32Array>()
.unwrap();
for i in 0..batch.num_rows() {
if names.is_valid(i) && names.value(i) == "Alice" && ages.is_valid(i) {
alice_age = Some(ages.value(i));
}
}
}
assert_eq!(alice_age, Some(42), "Alice's age must reflect the second update");
}

View file

@ -7,8 +7,8 @@ use omnigraph::loader::{LoadMode, load_jsonl};
use helpers::*;
#[tokio::test(flavor = "multi_thread")]
async fn s3_compatible_repo_lifecycle_works() {
let Some(uri) = s3_test_repo_uri("omnigraph-runtime") else {
async fn s3_compatible_graph_lifecycle_works() {
let Some(uri) = s3_test_graph_uri("omnigraph-runtime") else {
eprintln!("skipping s3 runtime test: OMNIGRAPH_S3_TEST_BUCKET is not set");
return;
};
@ -81,7 +81,7 @@ async fn s3_compatible_repo_lifecycle_works() {
#[tokio::test(flavor = "multi_thread")]
async fn s3_branch_change_merge_flow_works() {
let Some(uri) = s3_test_repo_uri("omnigraph-branching") else {
let Some(uri) = s3_test_graph_uri("omnigraph-branching") else {
eprintln!("skipping s3 branch test: OMNIGRAPH_S3_TEST_BUCKET is not set");
return;
};
@ -135,7 +135,7 @@ async fn s3_branch_change_merge_flow_works() {
#[tokio::test(flavor = "multi_thread")]
async fn s3_public_load_uses_hidden_run_and_publishes() {
let Some(uri) = s3_test_repo_uri("omnigraph-public-load") else {
let Some(uri) = s3_test_graph_uri("omnigraph-public-load") else {
eprintln!("skipping s3 public load test: OMNIGRAPH_S3_TEST_BUCKET is not set");
return;
};

View file

@ -74,7 +74,7 @@ async fn apply_schema_rejects_when_non_main_branch_exists() {
let err = db.apply_schema(&desired).await.unwrap_err();
assert!(
err.to_string()
.contains("schema apply requires a repo with only main")
.contains("schema apply requires a graph with only main")
);
}
@ -402,10 +402,7 @@ async fn apply_schema_rejects_adding_a_required_property_without_backfill() {
// Add `email: String` (required, non-nullable, no @rename_from). Existing
// rows have no value to fill in, so this is unsupported in v1.
let desired = TEST_SCHEMA.replace(
" age: I32?\n}",
" age: I32?\n email: String\n}",
);
let desired = TEST_SCHEMA.replace(" age: I32?\n}", " age: I32?\n email: String\n}");
let err = db.apply_schema(&desired).await.unwrap_err();
let msg = err.to_string();
assert!(
@ -437,7 +434,10 @@ async fn plan_schema_for_property_type_narrowing_is_not_supported() {
.unwrap();
let plan = db.plan_schema(TEST_SCHEMA).await.unwrap();
assert!(!plan.supported, "narrowing I64 -> I32 must not be supported");
assert!(
!plan.supported,
"narrowing I64 -> I32 must not be supported"
);
assert!(plan.steps.iter().any(|step| matches!(
step,
SchemaMigrationStep::UnsupportedChange { code, .. }

View file

@ -3,7 +3,8 @@ mod helpers;
use std::env;
use arrow_array::{Array, StringArray};
use lance_index::{DatasetIndexExt, is_system_index};
use lance::index::DatasetIndexExt;
use lance_index::is_system_index;
use serial_test::serial;
use omnigraph::db::Omnigraph;