Merge origin/main (v0.6.2, cluster Stage 2B) into ragnorc/scrutinize-rfc-002

Conflict resolutions: - cli/main.rs: keep the omnigraph_api_types import (branch extraction) and add main's omnigraph_cluster import; test-import list takes the branch's ResolvedCliGraph/is_remote_uri additions - Cargo manifests: union of deps — branch's config/queries/api-types crates plus main's cluster crate; all versions unified at 0.6.2 (new crates bumped from 0.6.1) - cli/Cargo.toml: omnigraph-server dep stays dropped (branch decision; CLI references it only in comments and the test harness binary spawn) - AGENTS.md: 0.6.2 + union crate list - cli-reference.md: branch's --graph/layered-config sections + main's cluster/repair rows - Cargo.lock regenerated cargo test --workspace --locked passes; scripts/check-agents-md.sh passes. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-27 02:39:38 +02:00 · 2026-06-09 22:02:50 +03:00 · 2026-06-09 22:02:50 +03:00 · 48912167d0
commit 48912167d0
parent d5a091336a 737a0f6e45
91 changed files with 10889 additions and 736 deletions
--- a/crates/omnigraph-api-types/Cargo.toml
+++ b/crates/omnigraph-api-types/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "omnigraph-api-types"
-version = "0.6.1"
+version = "0.6.2"
 edition = "2024"
 description = "HTTP request/response types (OpenAPI schemas) for the Omnigraph graph database."
 license = "MIT"
@ -9,9 +9,9 @@ homepage = "https://github.com/ModernRelay/omnigraph"
 documentation = "https://docs.rs/omnigraph-api-types"

 [dependencies]
-omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.6.1" }
-omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.1" }
-omnigraph-queries = { path = "../omnigraph-queries", version = "0.6.1" }
+omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.6.2" }
+omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.2" }
+omnigraph-queries = { path = "../omnigraph-queries", version = "0.6.2" }
 serde = { workspace = true }
 serde_json = { workspace = true }
 utoipa = { workspace = true }
--- a/crates/omnigraph-cli/Cargo.toml
+++ b/crates/omnigraph-cli/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "omnigraph-cli"
-version = "0.6.1"
+version = "0.6.2"
 edition = "2024"
 description = "CLI for the Omnigraph graph database."
 license = "MIT"
@ -13,12 +13,13 @@ name = "omnigraph"
 path = "src/main.rs"

 [dependencies]
-omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.6.1" }
-omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.1" }
-omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.1" }
-omnigraph-config = { path = "../omnigraph-config", version = "0.6.1" }
-omnigraph-queries = { path = "../omnigraph-queries", version = "0.6.1" }
-omnigraph-api-types = { path = "../omnigraph-api-types", version = "0.6.1" }
+omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.6.2" }
+omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.2" }
+omnigraph-cluster = { path = "../omnigraph-cluster", version = "0.6.2" }
+omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.2" }
+omnigraph-config = { path = "../omnigraph-config", version = "0.6.2" }
+omnigraph-queries = { path = "../omnigraph-queries", version = "0.6.2" }
+omnigraph-api-types = { path = "../omnigraph-api-types", version = "0.6.2" }
 clap = { workspace = true }
 color-eyre = { workspace = true }
 serde = { workspace = true }
--- a/crates/omnigraph-cli/src/main.rs
+++ b/crates/omnigraph-cli/src/main.rs
@ -18,6 +18,10 @@ use omnigraph_api_types::{
    SnapshotTableOutput, commit_output, ingest_output, read_output, schema_apply_output,
    snapshot_payload,
 };
+use omnigraph_cluster::{
+    DiagnosticSeverity, PlanOutput, StateSyncOutput, StatusOutput, ValidateOutput,
+    import_config_dir, plan_config_dir, refresh_config_dir, status_config_dir, validate_config_dir,
+};
 use omnigraph_compiler::query::parser::parse_query;
 use omnigraph_compiler::schema::parser::parse_schema;
 use omnigraph_compiler::{
@ -286,6 +290,25 @@ enum Command {
        #[arg(long)]
        json: bool,
    },
+    /// Classify and explicitly repair manifest/head drift
+    Repair {
+        /// Graph URI
+        uri: Option<String>,
+        #[arg(long)]
+        target: Option<String>,
+        #[arg(long)]
+        config: Option<PathBuf>,
+        /// Publish verified maintenance drift. Without this flag, repair only
+        /// previews what it would do.
+        #[arg(long)]
+        confirm: bool,
+        /// Also publish suspicious or unverifiable drift. Requires
+        /// `--confirm`; use only after operator review.
+        #[arg(long, requires = "confirm")]
+        force: bool,
+        #[arg(long)]
+        json: bool,
+    },
    /// Remove old Lance versions from every table of the graph (destructive)
    Cleanup {
        /// Graph URI
@ -308,6 +331,11 @@ enum Command {
        #[arg(long)]
        json: bool,
    },
+    /// Validate and plan read-only cluster configuration.
+    Cluster {
+        #[command(subcommand)]
+        command: ClusterCommand,
+    },
    /// Manage graphs on a multi-graph server (MR-668)
    Graphs {
        #[command(subcommand)]
@ -328,6 +356,55 @@ enum Command {
    },
 }

+#[derive(Debug, Subcommand)]
+enum ClusterCommand {
+    /// Validate cluster.yaml and referenced schemas, queries, and policy files.
+    Validate {
+        /// Cluster config directory containing cluster.yaml.
+        #[arg(long, default_value = ".")]
+        config: PathBuf,
+        /// Emit JSON instead of human text.
+        #[arg(long)]
+        json: bool,
+    },
+    /// Produce a read-only plan by diffing cluster.yaml against __cluster/state.json.
+    Plan {
+        /// Cluster config directory containing cluster.yaml.
+        #[arg(long, default_value = ".")]
+        config: PathBuf,
+        /// Emit JSON instead of human text.
+        #[arg(long)]
+        json: bool,
+    },
+    /// Read the local JSON state ledger without scanning live graph resources.
+    Status {
+        /// Cluster config directory containing cluster.yaml.
+        #[arg(long, default_value = ".")]
+        config: PathBuf,
+        /// Emit JSON instead of human text.
+        #[arg(long)]
+        json: bool,
+    },
+    /// Refresh existing local JSON state from declared graph observations.
+    Refresh {
+        /// Cluster config directory containing cluster.yaml.
+        #[arg(long, default_value = ".")]
+        config: PathBuf,
+        /// Emit JSON instead of human text.
+        #[arg(long)]
+        json: bool,
+    },
+    /// Import initial local JSON state from declared graph observations.
+    Import {
+        /// Cluster config directory containing cluster.yaml.
+        #[arg(long, default_value = ".")]
+        config: PathBuf,
+        /// Emit JSON instead of human text.
+        #[arg(long)]
+        json: bool,
+    },
+}
+
 /// Operations on the graph registry of a multi-graph server (MR-668).
 ///
 /// All operations target a remote multi-graph server URL (http:// or
@ -720,6 +797,159 @@ fn print_json<T: Serialize>(value: &T) -> Result<()> {
    Ok(())
 }

+fn print_cluster_validate_human(output: &ValidateOutput) {
+    if output.ok {
+        println!(
+            "cluster config valid: {} resource(s), {} dependency edge(s)",
+            output.resources.len(),
+            output.dependencies.len()
+        );
+    } else {
+        println!("cluster config invalid");
+    }
+    print_cluster_diagnostics(&output.diagnostics);
+}
+
+fn print_cluster_plan_human(output: &PlanOutput) {
+    if output.ok {
+        println!(
+            "cluster plan: {} change(s), {} approval gate(s)",
+            output.changes.len(),
+            output.approvals_required.len()
+        );
+        for change in &output.changes {
+            println!("  {:?} {}", change.operation, change.resource);
+        }
+        if output.changes.is_empty() {
+            println!("  no changes");
+        }
+    } else {
+        println!("cluster plan failed");
+    }
+    print_cluster_diagnostics(&output.diagnostics);
+}
+
+fn print_cluster_status_human(output: &StatusOutput) {
+    if output.ok {
+        let state = &output.state_observations;
+        if state.state_found {
+            println!(
+                "cluster state: revision {}, {} resource(s)",
+                state.state_revision, state.resource_count
+            );
+            if let Some(digest) = state.applied_config_digest.as_deref() {
+                println!("  applied config: {digest}");
+            }
+            if state.locked {
+                match state.lock_id.as_deref() {
+                    Some(lock_id) => println!("  lock: held ({lock_id})"),
+                    None => println!("  lock: held"),
+                }
+            } else {
+                println!("  lock: not held");
+            }
+        } else {
+            println!("cluster state missing");
+        }
+    } else {
+        println!("cluster status failed");
+    }
+    print_cluster_diagnostics(&output.diagnostics);
+}
+
+fn print_cluster_state_sync_human(output: &StateSyncOutput) {
+    let operation = match output.operation {
+        omnigraph_cluster::StateSyncOperation::Refresh => "refresh",
+        omnigraph_cluster::StateSyncOperation::Import => "import",
+    };
+    if output.ok {
+        let state = &output.state_observations;
+        println!(
+            "cluster {operation}: revision {}, {} resource(s)",
+            state.state_revision, state.resource_count
+        );
+        if let Some(cas) = state.state_cas.as_deref() {
+            println!("  state_cas: {cas}");
+        }
+        if state.locked {
+            match state.lock_id.as_deref() {
+                Some(lock_id) => println!("  lock: acquired ({lock_id})"),
+                None => println!("  lock: acquired"),
+            }
+        } else {
+            println!("  lock: not acquired");
+        }
+    } else {
+        println!("cluster {operation} failed");
+    }
+    print_cluster_diagnostics(&output.diagnostics);
+}
+
+fn print_cluster_diagnostics(diagnostics: &[omnigraph_cluster::Diagnostic]) {
+    for diagnostic in diagnostics {
+        let label = match diagnostic.severity {
+            DiagnosticSeverity::Error => "ERROR",
+            DiagnosticSeverity::Warning => "WARN ",
+        };
+        println!(
+            "{label} {} {}: {}",
+            diagnostic.code, diagnostic.path, diagnostic.message
+        );
+    }
+}
+
+fn finish_cluster_validate(output: &ValidateOutput, json: bool) -> Result<()> {
+    if json {
+        print_json(output)?;
+    } else {
+        print_cluster_validate_human(output);
+    }
+    if !output.ok {
+        io::stdout().flush()?;
+        std::process::exit(1);
+    }
+    Ok(())
+}
+
+fn finish_cluster_plan(output: &PlanOutput, json: bool) -> Result<()> {
+    if json {
+        print_json(output)?;
+    } else {
+        print_cluster_plan_human(output);
+    }
+    if !output.ok {
+        io::stdout().flush()?;
+        std::process::exit(1);
+    }
+    Ok(())
+}
+
+fn finish_cluster_status(output: &StatusOutput, json: bool) -> Result<()> {
+    if json {
+        print_json(output)?;
+    } else {
+        print_cluster_status_human(output);
+    }
+    if !output.ok {
+        io::stdout().flush()?;
+        std::process::exit(1);
+    }
+    Ok(())
+}
+
+fn finish_cluster_state_sync(output: &StateSyncOutput, json: bool) -> Result<()> {
+    if json {
+        print_json(output)?;
+    } else {
+        print_cluster_state_sync_human(output);
+    }
+    if !output.ok {
+        io::stdout().flush()?;
+        std::process::exit(1);
+    }
+    Ok(())
+}
+
 fn is_remote_uri(uri: &str) -> bool {
    uri.starts_with("http://") || uri.starts_with("https://")
 }
@ -3188,6 +3418,8 @@ async fn main() -> Result<()> {
                        "fragments_added": s.fragments_added,
                        "committed": s.committed,
                        "skipped": s.skipped.map(|r| r.as_str()),
+                        "manifest_version": s.manifest_version,
+                        "lance_head_version": s.lance_head_version,
                    })).collect::<Vec<_>>(),
                });
                print_json(&value)?;
@ -3207,6 +3439,89 @@ async fn main() -> Result<()> {
                }
            }
        }
+        Command::Repair {
+            uri,
+            target,
+            config,
+            confirm,
+            force,
+            json,
+        } => {
+            let config = load_cli_config(config.as_ref())?;
+            let uri = resolve_uri(&config, uri, target.as_deref())?;
+            let db = Omnigraph::open(&uri).await?;
+            let stats = db
+                .repair(omnigraph::db::RepairOptions { confirm, force })
+                .await?;
+            let refused_count = stats
+                .tables
+                .iter()
+                .filter(|s| matches!(s.action, omnigraph::db::RepairAction::Refused))
+                .count();
+            if json {
+                let value = serde_json::json!({
+                    "uri": uri,
+                    "confirm": confirm,
+                    "force": force,
+                    "manifest_version": stats.manifest_version,
+                    "tables": stats.tables.iter().map(|s| serde_json::json!({
+                        "table_key": s.table_key,
+                        "manifest_version": s.manifest_version,
+                        "lance_head_version": s.lance_head_version,
+                        "classification": s.classification.as_str(),
+                        "action": s.action.as_str(),
+                        "operations": s.operations,
+                        "error": s.error,
+                    })).collect::<Vec<_>>(),
+                });
+                print_json(&value)?;
+            } else {
+                let mode = if confirm { "confirm" } else { "preview" };
+                println!(
+                    "repair {} — {} mode, {} tables",
+                    uri,
+                    mode,
+                    stats.tables.len()
+                );
+                for s in &stats.tables {
+                    let drift = if s.manifest_version == s.lance_head_version {
+                        format!("{}", s.manifest_version)
+                    } else {
+                        format!("{} → {}", s.manifest_version, s.lance_head_version)
+                    };
+                    let ops = if s.operations.is_empty() {
+                        String::new()
+                    } else {
+                        format!(" [{}]", s.operations.join(", "))
+                    };
+                    let err = s
+                        .error
+                        .as_ref()
+                        .map(|err| format!(" ({err})"))
+                        .unwrap_or_default();
+                    println!(
+                        "  {:<40} {:<12} {:<22} {}{}{}",
+                        s.table_key,
+                        s.action.as_str(),
+                        s.classification.as_str(),
+                        drift,
+                        ops,
+                        err
+                    );
+                }
+                if !confirm {
+                    println!("rerun with --confirm to publish verified maintenance drift");
+                }
+            }
+            if refused_count > 0 {
+                bail!(
+                    "repair refused {} suspicious or unverifiable table(s); review the preview \
+                     output and rerun with --force --confirm only if publishing that drift is \
+                     intentional",
+                    refused_count
+                );
+            }
+        }
        Command::Cleanup {
            uri,
            target,
@ -3287,6 +3602,28 @@ async fn main() -> Result<()> {
                }
            }
        }
+        Command::Cluster { command } => match command {
+            ClusterCommand::Validate { config, json } => {
+                let output = validate_config_dir(config);
+                finish_cluster_validate(&output, json)?;
+            }
+            ClusterCommand::Plan { config, json } => {
+                let output = plan_config_dir(config);
+                finish_cluster_plan(&output, json)?;
+            }
+            ClusterCommand::Status { config, json } => {
+                let output = status_config_dir(config);
+                finish_cluster_status(&output, json)?;
+            }
+            ClusterCommand::Refresh { config, json } => {
+                let output = refresh_config_dir(config).await;
+                finish_cluster_state_sync(&output, json)?;
+            }
+            ClusterCommand::Import { config, json } => {
+                let output = import_config_dir(config).await;
+                finish_cluster_state_sync(&output, json)?;
+            }
+        },
        Command::Graphs { command } => match command {
            GraphsCommand::List {
                uri,
--- a/crates/omnigraph-cli/tests/cli.rs
+++ b/crates/omnigraph-cli/tests/cli.rs
@ -1,5 +1,6 @@
 use std::fs;

+use lance::Dataset;
 use lance::index::DatasetIndexExt;
 use omnigraph::db::{Omnigraph, ReadTarget};
 use serde_json::Value;
@ -60,6 +61,25 @@ fn manifest_dataset_version(graph: &std::path::Path) -> u64 {
    })
 }

+fn forge_person_delete_drift(graph: &std::path::Path) -> (u64, u64) {
+    tokio::runtime::Runtime::new().unwrap().block_on(async {
+        let uri = graph.to_string_lossy();
+        let db = Omnigraph::open(uri.as_ref()).await.unwrap();
+        let snap = db
+            .snapshot_of(ReadTarget::branch("main"))
+            .await
+            .unwrap();
+        let entry = snap.entry("node:Person").unwrap();
+        let full_path = format!("{}/{}", uri.trim_end_matches('/'), entry.table_path);
+        let mut ds = Dataset::open(&full_path).await.unwrap();
+        let deleted = ds.delete("name = 'Alice'").await.unwrap();
+        assert_eq!(deleted.num_deleted_rows, 1);
+        let head = deleted.new_dataset.version().version;
+        assert!(head > entry.table_version);
+        (entry.table_version, head)
+    })
+}
+
 fn write_policy_config_fixture(root: &std::path::Path) -> (std::path::PathBuf, std::path::PathBuf) {
    let config = root.join("omnigraph.yaml");
    let policy = root.join("policy.yaml");
@ -78,6 +98,64 @@ policy:
    (config, policy)
 }

+fn write_cluster_config_fixture(root: &std::path::Path) {
+    fs::write(
+        root.join("people.pg"),
+        r#"
+node Person {
+  name: String @key
+  age: I32?
+}
+"#,
+    )
+    .unwrap();
+    fs::write(
+        root.join("people.gq"),
+        r#"
+query find_person($name: String) {
+  match { $p: Person { name: $name } }
+  return { $p.name, $p.age }
+}
+"#,
+    )
+    .unwrap();
+    fs::write(root.join("base.policy.yaml"), "rules: []\n").unwrap();
+    fs::write(
+        root.join("cluster.yaml"),
+        r#"
+version: 1
+metadata:
+  name: company-brain
+state:
+  backend: cluster
+  lock: true
+graphs:
+  knowledge:
+    schema: ./people.pg
+    queries:
+      find_person:
+        file: ./people.gq
+policies:
+  base:
+    file: ./base.policy.yaml
+    applies_to: [knowledge]
+"#,
+    )
+    .unwrap();
+}
+
+fn init_cluster_derived_graph(root: &std::path::Path) {
+    let graph_dir = root.join("graphs");
+    fs::create_dir_all(&graph_dir).unwrap();
+    output_success(
+        cli()
+            .arg("init")
+            .arg("--schema")
+            .arg(root.join("people.pg"))
+            .arg(graph_dir.join("knowledge.omni")),
+    );
+}
+
 #[test]
 fn version_command_prints_current_cli_version() {
    let output = output_success(cli().arg("version"));
@ -89,6 +167,470 @@ fn version_command_prints_current_cli_version() {
    );
 }

+#[test]
+fn cluster_validate_config_success() {
+    let temp = tempdir().unwrap();
+    write_cluster_config_fixture(temp.path());
+
+    let output = output_success(
+        cli()
+            .arg("cluster")
+            .arg("validate")
+            .arg("--config")
+            .arg(temp.path()),
+    );
+    let stdout = stdout_string(&output);
+    assert!(stdout.contains("cluster config valid"), "{stdout}");
+}
+
+#[test]
+fn cluster_validate_json_is_stable() {
+    let temp = tempdir().unwrap();
+    write_cluster_config_fixture(temp.path());
+
+    let json = parse_stdout_json(&output_success(
+        cli()
+            .arg("cluster")
+            .arg("validate")
+            .arg("--config")
+            .arg(temp.path())
+            .arg("--json"),
+    ));
+    assert_eq!(json["ok"], true);
+    assert!(json["resource_digests"]["graph.knowledge"].is_string());
+    assert!(json["resource_digests"]["query.knowledge.find_person"].is_string());
+    assert_eq!(json["dependencies"][0]["from"], "policy.base");
+    assert_eq!(json["dependencies"][0]["to"], "graph.knowledge");
+}
+
+#[test]
+fn cluster_plan_json_reads_inferred_local_state() {
+    let temp = tempdir().unwrap();
+    write_cluster_config_fixture(temp.path());
+    let state_dir = temp.path().join("__cluster");
+    fs::create_dir_all(&state_dir).unwrap();
+    fs::write(
+        state_dir.join("state.json"),
+        r#"
+{
+  "version": 1,
+  "applied_revision": {
+    "config_digest": "old",
+    "resources": {
+      "graph.knowledge": { "digest": "old-graph" },
+      "policy.old": { "digest": "old-policy" }
+    }
+  }
+}
+"#,
+    )
+    .unwrap();
+
+    let json = parse_stdout_json(&output_success(
+        cli()
+            .arg("cluster")
+            .arg("plan")
+            .arg("--config")
+            .arg(temp.path())
+            .arg("--json"),
+    ));
+    assert_eq!(json["ok"], true);
+    assert_eq!(json["state_observations"]["state_found"], true);
+    assert!(
+        json["changes"]
+            .as_array()
+            .unwrap()
+            .iter()
+            .any(|change| change["resource"] == "policy.old" && change["operation"] == "delete"),
+        "plan should read state and delete stale resources: {json}"
+    );
+}
+
+#[test]
+fn cluster_status_json_reports_missing_state() {
+    let temp = tempdir().unwrap();
+    write_cluster_config_fixture(temp.path());
+
+    let json = parse_stdout_json(&output_success(
+        cli()
+            .arg("cluster")
+            .arg("status")
+            .arg("--config")
+            .arg(temp.path())
+            .arg("--json"),
+    ));
+    assert_eq!(json["ok"], true);
+    assert_eq!(json["state_observations"]["state_found"], false);
+    assert!(
+        json["diagnostics"]
+            .as_array()
+            .unwrap()
+            .iter()
+            .any(|diagnostic| diagnostic["code"] == "state_missing"),
+        "missing state should be a warning diagnostic: {json}"
+    );
+}
+
+#[test]
+fn cluster_status_json_reports_extended_state() {
+    let temp = tempdir().unwrap();
+    write_cluster_config_fixture(temp.path());
+    let state_dir = temp.path().join("__cluster");
+    fs::create_dir_all(&state_dir).unwrap();
+    fs::write(
+        state_dir.join("state.json"),
+        r#"
+{
+  "version": 1,
+  "state_revision": 5,
+  "applied_revision": {
+    "config_digest": "applied",
+    "resources": {
+      "graph.knowledge": { "digest": "graph-digest" }
+    }
+  },
+  "resource_statuses": {
+    "graph.knowledge": { "status": "applied", "conditions": ["healthy"] }
+  },
+  "approval_records": {},
+  "recovery_records": {},
+  "observations": {}
+}
+"#,
+    )
+    .unwrap();
+
+    let json = parse_stdout_json(&output_success(
+        cli()
+            .arg("cluster")
+            .arg("status")
+            .arg("--config")
+            .arg(temp.path())
+            .arg("--json"),
+    ));
+    assert_eq!(json["ok"], true);
+    assert_eq!(json["state_observations"]["state_revision"], 5);
+    assert!(
+        json["state_observations"]["state_cas"]
+            .as_str()
+            .unwrap()
+            .starts_with("sha256:")
+    );
+    assert_eq!(json["resource_digests"]["graph.knowledge"], "graph-digest");
+    assert_eq!(
+        json["resource_statuses"]["graph.knowledge"]["status"],
+        "applied"
+    );
+}
+
+#[test]
+fn cluster_plan_json_includes_state_cas_revision_and_lock_observation() {
+    let temp = tempdir().unwrap();
+    write_cluster_config_fixture(temp.path());
+    let state_dir = temp.path().join("__cluster");
+    fs::create_dir_all(&state_dir).unwrap();
+    fs::write(
+        state_dir.join("state.json"),
+        r#"
+{
+  "version": 1,
+  "state_revision": 9,
+  "applied_revision": {
+    "config_digest": "old",
+    "resources": {
+      "graph.knowledge": { "digest": "old-graph" }
+    }
+  }
+}
+"#,
+    )
+    .unwrap();
+
+    let json = parse_stdout_json(&output_success(
+        cli()
+            .arg("cluster")
+            .arg("plan")
+            .arg("--config")
+            .arg(temp.path())
+            .arg("--json"),
+    ));
+    assert_eq!(json["ok"], true);
+    assert_eq!(json["state_observations"]["state_revision"], 9);
+    assert!(
+        json["state_observations"]["state_cas"]
+            .as_str()
+            .unwrap()
+            .starts_with("sha256:")
+    );
+    assert_eq!(json["state_observations"]["locked"], false);
+    assert_eq!(json["state_observations"]["lock_acquired"], true);
+    assert!(json["state_observations"]["acquired_lock_id"].is_string());
+    assert!(!state_dir.join("lock.json").exists());
+}
+
+#[test]
+fn cluster_plan_locked_state_exits_nonzero() {
+    let temp = tempdir().unwrap();
+    write_cluster_config_fixture(temp.path());
+    let state_dir = temp.path().join("__cluster");
+    fs::create_dir_all(&state_dir).unwrap();
+    fs::write(
+        state_dir.join("lock.json"),
+        r#"
+{
+  "version": 1,
+  "lock_id": "held-lock",
+  "operation": "plan",
+  "created_at": "2026-06-08T00:00:00Z",
+  "pid": 123
+}
+"#,
+    )
+    .unwrap();
+
+    let output = output_failure(
+        cli()
+            .arg("cluster")
+            .arg("plan")
+            .arg("--config")
+            .arg(temp.path())
+            .arg("--json"),
+    );
+    let json = parse_stdout_json(&output);
+    assert_eq!(json["ok"], false);
+    assert_eq!(json["state_observations"]["locked"], true);
+    assert_eq!(json["state_observations"]["lock_acquired"], false);
+    assert_eq!(json["state_observations"]["lock_id"], "held-lock");
+    assert!(
+        json["diagnostics"]
+            .as_array()
+            .unwrap()
+            .iter()
+            .any(|diagnostic| diagnostic["code"] == "state_lock_held"),
+        "locked state should produce a useful diagnostic: {json}"
+    );
+}
+
+#[test]
+fn cluster_import_json_bootstraps_missing_state() {
+    let temp = tempdir().unwrap();
+    write_cluster_config_fixture(temp.path());
+    init_cluster_derived_graph(temp.path());
+
+    let json = parse_stdout_json(&output_success(
+        cli()
+            .arg("cluster")
+            .arg("import")
+            .arg("--config")
+            .arg(temp.path())
+            .arg("--json"),
+    ));
+    assert_eq!(json["ok"], true);
+    assert_eq!(json["operation"], "import");
+    assert_eq!(json["state_observations"]["state_revision"], 1);
+    assert!(
+        json["state_observations"]["state_cas"]
+            .as_str()
+            .unwrap()
+            .starts_with("sha256:")
+    );
+    assert_eq!(json["state_observations"]["locked"], false);
+    assert_eq!(json["state_observations"]["lock_acquired"], true);
+    assert!(json["state_observations"]["acquired_lock_id"].is_string());
+    assert!(json["observations"]["graph.knowledge"]["manifest_version"].is_number());
+    assert_eq!(
+        json["resource_statuses"]["graph.knowledge"]["status"],
+        "applied"
+    );
+    assert!(temp.path().join("__cluster/state.json").exists());
+    assert!(!temp.path().join("__cluster/lock.json").exists());
+}
+
+#[test]
+fn cluster_refresh_json_updates_revision_cas_and_removes_lock() {
+    let temp = tempdir().unwrap();
+    write_cluster_config_fixture(temp.path());
+    init_cluster_derived_graph(temp.path());
+    let state_dir = temp.path().join("__cluster");
+    fs::create_dir_all(&state_dir).unwrap();
+    fs::write(
+        state_dir.join("state.json"),
+        r#"
+{
+  "version": 1,
+  "state_revision": 2,
+  "applied_revision": { "resources": {} }
+}
+"#,
+    )
+    .unwrap();
+
+    let json = parse_stdout_json(&output_success(
+        cli()
+            .arg("cluster")
+            .arg("refresh")
+            .arg("--config")
+            .arg(temp.path())
+            .arg("--json"),
+    ));
+    assert_eq!(json["ok"], true);
+    assert_eq!(json["operation"], "refresh");
+    assert_eq!(json["state_observations"]["state_revision"], 3);
+    assert!(
+        json["state_observations"]["state_cas"]
+            .as_str()
+            .unwrap()
+            .starts_with("sha256:")
+    );
+    assert_eq!(json["state_observations"]["locked"], false);
+    assert_eq!(json["state_observations"]["lock_acquired"], true);
+    assert!(json["state_observations"]["acquired_lock_id"].is_string());
+    assert!(!state_dir.join("lock.json").exists());
+}
+
+#[test]
+fn cluster_refresh_missing_state_exits_nonzero() {
+    let temp = tempdir().unwrap();
+    write_cluster_config_fixture(temp.path());
+
+    let output = output_failure(
+        cli()
+            .arg("cluster")
+            .arg("refresh")
+            .arg("--config")
+            .arg(temp.path())
+            .arg("--json"),
+    );
+    let json = parse_stdout_json(&output);
+    assert_eq!(json["ok"], false);
+    assert!(
+        json["diagnostics"]
+            .as_array()
+            .unwrap()
+            .iter()
+            .any(|diagnostic| diagnostic["code"] == "state_missing"),
+        "missing state should produce a useful diagnostic: {json}"
+    );
+}
+
+#[test]
+fn cluster_import_existing_state_exits_nonzero() {
+    let temp = tempdir().unwrap();
+    write_cluster_config_fixture(temp.path());
+    let state_dir = temp.path().join("__cluster");
+    fs::create_dir_all(&state_dir).unwrap();
+    fs::write(
+        state_dir.join("state.json"),
+        r#"{"version":1,"applied_revision":{"resources":{}}}"#,
+    )
+    .unwrap();
+
+    let output = output_failure(
+        cli()
+            .arg("cluster")
+            .arg("import")
+            .arg("--config")
+            .arg(temp.path())
+            .arg("--json"),
+    );
+    let json = parse_stdout_json(&output);
+    assert_eq!(json["ok"], false);
+    assert!(
+        json["diagnostics"]
+            .as_array()
+            .unwrap()
+            .iter()
+            .any(|diagnostic| diagnostic["code"] == "state_already_exists"),
+        "existing state should produce a useful diagnostic: {json}"
+    );
+}
+
+#[test]
+fn cluster_refresh_and_import_locked_state_exit_nonzero() {
+    let temp = tempdir().unwrap();
+    write_cluster_config_fixture(temp.path());
+    let state_dir = temp.path().join("__cluster");
+    fs::create_dir_all(&state_dir).unwrap();
+    fs::write(
+        state_dir.join("state.json"),
+        r#"{"version":1,"applied_revision":{"resources":{}}}"#,
+    )
+    .unwrap();
+    fs::write(
+        state_dir.join("lock.json"),
+        r#"{"version":1,"lock_id":"held-lock","operation":"refresh","created_at":"2026-06-08T00:00:00Z","pid":123}"#,
+    )
+    .unwrap();
+
+    let refresh = parse_stdout_json(&output_failure(
+        cli()
+            .arg("cluster")
+            .arg("refresh")
+            .arg("--config")
+            .arg(temp.path())
+            .arg("--json"),
+    ));
+    assert_eq!(refresh["state_observations"]["locked"], true);
+    assert_eq!(refresh["state_observations"]["lock_id"], "held-lock");
+    assert_eq!(refresh["state_observations"]["lock_acquired"], false);
+    assert!(
+        refresh["diagnostics"]
+            .as_array()
+            .unwrap()
+            .iter()
+            .any(|diagnostic| diagnostic["code"] == "state_lock_held")
+    );
+
+    let temp = tempdir().unwrap();
+    write_cluster_config_fixture(temp.path());
+    let state_dir = temp.path().join("__cluster");
+    fs::create_dir_all(&state_dir).unwrap();
+    fs::write(
+        state_dir.join("lock.json"),
+        r#"{"version":1,"lock_id":"held-lock","operation":"import","created_at":"2026-06-08T00:00:00Z","pid":123}"#,
+    )
+    .unwrap();
+
+    let imported = parse_stdout_json(&output_failure(
+        cli()
+            .arg("cluster")
+            .arg("import")
+            .arg("--config")
+            .arg(temp.path())
+            .arg("--json"),
+    ));
+    assert_eq!(imported["state_observations"]["locked"], true);
+    assert_eq!(imported["state_observations"]["lock_id"], "held-lock");
+    assert_eq!(imported["state_observations"]["lock_acquired"], false);
+    assert!(
+        imported["diagnostics"]
+            .as_array()
+            .unwrap()
+            .iter()
+            .any(|diagnostic| diagnostic["code"] == "state_lock_held")
+    );
+}
+
+#[test]
+fn cluster_validate_invalid_config_exits_nonzero() {
+    let temp = tempdir().unwrap();
+    fs::write(
+        temp.path().join("cluster.yaml"),
+        "version: 1\ngraphs: {}\npipelines: {}\n",
+    )
+    .unwrap();
+
+    let output = output_failure(
+        cli()
+            .arg("cluster")
+            .arg("validate")
+            .arg("--config")
+            .arg(temp.path()),
+    );
+    let stdout = stdout_string(&output);
+    assert!(stdout.contains("future_phase_field"), "{stdout}");
+}
+
 #[test]
 fn short_version_flag_prints_current_cli_version() {
    let output = output_success(cli().arg("-v"));
@ -450,6 +992,83 @@ fn explicit_omnigraph_config_pointing_at_missing_file_errors() {
    );
 }

+#[test]
+fn repair_json_reports_noop_on_clean_graph() {
+    let temp = tempdir().unwrap();
+    let graph = graph_path(temp.path());
+    init_graph(&graph);
+    load_fixture(&graph);
+
+    let output = output_success(cli().arg("repair").arg("--json").arg(&graph));
+    let payload: Value = serde_json::from_slice(&output.stdout).unwrap();
+
+    assert_eq!(payload["confirm"], false);
+    assert_eq!(payload["force"], false);
+    assert_eq!(payload["manifest_version"], Value::Null);
+    let tables = payload["tables"].as_array().unwrap();
+    assert_eq!(tables.len(), 4);
+    assert!(tables.iter().all(|table| {
+        table["classification"] == "no_drift" && table["action"] == "no_op"
+    }));
+}
+
+#[test]
+fn repair_confirm_json_refuses_suspicious_drift_with_nonzero_exit_then_force_succeeds() {
+    let temp = tempdir().unwrap();
+    let graph = graph_path(temp.path());
+    init_graph(&graph);
+    load_fixture(&graph);
+    let graph_manifest_before = manifest_dataset_version(&graph);
+    let (table_manifest_before, table_head_before) = forge_person_delete_drift(&graph);
+
+    let refused = output_failure(
+        cli()
+            .arg("repair")
+            .arg("--confirm")
+            .arg("--json")
+            .arg(&graph),
+    );
+    let refused_payload: Value = serde_json::from_slice(&refused.stdout).unwrap();
+    assert_eq!(refused_payload["manifest_version"], Value::Null);
+    let person = refused_payload["tables"]
+        .as_array()
+        .unwrap()
+        .iter()
+        .find(|table| table["table_key"] == "node:Person")
+        .unwrap();
+    assert_eq!(person["classification"], "suspicious");
+    assert_eq!(person["action"], "refused");
+    assert!(
+        String::from_utf8_lossy(&refused.stderr).contains("repair refused"),
+        "stderr should explain the non-zero exit; got: {}",
+        String::from_utf8_lossy(&refused.stderr)
+    );
+    assert_eq!(manifest_dataset_version(&graph), graph_manifest_before);
+
+    let forced = output_success(
+        cli()
+            .arg("repair")
+            .arg("--force")
+            .arg("--confirm")
+            .arg("--json")
+            .arg(&graph),
+    );
+    let forced_payload: Value = serde_json::from_slice(&forced.stdout).unwrap();
+    let forced_manifest = forced_payload["manifest_version"].as_u64().unwrap();
+    assert!(forced_manifest > graph_manifest_before);
+    let person = forced_payload["tables"]
+        .as_array()
+        .unwrap()
+        .iter()
+        .find(|table| table["table_key"] == "node:Person")
+        .unwrap();
+    assert_eq!(person["classification"], "suspicious");
+    assert_eq!(person["action"], "forced");
+    assert_eq!(person["manifest_version"], table_manifest_before);
+    assert_eq!(person["lance_head_version"], table_head_before);
+    assert_eq!(manifest_dataset_version(&graph), forced_manifest);
+}
+
 #[test]
 fn schema_plan_json_reports_supported_additive_change() {
    let temp = tempdir().unwrap();
--- a/crates/omnigraph-cluster/Cargo.toml
+++ b/crates/omnigraph-cluster/Cargo.toml
@ -0,0 +1,24 @@
+[package]
+name = "omnigraph-cluster"
+version = "0.6.2"
+edition = "2024"
+description = "Read-only cluster configuration validation and planning for Omnigraph."
+license = "MIT"
+repository = "https://github.com/ModernRelay/omnigraph"
+homepage = "https://github.com/ModernRelay/omnigraph"
+documentation = "https://docs.rs/omnigraph-cluster"
+
+[dependencies]
+omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.2" }
+omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.6.2" }
+serde = { workspace = true }
+serde_json = { workspace = true }
+serde_yaml = { workspace = true }
+sha2 = { workspace = true }
+thiserror = { workspace = true }
+time = { workspace = true }
+ulid = { workspace = true }
+
+[dev-dependencies]
+tempfile = { workspace = true }
+tokio = { workspace = true }
--- a/crates/omnigraph-cluster/src/lib.rs
+++ b/crates/omnigraph-cluster/src/lib.rs
--- a/crates/omnigraph-compiler/Cargo.toml
+++ b/crates/omnigraph-compiler/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "omnigraph-compiler"
-version = "0.6.1"
+version = "0.6.2"
 edition = "2024"
 description = "Schema/query compiler for Omnigraph. Zero Lance dependency."
 license = "MIT"
--- a/crates/omnigraph-config/Cargo.toml
+++ b/crates/omnigraph-config/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "omnigraph-config"
-version = "0.6.1"
+version = "0.6.2"
 edition = "2024"
 description = "Configuration schema and loader for the Omnigraph graph database."
 license = "MIT"
--- a/crates/omnigraph-policy/Cargo.toml
+++ b/crates/omnigraph-policy/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "omnigraph-policy"
-version = "0.6.1"
+version = "0.6.2"
 edition = "2024"
 description = "Policy / authorization layer for Omnigraph — Cedar-backed PolicyEngine, PolicyChecker trait, ResourceScope enum."
 license = "MIT"
--- a/crates/omnigraph-queries/Cargo.toml
+++ b/crates/omnigraph-queries/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "omnigraph-queries"
-version = "0.6.1"
+version = "0.6.2"
 edition = "2024"
 description = "Stored-query registry and validation for the Omnigraph graph database."
 license = "MIT"
@ -9,8 +9,8 @@ homepage = "https://github.com/ModernRelay/omnigraph"
 documentation = "https://docs.rs/omnigraph-queries"

 [dependencies]
-omnigraph-config = { path = "../omnigraph-config", version = "0.6.1" }
-omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.1" }
+omnigraph-config = { path = "../omnigraph-config", version = "0.6.2" }
+omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.2" }

 [dev-dependencies]
 tempfile = { workspace = true }
--- a/crates/omnigraph-server/Cargo.toml
+++ b/crates/omnigraph-server/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "omnigraph-server"
-version = "0.6.1"
+version = "0.6.2"
 edition = "2024"
 description = "HTTP server for the Omnigraph graph database."
 license = "MIT"
@ -19,12 +19,12 @@ default = []
 aws = ["dep:aws-config", "dep:aws-sdk-secretsmanager"]

 [dependencies]
-omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.6.1" }
-omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.1" }
-omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.1" }
-omnigraph-config = { path = "../omnigraph-config", version = "0.6.1" }
-omnigraph-queries = { path = "../omnigraph-queries", version = "0.6.1" }
-omnigraph-api-types = { path = "../omnigraph-api-types", version = "0.6.1" }
+omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.6.2" }
+omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.2" }
+omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.2" }
+omnigraph-config = { path = "../omnigraph-config", version = "0.6.2" }
+omnigraph-queries = { path = "../omnigraph-queries", version = "0.6.2" }
+omnigraph-api-types = { path = "../omnigraph-api-types", version = "0.6.2" }
 axum = { workspace = true }
 clap = { workspace = true }
 color-eyre = { workspace = true }
--- a/crates/omnigraph/Cargo.toml
+++ b/crates/omnigraph/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "omnigraph-engine"
-version = "0.6.1"
+version = "0.6.2"
 edition = "2024"
 description = "Runtime engine for the Omnigraph graph database."
 license = "MIT"
@ -16,8 +16,8 @@ default = []
 failpoints = ["dep:fail", "fail/failpoints"]

 [dependencies]
-omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.1" }
-omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.1" }
+omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.2" }
+omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.2" }
 lance = { workspace = true }
 lance-datafusion = { workspace = true }
 datafusion = { workspace = true }
@ -51,7 +51,8 @@ chrono = { workspace = true }
 arc-swap = { workspace = true }

 [dev-dependencies]
-omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.1" }
+omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.2" }
 tokio = { workspace = true }
 lance-namespace-impls = { workspace = true }
 serial_test = "3"
+proptest = "1"
--- a/crates/omnigraph/examples/bench_expand.rs
+++ b/crates/omnigraph/examples/bench_expand.rs
@ -221,6 +221,65 @@ fn microbench_dedup() {
    );
 }

+/// Selective single-source traversal, timed cold in CSR vs indexed mode across
+/// growing |E|. The win of the indexed path: a small fixed frontier should be
+/// ~flat in |E| (one BTREE scan per hop), whereas CSR pays an O(|E|) adjacency
+/// build on the first (cold) query. Also asserts both modes return the same
+/// rows — a guard against the scalar-index `physical_rows` silent fallback
+/// dropping unindexed-fragment rows.
+async fn bench_selective_modes() {
+    println!("\n── Selective traversal: indexed vs CSR (cold, single-source knows{{1,2}}) ──");
+    let sel = r#"
+query sel($name: String) {
+    match {
+        $a: Person { name: $name }
+        $a knows{1,2} $b
+    }
+    return { $b.name }
+}
+"#;
+    for &(n, avg_deg) in &[(1_000usize, 8usize), (10_000, 8), (30_000, 8)] {
+        let jsonl = generate_jsonl(n, avg_deg, 42);
+        let mut params = ParamMap::new();
+        params.insert(
+            "name".to_string(),
+            omnigraph_compiler::query::ast::Literal::String("p0".to_string()),
+        );
+
+        let mut rows_by_mode: Vec<(&str, usize)> = Vec::new();
+        for mode in ["csr", "indexed"] {
+            // Fresh db per measurement so the query is cold (CSR pays its build).
+            let dir = tempfile::tempdir().unwrap();
+            let uri = dir.path().to_str().unwrap();
+            let mut db = Omnigraph::init(uri, SCHEMA).await.unwrap();
+            load_jsonl(&mut db, &jsonl, LoadMode::Overwrite).await.unwrap();
+            // SAFE: example main drives queries sequentially; no concurrent env reader.
+            unsafe { std::env::set_var("OMNIGRAPH_TRAVERSAL_MODE", mode) };
+
+            let t = Instant::now();
+            let r = db
+                .query(ReadTarget::branch("main"), sel, "sel", &params)
+                .await
+                .expect("sel query");
+            let elapsed = t.elapsed();
+            let rows = r.num_rows();
+            rows_by_mode.push((mode, rows));
+            println!(
+                "  |E|≈{:>7}  {:<8} cold={:>9.2?}  rows={}",
+                n * avg_deg,
+                mode,
+                elapsed,
+                rows
+            );
+        }
+        unsafe { std::env::remove_var("OMNIGRAPH_TRAVERSAL_MODE") };
+        assert_eq!(
+            rows_by_mode[0].1, rows_by_mode[1].1,
+            "indexed and CSR must return identical rows (no silent drop under partial index coverage)"
+        );
+    }
+}
+
 #[tokio::main(flavor = "multi_thread")]
 async fn main() {
    println!("── End-to-end query latency ──");
@ -262,5 +321,7 @@ async fn main() {
        }
    }

+    bench_selective_modes().await;
+
    microbench_dedup();
 }
--- a/crates/omnigraph/src/db/manifest.rs
+++ b/crates/omnigraph/src/db/manifest.rs
@ -36,7 +36,7 @@ use publisher::{GraphNamespacePublisher, ManifestBatchPublisher};
 pub(crate) use recovery::{
    RecoveryMode, RecoverySidecar, RecoverySidecarHandle, SidecarKind, SidecarTablePin,
    SidecarTableRegistration, SidecarTombstone, delete_sidecar, has_schema_apply_sidecar,
-    new_sidecar, recover_manifest_drift, write_sidecar,
+    list_sidecars, new_sidecar, recover_manifest_drift, write_sidecar,
 };
 pub use state::SubTableEntry;
 #[cfg(test)]
@ -48,6 +48,22 @@ const OBJECT_TYPE_TABLE_VERSION: &str = "table_version";
 const OBJECT_TYPE_TABLE_TOMBSTONE: &str = "table_tombstone";
 const TABLE_VERSION_MANAGEMENT_KEY: &str = "table_version_management";

+/// Apply pending internal-schema migrations against `__manifest` on the
+/// open-for-write path, independent of a publish.
+///
+/// `Omnigraph::open(ReadWrite)` calls this before the coordinator reads branch
+/// state, so branch-observing code (`branch_list`, the schema-apply
+/// blocking-branch checks) sees the post-migration graph. In particular the
+/// v2→v3 step sweeps legacy `__run__*` staging branches off `__manifest`
+/// (MR-770); running it here closes the window where those branches would
+/// otherwise block schema apply before the first publish runs the migration.
+///
+/// Idempotent: a no-op stamp read when the on-disk version already matches.
+pub(crate) async fn migrate_on_open(root_uri: &str) -> Result<()> {
+    let mut dataset = open_manifest_dataset(root_uri, None).await?;
+    migrations::migrate_internal_schema(&mut dataset).await
+}
+
 /// Immutable point-in-time view of the database.
 ///
 /// Cheap to create (no storage I/O). All reads within a query go through one
--- a/crates/omnigraph/src/db/manifest/migrations.rs
+++ b/crates/omnigraph/src/db/manifest/migrations.rs
@ -46,7 +46,11 @@ use crate::error::{OmniError, Result};
 /// - v2 — `__manifest.object_id` carries the unenforced-PK annotation,
 ///   engaging Lance's bloom-filter conflict resolver at commit time. Added
 ///   alongside `expected_table_versions` OCC on `ManifestBatchPublisher::publish`.
-pub(super) const INTERNAL_MANIFEST_SCHEMA_VERSION: u32 = 2;
+/// - v3 — one-time sweep of legacy `__run__<id>` staging branches left on the
+///   `__manifest` dataset by the pre-v0.4.0 Run state machine (removed in
+///   MR-771). Once swept, the `is_internal_run_branch` defense-in-depth guard
+///   is no longer needed (MR-770).
+pub(super) const INTERNAL_MANIFEST_SCHEMA_VERSION: u32 = 3;

 const INTERNAL_SCHEMA_VERSION_KEY: &str = "omnigraph:internal_schema_version";
 const OBJECT_ID_PK_KEY: &str = "lance-schema:unenforced-primary-key";
@ -89,6 +93,10 @@ pub(super) async fn migrate_internal_schema(dataset: &mut Dataset) -> Result<()>
                migrate_v1_to_v2(dataset).await?;
                current = 2;
            }
+            2 => {
+                migrate_v2_to_v3(dataset).await?;
+                current = 3;
+            }
            other => {
                return Err(OmniError::manifest_internal(format!(
                    "no internal-schema migration registered for v{} → v{}",
@ -122,6 +130,51 @@ async fn migrate_v1_to_v2(dataset: &mut Dataset) -> Result<()> {
    set_stamp(dataset, 2).await
 }

+/// v2 → v3: sweep legacy `__run__<id>` staging branches off the `__manifest`
+/// dataset, then bump the stamp.
+///
+/// The pre-v0.4.0 Run state machine (removed in MR-771) created graph-level
+/// staging branches named `__run__<ulid>` on `__manifest`. MR-771 stopped
+/// creating them but left any pre-existing ones in place; Lance's
+/// `list_branches` still enumerates them, so they leak into `branch_list()`
+/// and count as blocking branches at schema-apply time. This one-time sweep
+/// removes them so the `is_internal_run_branch` guard can retire (MR-770).
+///
+/// The `"__run__"` prefix is inlined here on purpose: this migration must keep
+/// working after the `run_registry` module (the guard) is deleted, so it does
+/// not depend on it.
+///
+/// Idempotent under both sequential retry and concurrent runners: each run
+/// re-enumerates `list_branches` fresh, and `force_delete_branch` tolerates a
+/// branch that is already gone — so a crash before the stamp bump, or a second
+/// process opening the same legacy graph at the same time, never errors out.
+async fn migrate_v2_to_v3(dataset: &mut Dataset) -> Result<()> {
+    const LEGACY_RUN_BRANCH_PREFIX: &str = "__run__";
+    let branches = dataset
+        .list_branches()
+        .await
+        .map_err(|e| OmniError::Lance(e.to_string()))?;
+    let run_branches: Vec<String> = branches
+        .into_keys()
+        .filter(|name| {
+            name.trim_start_matches('/')
+                .starts_with(LEGACY_RUN_BRANCH_PREFIX)
+        })
+        .collect();
+    for name in run_branches {
+        // `force_delete_branch` deletes even when the `BranchContents` is
+        // already gone. Plain `delete_branch` errors "BranchContents not
+        // found", which would fail a second concurrent open (or a retry that
+        // raced another runner) after the first one swept the branch. Force is
+        // exactly Lance's documented path for cleaning up zombie branches.
+        dataset
+            .force_delete_branch(&name)
+            .await
+            .map_err(|e| OmniError::Lance(e.to_string()))?;
+    }
+    set_stamp(dataset, 3).await
+}
+
 async fn set_stamp(dataset: &mut Dataset, version: u32) -> Result<()> {
    dataset
        .update_schema_metadata([(INTERNAL_SCHEMA_VERSION_KEY.to_string(), version.to_string())])
--- a/crates/omnigraph/src/db/manifest/recovery.rs
+++ b/crates/omnigraph/src/db/manifest/recovery.rs
@ -106,6 +106,12 @@ pub(crate) enum SidecarKind {
    BranchMerge,
    /// `ensure_indices_for_branch` — index lifecycle commits.
    EnsureIndices,
+    /// `optimize_all_tables` — Lance `compact_files` (reserve-fragments +
+    /// rewrite commits) followed by a manifest publish of the compacted
+    /// version. Loose-match like the other multi-commit writers; roll-forward
+    /// is always safe because compaction is content-preserving (Lance
+    /// `Operation::Rewrite` "reorganizes data without semantic modification").
+    Optimize,
 }

 /// One table's contribution to a sidecar's intended commit. The classifier
@ -412,11 +418,13 @@ pub(crate) fn parse_sidecar(sidecar_uri: &str, body: &str) -> Result<RecoverySid
 /// - **Strict** (`Mutation`, `Load`): exactly one `commit_staged` per
 ///   table, so `lance_head == manifest_pinned + 1` AND
 ///   `post_commit_pin == lance_head` is required.
-/// - **Loose** (`SchemaApply`, `EnsureIndices`, `BranchMerge`): the
-///   writer may run N ≥ 1 `commit_staged` calls per table (one per
-///   index built + one for the overwrite, etc.; merge tables run
-///   merge_insert + delete_where + index rebuilds) and the exact N
-///   is hard to compute at sidecar-write time. The loose match accepts
+/// - **Loose** (`SchemaApply`, `EnsureIndices`, `BranchMerge`,
+///   `Optimize`): the writer advances the Lance HEAD by N ≥ 1 commits
+///   per table (one per index built + one for the overwrite, etc.;
+///   merge tables run merge_insert + delete_where + index rebuilds;
+///   `Optimize` runs `compact_files`, which commits reserve-fragments +
+///   rewrite) and the exact N is hard to compute at sidecar-write time.
+///   The loose match accepts
 ///   any `lance_head > manifest_pinned` as `RolledPastExpected` when
 ///   `pin.expected_version == manifest_pinned` (the writer's CAS
 ///   target matches what the manifest currently shows). The risk this
@ -494,9 +502,12 @@ pub(crate) fn decide(classifications: &[TableClassification]) -> SidecarDecision
 /// Skipping the restore in those cases would leave Lance HEAD ahead of
 /// the manifest with no recovery artifact left.
 ///
-/// Cost: under repeated mid-rollback crashes (rare), Lance HEAD
-/// accumulates extra restore commits that `omnigraph cleanup` reclaims.
-/// Bounded by the number of recovery iterations — typically 1.
+/// Cost: a successful roll-back appends one restore commit and then publishes
+/// the manifest to match (`roll_back_sidecar`), so the table converges
+/// (`manifest == HEAD`) in one pass. Only repeated crashes *between* the restore
+/// and that publish (rare) accumulate extra restore commits; each re-classified
+/// roll-back restores again and `omnigraph cleanup` reclaims the surplus.
+/// Bounded by the number of interrupted recovery iterations — typically 0.
 pub(crate) async fn restore_table_to_version(
    table_path: &str,
    branch: Option<&str>,
@ -801,13 +812,24 @@ async fn roll_back_sidecar(
    sidecar: &RecoverySidecar,
    states: &[ClassifiedTable],
 ) -> Result<()> {
-    // Restore every table whose Lance HEAD has drifted from the
-    // manifest pin (RolledPastExpected, UnexpectedAtP1,
-    // UnexpectedMultistep). NoMovement tables are already at the
-    // manifest pin — no action. Restore is unconditional; repeated
-    // mid-rollback crashes accumulate a few extra Lance commits that
-    // `omnigraph cleanup` reclaims.
+    // Restore every drifted table (RolledPastExpected / UnexpectedAtP1 /
+    // UnexpectedMultistep) to its manifest-pinned content, then PUBLISH so
+    // `manifest == Lance HEAD` for each — symmetric with roll-forward. The
+    // restore commit's content equals the manifest-pinned version, so re-pinning
+    // the manifest to the new (restored) HEAD is content-correct and closes the
+    // orphaned-drift class (`HEAD > manifest` with no covering sidecar). This is
+    // what makes a failed-then-retried schema_apply converge: after one
+    // roll-back `manifest == HEAD`, so the retry's precondition passes instead of
+    // failing one version higher each iteration.
+    //
+    // NoMovement tables are already at the pin — excluded from both the restore
+    // and the publish. The audit `to_version` stays the *logical* rolled-back-to
+    // version (`manifest_pinned`), while the manifest is published at
+    // `manifest_pinned + 1` (the restore commit, same content) — keep that
+    // asymmetry so the audit records the drift (`from_version > to_version`).
    let mut outcomes = Vec::with_capacity(sidecar.tables.len());
+    let mut updates: Vec<ManifestChange> = Vec::with_capacity(sidecar.tables.len());
+    let mut expected: HashMap<String, u64> = HashMap::with_capacity(sidecar.tables.len());
    for (pin, state) in sidecar.tables.iter().zip(states.iter()) {
        if matches!(
            state.classification,
@ -821,10 +843,20 @@ async fn roll_back_sidecar(
                state.manifest_pinned,
            )
            .await?;
-            // `from_version` records the Lance HEAD observed BEFORE the
-            // restore (the actual drift), not the manifest pin. Operators
-            // reading `_graph_commit_recoveries.lance` see "rolled back
-            // from v7 to v5" rather than "v5 → v5".
+            // Publish the post-restore HEAD, CAS against the current (unmoved)
+            // manifest pin — the same helper roll-forward uses.
+            push_table_update_at_head(
+                root_uri,
+                &pin.table_key,
+                &pin.table_path,
+                pin.table_branch.as_deref(),
+                state.manifest_pinned,
+                &mut updates,
+                &mut expected,
+            )
+            .await?;
+            // `from_version` records the Lance HEAD observed BEFORE the restore
+            // (the actual drift); `to_version` the logical pin we rolled back to.
            outcomes.push(TableOutcome {
                table_key: pin.table_key.clone(),
                from_version: state.lance_head,
@ -832,13 +864,23 @@ async fn roll_back_sidecar(
            });
        }
    }
-    // Manifest pin doesn't move on rollback; record an audit-only
-    // commit at the existing version so operators can correlate via
-    // `omnigraph commit list --filter actor=omnigraph:recovery`.
+    // Publish the restored HEADs so manifest == HEAD. A degenerate all-NoMovement
+    // roll-back restores nothing — there's nothing to publish, and the audit
+    // records the unchanged snapshot version.
+    let manifest_version = if updates.is_empty() {
+        snapshot.version()
+    } else {
+        let publisher = GraphNamespacePublisher::new(root_uri, sidecar.branch.as_deref());
+        publisher
+            .publish(&updates, &expected)
+            .await?
+            .version()
+            .version
+    };
    record_audit(
        root_uri,
        sidecar,
-        snapshot.version(),
+        manifest_version,
        RecoveryKind::RolledBack,
        outcomes,
    )
@ -919,44 +961,20 @@ async fn roll_forward_all(
        HashMap::with_capacity(sidecar.tables.len() + sidecar.additional_registrations.len());

    for pin in &sidecar.tables {
-        // Open the dataset at its CURRENT Lance HEAD on the pin's branch
-        // (not at the sidecar's post_commit_pin). For strict-match writers
-        // (Mutation/Load) HEAD == post_commit_pin by construction. For
-        // loose-match writers (SchemaApply/EnsureIndices/BranchMerge) HEAD
-        // may be higher than post_commit_pin (multiple commit_staged
-        // calls per table); we want to publish to the actual current HEAD.
-        let head_ds = Dataset::open(&pin.table_path)
-            .await
-            .map_err(|e| OmniError::Lance(e.to_string()))?;
-        let head_ds = match pin.table_branch.as_deref() {
-            Some(b) if b != "main" => head_ds
-                .checkout_branch(b)
-                .await
-                .map_err(|e| OmniError::Lance(e.to_string()))?,
-            _ => head_ds,
-        };
-        let head_version = head_ds.version().version;
-
-        let row_count = head_ds
-            .count_rows(None)
-            .await
-            .map_err(|e| OmniError::Lance(e.to_string()))? as u64;
-
-        let table_relative_path = super::table_path_for_table_key(&pin.table_key)?;
-        let version_metadata = super::metadata::TableVersionMetadata::from_dataset(
+        // Publish to the table's CURRENT Lance HEAD on the pin's branch (not the
+        // sidecar's `post_commit_pin`, a lower bound for loose-match writers that
+        // run multiple commit_staged calls per table). CAS against the pin's
+        // pre-write `expected_version`.
+        let head_version = push_table_update_at_head(
            root_uri,
-            &table_relative_path,
-            &head_ds,
-        )?;
-
-        updates.push(ManifestChange::Update(SubTableUpdate {
-            table_key: pin.table_key.clone(),
-            table_version: head_version,
-            table_branch: pin.table_branch.clone(),
-            row_count,
-            version_metadata,
-        }));
-        expected.insert(pin.table_key.clone(), pin.expected_version);
+            &pin.table_key,
+            &pin.table_path,
+            pin.table_branch.as_deref(),
+            pin.expected_version,
+            &mut updates,
+            &mut expected,
+        )
+        .await?;
        published_versions.insert(pin.table_key.clone(), head_version);
    }

@ -1047,6 +1065,57 @@ async fn roll_forward_all(
    Ok((new_dataset.version().version, published_versions))
 }

+/// Open `table_path` at its branch HEAD, read the current Lance HEAD version,
+/// row count, and version metadata, and push a `ManifestChange::Update` (plus
+/// its CAS `expected` entry) that re-pins the manifest to that HEAD. Returns the
+/// published HEAD version.
+///
+/// Shared by `roll_forward_all` (where `expected_version` is the sidecar's
+/// pre-write pin) and `roll_back_sidecar` (where it is the manifest-pinned
+/// version the table was just restored to). The HEAD is read AFTER any restore
+/// in the same single-threaded sweep, so no concurrent writer can have advanced
+/// it.
+async fn push_table_update_at_head(
+    root_uri: &str,
+    table_key: &str,
+    table_path: &str,
+    branch: Option<&str>,
+    expected_version: u64,
+    updates: &mut Vec<ManifestChange>,
+    expected: &mut HashMap<String, u64>,
+) -> Result<u64> {
+    let head_ds = Dataset::open(table_path)
+        .await
+        .map_err(|e| OmniError::Lance(e.to_string()))?;
+    let head_ds = match branch {
+        Some(b) if b != "main" => head_ds
+            .checkout_branch(b)
+            .await
+            .map_err(|e| OmniError::Lance(e.to_string()))?,
+        _ => head_ds,
+    };
+    let head_version = head_ds.version().version;
+    let row_count = head_ds
+        .count_rows(None)
+        .await
+        .map_err(|e| OmniError::Lance(e.to_string()))? as u64;
+    let table_relative_path = super::table_path_for_table_key(table_key)?;
+    let version_metadata = super::metadata::TableVersionMetadata::from_dataset(
+        root_uri,
+        &table_relative_path,
+        &head_ds,
+    )?;
+    updates.push(ManifestChange::Update(SubTableUpdate {
+        table_key: table_key.to_string(),
+        table_version: head_version,
+        table_branch: branch.map(str::to_string),
+        row_count,
+        version_metadata,
+    }));
+    expected.insert(table_key.to_string(), expected_version);
+    Ok(head_version)
+}
+
 /// Append the audit row describing this recovery action.
 ///
 /// Two-part write: (a) `_graph_commits.lance` row anchored on the recovery
--- a/crates/omnigraph/src/db/manifest/tests.rs
+++ b/crates/omnigraph/src/db/manifest/tests.rs
@ -1461,6 +1461,80 @@ async fn test_publish_migrates_pre_stamp_manifest_to_current_version() {
    assert!(reopened.snapshot().entry("node:Person").is_some());
 }

+#[tokio::test]
+async fn test_v2_to_v3_sweeps_legacy_run_branches_on_write_open() {
+    let dir = tempfile::tempdir().unwrap();
+    let uri = dir.path().to_str().unwrap();
+    let catalog = build_test_catalog();
+    let mut mc = ManifestCoordinator::init(uri, &catalog).await.unwrap();
+
+    // Synthesize a pre-MR-770 graph: several stale `__run__` staging branches
+    // left on `__manifest` (a real legacy graph accumulates one per run), plus
+    // a real user branch that must survive the sweep. Multiple run branches
+    // exercise the migration's delete loop on a single reused dataset handle.
+    mc.create_branch("__run__01J9LEGACY").await.unwrap();
+    mc.create_branch("__run__01J9SECOND").await.unwrap();
+    mc.create_branch("__run__01J9THIRD").await.unwrap();
+    mc.create_branch("feature").await.unwrap();
+    let before = mc.list_branches().await.unwrap();
+    assert_eq!(
+        before.iter().filter(|b| b.starts_with("__run__")).count(),
+        3,
+        "precondition: three legacy run branches exist on __manifest; got {before:?}",
+    );
+
+    // Rewind the internal-schema stamp to v2 so the next write-open runs the
+    // v2 → v3 sweep arm (init stamps at the current version, which is past it).
+    {
+        let mut ds = open_manifest_dataset(uri, None).await.unwrap();
+        ds.update_schema_metadata([(
+            "omnigraph:internal_schema_version".to_string(),
+            Some("2".to_string()),
+        )])
+        .await
+        .unwrap();
+        let post = open_manifest_dataset(uri, None).await.unwrap();
+        assert_eq!(super::migrations::read_stamp(&post), 2, "stamp rewound to v2");
+    }
+
+    // A no-op publish forces the open-for-write path, which runs the migration.
+    let mut expected = HashMap::new();
+    expected.insert("node:Person".to_string(), 1);
+    GraphNamespacePublisher::new(uri, None)
+        .publish(&[], &expected)
+        .await
+        .unwrap();
+
+    // Stamp advanced to current; the legacy run branch is physically gone from
+    // `__manifest` (checked via the raw, unfiltered manifest list — not the
+    // guard-filtered `branch_list`), and the real branch + `main` survive.
+    let post = open_manifest_dataset(uri, None).await.unwrap();
+    assert_eq!(
+        super::migrations::read_stamp(&post),
+        super::migrations::INTERNAL_MANIFEST_SCHEMA_VERSION,
+    );
+    let reopened = ManifestCoordinator::open(uri).await.unwrap();
+    let after = reopened.list_branches().await.unwrap();
+    assert!(
+        !after.iter().any(|b| b.starts_with("__run__")),
+        "legacy run branch must be swept; got {after:?}",
+    );
+    assert!(after.iter().any(|b| b == "feature"), "user branch must survive");
+    assert!(after.iter().any(|b| b == "main"), "main must survive");
+
+    // Idempotent: a second write-open finds the stamp at current and does not
+    // re-run the sweep or error.
+    GraphNamespacePublisher::new(uri, None)
+        .publish(&[], &expected)
+        .await
+        .unwrap();
+    let final_ds = open_manifest_dataset(uri, None).await.unwrap();
+    assert_eq!(
+        super::migrations::read_stamp(&final_ds),
+        super::migrations::INTERNAL_MANIFEST_SCHEMA_VERSION,
+    );
+}
+
 #[tokio::test]
 async fn test_publish_rejects_manifest_stamped_at_future_version() {
    let dir = tempfile::tempdir().unwrap();
--- a/crates/omnigraph/src/db/mod.rs
+++ b/crates/omnigraph/src/db/mod.rs
@ -3,7 +3,6 @@ pub mod graph_coordinator;
 pub mod manifest;
 mod omnigraph;
 mod recovery_audit;
-mod run_registry;
 mod schema_state;
 pub(crate) mod write_queue;

@ -12,10 +11,10 @@ pub use graph_coordinator::{GraphCoordinator, ReadTarget, ResolvedTarget, Snapsh
 pub use manifest::{Snapshot, SubTableEntry, SubTableUpdate};
 pub(crate) use omnigraph::ensure_public_branch_ref;
 pub use omnigraph::{
-    CleanupPolicyOptions, InitOptions, MergeOutcome, Omnigraph, OpenMode, SchemaApplyOptions,
-    SchemaApplyResult, SkipReason, TableCleanupStats, TableOptimizeStats,
+    CleanupPolicyOptions, InitOptions, MergeOutcome, Omnigraph, OpenMode, RepairAction,
+    RepairClassification, RepairOptions, RepairStats, SchemaApplyOptions, SchemaApplyResult,
+    SkipReason, TableCleanupStats, TableOptimizeStats, TableRepairStats,
 };
-pub(crate) use run_registry::is_internal_run_branch;

 pub(crate) const SCHEMA_APPLY_LOCK_BRANCH: &str = "__schema_apply_lock__";

@ -69,5 +68,8 @@ pub(crate) fn is_schema_apply_lock_branch(name: &str) -> bool {
 }

 pub(crate) fn is_internal_system_branch(name: &str) -> bool {
-    is_internal_run_branch(name) || is_schema_apply_lock_branch(name)
+    // Legacy `__run__*` staging branches (Run state machine, removed MR-771)
+    // are swept off `__manifest` by the v2→v3 internal-schema migration, so the
+    // only internal branch the engine still creates is the schema-apply lock.
+    is_schema_apply_lock_branch(name)
 }
--- a/crates/omnigraph/src/db/omnigraph.rs
+++ b/crates/omnigraph/src/db/omnigraph.rs
@ -30,10 +30,14 @@ use crate::table_store::TableStore;

 mod export;
 mod optimize;
+mod repair;
 mod schema_apply;
 mod table_ops;

 pub use optimize::{CleanupPolicyOptions, SkipReason, TableCleanupStats, TableOptimizeStats};
+pub use repair::{
+    RepairAction, RepairClassification, RepairOptions, RepairStats, TableRepairStats,
+};
 pub use schema_apply::SchemaApplyOptions;

 use super::commit_graph::GraphCommit;
@ -346,6 +350,16 @@ impl Omnigraph {
        mode: OpenMode,
    ) -> Result<Self> {
        let root = normalize_root_uri(uri)?;
+        // Apply pending internal-schema migrations before the coordinator reads
+        // branch state, so `branch_list` and the schema-apply blocking-branch
+        // checks observe the post-migration graph — notably the v2→v3 sweep of
+        // legacy `__run__*` staging branches (MR-770). ReadWrite only: a
+        // read-only open must not trigger object-store writes, so a read-only
+        // open of an unmigrated legacy graph still lists `__run__*` until its
+        // first read-write open (an accepted, documented limitation).
+        if matches!(mode, OpenMode::ReadWrite) {
+            crate::db::manifest::migrate_on_open(&root).await?;
+        }
        // Open the coordinator first so the schema-staging recovery sweep can
        // compare its snapshot against any leftover staging files.
        let mut coordinator = GraphCoordinator::open(&root, Arc::clone(&storage)).await?;
@ -672,6 +686,16 @@ impl Omnigraph {
            .map(|resolved| resolved.snapshot)
    }

+    pub(crate) async fn fresh_snapshot_for_branch(&self, branch: Option<&str>) -> Result<Snapshot> {
+        self.ensure_schema_state_valid().await?;
+        let requested = ReadTarget::Branch(branch.unwrap_or("main").to_string());
+        let coord = self.coordinator.read().await;
+        coord
+            .resolve_target(&requested)
+            .await
+            .map(|resolved| resolved.snapshot)
+    }
+
    pub(crate) async fn version(&self) -> u64 {
        self.coordinator.read().await.version()
    }
@ -989,6 +1013,13 @@ impl Omnigraph {
        optimize::optimize_all_tables(self).await
    }

+    /// Classify and explicitly repair uncovered manifest/head drift. See
+    /// [`repair`] for the distinction between safe maintenance drift and
+    /// suspicious/unverifiable drift.
+    pub async fn repair(&self, options: repair::RepairOptions) -> Result<repair::RepairStats> {
+        repair::repair_all_tables(self, options).await
+    }
+
    /// Remove Lance manifests (and the fragments they uniquely own) per the
    /// given [`optimize::CleanupPolicyOptions`]. Destructive to version
    /// history. See [`optimize`] for details.
@ -1495,12 +1526,6 @@ pub(crate) fn normalize_branch_name(branch: &str) -> Result<Option<String>> {
 }

 pub(crate) fn ensure_public_branch_ref(branch: &str, operation: &str) -> Result<()> {
-    if super::is_internal_run_branch(branch) {
-        return Err(OmniError::manifest(format!(
-            "{} does not allow internal run ref '{}'",
-            operation, branch
-        )));
-    }
    if is_internal_system_branch(branch) {
        return Err(OmniError::manifest(format!(
            "{} does not allow internal system ref '{}'",
@ -1904,7 +1929,6 @@ fn json_value_from_array(array: &dyn Array, row: usize) -> Result<serde_json::Va
 #[cfg(test)]
 mod tests {
    use super::*;
-    use crate::db::is_internal_run_branch;
    use crate::db::manifest::ManifestCoordinator;
    use async_trait::async_trait;
    use serde_json::Value;
@ -2242,11 +2266,11 @@ edge WorksAt: Person -> Company
    #[tokio::test]
    async fn test_apply_schema_succeeds_after_load() {
        // Historical: schema apply used to be blocked by leftover
-        // `__run__` branches. A defense-in-depth filter now skips
-        // internal system branches, and run branches were made
-        // ephemeral on every terminal state — so in practice no
-        // `__run__` branch survives publish. The filter still guards
-        // the invariant.
+        // `__run__` branches. The Run state machine was removed in
+        // MR-771, so a fresh graph never creates a `__run__` branch;
+        // legacy ones are swept by the v2→v3 manifest migration. This
+        // asserts the invariant a current graph upholds: publish leaves
+        // no `__run__` branch behind, so schema apply proceeds.
        let dir = tempfile::tempdir().unwrap();
        let uri = dir.path().to_str().unwrap();
        let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
@ -2261,8 +2285,8 @@ edge WorksAt: Person -> Company

        let all_branches = db.coordinator.read().await.all_branches().await.unwrap();
        assert!(
-            !all_branches.iter().any(|b| is_internal_run_branch(b)),
-            "run branch should be deleted after publish, got: {:?}",
+            !all_branches.iter().any(|b| b.starts_with("__run__")),
+            "no __run__ branch should exist after publish, got: {:?}",
            all_branches
        );

@ -2274,6 +2298,56 @@ edge WorksAt: Person -> Company
        assert!(result.applied, "schema apply should have applied");
    }

+    /// Regression (MR-770): a pre-v0.4.0 graph that still carries a stale
+    /// `__run__*` branch on `__manifest` must not block schema apply. The
+    /// v2→v3 sweep runs in `Omnigraph::open(ReadWrite)` — before the
+    /// schema-apply blocking-branch check — so apply succeeds with no
+    /// intervening publish.
+    ///
+    /// Confirmed to fail before the open-time migration landed: the reopened
+    /// graph still listed `__run__legacy`, and `apply_schema` returned
+    /// "found non-main branches: __run__legacy".
+    #[tokio::test]
+    async fn legacy_run_branch_is_swept_on_open_and_does_not_block_schema_apply() {
+        let dir = tempfile::tempdir().unwrap();
+        let uri = dir.path().to_str().unwrap();
+        let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
+
+        // Synthesize a legacy graph: a stale `__run__` branch on `__manifest`
+        // plus the manifest stamp rewound to v2 (pre-sweep).
+        db.branch_create("__run__legacy").await.unwrap();
+        drop(db);
+        {
+            let mut ds = lance::Dataset::open(&format!("{}/__manifest", uri))
+                .await
+                .unwrap();
+            ds.update_schema_metadata([(
+                "omnigraph:internal_schema_version".to_string(),
+                Some("2".to_string()),
+            )])
+            .await
+            .unwrap();
+        }
+
+        // Reopen (ReadWrite): the open-time migration must sweep `__run__legacy`
+        // before any branch-observing code runs.
+        let db = Omnigraph::open(uri).await.unwrap();
+        let branches = db.branch_list().await.unwrap();
+        assert!(
+            !branches.iter().any(|b| b.starts_with("__run__")),
+            "open-time migration must sweep legacy __run__ branches; got {branches:?}",
+        );
+
+        // Schema apply must proceed with no intervening publish — the
+        // blocking-branch check no longer sees `__run__legacy`.
+        let desired = TEST_SCHEMA.replace(
+            "    age: I32?\n}",
+            "    age: I32?\n    nickname: String?\n}",
+        );
+        let result = db.apply_schema(&desired).await.unwrap();
+        assert!(result.applied, "schema apply should have applied");
+    }
+
    #[tokio::test]
    async fn test_apply_schema_adds_index_for_existing_property() {
        let dir = tempfile::tempdir().unwrap();
--- a/crates/omnigraph/src/db/omnigraph/optimize.rs
+++ b/crates/omnigraph/src/db/omnigraph/optimize.rs
@ -8,8 +8,14 @@
 //! Two dials:
 //!
 //! * `optimize_all_tables` — Lance `compact_files` on every table. Rewrites
-//!   small fragments into fewer large ones. Non-destructive (creates a new
-//!   version; old fragments remain reachable via older manifest versions).
+//!   small fragments into fewer large ones, then **publishes the compacted
+//!   version to the `__manifest`** so the manifest's `table_version` tracks the
+//!   compacted Lance HEAD (reads pin the manifest version, so without the
+//!   publish compaction would be invisible to readers and would break the
+//!   HEAD-vs-manifest precondition of schema apply / strict writes). Compaction
+//!   is content-preserving (Lance `Operation::Rewrite` "reorganizes data
+//!   without semantic modification"), so old fragments remain reachable via
+//!   older manifest versions until `cleanup` runs.
 //! * `cleanup_all_tables` — Lance `cleanup_old_versions` on every table.
 //!   Removes manifests (and their unique fragments) older than the configured
 //!   retention. Destructive to version history — callers should gate this
@ -23,7 +29,9 @@ use std::time::Duration;
 use chrono::Utc;
 use futures::stream::StreamExt;
 use lance::dataset::cleanup::{CleanupPolicy, RemovalStats};
-use lance::dataset::optimize::{CompactionMetrics, CompactionOptions, compact_files};
+use lance::dataset::optimize::{
+    CompactionMetrics, CompactionOptions, compact_files, plan_compaction,
+};

 use super::*;

@ -67,8 +75,7 @@ pub struct CleanupPolicyOptions {
 }

 /// Why `optimize` did not compact a table. Typed so callers branch on the
-/// reason rather than sniffing a string. One variant today, gated by
-/// [`LANCE_SUPPORTS_BLOB_COMPACTION`].
+/// reason rather than sniffing a string.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 #[non_exhaustive]
 pub enum SkipReason {
@ -76,6 +83,12 @@ pub enum SkipReason {
    /// `BlobHandling::AllBinary`, which mis-decodes blob-v2 columns; see
    /// [`LANCE_SUPPORTS_BLOB_COMPACTION`] and `docs/dev/lance.md`.
    BlobColumnsUnsupportedByLance,
+    /// The Lance dataset HEAD is ahead of the version recorded in
+    /// `__manifest`, and no recovery sidecar covers that movement. `optimize`
+    /// cannot infer whether the drift is benign maintenance or an external
+    /// semantic write, so it leaves the table untouched and points operators at
+    /// explicit `repair`.
+    DriftNeedsRepair,
 }

 impl SkipReason {
@ -84,6 +97,7 @@ impl SkipReason {
    pub fn as_str(&self) -> &'static str {
        match self {
            SkipReason::BlobColumnsUnsupportedByLance => "blob_columns_unsupported_by_lance",
+            SkipReason::DriftNeedsRepair => "drift_needs_repair",
        }
    }
 }
@ -95,6 +109,7 @@ impl std::fmt::Display for SkipReason {
            SkipReason::BlobColumnsUnsupportedByLance => {
                "blob columns — Lance compaction unsupported"
            }
+            SkipReason::DriftNeedsRepair => "manifest/head drift — run omnigraph repair",
        };
        f.write_str(msg)
    }
@ -111,11 +126,18 @@ pub struct TableOptimizeStats {
    pub fragments_removed: usize,
    /// Number of new, larger fragments Lance produced.
    pub fragments_added: usize,
-    /// Did this table get a new Lance manifest version from the compaction?
+    /// Did this table get a new manifest version from the compaction? True when
+    /// compaction ran and its compacted version was published to `__manifest`.
    pub committed: bool,
    /// `Some(reason)` if this table was deliberately not compacted. When set,
    /// `fragments_removed == 0`, `fragments_added == 0`, and `!committed`.
    pub skipped: Option<SkipReason>,
+    /// Manifest table version observed by optimize for drift skips. `None` for
+    /// normal compaction/no-op/blob skips.
+    pub manifest_version: Option<u64>,
+    /// Lance HEAD version observed by optimize for drift skips. `None` for
+    /// normal compaction/no-op/blob skips.
+    pub lance_head_version: Option<u64>,
 }

 impl TableOptimizeStats {
@ -127,6 +149,8 @@ impl TableOptimizeStats {
            fragments_added: metrics.fragments_added,
            committed,
            skipped: None,
+            manifest_version: None,
+            lance_head_version: None,
        }
    }

@ -138,6 +162,25 @@ impl TableOptimizeStats {
            fragments_added: 0,
            committed: false,
            skipped: Some(reason),
+            manifest_version: None,
+            lance_head_version: None,
+        }
+    }
+
+    /// Stat for a table skipped because the manifest and Lance HEAD disagree.
+    fn skipped_for_drift(
+        table_key: String,
+        manifest_version: u64,
+        lance_head_version: u64,
+    ) -> Self {
+        Self {
+            table_key,
+            fragments_removed: 0,
+            fragments_added: 0,
+            committed: false,
+            skipped: Some(SkipReason::DriftNeedsRepair),
+            manifest_version: Some(manifest_version),
+            lance_head_version: Some(lance_head_version),
        }
    }
 }
@ -153,14 +196,30 @@ pub struct TableCleanupStats {
    pub error: Option<String>,
 }

-/// Run Lance `compact_files` on every node + edge table on `main`.
-/// Tables run in parallel (bounded concurrency).
+/// Run Lance `compact_files` on every node + edge table on `main`, publishing
+/// each compacted table's new version to the `__manifest`. Tables run in
+/// parallel (bounded concurrency); each is fault-isolated only at the Lance
+/// level — a publish error is propagated (the recovery sidecar covers it).
 pub async fn optimize_all_tables(db: &Omnigraph) -> Result<Vec<TableOptimizeStats>> {
    db.ensure_schema_state_valid().await?;
    db.ensure_schema_apply_idle("optimize").await?;

-    let resolved = db.resolved_branch_target(None).await?;
-    let snapshot = resolved.snapshot;
+    // Refuse on an unrecovered graph. A pending recovery sidecar means a failed
+    // write left partial state that the open-time sweep must resolve (roll
+    // forward/back) first; compacting + publishing a table covered by such a
+    // sidecar could commit a partial write the sweep would roll back. Reopen the
+    // graph to run recovery, then re-run optimize.
+    if !crate::db::manifest::list_sidecars(db.root_uri(), db.storage_adapter())
+        .await?
+        .is_empty()
+    {
+        return Err(OmniError::manifest_conflict(
+            "optimize requires a clean recovery state; reopen the graph to run the \
+             recovery sweep before optimizing",
+        ));
+    }
+
+    let snapshot = db.fresh_snapshot_for_branch(None).await?;

    // Compute per-table state (path + whether it has blob columns) up front, in
    // a scope that drops the catalog handle before the async stream starts.
@ -183,49 +242,201 @@ pub async fn optimize_all_tables(db: &Omnigraph) -> Result<Vec<TableOptimizeStat
    }

    let concurrency = maint_concurrency().min(table_tasks.len()).max(1);
-    let table_store = &db.table_store;

    let stats: Vec<Result<TableOptimizeStats>> = futures::stream::iter(table_tasks.into_iter())
-        .map(|(table_key, full_path, has_blob)| async move {
-            // Lance `compact_files` mis-decodes blob-v2 columns under the forced
-            // `BlobHandling::AllBinary` read (see LANCE_SUPPORTS_BLOB_COMPACTION).
-            // Skip blob-bearing tables and report it rather than aborting the
-            // whole sweep — the other tables still compact.
-            if has_blob && !LANCE_SUPPORTS_BLOB_COMPACTION {
-                tracing::warn!(
-                    target: "omnigraph::optimize",
-                    table = %table_key,
-                    "skipping compaction: table has blob columns the current Lance \
-                     cannot rewrite (blob-v2 AllBinary decode bug); other tables \
-                     unaffected — rerun after the Lance fix",
-                );
-                return Ok(TableOptimizeStats::skipped(
-                    table_key,
-                    SkipReason::BlobColumnsUnsupportedByLance,
-                ));
-            }
-            let mut ds = table_store
-                .open_dataset_head_for_write(&table_key, &full_path, None)
-                .await?;
-            let version_before = ds.version().version;
-            let metrics: CompactionMetrics =
-                compact_files(&mut ds, CompactionOptions::default(), None)
-                    .await
-                    .map_err(|e| OmniError::Lance(e.to_string()))?;
-            let version_after = ds.version().version;
-            Ok(TableOptimizeStats::compacted(
-                table_key,
-                &metrics,
-                version_after != version_before,
-            ))
+        .map(move |(table_key, full_path, has_blob)| async move {
+            optimize_one_table(db, table_key, full_path, has_blob).await
        })
        .buffer_unordered(concurrency)
        .collect()
        .await;

+    // Invalidate caches for any table that published a compaction — done BEFORE
+    // propagating a sibling table's error, since the published versions are
+    // durable and reads must observe the new fragment layout (Lance invalidates
+    // the original row addresses on rewrite). The CSR/CSC graph topology index
+    // is rebuilt only when an edge table moved. Mirrors schema_apply's
+    // post-publish invalidation.
+    let any_committed = stats
+        .iter()
+        .any(|s| matches!(s, Ok(st) if st.committed));
+    let edge_committed = stats
+        .iter()
+        .any(|s| matches!(s, Ok(st) if st.committed && st.table_key.starts_with("edge:")));
+    if any_committed {
+        db.runtime_cache.invalidate_all().await;
+        if edge_committed {
+            db.invalidate_graph_index().await;
+        }
+    }
+
    stats.into_iter().collect()
 }

+/// Compact one table and publish the compacted version to the `__manifest`.
+///
+/// Compaction (`compact_files`) advances the *dataset's* Lance HEAD via a
+/// reserve-fragments + rewrite commit, but Lance knows nothing about the
+/// `__manifest`. To keep the manifest the single authority for each table's
+/// visible version (invariant 2), optimize must publish the compacted version.
+/// The Lance-HEAD-before-manifest-publish gap is unavoidable (Lance has no
+/// staged/uncommitted compaction), so it is covered by a recovery sidecar like
+/// the other multi-commit writers; roll-forward is always safe because
+/// compaction is content-preserving.
+async fn optimize_one_table(
+    db: &Omnigraph,
+    table_key: String,
+    full_path: String,
+    has_blob: bool,
+) -> Result<TableOptimizeStats> {
+    // Lance `compact_files` mis-decodes blob-v2 columns under the forced
+    // `BlobHandling::AllBinary` read (see LANCE_SUPPORTS_BLOB_COMPACTION). Skip
+    // blob-bearing tables before acquiring the write queue; `repair` is the
+    // operator tool for full manifest/head drift classification.
+    if has_blob && !LANCE_SUPPORTS_BLOB_COMPACTION {
+        tracing::warn!(
+            target: "omnigraph::optimize",
+            table = %table_key,
+            "skipping compaction: table has blob columns the current Lance \
+             cannot rewrite (blob-v2 AllBinary decode bug); other tables \
+             unaffected — rerun after the Lance fix",
+        );
+        return Ok(TableOptimizeStats::skipped(
+            table_key,
+            SkipReason::BlobColumnsUnsupportedByLance,
+        ));
+    }
+
+    // Serialize the whole compact→publish against concurrent mutations on this
+    // (table, main): compaction is a Rewrite op that retryable-conflicts with a
+    // concurrent Merge/Update/Delete on overlapping fragments, and an
+    // interleaved write would also move the manifest version out from under the
+    // CAS below. Holding the queue makes the CAS baseline read under it exact.
+    let _guard = db
+        .write_queue()
+        .acquire_many(&[(table_key.clone(), None)])
+        .await;
+
+    let mut ds = db
+        .table_store
+        .open_dataset_head_for_write(&table_key, &full_path, None)
+        .await?;
+
+    // CAS baseline: the table's current manifest version, read under the queue
+    // (in-memory coordinator snapshot, no storage I/O — stable for this section).
+    let expected_version = db
+        .fresh_snapshot_for_branch(None)
+        .await?
+        .entry(&table_key)
+        .map(|e| e.table_version)
+        .ok_or_else(|| OmniError::manifest(format!("no manifest entry for {}", table_key)))?;
+
+    let lance_head_version = ds.version().version;
+    if lance_head_version < expected_version {
+        return Err(OmniError::manifest_internal(format!(
+            "table '{}' Lance HEAD version {} is behind manifest version {}",
+            table_key, lance_head_version, expected_version
+        )));
+    }
+    if lance_head_version > expected_version {
+        tracing::warn!(
+            target: "omnigraph::optimize",
+            table = %table_key,
+            manifest_version = expected_version,
+            lance_head_version,
+            "skipping compaction: Lance HEAD is ahead of the manifest; run `omnigraph repair` \
+             to classify and publish covered maintenance drift explicitly",
+        );
+        return Ok(TableOptimizeStats::skipped_for_drift(
+            table_key,
+            expected_version,
+            lance_head_version,
+        ));
+    }
+
+    // Precise "will it compact?" check — `plan_compaction` also accounts for
+    // deletion materialization (which can rewrite even a single fragment). A
+    // steady-state already-compacted table yields an empty plan and is never
+    // pinned in a sidecar (a zero-commit pin would classify NoMovement on
+    // recovery and force an all-or-nothing rollback). Uncovered pre-existing
+    // drift is skipped above and must go through explicit repair.
+    let options = CompactionOptions::default();
+    let plan = plan_compaction(&ds, &options)
+        .await
+        .map_err(|e| OmniError::Lance(e.to_string()))?;
+    if plan.num_tasks() == 0 {
+        return Ok(TableOptimizeStats::compacted(
+            table_key,
+            &CompactionMetrics::default(),
+            false,
+        ));
+    }
+
+    // Phase A: recovery sidecar BEFORE compaction advances the Lance HEAD, so a
+    // crash before the manifest publish rolls forward on next open.
+    let sidecar = crate::db::manifest::new_sidecar(
+        crate::db::manifest::SidecarKind::Optimize,
+        None,
+        // optimize is system-attributed (no `optimize_as` actor API today).
+        None,
+        vec![crate::db::manifest::SidecarTablePin {
+            table_key: table_key.clone(),
+            table_path: full_path.clone(),
+            expected_version,
+            // Lower bound — compaction commits N≥1 versions (reserve + rewrite);
+            // the classifier loose-matches SidecarKind::Optimize.
+            post_commit_pin: expected_version + 1,
+            table_branch: None,
+        }],
+    );
+    let handle =
+        crate::db::manifest::write_sidecar(db.root_uri(), db.storage_adapter(), &sidecar).await?;
+
+    // Phase B: compaction (reserve-fragments + rewrite commits advance HEAD).
+    let version_before = ds.version().version;
+    let metrics: CompactionMetrics = compact_files(&mut ds, options, None)
+        .await
+        .map_err(|e| OmniError::Lance(e.to_string()))?;
+    let version_after = ds.version().version;
+    let committed = version_after != version_before;
+
+    // Pin the per-writer Phase B → Phase C residual for optimize: Lance HEAD has
+    // advanced but the manifest publish below hasn't run.
+    crate::failpoints::maybe_fail("optimize.post_phase_b_pre_manifest_commit")?;
+
+    // Phase C: publish the compacted version to the manifest (one CAS commit,
+    // expected = the version observed under the queue). On failure the sidecar
+    // is intentionally left for the open-time recovery sweep to roll forward.
+    if committed {
+        let state = db.table_store.table_state(&full_path, &ds).await?;
+        let update = crate::db::SubTableUpdate {
+            table_key: table_key.clone(),
+            table_version: state.version,
+            table_branch: None,
+            row_count: state.row_count,
+            version_metadata: state.version_metadata,
+        };
+        let mut expected = std::collections::HashMap::new();
+        expected.insert(table_key.clone(), expected_version);
+        db.coordinator
+            .write()
+            .await
+            .commit_updates_with_actor_with_expected(&[update], &expected, None)
+            .await?;
+    }
+
+    // Phase D: delete the sidecar (best-effort; recovery resolves a leftover).
+    if let Err(err) = crate::db::manifest::delete_sidecar(&handle, db.storage_adapter()).await {
+        tracing::warn!(
+            error = %err,
+            operation_id = handle.operation_id.as_str(),
+            "optimize recovery sidecar cleanup failed; next open's recovery sweep will resolve it"
+        );
+    }
+
+    Ok(TableOptimizeStats::compacted(table_key, &metrics, committed))
+}
+
 /// Run Lance `cleanup_old_versions` on every node + edge table on `main`,
 /// using [`CleanupPolicyOptions`]. The latest manifest is always preserved
 /// regardless (Lance invariant).
@ -493,7 +704,7 @@ fn orphan_branches(present: Vec<String>, keep: &std::collections::HashSet<String
    orphans
 }

-fn all_table_keys(catalog: &omnigraph_compiler::catalog::Catalog) -> Vec<String> {
+pub(super) fn all_table_keys(catalog: &omnigraph_compiler::catalog::Catalog) -> Vec<String> {
    let mut keys: Vec<String> = catalog
        .node_types
        .keys()
--- a/crates/omnigraph/src/db/omnigraph/repair.rs
+++ b/crates/omnigraph/src/db/omnigraph/repair.rs
@ -0,0 +1,332 @@
+//! Explicit repair for uncovered manifest/head drift.
+//!
+//! Recovery sidecars handle deterministic crash residuals automatically. This
+//! module is for the different case: a table's Lance HEAD is ahead of the
+//! version recorded in `__manifest` and there is no sidecar encoding writer
+//! intent. `repair` classifies that uncovered drift from Lance transactions and
+//! only auto-publishes maintenance-only drift when the operator confirms.
+
+use std::collections::HashMap;
+
+use lance::Dataset;
+use lance::dataset::transaction::Operation;
+
+use super::*;
+
+/// Options for [`Omnigraph::repair`].
+#[derive(Debug, Clone, Copy, Default)]
+pub struct RepairOptions {
+    /// Preview by default. With `confirm`, verified maintenance drift is
+    /// published to `__manifest`.
+    pub confirm: bool,
+    /// Also publish suspicious/unverifiable drift. Requires `confirm`.
+    pub force: bool,
+}
+
+/// Classification of a table's manifest/head state.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[non_exhaustive]
+pub enum RepairClassification {
+    /// Lance HEAD equals the manifest pin.
+    NoDrift,
+    /// Every uncovered Lance transaction is maintenance-only (`Rewrite` or
+    /// `ReserveFragments`), so publishing the HEAD is content-preserving.
+    VerifiedMaintenance,
+    /// At least one uncovered transaction is semantic (`Append`, `Delete`,
+    /// `Update`, etc.).
+    Suspicious,
+    /// A needed transaction could not be read, so the drift cannot be judged.
+    Unverifiable,
+}
+
+impl RepairClassification {
+    /// Stable machine-readable token for serialized output.
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            Self::NoDrift => "no_drift",
+            Self::VerifiedMaintenance => "verified_maintenance",
+            Self::Suspicious => "suspicious",
+            Self::Unverifiable => "unverifiable",
+        }
+    }
+}
+
+impl std::fmt::Display for RepairClassification {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_str(self.as_str())
+    }
+}
+
+/// What repair did for a table.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[non_exhaustive]
+pub enum RepairAction {
+    /// Nothing to do.
+    NoOp,
+    /// Drift was reported but not published because this was a preview.
+    Preview,
+    /// Verified maintenance drift was published to `__manifest`.
+    Healed,
+    /// Suspicious/unverifiable drift was published because `force` was set.
+    Forced,
+    /// Drift was left untouched because it was not safe to publish without
+    /// `force`.
+    Refused,
+}
+
+impl RepairAction {
+    /// Stable machine-readable token for serialized output.
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            Self::NoOp => "no_op",
+            Self::Preview => "preview",
+            Self::Healed => "healed",
+            Self::Forced => "forced",
+            Self::Refused => "refused",
+        }
+    }
+}
+
+impl std::fmt::Display for RepairAction {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_str(self.as_str())
+    }
+}
+
+/// Per-table repair outcome.
+#[derive(Debug, Clone)]
+#[non_exhaustive]
+pub struct TableRepairStats {
+    pub table_key: String,
+    pub manifest_version: u64,
+    pub lance_head_version: u64,
+    pub classification: RepairClassification,
+    pub action: RepairAction,
+    pub operations: Vec<String>,
+    pub error: Option<String>,
+}
+
+/// Whole-graph repair outcome.
+#[derive(Debug, Clone)]
+#[non_exhaustive]
+pub struct RepairStats {
+    pub tables: Vec<TableRepairStats>,
+    /// New graph manifest version if repair published any table pins.
+    pub manifest_version: Option<u64>,
+}
+
+struct ClassificationResult {
+    classification: RepairClassification,
+    operations: Vec<String>,
+    error: Option<String>,
+}
+
+pub async fn repair_all_tables(db: &Omnigraph, options: RepairOptions) -> Result<RepairStats> {
+    if options.force && !options.confirm {
+        return Err(OmniError::manifest("repair --force requires --confirm"));
+    }
+
+    db.ensure_schema_state_valid().await?;
+    db.ensure_schema_apply_idle("repair").await?;
+    ensure_no_pending_recovery_sidecars(db, "repair").await?;
+
+    let snapshot = db.fresh_snapshot_for_branch(None).await?;
+    let table_tasks: Vec<(String, String)> = {
+        let catalog = db.catalog();
+        let mut tasks = Vec::new();
+        for table_key in optimize::all_table_keys(&catalog) {
+            let Some(entry) = snapshot.entry(&table_key) else {
+                continue;
+            };
+            let full_path = format!("{}/{}", db.root_uri, entry.table_path);
+            tasks.push((table_key, full_path));
+        }
+        tasks
+    };
+
+    if table_tasks.is_empty() {
+        return Ok(RepairStats {
+            tables: Vec::new(),
+            manifest_version: None,
+        });
+    }
+
+    let queue_keys: Vec<(String, Option<String>)> = table_tasks
+        .iter()
+        .map(|(table_key, _)| (table_key.clone(), None))
+        .collect();
+    let _guards = db.write_queue().acquire_many(&queue_keys).await;
+    ensure_no_pending_recovery_sidecars(db, "repair").await?;
+
+    let snapshot = db.fresh_snapshot_for_branch(None).await?;
+    let mut tables = Vec::with_capacity(table_tasks.len());
+    let mut updates = Vec::new();
+    let mut expected = HashMap::new();
+    let mut any_forced = false;
+
+    for (table_key, full_path) in table_tasks {
+        let ds = db
+            .table_store
+            .open_dataset_head_for_write(&table_key, &full_path, None)
+            .await?;
+        let manifest_version = snapshot
+            .entry(&table_key)
+            .map(|e| e.table_version)
+            .ok_or_else(|| OmniError::manifest(format!("no manifest entry for {}", table_key)))?;
+        let lance_head_version = ds.version().version;
+
+        if lance_head_version < manifest_version {
+            return Err(OmniError::manifest_internal(format!(
+                "table '{}' Lance HEAD version {} is behind manifest version {}",
+                table_key, lance_head_version, manifest_version
+            )));
+        }
+
+        if lance_head_version == manifest_version {
+            tables.push(TableRepairStats {
+                table_key,
+                manifest_version,
+                lance_head_version,
+                classification: RepairClassification::NoDrift,
+                action: RepairAction::NoOp,
+                operations: Vec::new(),
+                error: None,
+            });
+            continue;
+        }
+
+        let classification = classify_drift(&ds, manifest_version, lance_head_version).await;
+        let action = match (
+            options.confirm,
+            options.force,
+            classification.classification,
+        ) {
+            (false, _, _) => RepairAction::Preview,
+            (true, _, RepairClassification::VerifiedMaintenance) => RepairAction::Healed,
+            (true, true, RepairClassification::Suspicious | RepairClassification::Unverifiable) => {
+                any_forced = true;
+                RepairAction::Forced
+            }
+            (true, _, RepairClassification::Suspicious | RepairClassification::Unverifiable) => {
+                RepairAction::Refused
+            }
+            (true, _, RepairClassification::NoDrift) => RepairAction::NoOp,
+        };
+
+        if matches!(action, RepairAction::Healed | RepairAction::Forced) {
+            let state = db.table_store.table_state(&full_path, &ds).await?;
+            updates.push(crate::db::SubTableUpdate {
+                table_key: table_key.clone(),
+                table_version: state.version,
+                table_branch: None,
+                row_count: state.row_count,
+                version_metadata: state.version_metadata,
+            });
+            expected.insert(table_key.clone(), manifest_version);
+        }
+
+        tables.push(TableRepairStats {
+            table_key,
+            manifest_version,
+            lance_head_version,
+            classification: classification.classification,
+            action,
+            operations: classification.operations,
+            error: classification.error,
+        });
+    }
+
+    let manifest_version = if updates.is_empty() {
+        None
+    } else {
+        let actor = if any_forced {
+            Some("omnigraph:repair:force")
+        } else {
+            Some("omnigraph:repair")
+        };
+        let PublishedSnapshot {
+            manifest_version,
+            _snapshot_id: _,
+        } = db
+            .coordinator
+            .write()
+            .await
+            .commit_updates_with_actor_with_expected(&updates, &expected, actor)
+            .await?;
+        db.runtime_cache.invalidate_all().await;
+        if updates
+            .iter()
+            .any(|update| update.table_key.starts_with("edge:"))
+        {
+            db.invalidate_graph_index().await;
+        }
+        Some(manifest_version)
+    };
+
+    Ok(RepairStats {
+        tables,
+        manifest_version,
+    })
+}
+
+async fn ensure_no_pending_recovery_sidecars(db: &Omnigraph, operation: &str) -> Result<()> {
+    if !crate::db::manifest::list_sidecars(db.root_uri(), db.storage_adapter())
+        .await?
+        .is_empty()
+    {
+        return Err(OmniError::manifest_conflict(format!(
+            "{operation} requires a clean recovery state; reopen the graph to run the \
+             recovery sweep before repairing"
+        )));
+    }
+    Ok(())
+}
+
+async fn classify_drift(
+    ds: &Dataset,
+    manifest_version: u64,
+    lance_head_version: u64,
+) -> ClassificationResult {
+    let mut operations = Vec::new();
+    let mut saw_suspicious = false;
+    let mut error = None;
+
+    for version in manifest_version.saturating_add(1)..=lance_head_version {
+        match ds.read_transaction_by_version(version).await {
+            Ok(Some(transaction)) => {
+                let operation = transaction.operation;
+                operations.push(operation.name().to_string());
+                if !matches!(
+                    operation,
+                    Operation::Rewrite { .. } | Operation::ReserveFragments { .. }
+                ) {
+                    saw_suspicious = true;
+                }
+            }
+            Ok(None) => {
+                error = Some(format!("missing Lance transaction for version {version}"));
+                break;
+            }
+            Err(err) => {
+                error = Some(format!(
+                    "failed to read Lance transaction for version {version}: {err}"
+                ));
+                break;
+            }
+        }
+    }
+
+    let classification = if error.is_some() {
+        RepairClassification::Unverifiable
+    } else if saw_suspicious {
+        RepairClassification::Suspicious
+    } else {
+        RepairClassification::VerifiedMaintenance
+    };
+
+    ClassificationResult {
+        classification,
+        operations,
+        error,
+    }
+}
--- a/crates/omnigraph/src/db/omnigraph/schema_apply.rs
+++ b/crates/omnigraph/src/db/omnigraph/schema_apply.rs
@ -61,11 +61,11 @@ async fn plan_schema_for_apply(
 ) -> Result<PlannedSchemaApply> {
    db.ensure_schema_state_valid().await?;
    let branches = db.coordinator.read().await.all_branches().await?;
-    // Skip `main` and internal system branches. The schema-apply lock branch
-    // is excluded because it is the cluster-wide schema-apply serializer.
-    // `__run__*` branches are no longer created; the filter remains as
-    // defense-in-depth for legacy graphs with leftover staging branches.
-    // A future production sweep will let this guard go.
+    // Skip `main` and internal system branches (the schema-apply lock branch,
+    // the cluster-wide schema-apply serializer). Legacy `__run__*` staging
+    // branches were swept off `__manifest` by the v2→v3 migration that runs in
+    // `Omnigraph::open(ReadWrite)` before this check (MR-770), so they no
+    // longer appear here.
    let blocking_branches = branches
        .into_iter()
        .filter(|branch| branch != "main" && !is_internal_system_branch(branch))
--- a/crates/omnigraph/src/db/run_registry.rs
+++ b/crates/omnigraph/src/db/run_registry.rs
@ -1,16 +0,0 @@
-// The Run state machine has been removed. Mutations now write directly
-// to target tables and use the publisher's `expected_table_versions`
-// CAS for cross-table OCC; `__run__<id>` staging branches and the
-// `_graph_runs.lance` state machine no longer exist.
-//
-// What remains is the branch-name predicate, kept as a defense-in-depth
-// guard against users naming a public branch `__run__*`. A future
-// production sweep of legacy `_graph_runs.lance` rows and stale
-// `__run__*` branches will let this predicate (and this file) go too.
-
-pub(crate) const INTERNAL_RUN_BRANCH_PREFIX: &str = "__run__";
-
-pub(crate) fn is_internal_run_branch(name: &str) -> bool {
-    name.trim_start_matches('/')
-        .starts_with(INTERNAL_RUN_BRANCH_PREFIX)
-}
--- a/crates/omnigraph/src/exec/merge.rs
+++ b/crates/omnigraph/src/exec/merge.rs
@ -670,36 +670,34 @@ fn update_unique_constraints(
    table_key: &str,
    batch: &RecordBatch,
    constraints: &[Vec<String>],
-    seen: &mut [HashMap<String, String>],
+    seen: &mut [HashMap<Vec<String>, String>],
    conflicts: &mut Vec<MergeConflict>,
 ) -> Result<()> {
    for (constraint_idx, columns) in constraints.iter().enumerate() {
        let seen = &mut seen[constraint_idx];
-        for row in 0..batch.num_rows() {
-            let mut parts = Vec::with_capacity(columns.len());
-            let mut any_null = false;
-            for column_name in columns {
-                let column = batch.column_by_name(column_name).ok_or_else(|| {
+        // Resolve the group's columns once. The candidate dataset always
+        // carries the full table schema, so a missing column is an internal
+        // error rather than a skip.
+        let group_columns = columns
+            .iter()
+            .map(|column_name| {
+                batch.column_by_name(column_name).cloned().ok_or_else(|| {
                    OmniError::manifest(format!(
                        "table {} missing unique column '{}'",
                        table_key, column_name
                    ))
-                })?;
-                if column.is_null(row) {
-                    any_null = true;
-                    break;
-                }
-                parts.push(
-                    array_value_to_string(column.as_ref(), row)
-                        .map_err(|e| OmniError::Lance(e.to_string()))?,
-                );
-            }
-            if any_null {
+                })
+            })
+            .collect::<Result<Vec<_>>>()?;
+        for row in 0..batch.num_rows() {
+            // Same tuple key as the intake path — one shared derivation in
+            // `crate::loader::composite_unique_key`, so the two cannot drift on
+            // separator or scalar conversion. Null rows are exempt.
+            let Some(key) = crate::loader::composite_unique_key(&group_columns, row)? else {
                continue;
-            }
-            let value = parts.join("|");
+            };
            let row_id = row_id_at(batch, row)?;
-            if let Some(first_row_id) = seen.insert(value.clone(), row_id.clone()) {
+            if let Some(first_row_id) = seen.insert(key, row_id.clone()) {
                conflicts.push(MergeConflict {
                    table_key: table_key.to_string(),
                    row_id: Some(row_id.clone()),
@ -1087,9 +1085,9 @@ impl Omnigraph {
        target: &str,
        actor_id: Option<&str>,
    ) -> Result<MergeOutcome> {
-        if is_internal_run_branch(source) || is_internal_run_branch(target) {
+        if is_internal_system_branch(source) || is_internal_system_branch(target) {
            return Err(OmniError::manifest(format!(
-                "branch_merge does not allow internal run refs ('{}' -> '{}')",
+                "branch_merge does not allow internal system refs ('{}' -> '{}')",
                source, target
            )));
        }
--- a/crates/omnigraph/src/exec/mod.rs
+++ b/crates/omnigraph/src/exec/mod.rs
@ -35,7 +35,7 @@ use time::format_description::well_known::Rfc3339;

 use crate::db::commit_graph::CommitGraph;
 use crate::db::manifest::ManifestCoordinator;
-use crate::db::{MergeOutcome, Omnigraph, is_internal_run_branch};
+use crate::db::{MergeOutcome, Omnigraph, is_internal_system_branch};
 use crate::db::{ReadTarget, Snapshot};
 use crate::embedding::EmbeddingClient;
 use crate::error::{MergeConflict, MergeConflictKind, OmniError, Result};
--- a/crates/omnigraph/src/exec/mutation.rs
+++ b/crates/omnigraph/src/exec/mutation.rs
@ -569,7 +569,8 @@ use super::staging::{MutationStaging, PendingMode};
 /// via `open_for_mutation_on_branch`, which compares Lance HEAD against
 /// the manifest's pinned version — that fence is the engine's
 /// publisher-style OCC catching cross-writer drift before we make any
-/// changes.
+/// changes. For delete-only queries, this strict open is also the uncovered
+/// drift guard that runs before `delete_where` can inline-commit.
 ///
 /// On subsequent touches *within the same query*, behavior depends on
 /// whether the table has already been inline-committed by a delete op:
@ -904,12 +905,12 @@ impl Omnigraph {
            let batch = build_insert_batch(&schema, &id, &resolved, &blob_props)?;
            crate::loader::validate_value_constraints(&batch, node_type)?;
            crate::loader::validate_enum_constraints(&batch, &node_type.properties, type_name)?;
-            let unique_props = crate::loader::unique_property_names_for_node(node_type);
-            if !unique_props.is_empty() {
+            let unique_groups = crate::loader::unique_constraint_groups_for_node(node_type);
+            if !unique_groups.is_empty() {
                crate::loader::enforce_unique_constraints_intra_batch(
                    &batch,
                    type_name,
-                    &unique_props,
+                    &unique_groups,
                )?;
            }
            let has_key = node_type.key_property().is_some();
@ -945,12 +946,12 @@ impl Omnigraph {
            let batch = build_insert_batch(&schema, &id, &resolved, &blob_props)?;
            validate_edge_insert_endpoints(self, staging, branch, type_name, &resolved).await?;
            crate::loader::validate_enum_constraints(&batch, &edge_type.properties, type_name)?;
-            let unique_props = crate::loader::unique_property_names_for_edge(edge_type);
-            if !unique_props.is_empty() {
+            let unique_groups = crate::loader::unique_constraint_groups_for_edge(edge_type);
+            if !unique_groups.is_empty() {
                crate::loader::enforce_unique_constraints_intra_batch(
                    &batch,
                    type_name,
-                    &unique_props,
+                    &unique_groups,
                )?;
            }
            let table_key = format!("edge:{}", type_name);
@ -1093,12 +1094,12 @@ impl Omnigraph {
        let node_type = &self.catalog().node_types[type_name];
        crate::loader::validate_value_constraints(&updated, node_type)?;
        crate::loader::validate_enum_constraints(&updated, &node_type.properties, type_name)?;
-        let unique_props = crate::loader::unique_property_names_for_node(node_type);
-        if !unique_props.is_empty() {
+        let unique_groups = crate::loader::unique_constraint_groups_for_node(node_type);
+        if !unique_groups.is_empty() {
            crate::loader::enforce_unique_constraints_intra_batch(
                &updated,
                type_name,
-                &unique_props,
+                &unique_groups,
            )?;
        }

--- a/crates/omnigraph/src/exec/projection.rs
+++ b/crates/omnigraph/src/exec/projection.rs
@ -422,6 +422,35 @@ pub(super) fn apply_ordering(
        });
    }

+    // Deterministic tie-break for a TOTAL order. `lexsort_to_indices` is unstable
+    // and the input row order is not guaranteed (scan parallelism, upstream
+    // hashing), so equal user-sort keys would otherwise come out run-dependent —
+    // making `ORDER ... LIMIT` non-deterministic. Append the bound entities' key
+    // columns (`<var>.id`, unique per row) in canonical (name-sorted) order as
+    // ascending tie-breaks. The combination of all bound keys uniquely identifies
+    // a result row, so the order is total and reproducible. (Aggregate results
+    // have no `.id` columns; their group rows are already distinct on the
+    // projected group keys.)
+    let mut tiebreak_cols: Vec<String> = source
+        .schema()
+        .fields()
+        .iter()
+        .map(|f| f.name().to_string())
+        .filter(|name| name.ends_with(".id"))
+        .collect();
+    tiebreak_cols.sort();
+    for name in &tiebreak_cols {
+        if let Some(col) = source.column_by_name(name) {
+            sort_columns.push(SortColumn {
+                values: col.clone(),
+                options: Some(arrow_schema::SortOptions {
+                    descending: false,
+                    nulls_first: true,
+                }),
+            });
+        }
+    }
+
    let indices =
        lexsort_to_indices(&sort_columns, None).map_err(|e| OmniError::Lance(e.to_string()))?;

--- a/crates/omnigraph/src/exec/query.rs
+++ b/crates/omnigraph/src/exec/query.rs
--- a/crates/omnigraph/src/exec/staging.rs
+++ b/crates/omnigraph/src/exec/staging.rs
@ -495,25 +495,21 @@ impl StagedMutation {
        // until `ensure_path` learns how to bump expected_version on
        // op-kind upgrade.
        //
-        // Why per-branch (and not the bound-branch `db.snapshot()`):
-        // when the caller mutates a branch other than the engine's
-        // bound branch (e.g., feature-branch ingest from a server
-        // handle bound to main), `db.snapshot()` returns the bound
-        // branch's view of each table — which is the wrong pin for
-        // the publisher's CAS on a different branch. Using
-        // `snapshot_for_branch(branch)` resolves the per-branch
-        // entries correctly. The cost is one fresh manifest read per
-        // mutation; PR 1b's regression came from this same read, but
-        // that read is now strictly necessary for cross-branch
-        // correctness. Single-table same-branch mutations could still
-        // skip this read (queue exclusivity makes the publisher CAS a
-        // no-op), but the conditional adds complexity for marginal
-        // gain — left as a follow-up perf optimization.
+        // Why a fresh per-branch snapshot (and not the bound-branch
+        // `db.snapshot()` / `snapshot_for_branch()` fast path): a stale
+        // engine handle may be bound to the same branch it is writing. For
+        // non-strict Insert/Merge, that stale local view is allowed to rebase
+        // to the live manifest pin under the queue; only uncovered Lance
+        // HEAD>manifest drift is refused. For writes targeting a branch other
+        // than the engine's bound branch (e.g., feature-branch ingest from a
+        // server handle bound to main), the same helper also resolves the
+        // correct branch pin. The cost is one fresh manifest read per mutation
+        // plus one Lance HEAD open per staged table for the drift guard below.
        //
        // Multi-coordinator deployments (§VI.27 aspirational) get
        // genuine cross-process drift detection from this read for
        // free.
-        let snapshot = db.snapshot_for_branch(branch).await?;
+        let snapshot = db.fresh_snapshot_for_branch(branch).await?;
        for entry in staged.iter_mut() {
            let current = snapshot
                .entry(&entry.table_key)
@ -541,6 +537,35 @@ impl StagedMutation {
                ));
            }

+            // Separate manifest-visible concurrency from uncovered Lance drift.
+            // Non-strict inserts/merges are allowed to rebase from their staged
+            // read version to the fresh manifest pin above, but only if the
+            // live Lance HEAD still equals that manifest pin. If an external
+            // raw Lance write or a pre-fix maintenance path moved HEAD without
+            // publishing `__manifest`, this write must not silently fold it.
+            let head = db
+                .table_store()
+                .open_dataset_head_for_write(
+                    &entry.table_key,
+                    &entry.path.full_path,
+                    entry.path.table_branch.as_deref(),
+                )
+                .await?
+                .version()
+                .version;
+            if head < current {
+                return Err(OmniError::manifest_internal(format!(
+                    "table '{}' Lance HEAD version {} is behind manifest version {}",
+                    entry.table_key, head, current
+                )));
+            }
+            if head > current {
+                return Err(OmniError::manifest_conflict(format!(
+                    "table '{}' has Lance HEAD version {} ahead of manifest version {}; run `omnigraph repair` before writing",
+                    entry.table_key, head, current
+                )));
+            }
+
            entry.expected_version = current;
            expected_versions.insert(entry.table_key.clone(), current);
        }
--- a/crates/omnigraph/src/loader/mod.rs
+++ b/crates/omnigraph/src/loader/mod.rs
@ -288,21 +288,24 @@ async fn load_jsonl_reader<R: BufRead>(
    let mut node_rows: HashMap<String, Vec<JsonValue>> = HashMap::new();
    let mut edge_rows: HashMap<String, Vec<(String, String, JsonValue)>> = HashMap::new();

-    for (line_num, line) in reader.lines().enumerate() {
-        let line = line?;
-        let line = line.trim();
-        if line.is_empty() {
-            continue;
-        }
-        let value: JsonValue = serde_json::from_str(line).map_err(|e| {
-            OmniError::manifest(format!("invalid JSON on line {}: {}", line_num + 1, e))
+    // Parse a stream of JSON values. Accepts both compact JSONL (one object
+    // per line) and pretty-printed JSON where a single object spans multiple
+    // lines — serde's streaming deserializer treats any whitespace (including
+    // newlines) between top-level values as a separator.
+    for (idx, parsed) in serde_json::Deserializer::from_reader(reader)
+        .into_iter::<JsonValue>()
+        .enumerate()
+    {
+        let record_num = idx + 1;
+        let value: JsonValue = parsed.map_err(|e| {
+            OmniError::manifest(format!("invalid JSON at record {}: {}", record_num, e))
        })?;

        if let Some(type_name) = value.get("type").and_then(|v| v.as_str()) {
            if !catalog.node_types.contains_key(type_name) {
                return Err(OmniError::manifest(format!(
-                    "line {}: unknown node type '{}'",
-                    line_num + 1,
+                    "record {}: unknown node type '{}'",
+                    record_num,
                    type_name
                )));
            }
@ -317,8 +320,8 @@ async fn load_jsonl_reader<R: BufRead>(
        } else if let Some(edge_name) = value.get("edge").and_then(|v| v.as_str()) {
            if catalog.lookup_edge_by_name(edge_name).is_none() {
                return Err(OmniError::manifest(format!(
-                    "line {}: unknown edge type '{}'",
-                    line_num + 1,
+                    "record {}: unknown edge type '{}'",
+                    record_num,
                    edge_name
                )));
            }
@ -326,14 +329,14 @@ async fn load_jsonl_reader<R: BufRead>(
                .get("from")
                .and_then(|v| v.as_str())
                .ok_or_else(|| {
-                    OmniError::manifest(format!("line {}: edge missing 'from'", line_num + 1))
+                    OmniError::manifest(format!("record {}: edge missing 'from'", record_num))
                })?
                .to_string();
            let to = value
                .get("to")
                .and_then(|v| v.as_str())
                .ok_or_else(|| {
-                    OmniError::manifest(format!("line {}: edge missing 'to'", line_num + 1))
+                    OmniError::manifest(format!("record {}: edge missing 'to'", record_num))
                })?
                .to_string();
            let data = value
@ -347,8 +350,8 @@ async fn load_jsonl_reader<R: BufRead>(
                .push((from, to, data));
        } else {
            return Err(OmniError::manifest(format!(
-                "line {}: expected 'type' or 'edge' field",
-                line_num + 1
+                "record {}: expected 'type' or 'edge' field",
+                record_num
            )));
        }
    }
@ -396,9 +399,9 @@ async fn load_jsonl_reader<R: BufRead>(
        let batch = build_node_batch(node_type, rows)?;
        validate_value_constraints(&batch, node_type)?;
        validate_enum_constraints(&batch, &node_type.properties, type_name)?;
-        let unique_props = unique_property_names_for_node(node_type);
-        if !unique_props.is_empty() {
-            enforce_unique_constraints_intra_batch(&batch, type_name, &unique_props)?;
+        let unique_groups = unique_constraint_groups_for_node(node_type);
+        if !unique_groups.is_empty() {
+            enforce_unique_constraints_intra_batch(&batch, type_name, &unique_groups)?;
        }
        let loaded_count = batch.num_rows();
        let table_key = format!("node:{}", type_name);
@ -507,9 +510,9 @@ async fn load_jsonl_reader<R: BufRead>(
        let edge_type = &catalog.edge_types[edge_name];
        let batch = build_edge_batch(edge_type, rows)?;
        validate_enum_constraints(&batch, &edge_type.properties, edge_name)?;
-        let unique_props = unique_property_names_for_edge(edge_type);
-        if !unique_props.is_empty() {
-            enforce_unique_constraints_intra_batch(&batch, edge_name, &unique_props)?;
+        let unique_groups = unique_constraint_groups_for_edge(edge_type);
+        if !unique_groups.is_empty() {
+            enforce_unique_constraints_intra_batch(&batch, edge_name, &unique_groups)?;
        }
        let loaded_count = batch.num_rows();
        let table_key = format!("edge:{}", edge_name);
@ -1422,8 +1425,16 @@ pub(crate) fn validate_enum_constraints(
    Ok(())
 }

-/// Detect duplicate values within a single `RecordBatch` for any of the named
-/// `unique_properties`. Returns an error on the first duplicate found.
+/// Detect duplicate values within a single `RecordBatch` for any of the
+/// `unique_constraints` groups. Each group is a list of one or more columns
+/// that together form a uniqueness key: a violation occurs when two rows share
+/// the same tuple of values across *all* columns in a group, so a composite
+/// `@unique(a, b)` only conflicts when both `a` and `b` match. Returns an
+/// error on the first duplicate found.
+///
+/// Rows where any column in a group is null are exempt (standard SQL semantics
+/// for uniqueness over nullable columns), as is any group whose columns are
+/// not all present in the batch (e.g. a partial-schema load).
 ///
 /// Note: this only catches duplicates *within* the batch. Cross-batch
 /// uniqueness against already-committed rows is not enforced here — that
@ -1431,22 +1442,37 @@ pub(crate) fn validate_enum_constraints(
 pub(crate) fn enforce_unique_constraints_intra_batch(
    batch: &RecordBatch,
    type_name: &str,
-    unique_properties: &[String],
+    unique_constraints: &[Vec<String>],
 ) -> Result<()> {
-    for property in unique_properties {
-        let Some(col_idx) = batch.schema().index_of(property).ok() else {
+    for columns in unique_constraints {
+        // Resolve the group's columns once. A group whose columns aren't all
+        // present in this batch is skipped (e.g. a partial-schema load).
+        let Some(group_columns) = columns
+            .iter()
+            .map(|name| {
+                batch
+                    .schema()
+                    .index_of(name)
+                    .ok()
+                    .map(|i| batch.column(i).clone())
+            })
+            .collect::<Option<Vec<ArrayRef>>>()
+        else {
            continue;
        };
-        let arr = batch.column(col_idx);
-        let mut seen: HashMap<String, usize> = HashMap::new();
+        let mut seen: HashMap<Vec<String>, usize> = HashMap::new();
        for row in 0..batch.num_rows() {
-            let Some(value) = scalar_to_string(arr, row) else {
+            let Some(key) = composite_unique_key(&group_columns, row)? else {
                continue;
            };
-            if let Some(prev_row) = seen.insert(value.clone(), row) {
+            if let Some(prev_row) = seen.insert(key.clone(), row) {
                return Err(OmniError::manifest(format!(
                    "@unique violation on {}.{}: value '{}' appears in rows {} and {}",
-                    type_name, property, value, prev_row, row
+                    type_name,
+                    format_tuple(columns),
+                    format_tuple(&key),
+                    prev_row,
+                    row
                )));
            }
        }
@ -1454,80 +1480,131 @@ pub(crate) fn enforce_unique_constraints_intra_batch(
    Ok(())
 }

-/// Reduce a single Arrow scalar at (`array`, `row`) to a `String` for
-/// uniqueness comparison. Returns `None` for null values (nulls are exempt
-/// from uniqueness in standard SQL semantics).
-fn scalar_to_string(array: &ArrayRef, row: usize) -> Option<String> {
-    use arrow_array::Array;
+/// Build the composite uniqueness key for `row` over a constraint group's
+/// already-resolved columns (in declaration order).
+///
+/// The key is the *tuple* of per-column scalar strings (`Vec<String>`), keyed
+/// directly in the dedup map — there is no separator, so no data value can
+/// forge a collision (an earlier version joined on `U+001F`, which a value
+/// containing that control char could still defeat).
+///
+/// - `Ok(None)` if any column is null: the row is exempt (a partial tuple
+///   can't violate uniqueness under SQL null semantics).
+/// - `Ok(Some(tuple))` otherwise.
+/// - `Err(..)` propagated from [`unique_key_scalar`] on an un-keyable value.
+///
+/// Shared by the intake path (`enforce_unique_constraints_intra_batch`) and the
+/// branch-merge path (`exec/merge.rs::update_unique_constraints`) so the two
+/// derive identical keys and cannot drift on separator or scalar conversion.
+pub(crate) fn composite_unique_key(
+    group_columns: &[ArrayRef],
+    row: usize,
+) -> Result<Option<Vec<String>>> {
+    let mut parts = Vec::with_capacity(group_columns.len());
+    for column in group_columns {
+        match unique_key_scalar(column, row)? {
+            Some(value) => parts.push(value),
+            None => return Ok(None),
+        }
+    }
+    Ok(Some(parts))
+}
+
+/// Render a constraint's column tuple for error messages: a single item as
+/// `col`, a composite as `(a, b)`. Used for both the column list and the
+/// offending value tuple, which share the same shape.
+fn format_tuple(items: &[String]) -> String {
+    match items {
+        [single] => single.clone(),
+        _ => format!("({})", items.join(", ")),
+    }
+}
+
+/// Reduce a single Arrow scalar at (`array`, `row`) to its uniqueness-key
+/// string.
+///
+/// - `Ok(None)` for a null value: nulls are exempt from uniqueness (standard
+///   SQL semantics over nullable columns).
+/// - `Ok(Some(s))` for every scalar type a `@unique` / `@key` column can hold.
+///   Strings are covered in all three physical Arrow encodings (`Utf8`,
+///   `LargeUtf8`, `Utf8View`), so a legal string column is always keyable
+///   regardless of how Lance materializes it on read-back.
+/// - `Err(..)` for a non-null value whose Arrow type can't be reduced to a key
+///   (a list, blob, or vector column). This fails loudly rather than silently
+///   exempting the row, and because every legal scalar encoding is handled
+///   above, the error fires only for a genuinely un-keyable column type — never
+///   for a legal value that merely arrived in an unenumerated encoding.
+fn unique_key_scalar(array: &ArrayRef, row: usize) -> Result<Option<String>> {
+    use arrow_array::{Array, LargeStringArray, StringViewArray};
    if array.is_null(row) {
-        return None;
+        return Ok(None);
    }
    if let Some(a) = array.as_any().downcast_ref::<StringArray>() {
-        return Some(a.value(row).to_string());
+        return Ok(Some(a.value(row).to_string()));
+    }
+    if let Some(a) = array.as_any().downcast_ref::<LargeStringArray>() {
+        return Ok(Some(a.value(row).to_string()));
+    }
+    if let Some(a) = array.as_any().downcast_ref::<StringViewArray>() {
+        return Ok(Some(a.value(row).to_string()));
    }
    if let Some(a) = array.as_any().downcast_ref::<Int32Array>() {
-        return Some(a.value(row).to_string());
+        return Ok(Some(a.value(row).to_string()));
    }
    if let Some(a) = array.as_any().downcast_ref::<Int64Array>() {
-        return Some(a.value(row).to_string());
+        return Ok(Some(a.value(row).to_string()));
    }
    if let Some(a) = array.as_any().downcast_ref::<UInt32Array>() {
-        return Some(a.value(row).to_string());
+        return Ok(Some(a.value(row).to_string()));
    }
    if let Some(a) = array.as_any().downcast_ref::<UInt64Array>() {
-        return Some(a.value(row).to_string());
+        return Ok(Some(a.value(row).to_string()));
    }
    if let Some(a) = array.as_any().downcast_ref::<Float32Array>() {
-        return Some(a.value(row).to_string());
+        return Ok(Some(a.value(row).to_string()));
    }
    if let Some(a) = array.as_any().downcast_ref::<Float64Array>() {
-        return Some(a.value(row).to_string());
+        return Ok(Some(a.value(row).to_string()));
    }
    if let Some(a) = array.as_any().downcast_ref::<BooleanArray>() {
-        return Some(a.value(row).to_string());
+        return Ok(Some(a.value(row).to_string()));
    }
    if let Some(a) = array.as_any().downcast_ref::<Date32Array>() {
-        return Some(a.value(row).to_string());
+        return Ok(Some(a.value(row).to_string()));
    }
    if let Some(a) = array.as_any().downcast_ref::<Date64Array>() {
-        return Some(a.value(row).to_string());
+        return Ok(Some(a.value(row).to_string()));
    }
-    None
+    Err(OmniError::manifest(format!(
+        "uniqueness key: unsupported column type {:?} for @unique/@key enforcement",
+        array.data_type()
+    )))
 }

-/// Build the flat list of property names that must be checked for uniqueness
-/// on a node type. Includes both `@unique` properties (from
-/// `NodeType.unique_constraints`) and the `@key` (which implies uniqueness).
-pub(crate) fn unique_property_names_for_node(
+/// Build the list of uniqueness constraint groups to enforce on a node type.
+/// Each group is the column tuple of one constraint. Includes every
+/// `@unique(...)` constraint (from `NodeType.unique_constraints`) and the
+/// `@key` (which implies uniqueness over its column tuple). Grouping is
+/// preserved so a composite `@unique(a, b)` is enforced as a composite key
+/// rather than degraded into independent single-field checks.
+pub(crate) fn unique_constraint_groups_for_node(
    node_type: &omnigraph_compiler::catalog::NodeType,
-) -> Vec<String> {
-    let mut props: Vec<String> = node_type
-        .unique_constraints
-        .iter()
-        .flatten()
-        .cloned()
-        .collect();
-    if let Some(key) = &node_type.key {
-        props.extend(key.iter().cloned());
+) -> Vec<Vec<String>> {
+    let mut groups: Vec<Vec<String>> = node_type.unique_constraints.clone();
+    if let Some(key) = &node_type.key
+        && !groups.contains(key)
+    {
+        groups.push(key.clone());
    }
-    props.sort();
-    props.dedup();
-    props
+    groups
 }

-/// Same as [`unique_property_names_for_node`] but for an edge type.
-pub(crate) fn unique_property_names_for_edge(
+/// Same as [`unique_constraint_groups_for_node`] but for an edge type (edges
+/// have no `@key`).
+pub(crate) fn unique_constraint_groups_for_edge(
    edge_type: &omnigraph_compiler::catalog::EdgeType,
-) -> Vec<String> {
-    let mut props: Vec<String> = edge_type
-        .unique_constraints
-        .iter()
-        .flatten()
-        .cloned()
-        .collect();
-    props.sort();
-    props.dedup();
-    props
+) -> Vec<Vec<String>> {
+    edge_type.unique_constraints.clone()
 }

 fn extract_numeric_value(col: &ArrayRef, row: usize) -> Option<f64> {
@ -2169,4 +2246,66 @@ edge WorksAt: Person -> Company
        let err = result.unwrap_err().to_string();
        assert!(err.contains("NaN"), "error should mention NaN: {}", err);
    }
+
+    #[test]
+    fn composite_unique_key_builds_tuple_and_exempts_null() {
+        let a: ArrayRef = Arc::new(StringArray::from(vec![Some("x|y"), Some("x"), None]));
+        let b: ArrayRef = Arc::new(StringArray::from(vec![Some("z"), Some("y|z"), Some("q")]));
+        let cols = [a, b];
+
+        // Tuple key, so `("x|y", "z")` and `("x", "y|z")` stay distinct —
+        // a separator-joined key (the old `|` join) would collapse both to
+        // `x|y|z`.
+        assert_eq!(
+            composite_unique_key(&cols, 0).unwrap(),
+            Some(vec!["x|y".to_string(), "z".to_string()])
+        );
+        assert_eq!(
+            composite_unique_key(&cols, 1).unwrap(),
+            Some(vec!["x".to_string(), "y|z".to_string()])
+        );
+        assert_ne!(
+            composite_unique_key(&cols, 0).unwrap(),
+            composite_unique_key(&cols, 1).unwrap()
+        );
+
+        // Any null column → the whole row is exempt (SQL null semantics).
+        assert_eq!(composite_unique_key(&cols, 2).unwrap(), None);
+    }
+
+    #[test]
+    fn unique_key_scalar_errors_loudly_on_unkeyable_type() {
+        use arrow_array::LargeBinaryArray;
+        // A binary/blob column can't be reduced to a uniqueness key. Before the
+        // hardening this returned `None`, so a `@unique` on such a column was
+        // silently un-enforced; now it errors instead of weakening the
+        // constraint in silence.
+        let blob: ArrayRef = Arc::new(LargeBinaryArray::from(vec![Some(&b"abc"[..])]));
+        let err = unique_key_scalar(&blob, 0).unwrap_err();
+        assert!(
+            err.to_string().contains("unsupported column type"),
+            "un-keyable type must fail loudly (got: {err})"
+        );
+    }
+
+    #[test]
+    fn unique_key_scalar_handles_all_string_encodings() {
+        use arrow_array::{LargeStringArray, StringViewArray};
+        // A legal string column is keyable in every physical Arrow encoding
+        // Lance might hand back (Utf8 / LargeUtf8 / Utf8View). None of these may
+        // fall through to the loud `Err` path — that branch is reserved for
+        // genuinely un-keyable column types, not a legal value in an
+        // unenumerated encoding.
+        let utf8: ArrayRef = Arc::new(StringArray::from(vec![Some("v")]));
+        let large: ArrayRef = Arc::new(LargeStringArray::from(vec![Some("v")]));
+        let view: ArrayRef = Arc::new(StringViewArray::from(vec![Some("v")]));
+        for array in [&utf8, &large, &view] {
+            assert_eq!(
+                unique_key_scalar(array, 0).unwrap(),
+                Some("v".to_string()),
+                "string array {:?} must render, not error",
+                array.data_type()
+            );
+        }
+    }
 }
--- a/crates/omnigraph/src/table_store.rs
+++ b/crates/omnigraph/src/table_store.rs
@ -43,6 +43,19 @@ pub struct DeleteState {
    pub(crate) version_metadata: TableVersionMetadata,
 }

+/// Whether a `key_col IN (...)` scan on a dataset will be served by the
+/// persisted scalar (BTREE) index, or silently fall back to a full filtered
+/// scan. Detection-only (metadata, no IO); the scan returns the correct rows
+/// either way. Surfaced by the indexed traversal path so the silent perf
+/// fallback is observable, and available to a future cost-based planner.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum IndexCoverage {
+    /// The column has a usable BTREE and every fragment records `physical_rows`.
+    Indexed,
+    /// Lance will not use the scalar index for this scan (correct, full scan).
+    Degraded { reason: String },
+}
+
 /// A Lance write that has produced fragment files on object storage but is
 /// not yet committed to the dataset's manifest. The staged-write primitives
 /// are consumed by `MutationStaging` (`exec/staging.rs`,
@ -582,6 +595,117 @@ impl TableStore {
            .map_err(|e| OmniError::Lance(e.to_string()))
    }

+    /// Indexed neighbor lookup for graph traversal. Given an edge dataset and a
+    /// set of endpoint keys on `key_col` (`"src"` for out-traversal, `"dst"` for
+    /// in-traversal), return the matching edge rows projected to
+    /// `[key_col, opposite_col]`.
+    ///
+    /// The `key_col IN (keys)` predicate is built as a structured DataFusion
+    /// `Expr` and applied via `Scanner::filter_expr`, so Lance routes it through
+    /// the persisted BTREE on `key_col` (index-search → take). Cost scales with
+    /// the frontier size, not |E| — the basis for serving selective traversals
+    /// without building the whole in-memory CSR. Empty `keys` returns empty
+    /// without scanning.
+    ///
+    /// Note: like any indexed scan, this observes only fragments the BTREE
+    /// covers plus an unindexed-fragment scan fallback; it reads the committed
+    /// snapshot `ds` was opened at.
+    pub async fn scan_edges_by_endpoint(
+        ds: &Dataset,
+        key_col: &str,
+        opposite_col: &str,
+        keys: &[String],
+    ) -> Result<Vec<RecordBatch>> {
+        use datafusion::prelude::{col, lit};
+
+        if keys.is_empty() {
+            return Ok(Vec::new());
+        }
+        let key_list: Vec<datafusion::prelude::Expr> =
+            keys.iter().map(|k| lit(k.clone())).collect();
+        let filter_expr = col(key_col).in_list(key_list, false);
+        Self::scan_stream_with(
+            ds,
+            Some(&[key_col, opposite_col]),
+            None,
+            None,
+            false,
+            |scanner| {
+                scanner.filter_expr(filter_expr);
+                Ok(())
+            },
+        )
+        .await?
+        .try_collect()
+        .await
+        .map_err(|e| OmniError::Lance(e.to_string()))
+    }
+
+    /// Metadata-only check (no IO) of whether `scan_edges_by_endpoint` — a
+    /// `key_col IN (...)` filter — on `ds` will be served by the persisted BTREE
+    /// on `column`, or silently fall back to a full filtered scan. Mirrors
+    /// Lance's own decision: scalar indices are disabled for the whole scan if
+    /// ANY fragment lacks `physical_rows` (lance `dataset/scanner.rs`
+    /// `create_filter_plan`), and are obviously unused if no BTREE on the
+    /// column exists. The scan is correct (returns all rows) either way — this
+    /// only surfaces the perf cliff so the indexed traversal can warn on it.
+    pub async fn key_column_index_coverage(ds: &Dataset, column: &str) -> Result<IndexCoverage> {
+        let Some(field_id) = ds.schema().field(column).map(|field| field.id) else {
+            return Ok(IndexCoverage::Degraded {
+                reason: format!("column '{}' not in schema", column),
+            });
+        };
+        let indices = ds
+            .load_indices()
+            .await
+            .map_err(|e| OmniError::Lance(e.to_string()))?;
+        let btree = indices
+            .iter()
+            .filter(|index| !is_system_index(index))
+            .filter(|index| index.fields.len() == 1 && index.fields[0] == field_id)
+            .find(|index| {
+                index
+                    .index_details
+                    .as_ref()
+                    .map(|details| details.type_url.ends_with("BTreeIndexDetails"))
+                    .unwrap_or(false)
+            });
+        let Some(btree) = btree else {
+            return Ok(IndexCoverage::Degraded {
+                reason: format!("no BTREE index on '{}'", column),
+            });
+        };
+        // Same check Lance runs: a fragment missing physical_rows disables
+        // scalar indices for the entire scan (all-or-nothing).
+        if ds.fragments().iter().any(|f| f.physical_rows.is_none()) {
+            return Ok(IndexCoverage::Degraded {
+                reason: "a fragment is missing physical_rows".to_string(),
+            });
+        }
+        // An index only covers the fragments it was built over; fragments
+        // appended afterward (edge-index creation is skipped once a BTREE exists)
+        // are scanned unindexed. If any CURRENT fragment is absent from the
+        // index's `fragment_bitmap`, the scan is partly a full scan — so the
+        // chooser must not price it as fully indexed. A `None` bitmap means Lance
+        // can't report coverage; don't over-degrade in that case.
+        if let Some(bitmap) = btree.fragment_bitmap.as_ref() {
+            let uncovered = ds
+                .fragments()
+                .iter()
+                .filter(|f| !bitmap.contains(f.id as u32))
+                .count();
+            if uncovered > 0 {
+                return Ok(IndexCoverage::Degraded {
+                    reason: format!(
+                        "{} fragment(s) not covered by the index on '{}'",
+                        uncovered, column
+                    ),
+                });
+            }
+        }
+        Ok(IndexCoverage::Indexed)
+    }
+
    pub async fn count_rows(&self, ds: &Dataset, filter: Option<String>) -> Result<usize> {
        ds.count_rows(filter)
            .await
@ -732,7 +856,7 @@ impl TableStore {
        // before the FirstSeen setter has a chance to silently collapse
        // anything):
        // - Load path: `enforce_unique_constraints_intra_batch`
-        //   (`loader/mod.rs:1453`) errors on intra-batch `@key` dups.
+        //   (`loader/mod.rs:1442`) errors on intra-batch `@key` dups.
        // - Mutate path: `MutationStaging::finalize` (`exec/staging.rs`)
        //   accumulates and dedupes by `id`.
        // - Branch-merge path: `compute_source_delta` /
--- a/crates/omnigraph/tests/branching.rs
+++ b/crates/omnigraph/tests/branching.rs
@ -39,6 +39,26 @@ query insert_user($name: String, $email: String) {
 }
 "#;

+const EDGE_UNIQUE_SCHEMA: &str = r#"
+node Person {
+    name: String @key
+}
+
+edge Knows: Person -> Person {
+    @unique(src, dst)
+}
+"#;
+
+const EDGE_UNIQUE_DATA: &str = r#"{"type":"Person","data":{"name":"Alice"}}
+{"type":"Person","data":{"name":"Bob"}}
+{"type":"Person","data":{"name":"Carol"}}"#;
+
+const EDGE_UNIQUE_MUTATIONS: &str = r#"
+query add_knows($from: String, $to: String) {
+    insert Knows { from: $from, to: $to }
+}
+"#;
+
 const CARDINALITY_SCHEMA: &str = r#"
 node Person {
    name: String @key
@ -1119,6 +1139,87 @@ async fn branch_merge_reports_unique_violation_conflict() {
    }
 }

+/// Regression for the MR-983 follow-up: the branch-merge path must enforce an
+/// edge composite `@unique(src, dst)` as a true composite key, consistent with
+/// the intake path. Two branches inserting the *same* (src, dst) pair must
+/// conflict on merge.
+#[tokio::test]
+async fn branch_merge_reports_composite_unique_violation_conflict() {
+    let dir = tempfile::tempdir().unwrap();
+    let uri = dir.path().to_str().unwrap();
+    let mut main = init_db_from_schema_and_data(&dir, EDGE_UNIQUE_SCHEMA, EDGE_UNIQUE_DATA).await;
+    main.branch_create("feature").await.unwrap();
+
+    let mut feature = Omnigraph::open(uri).await.unwrap();
+
+    mutate_main(
+        &mut main,
+        EDGE_UNIQUE_MUTATIONS,
+        "add_knows",
+        &params(&[("$from", "Alice"), ("$to", "Bob")]),
+    )
+    .await
+    .unwrap();
+
+    mutate_branch(
+        &mut feature,
+        "feature",
+        EDGE_UNIQUE_MUTATIONS,
+        "add_knows",
+        &params(&[("$from", "Alice"), ("$to", "Bob")]),
+    )
+    .await
+    .unwrap();
+
+    let err = main.branch_merge("feature", "main").await.unwrap_err();
+    match err {
+        OmniError::MergeConflicts(conflicts) => {
+            assert!(conflicts.iter().any(|conflict| {
+                conflict.table_key == "edge:Knows"
+                    && conflict.kind == MergeConflictKind::UniqueViolation
+            }));
+        }
+        other => panic!("expected merge conflicts, got {other:?}"),
+    }
+}
+
+/// Sibling to the above: pairs sharing `src` but differing on `dst` are unique
+/// on the (src, dst) tuple and must merge cleanly. Guards against the composite
+/// degrading back into a single-field `@unique(src)` on the merge path.
+#[tokio::test]
+async fn branch_merge_allows_distinct_composite_unique_pairs() {
+    let dir = tempfile::tempdir().unwrap();
+    let uri = dir.path().to_str().unwrap();
+    let mut main = init_db_from_schema_and_data(&dir, EDGE_UNIQUE_SCHEMA, EDGE_UNIQUE_DATA).await;
+    main.branch_create("feature").await.unwrap();
+
+    let mut feature = Omnigraph::open(uri).await.unwrap();
+
+    mutate_main(
+        &mut main,
+        EDGE_UNIQUE_MUTATIONS,
+        "add_knows",
+        &params(&[("$from", "Alice"), ("$to", "Bob")]),
+    )
+    .await
+    .unwrap();
+
+    mutate_branch(
+        &mut feature,
+        "feature",
+        EDGE_UNIQUE_MUTATIONS,
+        "add_knows",
+        &params(&[("$from", "Alice"), ("$to", "Carol")]),
+    )
+    .await
+    .unwrap();
+
+    main.branch_merge("feature", "main")
+        .await
+        .expect("distinct (src, dst) pairs are unique on the composite and must merge cleanly");
+    assert_eq!(count_rows(&main, "edge:Knows").await, 2);
+}
+
 #[tokio::test]
 async fn branch_merge_reports_cardinality_violation_conflict() {
    let dir = tempfile::tempdir().unwrap();
--- a/crates/omnigraph/tests/composite_flow.rs
+++ b/crates/omnigraph/tests/composite_flow.rs
@ -294,21 +294,19 @@ async fn composite_flow_canonical_lifecycle() {
    );

    // ─────────────────────────────────────────────────────────────────
-    // Step 10: optimize the post-merge graph — verify indices stay
-    // valid and queryable.
+    // Step 10: optimize the post-merge graph — verify compaction is
+    // published to the manifest (so the manifest pin tracks the compacted
+    // Lance HEAD), indices stay valid and queryable, and a post-optimize
+    // strict write commits.
    //
-    // **Known limitation**: `optimize_all_tables` calls Lance
-    // `compact_files` directly — it advances per-table Lance HEAD
-    // without updating the omnigraph `__manifest` pin. After optimize,
-    // the next writer's expected_table_versions captures the
-    // pre-optimize manifest pin, but the publisher's pre-check reads
-    // a higher version from the manifest dataset (because some other
-    // path — possibly schema-state recovery on reopen — wrote a newer
-    // __manifest row). The `ExpectedVersionMismatch` is benign
-    // (re-issuing the mutation after a snapshot refresh succeeds), but
-    // a composite test cannot reliably exercise post-optimize mutations
-    // until that path is investigated. Coverage of post-optimize
-    // mutations is left to a focused optimize+cleanup integration test.
+    // This step used to carry a "Known limitation": `optimize_all_tables`
+    // ran Lance `compact_files` without publishing the new version to
+    // `__manifest`, so the manifest pin lagged the Lance HEAD and the next
+    // strict write / schema apply failed with `ExpectedVersionMismatch`
+    // ("stale view … refresh and retry") — so post-optimize mutations were
+    // deliberately omitted here. optimize now publishes the compacted
+    // version, and this flow exercises exactly that previously-failing
+    // write below.
    // ─────────────────────────────────────────────────────────────────
    let optimize_stats = db.optimize().await.unwrap();
    assert!(
@ -331,6 +329,28 @@ async fn composite_flow_canonical_lifecycle() {
        "row counts unchanged by optimize"
    );

+    // A strict update on a compacted table is exactly the write that
+    // failed with "stale view" before optimize published its compaction.
+    // It must now commit (Alice is one of the seed Persons; an update
+    // leaves the row count at 6).
+    let post_optimize_update = mutate_main(
+        &mut db,
+        MUTATION_QUERIES,
+        "set_age",
+        &mixed_params(&[("$name", "Alice")], &[("$age", 41)]),
+    )
+    .await
+    .expect("post-optimize strict update must commit — optimize published the manifest");
+    assert_eq!(
+        post_optimize_update.affected_nodes, 1,
+        "post-optimize update must affect exactly Alice"
+    );
+    assert_eq!(
+        count_rows(&db, "node:Person").await,
+        6,
+        "an update must not change the Person row count"
+    );
+
    // ─────────────────────────────────────────────────────────────────
    // Step 11: cleanup — keep last 10 versions, only purge versions
    // older than 1 hour. With this small test, we have well under 10
@ -373,14 +393,27 @@ async fn composite_flow_canonical_lifecycle() {
        branches,
    );

-    // Final query exercise — full read path works post-reopen,
-    // post-cleanup. Post-cleanup mutation is omitted here pending
-    // resolution of the optimize-vs-manifest-pin interaction documented
-    // in Step 10.
+    // Final exercise — full read AND write path works post-reopen,
+    // post-cleanup. (The post-cleanup mutation was previously omitted
+    // pending resolution of the optimize-vs-manifest-pin interaction in
+    // Step 10; that is now fixed, so a strict write here must commit.)
    let final_total = query_main(&mut db, TEST_QUERIES, "total_people", &ParamMap::default())
        .await
        .unwrap();
    assert!(!final_total.batches().is_empty());
+
+    let post_reopen_update = mutate_main(
+        &mut db,
+        MUTATION_QUERIES,
+        "set_age",
+        &mixed_params(&[("$name", "Alice")], &[("$age", 42)]),
+    )
+    .await
+    .expect("post-reopen, post-cleanup strict update must commit");
+    assert_eq!(
+        post_reopen_update.affected_nodes, 1,
+        "post-reopen update must affect exactly Alice"
+    );
 }

 /// Cross-handle sequence that exercises operations after a schema_apply
--- a/crates/omnigraph/tests/consistency.rs
+++ b/crates/omnigraph/tests/consistency.rs
@ -188,7 +188,7 @@ node Thing {
 ///
 /// Defense in depth:
 /// 1. The loader's `enforce_unique_constraints_intra_batch`
-///    (`loader/mod.rs:1453`), invoked unconditionally on any node type
+///    (`loader/mod.rs:1442`), invoked unconditionally on any node type
 ///    with a `@key`, errors on intra-batch duplicate `@key` values at
 ///    intake — pinned by this test across every `LoadMode`.
 /// 2. The `check_batch_unique_by_keys` precondition at the top of
@ -229,6 +229,122 @@ node Thing {
    }
 }

+/// Regression for MR-983: a node-level composite `@unique(a, b)` must be
+/// enforced as a true composite key, not degraded into independent
+/// single-field checks. Pre-fix, `unique_property_names_for_node` flattened
+/// every constraint group into one property list, so `@unique(source,
+/// external_id)` was enforced as `@unique(source)` *and* `@unique(external_id)`
+/// — rejecting rows that were unique on the composite key and naming only the
+/// first field in the error.
+#[tokio::test]
+async fn loader_enforces_composite_unique_as_composite_key() {
+    let dir = tempfile::tempdir().unwrap();
+    let uri = dir.path().to_str().unwrap();
+    let schema = r#"
+node ExternalID {
+    slug: String @key
+    source: String @index
+    external_id: String @index
+    @unique(source, external_id)
+}
+"#;
+    let mut db = Omnigraph::init(uri, schema).await.unwrap();
+
+    // Same `source`, different `external_id` → unique on the composite key.
+    // This is the exact repro from MR-983 and must be accepted.
+    let composite_ok = r#"{"type":"ExternalID","data":{"slug":"a","source":"whatsapp","external_id":"+E.164"}}
+{"type":"ExternalID","data":{"slug":"b","source":"whatsapp","external_id":"pn:12345"}}
+"#;
+    load_jsonl(&mut db, composite_ok, LoadMode::Overwrite)
+        .await
+        .expect("rows unique on the composite (source, external_id) must be accepted");
+    assert_eq!(count_rows(&db, "node:ExternalID").await, 2);
+
+    // Both composite columns equal → genuine violation. The error must name
+    // the whole composite, not just the first field.
+    let composite_dupe = r#"{"type":"ExternalID","data":{"slug":"c","source":"whatsapp","external_id":"dup"}}
+{"type":"ExternalID","data":{"slug":"d","source":"whatsapp","external_id":"dup"}}
+"#;
+    let err = load_jsonl(&mut db, composite_dupe, LoadMode::Overwrite)
+        .await
+        .unwrap_err();
+    let msg = err.to_string();
+    // Columns are canonicalized to sorted order in the catalog, so the
+    // message reads `(external_id, source)`; assert order-agnostically that
+    // both composite columns are named (not just the first, as pre-fix).
+    assert!(
+        msg.contains("@unique violation")
+            && msg.contains("source")
+            && msg.contains("external_id"),
+        "composite violation must name both columns (got: {msg})"
+    );
+}
+
+/// Guard: the intake path (load/insert/update) and the branch-merge path must
+/// derive the same composite `@unique(a, b)` key, so a pair of rows unique on
+/// the tuple is accepted by BOTH. Both paths now key on the tuple itself (no
+/// separator), so a value containing any byte — including the `|` that an
+/// earlier merge-path join used as its separator — can't forge a collision.
+/// `("x|y", "z")` and `("x", "y|z")` are distinct tuples and must survive a
+/// load-on-branch then merge without a phantom `UniqueViolation`. This pins the
+/// cross-path consistency against any future drift in the shared keying.
+#[tokio::test]
+async fn composite_unique_key_is_consistent_across_intake_and_merge() {
+    let dir = tempfile::tempdir().unwrap();
+    let uri = dir.path().to_str().unwrap();
+    let schema = r#"
+node Item {
+    slug: String @key
+    a: String @index
+    b: String @index
+    @unique(a, b)
+}
+"#;
+    let insert_item = r#"
+query insert_item($slug: String, $a: String, $b: String) {
+    insert Item { slug: $slug, a: $a, b: $b }
+}
+"#;
+    let main = Omnigraph::init(uri, schema).await.unwrap();
+    main.branch_create("feature").await.unwrap();
+
+    // Two rows unique on the composite (a, b), where `a`/`b` carry a literal
+    // `|`. Distinct under a tuple key; identical (`x|y|z`) under a `|`-join.
+    let feature = Omnigraph::open(uri).await.unwrap();
+    feature
+        .mutate(
+            "feature",
+            insert_item,
+            "insert_item",
+            &params(&[("$slug", "r1"), ("$a", "x|y"), ("$b", "z")]),
+        )
+        .await
+        .expect("intake must accept the first composite-unique row");
+    feature
+        .mutate(
+            "feature",
+            insert_item,
+            "insert_item",
+            &params(&[("$slug", "r2"), ("$a", "x"), ("$b", "y|z")]),
+        )
+        .await
+        .expect("intake must accept the second composite-unique row (distinct on the tuple)");
+
+    // The merge re-validates uniqueness over the adopted source rows. Both
+    // rows are unique on (a, b), so this must merge cleanly with no phantom
+    // conflict — intake and merge must key the tuple identically.
+    let merge_result = feature.branch_merge("feature", "main").await;
+    assert!(
+        merge_result.is_ok(),
+        "rows unique on the composite (a, b) must merge cleanly; \
+         intake and merge must key the tuple the same way (got: {:?})",
+        merge_result.err()
+    );
+
+    let reopened = Omnigraph::open(uri).await.unwrap();
+    assert_eq!(count_rows(&reopened, "node:Item").await, 2);
+}
+
 /// Canary for the upstream Lance gap that the `FirstSeen` workaround
 /// in `table_store.rs` masks. The bug class is "Window 2": load →
 /// indices built explicitly → merge → merge. Even with the engine
--- a/crates/omnigraph/tests/end_to_end.rs
+++ b/crates/omnigraph/tests/end_to_end.rs
@ -1933,3 +1933,87 @@ query docs_with_tag($tag: String) {
        "contains-pushdown should return exactly the rows whose tags list contains 'red'"
    );
 }
+
+// ─── Maintenance in the full lifecycle: optimize (compaction) ────────────────
+
+/// `optimize` (Lance compaction) is part of a realistic graph lifecycle: it
+/// advances the Lance HEAD and publishes the compacted version to the manifest.
+/// The rest of the flow must keep working across that boundary — reads observe
+/// the compacted data, strict updates (which check Lance HEAD == manifest
+/// version) still commit, inserts still commit, and the state survives a reopen
+/// (the open-time recovery sweep finds no leftover drift). Before optimize
+/// published its compaction, the manifest lagged the Lance HEAD here and the
+/// post-optimize update below failed with "stale view ... refresh and retry".
+#[tokio::test]
+async fn full_flow_optimize_then_query_update_and_reopen() {
+    let dir = tempfile::tempdir().unwrap();
+    let uri = dir.path().to_str().unwrap().to_string();
+    let mut db = init_and_load(&dir).await;
+
+    // Build several Person fragments so compaction has something to merge.
+    for (name, age) in [("Eve", 40), ("Frank", 41), ("Grace", 42)] {
+        mutate_main(
+            &mut db,
+            MUTATION_QUERIES,
+            "insert_person",
+            &mixed_params(&[("$name", name)], &[("$age", age)]),
+        )
+        .await
+        .unwrap();
+    }
+
+    let stats = db.optimize().await.unwrap();
+    assert!(
+        stats.iter().any(|s| s.committed),
+        "a multi-fragment table should have compacted in this flow"
+    );
+
+    // Reads observe the compacted data.
+    let qr = query_main(
+        &mut db,
+        TEST_QUERIES,
+        "get_person",
+        &params(&[("$name", "Alice")]),
+    )
+    .await
+    .unwrap();
+    assert_eq!(qr.num_rows(), 1);
+
+    // Strict update after optimize commits (previously failed with "stale view"
+    // because the manifest lagged the compacted Lance HEAD).
+    let upd = mutate_main(
+        &mut db,
+        MUTATION_QUERIES,
+        "set_age",
+        &mixed_params(&[("$name", "Alice")], &[("$age", 31)]),
+    )
+    .await
+    .unwrap();
+    assert_eq!(upd.affected_nodes, 1);
+
+    // Insert after optimize also commits.
+    mutate_main(
+        &mut db,
+        MUTATION_QUERIES,
+        "insert_person",
+        &mixed_params(&[("$name", "Ivan")], &[("$age", 50)]),
+    )
+    .await
+    .unwrap();
+    assert_eq!(count_rows(&db, "node:Person").await, 8); // 4 seed + Eve/Frank/Grace + Ivan
+
+    // State survives a reopen — the recovery sweep runs and finds no drift.
+    drop(db);
+    let reopened = Omnigraph::open(&uri).await.unwrap();
+    assert_eq!(count_rows(&reopened, "node:Person").await, 8);
+    let alice = reopened
+        .entity_at_target(ReadTarget::branch("main"), "node:Person", "Alice")
+        .await
+        .unwrap()
+        .unwrap();
+    assert_eq!(
+        alice["age"],
+        serde_json::json!(31),
+        "Alice's post-optimize age update must persist across reopen"
+    );
+}
--- a/crates/omnigraph/tests/failpoints.rs
+++ b/crates/omnigraph/tests/failpoints.rs
@ -1245,7 +1245,7 @@ async fn refresh_defers_rollback_eligible_sidecar_to_next_open() {
    // the rollback (will use Dataset::restore safely; no concurrent
    // writers at open time).
    drop(db);
-    let _db = Omnigraph::open(&uri).await.unwrap();
+    let db = Omnigraph::open(&uri).await.unwrap();
    // After full-sweep recovery, the sidecar should be processed
    // (deleted). Sidecar's tables are eligible for rollback (UnexpectedAtP1):
    // restore happens on Person (HEAD advances by 1).
@ -1268,6 +1268,19 @@ async fn refresh_defers_rollback_eligible_sidecar_to_next_open() {
        "full sweep must run Dataset::restore (head advances); \
         post_head={post_head}, final_head={final_head}",
    );
+    // Convergence: roll-back published the restored HEAD, so the manifest pin
+    // tracks Lance HEAD afterward (no residual drift).
+    let entry_version = db
+        .snapshot_of(omnigraph::db::ReadTarget::branch("main"))
+        .await
+        .unwrap()
+        .entry("node:Person")
+        .unwrap()
+        .table_version;
+    assert_eq!(
+        entry_version, final_head,
+        "full-sweep roll-back must publish so manifest pin ({entry_version}) == Lance HEAD ({final_head})",
+    );
 }

 /// Companion to the above — confirms that a finalize→publisher failure
@ -1461,10 +1474,15 @@ edge WorksAt: Person -> Company
    }

    let db = Omnigraph::open(&uri).await.unwrap();
-    assert_eq!(
-        version_main(&db).await.unwrap(),
-        pre_failure_version,
-        "manifest must remain on the old schema when no schema staging files existed"
+    // Roll-back now publishes the restored version, so the manifest version
+    // advances — but to the OLD-schema content: the migration never applied
+    // (asserted by count_rows + the `_schema.pg` checks below), and the sweep
+    // converges (`manifest == Lance HEAD`, asserted by
+    // assert_post_recovery_invariants's RolledBack arm).
+    assert!(
+        version_main(&db).await.unwrap() > pre_failure_version,
+        "roll-back publishes the restored (old-schema) version, advancing the manifest; \
+         pre={pre_failure_version}",
    );
    assert_eq!(
        helpers::count_rows(&db, "node:Person").await,
@ -1637,6 +1655,100 @@ edge WorksAt: Person -> Company
    );
 }

+/// `optimize` Phase B → Phase C residual: `compact_files` advanced the Lance
+/// HEAD but the manifest publish hasn't run. The `Optimize` recovery sidecar
+/// (loose-match, like SchemaApply/EnsureIndices) must roll the compacted version
+/// forward on next open so the manifest tracks the Lance HEAD — and the healed
+/// table must then accept a schema apply (the original bug's victim).
+#[tokio::test]
+async fn optimize_phase_b_failure_recovered_on_next_open() {
+    let _scenario = FailScenario::setup();
+    let dir = tempfile::tempdir().unwrap();
+    let uri = dir.path().to_str().unwrap().to_string();
+    let operation_id;
+
+    // Seed: several separate Person inserts → multiple fragments, so compaction
+    // has real work and advances the Lance HEAD.
+    {
+        let db = Omnigraph::init(&uri, helpers::TEST_SCHEMA).await.unwrap();
+        for (name, age) in [("alice", 30), ("bob", 31), ("carol", 32), ("dave", 33)] {
+            db.mutate(
+                "main",
+                MUTATION_QUERIES,
+                "insert_person",
+                &mixed_params(&[("$name", name)], &[("$age", age)]),
+            )
+            .await
+            .unwrap();
+        }
+    }
+
+    let pre_failure_version = {
+        let db = Omnigraph::open(&uri).await.unwrap();
+        version_main(&db).await.unwrap()
+    };
+
+    // Failpoint fires AFTER compact_files advanced the Lance HEAD but BEFORE the
+    // manifest publish. The Optimize sidecar persists (only node:Person has
+    // compactable fragments, so exactly one sidecar is written).
+    {
+        let db = Omnigraph::open(&uri).await.unwrap();
+        let _failpoint =
+            ScopedFailPoint::new("optimize.post_phase_b_pre_manifest_commit", "return");
+        let err = db.optimize().await.unwrap_err();
+        assert!(
+            err.to_string()
+                .contains("injected failpoint triggered: optimize.post_phase_b_pre_manifest_commit"),
+            "unexpected error: {err}"
+        );
+
+        let recovery_dir = dir.path().join("__recovery");
+        let sidecars: Vec<_> = std::fs::read_dir(&recovery_dir)
+            .unwrap()
+            .filter_map(|e| e.ok())
+            .collect();
+        assert_eq!(
+            sidecars.len(),
+            1,
+            "exactly one Optimize sidecar must persist after optimize failure"
+        );
+        operation_id = single_sidecar_operation_id(dir.path());
+    }
+
+    // Recovery: reopen runs the sweep. The Optimize sidecar classifies
+    // RolledPastExpected (loose-match) → RollForward → manifest extends to the
+    // compacted Lance HEAD.
+    let db = Omnigraph::open(&uri).await.unwrap();
+    let post_recovery_version = version_main(&db).await.unwrap();
+    assert!(
+        post_recovery_version > pre_failure_version,
+        "manifest version must advance post-recovery (compaction rolled forward); \
+         pre={pre_failure_version}, post={post_recovery_version}",
+    );
+    drop(db);
+
+    assert_post_recovery_invariants(
+        dir.path(),
+        &operation_id,
+        RecoveryExpectation::RolledForward {
+            tables: vec![TableExpectation::main("node:Person")],
+        },
+    )
+    .await
+    .unwrap();
+
+    // The healed table accepts an additive schema apply — its HEAD-vs-manifest
+    // precondition is satisfied because recovery published the compacted version.
+    let db = Omnigraph::open(&uri).await.unwrap();
+    let desired = helpers::TEST_SCHEMA.replace(
+        "    age: I32?\n}",
+        "    age: I32?\n    nickname: String?\n}",
+    );
+    db.apply_schema(&desired)
+        .await
+        .expect("schema apply after optimize recovery must succeed");
+}
+
 #[tokio::test]
 async fn branch_merge_phase_b_failure_recovered_on_next_open() {
    use omnigraph::loader::{LoadMode, load_jsonl};
--- a/crates/omnigraph/tests/fixtures/search.gq
+++ b/crates/omnigraph/tests/fixtures/search.gq
@ -42,3 +42,17 @@ query hybrid_search($vq: Vector(4), $tq: String) {
    order { rrf(nearest($d.embedding, $vq), bm25($d.title, $tq)) }
    limit 3
 }
+
+query rrf_two_fts($q: String) {
+    match { $d: Doc }
+    return { $d.slug, $d.title }
+    order { rrf(bm25($d.title, $q), bm25($d.body, $q)) }
+    limit 3
+}
+
+query rrf_two_vectors($q1: Vector(4), $q2: Vector(4)) {
+    match { $d: Doc }
+    return { $d.slug, $d.title }
+    order { rrf(nearest($d.embedding, $q1), nearest($d.embedding, $q2)) }
+    limit 3
+}
--- a/crates/omnigraph/tests/helpers/mod.rs
+++ b/crates/omnigraph/tests/helpers/mod.rs
@ -236,6 +236,15 @@ pub fn vector_param(name: &str, values: &[f32]) -> ParamMap {
    map
 }

+/// Build a ParamMap with two vector params.
+pub fn two_vector_params(name1: &str, vals1: &[f32], name2: &str, vals2: &[f32]) -> ParamMap {
+    let mut map = vector_param(name1, vals1);
+    let key = name2.strip_prefix('$').unwrap_or(name2).to_string();
+    let lit = Literal::List(vals2.iter().map(|v| Literal::Float(*v as f64)).collect());
+    map.insert(key, lit);
+    map
+}
+
 /// Build a ParamMap with a vector param and a string param.
 pub fn vector_and_string_params(
    vec_name: &str,
--- a/crates/omnigraph/tests/helpers/recovery.rs
+++ b/crates/omnigraph/tests/helpers/recovery.rs
@ -181,6 +181,9 @@ pub async fn assert_post_recovery_invariants(
                "audit row for {operation_id} recorded the wrong recovery_kind",
            );
            assert_rollback_outcomes_record_drift(&audit);
+            // Roll-back now publishes the restored HEAD, so manifest == Lance
+            // HEAD afterward (symmetric with roll-forward) — no residual drift.
+            assert_manifest_pins_match_lance_heads(graph_root, &tables).await?;
            assert_recovery_commit_shape(graph_root, &audit, &tables).await?;
            assert_non_main_did_not_move_main(graph_root, &tables).await?;
            assert_idempotent_reopen(graph_root, operation_id).await?;
--- a/crates/omnigraph/tests/lance_surface_guards.rs
+++ b/crates/omnigraph/tests/lance_surface_guards.rs
@ -30,9 +30,13 @@ use arrow_schema::{DataType, Field, Schema};
 use lance::Dataset;
 use lance::dataset::builder::DatasetBuilder;
 use lance::dataset::optimize::{CompactionOptions, compact_files};
+use lance::dataset::transaction::Operation;
 use lance::dataset::write::delete::DeleteResult;
 use lance::dataset::{MergeInsertBuilder, WhenMatched, WhenNotMatched, WriteMode, WriteParams};
+use lance::index::DatasetIndexExt;
 use lance_file::version::LanceFileVersion;
+use lance_index::IndexType;
+use lance_index::scalar::ScalarIndexParams;
 use lance_namespace::LanceNamespace;
 use lance_table::io::commit::ManifestNamingScheme;

@ -222,6 +226,33 @@ async fn _compile_compact_files_signature() -> lance::Result<()> {
    Ok(())
 }

+// --- Guard 7b: transaction history exposes repair's classification surface -
+//
+// `db/omnigraph/repair.rs` reads Lance transactions between manifest and HEAD
+// and treats only `ReserveFragments` + `Rewrite` as safe maintenance drift.
+// Compile-only.
+
+#[allow(
+    dead_code,
+    unreachable_code,
+    unused_variables,
+    unused_mut,
+    clippy::diverging_sub_expression
+)]
+async fn _compile_transaction_history_for_repair_signature() -> lance::Result<()> {
+    let ds: Dataset = unimplemented!();
+    let tx = ds.read_transaction_by_version(1u64).await?;
+    if let Some(tx) = tx {
+        let operation = tx.operation;
+        let _name: &str = operation.name();
+        match operation {
+            Operation::Rewrite { .. } | Operation::ReserveFragments { .. } => {}
+            _ => {}
+        }
+    }
+    Ok(())
+}
+
 // --- Guard 8: Dataset::delete returns DeleteResult { new_dataset, num_deleted_rows } ---
 //
 // `table_store.rs::delete_where` consumes both fields. When MR-A migrates
@ -378,3 +409,135 @@ async fn compact_files_still_fails_on_blob_columns() {
         shifted): {err}"
    );
 }
+
+// --- Guard 11: scalar-index coverage surface (physical_rows + index details) ---
+//
+// `table_store.rs::key_column_index_coverage` mirrors Lance's `create_filter_plan`
+// C6 fallback: it reads `fragment.physical_rows` (the field whose absence on ANY
+// fragment disables the scalar index for the whole scan) and sniffs the BTREE via
+// `load_indices()` → `index.fields` / `index.index_details.type_url`. This is the
+// one real Lance-internal coupling on the indexed-traversal read path. If any of
+// these surfaces renames or changes type, the coverage check (and the cost-based
+// traversal chooser that consumes it) silently misclassifies. Compile-only.
+
+#[allow(
+    dead_code,
+    unreachable_code,
+    unused_variables,
+    unused_mut,
+    clippy::diverging_sub_expression
+)]
+async fn _compile_scalar_index_coverage_surface() -> lance::Result<()> {
+    let ds: Dataset = unimplemented!();
+    // The create_filter_plan coupling: a fragment lacking `physical_rows`
+    // disables the scalar index for the entire scan.
+    for frag in ds.fragments().iter() {
+        let _physical_rows: Option<usize> = frag.physical_rows;
+        // `key_column_index_coverage` checks each current fragment id against the
+        // index `fragment_bitmap`.
+        let _id: u64 = frag.id;
+    }
+    // The index sniff: BTREE presence is detected by single-field index whose
+    // details type_url ends with "BTreeIndexDetails". The fragment coverage check
+    // reads `fragment_bitmap` (Option<RoaringBitmap>) and calls `.contains(u32)`.
+    let indices = ds.load_indices().await?;
+    for index in indices.iter() {
+        let _fields: &Vec<i32> = &index.fields;
+        if let Some(details) = index.index_details.as_ref() {
+            let _type_url: &str = details.type_url.as_str();
+        }
+        let _covered: Option<bool> = index.fragment_bitmap.as_ref().map(|b| b.contains(0u32));
+    }
+    Ok(())
+}
+
+// --- Guard 12: can a scalar BTREE be built on a system version column? --------
+//
+// The deferred persisted-adjacency artifact plan assumed a cheap delta read of
+// `_row_last_updated_at_version > V` could be a BTREE range lookup. Lance resolves
+// index columns from the dataset schema, and the version columns are system
+// metadata — so this probe documents whether the assumption holds. The outcome is
+// the load-bearing fact, not a pass/fail of intent: if this starts SUCCEEDING when
+// it currently errors (or vice versa), the artifact's delta-cost story changes.
+
+#[tokio::test]
+async fn scalar_index_on_system_version_column_probe() {
+    let dir = tempfile::tempdir().unwrap();
+    let uri = dir.path().join("guard12.lance");
+    let mut ds = fresh_dataset(uri.to_str().unwrap()).await;
+
+    // Sanity: the system version column is present (stable row ids + V2_2).
+    assert!(
+        ds.schema().field("_row_last_updated_at_version").is_none(),
+        "PROBE NOTE: `_row_last_updated_at_version` is NOT in the user schema \
+         (it is system metadata); indexing it resolves through a different path."
+    );
+
+    let result = ds
+        .create_index_builder(
+            &["_row_last_updated_at_version"],
+            IndexType::BTree,
+            &ScalarIndexParams::default(),
+        )
+        .replace(true)
+        .await;
+
+    // Pin the observed behavior: a scalar index on the system version column is
+    // NOT buildable via the normal create-index path in this Lance. If this turns
+    // green (Ok), the artifact delta CAN use a version-column BTREE — revisit the
+    // deferred plan's Phase-2 delta-cost note in docs/dev/traversal handoff.
+    assert!(
+        result.is_err(),
+        "create_index on `_row_last_updated_at_version` unexpectedly SUCCEEDED — \
+         a system-column scalar index is now buildable; the persisted-artifact \
+         delta read could use it. Update the deferred-design notes."
+    );
+}
+
+// --- Guard 13: per-fragment deletion metadata is exposed without a scan -------
+//
+// The deferred artifact's delete-correctness coverage model needs to detect,
+// cheaply (O(fragments), no row scan), that a covered fragment acquired new
+// deletions. That hinges on Lance tracking deletions at fragment-metadata level.
+// This pins that a delete populates `fragment.deletion_file`, and probes whether
+// the deleted-row COUNT is available as metadata (`num_deleted_rows`) — the
+// difference between an O(fragments) coverage check and an O(|E|) scan.
+
+#[tokio::test]
+async fn fragment_deletion_metadata_is_available() {
+    let dir = tempfile::tempdir().unwrap();
+    let uri = dir.path().join("guard13.lance");
+    let ds = fresh_dataset(uri.to_str().unwrap()).await; // 2 rows: alice, bob
+
+    let deleted: DeleteResult = {
+        let mut ds = ds;
+        ds.delete("id = 'alice'").await.unwrap()
+    };
+    assert_eq!(deleted.num_deleted_rows, 1, "one row deleted");
+    let ds = deleted.new_dataset;
+
+    // A delete must be tracked at fragment-metadata level (not only in data).
+    let with_deletion = ds
+        .fragments()
+        .iter()
+        .find(|f| f.deletion_file.is_some())
+        .expect(
+            "after a delete, some fragment must carry a deletion_file — if not, \
+             Lance changed deletion tracking; the artifact coverage model's \
+             cheap delete-detection assumption is invalid.",
+        );
+
+    // Probe: is the deleted-row count available as metadata (cheap), or must the
+    // deletion vector be read? Pin whichever holds so the artifact plan knows.
+    let count: Option<usize> = with_deletion
+        .deletion_file
+        .as_ref()
+        .and_then(|df| df.num_deleted_rows);
+    assert_eq!(
+        count,
+        Some(1),
+        "PROBE: deletion_file.num_deleted_rows is not a populated metadata count \
+         (got {count:?}); the artifact coverage model cannot cheaply detect \
+         per-fragment deletions and would need to read the deletion vector.",
+    );
+}
--- a/crates/omnigraph/tests/literal_filters.rs
+++ b/crates/omnigraph/tests/literal_filters.rs
@ -0,0 +1,96 @@
+//! Execution goldens for filtering by non-string/non-integer scalar LITERALS
+//! (F64, F32, Bool, Date, DateTime), across both the in-memory comparison arm
+//! (standalone `$m.prop op lit`) and the Lance-pushdown arm (inline binding
+//! `Metric { prop: lit }`). Param-bound scalar filters and list-column
+//! `contains` are already covered elsewhere; this closes the literal-RHS gap.
+
+mod helpers;
+
+use arrow_array::{Array, StringArray};
+
+use omnigraph::db::Omnigraph;
+use omnigraph::loader::{LoadMode, load_jsonl};
+use omnigraph_compiler::ir::ParamMap;
+
+use helpers::*;
+
+const SCHEMA: &str = r#"
+node Metric {
+    name: String @key
+    score: F64?
+    ratio: F32?
+    active: Bool?
+    born: Date?
+    seen: DateTime?
+}
+"#;
+
+// Seeds partition every predicate, so a dropped filter returns all 4 rows.
+const DATA: &str = r#"{"type":"Metric","data":{"name":"m1","score":2.5,"ratio":0.5,"active":true,"born":"2024-06-01","seen":"2024-06-01T12:00:00Z"}}
+{"type":"Metric","data":{"name":"m2","score":1.0,"ratio":0.25,"active":false,"born":"2023-01-01","seen":"2023-01-01T00:00:00Z"}}
+{"type":"Metric","data":{"name":"m3","score":3.0,"ratio":0.75,"active":true,"born":"2025-01-01","seen":"2025-01-01T00:00:00Z"}}
+{"type":"Metric","data":{"name":"m4","score":0.5,"ratio":0.1,"active":false,"born":"2022-12-31","seen":"2022-01-01T00:00:00Z"}}"#;
+
+async fn metric_db(dir: &tempfile::TempDir) -> Omnigraph {
+    let uri = dir.path().to_str().unwrap();
+    let mut db = Omnigraph::init(uri, SCHEMA).await.unwrap();
+    load_jsonl(&mut db, DATA, LoadMode::Overwrite).await.unwrap();
+    db
+}
+
+async fn sorted_metric_names(db: &mut Omnigraph, queries: &str, name: &str) -> Vec<String> {
+    let r = query_main(db, queries, name, &ParamMap::new()).await.unwrap();
+    if r.num_rows() == 0 {
+        return Vec::new();
+    }
+    let b = r.concat_batches().unwrap();
+    let col = b.column(0).as_any().downcast_ref::<StringArray>().unwrap();
+    let mut v: Vec<String> = (0..col.len()).map(|i| col.value(i).to_string()).collect();
+    v.sort();
+    v
+}
+
+#[tokio::test]
+async fn float_literal_filters_execute() {
+    let dir = tempfile::tempdir().unwrap();
+    let mut db = metric_db(&dir).await;
+    let q = r#"
+query gt() { match { $m: Metric  $m.score > 1.5 } return { $m.name } }
+query le() { match { $m: Metric  $m.ratio <= 0.25 } return { $m.name } }
+query inline() { match { $m: Metric { score: 3.0 } } return { $m.name } }
+"#;
+    // F64 standalone: scores 2.5, 3.0 > 1.5
+    assert_eq!(sorted_metric_names(&mut db, q, "gt").await, vec!["m1", "m3"]);
+    // F32 standalone: ratios 0.25, 0.1 <= 0.25
+    assert_eq!(sorted_metric_names(&mut db, q, "le").await, vec!["m2", "m4"]);
+    // F64 inline-binding pushdown: score == 3.0
+    assert_eq!(sorted_metric_names(&mut db, q, "inline").await, vec!["m3"]);
+}
+
+#[tokio::test]
+async fn bool_literal_filters_execute() {
+    let dir = tempfile::tempdir().unwrap();
+    let mut db = metric_db(&dir).await;
+    let q = r#"
+query standalone() { match { $m: Metric  $m.active = true } return { $m.name } }
+query inline() { match { $m: Metric { active: true } } return { $m.name } }
+query negated() { match { $m: Metric  $m.active != true } return { $m.name } }
+"#;
+    assert_eq!(sorted_metric_names(&mut db, q, "standalone").await, vec!["m1", "m3"]);
+    assert_eq!(sorted_metric_names(&mut db, q, "inline").await, vec!["m1", "m3"]);
+    assert_eq!(sorted_metric_names(&mut db, q, "negated").await, vec!["m2", "m4"]);
+}
+
+#[tokio::test]
+async fn date_and_datetime_literal_filters_execute() {
+    let dir = tempfile::tempdir().unwrap();
+    let mut db = metric_db(&dir).await;
+    let q = r#"
+query born_ge() { match { $m: Metric  $m.born >= date("2024-01-01") } return { $m.name } }
+query seen_lt() { match { $m: Metric  $m.seen < datetime("2024-01-01T00:00:00Z") } return { $m.name } }
+"#;
+    // born: m1 2024-06, m3 2025 >= 2024-01-01
+    assert_eq!(sorted_metric_names(&mut db, q, "born_ge").await, vec!["m1", "m3"]);
+    // seen: m2 2023, m4 2022 < 2024-01-01
+    assert_eq!(sorted_metric_names(&mut db, q, "seen_lt").await, vec!["m2", "m4"]);
+}
--- a/crates/omnigraph/tests/maintenance.rs
+++ b/crates/omnigraph/tests/maintenance.rs
@ -8,10 +8,16 @@ mod helpers;
 use std::time::Duration;

 use lance::Dataset;
-use omnigraph::db::{CleanupPolicyOptions, Omnigraph, SkipReason};
+use lance::dataset::optimize::{CompactionOptions, compact_files};
+use omnigraph::db::{
+    CleanupPolicyOptions, Omnigraph, ReadTarget, RepairAction, RepairClassification, RepairOptions,
+    SkipReason,
+};
 use omnigraph::loader::{LoadMode, load_jsonl};

-use helpers::{TEST_DATA, TEST_SCHEMA, count_rows, init_and_load};
+use helpers::{
+    MUTATION_QUERIES, TEST_DATA, TEST_SCHEMA, count_rows, init_and_load, mixed_params, mutate_main,
+};

 /// Filesystem URI of a node sub-table, mirroring the engine's layout
 /// (FNV-1a of the type name under `nodes/`). Matches the helper in
@ -25,11 +31,64 @@ fn node_table_uri(root: &str, type_name: &str) -> String {
    format!("{}/nodes/{hash:016x}", root.trim_end_matches('/'))
 }

+async fn person_manifest_and_head(db: &Omnigraph, root: &str) -> (u64, u64, String) {
+    let snap = db.snapshot_of(ReadTarget::branch("main")).await.unwrap();
+    let entry = snap.entry("node:Person").unwrap();
+    let full = format!("{}/{}", root.trim_end_matches('/'), entry.table_path);
+    let head = Dataset::open(&full).await.unwrap().version().version;
+    (entry.table_version, head, full)
+}
+
+async fn add_person_fragments(db: &mut Omnigraph) {
+    for (name, age) in [("Eve", 40), ("Frank", 41), ("Grace", 42), ("Heidi", 43)] {
+        mutate_main(
+            db,
+            MUTATION_QUERIES,
+            "insert_person",
+            &mixed_params(&[("$name", name)], &[("$age", age as i64)]),
+        )
+        .await
+        .expect("insert");
+    }
+}
+
+async fn forge_person_compaction_drift(db: &mut Omnigraph, root: &str) -> (u64, u64, String) {
+    add_person_fragments(db).await;
+    let (manifest_version, _, full) = person_manifest_and_head(db, root).await;
+    let mut ds = Dataset::open(&full).await.unwrap();
+    let metrics = compact_files(&mut ds, CompactionOptions::default(), None)
+        .await
+        .expect("raw Lance compaction");
+    let lance_head_version = ds.version().version;
+    assert!(
+        lance_head_version > manifest_version,
+        "raw Lance compaction should advance HEAD beyond manifest"
+    );
+    assert!(
+        metrics.fragments_removed > 0 || metrics.fragments_added > 0,
+        "test precondition: raw compaction should rewrite fragments"
+    );
+    (manifest_version, lance_head_version, full)
+}
+
+async fn forge_person_delete_drift(db: &Omnigraph, root: &str) -> (u64, u64, String) {
+    let (manifest_version, _, full) = person_manifest_and_head(db, root).await;
+    let mut ds = Dataset::open(&full).await.unwrap();
+    let deleted = ds.delete("name = 'Alice'").await.expect("raw Lance delete");
+    assert_eq!(deleted.num_deleted_rows, 1, "fixture should delete Alice");
+    let lance_head_version = deleted.new_dataset.version().version;
+    assert!(
+        lance_head_version > manifest_version,
+        "raw Lance delete should advance HEAD beyond manifest"
+    );
+    (manifest_version, lance_head_version, full)
+}
+
 #[tokio::test]
 async fn optimize_on_empty_graph_returns_stats_per_table_with_no_changes() {
    let dir = tempfile::tempdir().unwrap();
    let uri = dir.path().to_str().unwrap();
-    let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
+    let db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();

    let stats = db.optimize().await.unwrap();

@ -45,7 +104,7 @@ async fn optimize_on_empty_graph_returns_stats_per_table_with_no_changes() {
 #[tokio::test]
 async fn optimize_after_load_then_again_is_idempotent() {
    let dir = tempfile::tempdir().unwrap();
-    let mut db = init_and_load(&dir).await;
+    let db = init_and_load(&dir).await;

    // First pass may compact (load wrote real fragments).
    let _first = db.optimize().await.unwrap();
@ -163,6 +222,404 @@ node Tag {\n    slug: String @key\n}\n";
    assert_eq!(tag.skipped, None, "non-blob table must not be skipped");
 }

+// Regression: `optimize` must publish its compaction to the `__manifest` so the
+// manifest's recorded `table_version` tracks the compacted Lance HEAD.
+//
+// Lance `compact_files` advances the *dataset's* version (reserve-fragments +
+// rewrite commits) but knows nothing about OmniGraph's `__manifest`. If optimize
+// does not publish a manifest update, the manifest's `table_version` lags the
+// Lance HEAD: reads stay pinned to the pre-compaction version (compaction is
+// invisible to them) and any subsequent schema apply / strict update/delete
+// fails its HEAD-vs-manifest precondition with
+// "stale view of '<table>': expected manifest table version X but current is Y".
+// This pins the fix — optimize publishes the compacted version, so manifest ==
+// HEAD and migrations after a compaction succeed.
+#[tokio::test]
+async fn optimize_publishes_compaction_to_manifest_so_schema_apply_succeeds() {
+    let dir = tempfile::tempdir().unwrap();
+    let root = dir
+        .path()
+        .to_str()
+        .unwrap()
+        .trim_end_matches('/')
+        .to_string();
+    let mut db = init_and_load(&dir).await;
+
+    // Several separate inserts → multiple Person fragments, so `compact_files`
+    // actually merges and moves the Lance HEAD (a single fragment is a no-op).
+    for (name, age) in [("Eve", 40), ("Frank", 41), ("Grace", 42), ("Heidi", 43)] {
+        mutate_main(
+            &mut db,
+            MUTATION_QUERIES,
+            "insert_person",
+            &mixed_params(&[("$name", name)], &[("$age", age as i64)]),
+        )
+        .await
+        .expect("insert");
+    }
+
+    let stats = db.optimize().await.unwrap();
+    let person = stats
+        .iter()
+        .find(|s| s.table_key == "node:Person")
+        .expect("Person stat present");
+    assert!(
+        person.committed,
+        "Person is multi-fragment, so optimize must have compacted it"
+    );
+
+    // After optimize, the manifest's recorded table_version must equal the actual
+    // Lance HEAD — optimize published its compaction, so there is no drift.
+    let snap = db.snapshot_of(ReadTarget::branch("main")).await.unwrap();
+    let entry = snap.entry("node:Person").unwrap();
+    let manifest_version = entry.table_version;
+    let full = format!("{}/{}", root, entry.table_path);
+    let lance_head = Dataset::open(&full).await.unwrap().version().version;
+    assert_eq!(
+        manifest_version, lance_head,
+        "after optimize, manifest table_version ({manifest_version}) must equal Lance HEAD ({lance_head})",
+    );
+
+    // Reads observe the compacted version with rows preserved (4 seed + 4 inserts).
+    assert_eq!(count_rows(&db, "node:Person").await, 8);
+
+    // The headline: an additive (nullable property) migration touching the
+    // just-compacted table succeeds, where it previously failed with "stale view".
+    let desired = TEST_SCHEMA.replace(
+        "    age: I32?\n}",
+        "    age: I32?\n    nickname: String?\n}",
+    );
+    let result = db
+        .apply_schema(&desired)
+        .await
+        .expect("additive schema apply after optimize must succeed");
+    assert!(result.applied, "schema apply should report applied=true");
+}
+
+#[tokio::test]
+async fn optimize_skips_preexisting_manifest_head_drift() {
+    let dir = tempfile::tempdir().unwrap();
+    let root = dir
+        .path()
+        .to_str()
+        .unwrap()
+        .trim_end_matches('/')
+        .to_string();
+    let mut db = init_and_load(&dir).await;
+    let (manifest_before, head_before, _) = forge_person_compaction_drift(&mut db, &root).await;
+
+    let stats = db.optimize().await.unwrap();
+    let person = stats
+        .iter()
+        .find(|s| s.table_key == "node:Person")
+        .expect("Person stat present");
+    assert_eq!(person.skipped, Some(SkipReason::DriftNeedsRepair));
+    assert!(!person.committed);
+    assert_eq!(person.manifest_version, Some(manifest_before));
+    assert_eq!(person.lance_head_version, Some(head_before));
+
+    let (manifest_after, head_after, _) = person_manifest_and_head(&db, &root).await;
+    assert_eq!(
+        manifest_after, manifest_before,
+        "optimize must not publish uncovered drift"
+    );
+    assert_eq!(
+        head_after, head_before,
+        "optimize must not move drifted HEAD"
+    );
+}
+
+#[tokio::test]
+async fn repair_preview_reports_verified_maintenance_drift_without_healing() {
+    let dir = tempfile::tempdir().unwrap();
+    let root = dir
+        .path()
+        .to_str()
+        .unwrap()
+        .trim_end_matches('/')
+        .to_string();
+    let mut db = init_and_load(&dir).await;
+    let (manifest_before, head_before, _) = forge_person_compaction_drift(&mut db, &root).await;
+
+    let stats = db
+        .repair(RepairOptions {
+            confirm: false,
+            force: false,
+        })
+        .await
+        .unwrap();
+    assert_eq!(stats.manifest_version, None);
+    let person = stats
+        .tables
+        .iter()
+        .find(|s| s.table_key == "node:Person")
+        .expect("Person repair stat present");
+    assert_eq!(
+        person.classification,
+        RepairClassification::VerifiedMaintenance
+    );
+    assert_eq!(person.action, RepairAction::Preview);
+    assert_eq!(person.manifest_version, manifest_before);
+    assert_eq!(person.lance_head_version, head_before);
+    assert!(
+        person
+            .operations
+            .iter()
+            .all(|op| op == "ReserveFragments" || op == "Rewrite"),
+        "maintenance drift should only include Lance maintenance operations: {:?}",
+        person.operations
+    );
+
+    let (manifest_after, head_after, _) = person_manifest_and_head(&db, &root).await;
+    assert_eq!(manifest_after, manifest_before);
+    assert_eq!(head_after, head_before);
+}
+
+#[tokio::test]
+async fn repair_confirm_heals_verified_maintenance_drift() {
+    let dir = tempfile::tempdir().unwrap();
+    let root = dir
+        .path()
+        .to_str()
+        .unwrap()
+        .trim_end_matches('/')
+        .to_string();
+    let mut db = init_and_load(&dir).await;
+    let (_, head_before, _) = forge_person_compaction_drift(&mut db, &root).await;
+
+    let stats = db
+        .repair(RepairOptions {
+            confirm: true,
+            force: false,
+        })
+        .await
+        .unwrap();
+    assert!(
+        stats.manifest_version.is_some(),
+        "confirmed repair should publish one manifest commit"
+    );
+    let person = stats
+        .tables
+        .iter()
+        .find(|s| s.table_key == "node:Person")
+        .expect("Person repair stat present");
+    assert_eq!(
+        person.classification,
+        RepairClassification::VerifiedMaintenance
+    );
+    assert_eq!(person.action, RepairAction::Healed);
+
+    let (manifest_after, head_after, _) = person_manifest_and_head(&db, &root).await;
+    assert_eq!(manifest_after, head_before);
+    assert_eq!(head_after, head_before);
+
+    let desired = TEST_SCHEMA.replace(
+        "    age: I32?\n}",
+        "    age: I32?\n    nickname: String?\n}",
+    );
+    let result = db
+        .apply_schema(&desired)
+        .await
+        .expect("strict schema apply should succeed after repair");
+    assert!(result.applied);
+}
+
+#[tokio::test]
+async fn repair_refuses_raw_delete_without_force() {
+    let dir = tempfile::tempdir().unwrap();
+    let root = dir
+        .path()
+        .to_str()
+        .unwrap()
+        .trim_end_matches('/')
+        .to_string();
+    let db = init_and_load(&dir).await;
+    let (manifest_before, head_before, _) = forge_person_delete_drift(&db, &root).await;
+
+    let stats = db
+        .repair(RepairOptions {
+            confirm: true,
+            force: false,
+        })
+        .await
+        .unwrap();
+    assert_eq!(stats.manifest_version, None);
+    let person = stats
+        .tables
+        .iter()
+        .find(|s| s.table_key == "node:Person")
+        .expect("Person repair stat present");
+    assert_eq!(person.classification, RepairClassification::Suspicious);
+    assert_eq!(person.action, RepairAction::Refused);
+    assert!(
+        person.operations.iter().any(|op| op == "Delete"),
+        "raw Lance delete should be reported as a suspicious operation: {:?}",
+        person.operations
+    );
+
+    let (manifest_after, head_after, _) = person_manifest_and_head(&db, &root).await;
+    assert_eq!(manifest_after, manifest_before);
+    assert_eq!(head_after, head_before);
+    assert_eq!(
+        count_rows(&db, "node:Person").await,
+        4,
+        "manifest-pinned reads should still see the pre-delete version"
+    );
+}
+
+#[tokio::test]
+async fn repair_force_heals_suspicious_drift() {
+    let dir = tempfile::tempdir().unwrap();
+    let root = dir
+        .path()
+        .to_str()
+        .unwrap()
+        .trim_end_matches('/')
+        .to_string();
+    let db = init_and_load(&dir).await;
+    let (_, head_before, _) = forge_person_delete_drift(&db, &root).await;
+
+    let stats = db
+        .repair(RepairOptions {
+            confirm: true,
+            force: true,
+        })
+        .await
+        .unwrap();
+    let person = stats
+        .tables
+        .iter()
+        .find(|s| s.table_key == "node:Person")
+        .expect("Person repair stat present");
+    assert_eq!(person.classification, RepairClassification::Suspicious);
+    assert_eq!(person.action, RepairAction::Forced);
+
+    let (manifest_after, head_after, _) = person_manifest_and_head(&db, &root).await;
+    assert_eq!(manifest_after, head_before);
+    assert_eq!(head_after, head_before);
+    assert_eq!(
+        count_rows(&db, "node:Person").await,
+        3,
+        "forced repair publishes the raw delete's HEAD"
+    );
+}
+
+#[tokio::test]
+async fn non_strict_load_refuses_uncovered_drift_before_folding_it() {
+    let dir = tempfile::tempdir().unwrap();
+    let root = dir
+        .path()
+        .to_str()
+        .unwrap()
+        .trim_end_matches('/')
+        .to_string();
+    let mut db = init_and_load(&dir).await;
+    let (manifest_before, head_before, _) = forge_person_compaction_drift(&mut db, &root).await;
+
+    let err = load_jsonl(
+        &mut db,
+        "{\"type\":\"Person\",\"data\":{\"name\":\"Ivan\",\"age\":44}}",
+        LoadMode::Merge,
+    )
+    .await
+    .expect_err("merge load must not silently fold uncovered drift");
+    assert!(
+        err.to_string().contains("omnigraph repair"),
+        "error should point at explicit repair; got: {err}"
+    );
+
+    let (manifest_after, head_after, _) = person_manifest_and_head(&db, &root).await;
+    assert_eq!(manifest_after, manifest_before);
+    assert_eq!(head_after, head_before);
+}
+
+#[tokio::test]
+async fn delete_only_mutation_refuses_uncovered_drift_before_inline_commit() {
+    let dir = tempfile::tempdir().unwrap();
+    let root = dir
+        .path()
+        .to_str()
+        .unwrap()
+        .trim_end_matches('/')
+        .to_string();
+    let mut db = init_and_load(&dir).await;
+    let (manifest_before, head_before, _) = forge_person_compaction_drift(&mut db, &root).await;
+
+    let err = mutate_main(
+        &mut db,
+        MUTATION_QUERIES,
+        "remove_person",
+        &mixed_params(&[("$name", "Alice")], &[]),
+    )
+    .await
+    .expect_err("strict delete must reject uncovered drift before delete_where");
+    assert!(
+        err.to_string().contains("expected"),
+        "delete should fail as a strict stale-version write; got: {err}"
+    );
+
+    let (manifest_after, head_after, _) = person_manifest_and_head(&db, &root).await;
+    assert_eq!(manifest_after, manifest_before);
+    assert_eq!(
+        head_after, head_before,
+        "delete_where must not run after the strict drift guard fails"
+    );
+    assert_eq!(
+        count_rows(&db, "node:Person").await,
+        8,
+        "manifest-pinned reads should still see all rows present before the failed delete"
+    );
+}
+
+// Regression: `optimize` must REFUSE when an unresolved recovery sidecar is
+// pending. Operating on an unrecovered graph could publish a partial write that
+// the all-or-nothing recovery sweep would roll back; the operator must reopen
+// (run the recovery sweep) first.
+#[tokio::test]
+async fn optimize_defers_when_recovery_sidecar_is_pending() {
+    let dir = tempfile::tempdir().unwrap();
+    let uri = dir.path().to_str().unwrap();
+    let db = init_and_load(&dir).await;
+
+    // Simulate an in-process failed write that left a recovery sidecar on disk.
+    let recovery_dir = dir.path().join("__recovery");
+    std::fs::create_dir_all(&recovery_dir).unwrap();
+    let person_path = node_table_uri(uri, "Person");
+    let sidecar_json = format!(
+        r#"{{
+            "schema_version": 1,
+            "operation_id": "01H000000000000000000DEFR",
+            "started_at": "0",
+            "branch": null,
+            "actor_id": "act-test",
+            "writer_kind": "Mutation",
+            "tables": [
+                {{
+                    "table_key": "node:Person",
+                    "table_path": "{}",
+                    "expected_version": 1,
+                    "post_commit_pin": 2
+                }}
+            ]
+        }}"#,
+        person_path
+    );
+    std::fs::write(
+        recovery_dir.join("01H000000000000000000DEFR.json"),
+        sidecar_json,
+    )
+    .unwrap();
+
+    let err = db
+        .optimize()
+        .await
+        .expect_err("optimize must defer (error) while a recovery sidecar is pending");
+    assert!(
+        err.to_string().to_lowercase().contains("recovery"),
+        "optimize defer error should mention recovery; got: {err}",
+    );
+}
+
 #[tokio::test]
 async fn cleanup_without_any_policy_option_errors() {
    let dir = tempfile::tempdir().unwrap();
--- a/crates/omnigraph/tests/merge_truth_table.rs
+++ b/crates/omnigraph/tests/merge_truth_table.rs
@ -941,8 +941,8 @@ async fn merge_pair_truth_table() {
        unsupported_cells, 45,
        "expected 45 cells involving dropProperty/addLabel/removeLabel"
    );
-    assert!(
-        elapsed.as_secs() < 30,
-        "merge truth table exceeded 30s budget: {elapsed:?}"
-    );
+    // No wall-clock assertion here: `elapsed` is logged above for visibility, but
+    // a fixed time budget in a correctness test flakes under parallel test load
+    // (it tripped at ~31s in the full `--test-threads=4` gate while passing at
+    // ~20s in isolation). Merge-perf regressions belong in a bench, not here.
 }
--- a/crates/omnigraph/tests/ordering.rs
+++ b/crates/omnigraph/tests/ordering.rs
@ -0,0 +1,134 @@
+//! ORDER BY golden coverage: descending, multi-key precedence, deterministic
+//! tie-break (total order), and NULL placement.
+//!
+//! These pin the observable output-ordering contract (deny-list: "output
+//! ordering … become dependencies once shipped"). `apply_ordering` appends the
+//! bound entities' key columns as an ascending tie-break, so equal user-sort
+//! keys yield a TOTAL, deterministic order (and `ORDER … LIMIT` is
+//! deterministic). NULL placement is `nulls_first = !descending` (NULLs first
+//! under ASC, last under DESC). Both are documented in
+//! `docs/user/query-language.md`.
+
+mod helpers;
+
+use arrow_array::{Array, StringArray};
+
+use omnigraph::db::Omnigraph;
+use omnigraph::loader::{LoadMode, load_jsonl};
+use omnigraph_compiler::ir::ParamMap;
+use omnigraph_compiler::result::QueryResult;
+
+use helpers::*;
+
+/// Names in result ROW order (not sorted) — these tests assert positional order.
+fn names_in_order(result: &QueryResult) -> Vec<String> {
+    let batch = result.concat_batches().unwrap();
+    if batch.num_rows() == 0 {
+        return Vec::new();
+    }
+    let col = batch
+        .column(0)
+        .as_any()
+        .downcast_ref::<StringArray>()
+        .unwrap();
+    (0..col.len()).map(|i| col.value(i).to_string()).collect()
+}
+
+/// Init the standard schema and load a custom Person-only dataset.
+async fn init_people(dir: &tempfile::TempDir, jsonl: &str) -> Omnigraph {
+    let uri = dir.path().to_str().unwrap();
+    let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
+    load_jsonl(&mut db, jsonl, LoadMode::Overwrite).await.unwrap();
+    db
+}
+
+#[tokio::test]
+async fn ordering_descending() {
+    let dir = tempfile::tempdir().unwrap();
+    let mut db = init_and_load(&dir).await;
+    let q = r#"
+query q() {
+    match { $p: Person }
+    return { $p.name }
+    order { $p.age desc }
+}
+"#;
+    let got = names_in_order(&query_main(&mut db, q, "q", &ParamMap::new()).await.unwrap());
+    // Charlie(35), Alice(30), Diana(28), Bob(25)
+    assert_eq!(got, vec!["Charlie", "Alice", "Diana", "Bob"]);
+}
+
+#[tokio::test]
+async fn ordering_multi_key_age_desc_name_asc() {
+    let dir = tempfile::tempdir().unwrap();
+    // Alice & Bob tie at age 30; loaded Bob-first so the expected output order
+    // cannot be the load order.
+    let data = r#"{"type":"Person","data":{"name":"Bob","age":30}}
+{"type":"Person","data":{"name":"Alice","age":30}}
+{"type":"Person","data":{"name":"Charlie","age":25}}"#;
+    let mut db = init_people(&dir, data).await;
+    let q = r#"
+query q() {
+    match { $p: Person }
+    return { $p.name }
+    order { $p.age desc, $p.name asc }
+}
+"#;
+    let got = names_in_order(&query_main(&mut db, q, "q", &ParamMap::new()).await.unwrap());
+    // age desc -> [30,30,25]; the 30-tie broken by name asc -> Alice before Bob.
+    assert_eq!(got, vec!["Alice", "Bob", "Charlie"]);
+}
+
+#[tokio::test]
+async fn ordering_tiebreak_by_key_is_deterministic() {
+    let dir = tempfile::tempdir().unwrap();
+    // Same tie at age 30, NO secondary sort key. Loaded Bob-first; the tie must
+    // break by the entity key (name) ascending -> Alice before Bob, regardless
+    // of load order. This locks the total-order tie-break in apply_ordering.
+    let data = r#"{"type":"Person","data":{"name":"Bob","age":30}}
+{"type":"Person","data":{"name":"Alice","age":30}}
+{"type":"Person","data":{"name":"Charlie","age":25}}"#;
+    let mut db = init_people(&dir, data).await;
+    let q = r#"
+query q() {
+    match { $p: Person }
+    return { $p.name }
+    order { $p.age asc }
+}
+"#;
+    let got = names_in_order(&query_main(&mut db, q, "q", &ParamMap::new()).await.unwrap());
+    // age asc -> Charlie(25), then the 30-tie broken by key asc -> Alice, Bob.
+    assert_eq!(got, vec!["Charlie", "Alice", "Bob"]);
+}
+
+#[tokio::test]
+async fn ordering_nulls_placement_asc_and_desc() {
+    let dir = tempfile::tempdir().unwrap();
+    // Bob has a NULL age.
+    let data = r#"{"type":"Person","data":{"name":"Alice","age":30}}
+{"type":"Person","data":{"name":"Bob","age":null}}
+{"type":"Person","data":{"name":"Charlie","age":25}}"#;
+    let mut db = init_people(&dir, data).await;
+
+    let asc = r#"
+query q() {
+    match { $p: Person }
+    return { $p.name }
+    order { $p.age asc }
+}
+"#;
+    let got_asc = names_in_order(&query_main(&mut db, asc, "q", &ParamMap::new()).await.unwrap());
+    // ASC: nulls_first -> Bob(null), then 25, 30.
+    assert_eq!(got_asc, vec!["Bob", "Charlie", "Alice"]);
+
+    let desc = r#"
+query q() {
+    match { $p: Person }
+    return { $p.name }
+    order { $p.age desc }
+}
+"#;
+    let got_desc = names_in_order(&query_main(&mut db, desc, "q", &ParamMap::new()).await.unwrap());
+    // DESC: nulls last -> 30, 25, then Bob(null).
+    assert_eq!(got_desc, vec!["Alice", "Charlie", "Bob"]);
+}
--- a/crates/omnigraph/tests/proptest_equivalence.rs
+++ b/crates/omnigraph/tests/proptest_equivalence.rs
@ -0,0 +1,311 @@
+//! Property-based query-correctness invariants over generated graphs.
+//!
+//! The cross-type id-collision bug (fixed in f6a0e53) was a silent wrong-result
+//! divergence between the two Expand modes, caught only because someone
+//! hand-built the one colliding fixture. This turns that single example into a
+//! search over the whole class: node keys for BOTH types are drawn from a small
+//! SHARED alphabet, so cross-type collisions — plus cycles and self-loops —
+//! arise frequently. The invariants make any future fork divergence (the planned
+//! third ExpandMode, the anti-join fast/slow fork) fail loudly instead of
+//! silently.
+//!
+//! Each test is a sync `#[test]` + `#[serial]`: it builds its own runtime and
+//! `block_on`s per generated case (proptest closures are sync), and the
+//! mode-equivalence test writes `OMNIGRAPH_TRAVERSAL_MODE`, so serial execution
+//! keeps env writes from racing other tests in this binary.
+
+mod helpers;
+
+use std::collections::HashSet;
+
+use arrow_array::{Array, StringArray};
+use proptest::prelude::*;
+use proptest::test_runner::{Config, TestRunner};
+use serial_test::serial;
+
+use omnigraph::db::{Omnigraph, ReadTarget};
+use omnigraph::loader::{LoadMode, load_jsonl};
+use omnigraph_compiler::ir::ParamMap;
+use omnigraph_compiler::query::ast::Literal;
+
+use helpers::*;
+
+/// Small SHARED key alphabet — Person and Company keys are both drawn from this,
+/// so cross-type id collisions are common.
+const KEYS: &[&str] = &["a", "b", "c", "d", "e"];
+
+const QUERIES: &str = r#"
+query friends($name: String) {
+    match {
+        $p: Person { name: $name }
+        $p knows{1,3} $f
+    }
+    return { $f.name }
+}
+query employers($name: String) {
+    match {
+        $p: Person { name: $name }
+        $p worksAt{1,2} $c
+    }
+    return { $c.name }
+}
+query all_persons() {
+    match { $p: Person }
+    return { $p.name }
+}
+query employed() {
+    match {
+        $p: Person
+        $p worksAt $c
+    }
+    return { $p.name }
+}
+query unemployed() {
+    match {
+        $p: Person
+        not { $p worksAt $_ }
+    }
+    return { $p.name }
+}
+"#;
+
+#[derive(Debug, Clone)]
+struct GenGraph {
+    persons: Vec<String>,
+    companies: Vec<String>,
+    knows: Vec<(usize, usize)>,    // indices into persons (self-loops & cycles allowed)
+    works_at: Vec<(usize, usize)>, // (person idx, company idx)
+}
+
+impl GenGraph {
+    fn to_jsonl(&self) -> String {
+        let mut s = String::new();
+        for p in &self.persons {
+            s.push_str(&format!("{{\"type\":\"Person\",\"data\":{{\"name\":\"{p}\"}}}}\n"));
+        }
+        for c in &self.companies {
+            s.push_str(&format!("{{\"type\":\"Company\",\"data\":{{\"name\":\"{c}\"}}}}\n"));
+        }
+        // Dedup exact-duplicate edge rows (the loader rejects intra-batch
+        // duplicate keys); collisions/cycles/self-loops are unaffected.
+        let mut seen = HashSet::new();
+        for &(a, b) in &self.knows {
+            if seen.insert(("k", a, b)) {
+                s.push_str(&format!(
+                    "{{\"edge\":\"Knows\",\"from\":\"{}\",\"to\":\"{}\"}}\n",
+                    self.persons[a], self.persons[b]
+                ));
+            }
+        }
+        for &(a, b) in &self.works_at {
+            if seen.insert(("w", a, b)) {
+                s.push_str(&format!(
+                    "{{\"edge\":\"WorksAt\",\"from\":\"{}\",\"to\":\"{}\"}}\n",
+                    self.persons[a], self.companies[b]
+                ));
+            }
+        }
+        s
+    }
+}
+
+fn arb_keys() -> impl Strategy<Value = Vec<String>> {
+    proptest::sample::subsequence(KEYS.to_vec(), 1..=KEYS.len())
+        .prop_map(|v| v.into_iter().map(String::from).collect())
+}
+
+fn arb_graph() -> impl Strategy<Value = GenGraph> {
+    (arb_keys(), arb_keys()).prop_flat_map(|(persons, companies)| {
+        let np = persons.len();
+        let nc = companies.len();
+        let knows = prop::collection::vec((0..np, 0..np), 0..=10);
+        let works = prop::collection::vec((0..np, 0..nc), 0..=10);
+        (Just(persons), Just(companies), knows, works).prop_map(
+            |(persons, companies, knows, works_at)| GenGraph {
+                persons,
+                companies,
+                knows,
+                works_at,
+            },
+        )
+    })
+}
+
+fn config() -> Config {
+    Config {
+        cases: 48,
+        ..Config::default()
+    }
+}
+
+fn clear_mode() {
+    unsafe { std::env::remove_var("OMNIGRAPH_TRAVERSAL_MODE") };
+}
+
+/// RAII guard that sets `OMNIGRAPH_TRAVERSAL_MODE` and clears it on drop — so a
+/// panic mid-case (e.g. a query `unwrap`) cannot leak the forced mode into
+/// proptest's subsequent shrink/cases and mask the divergence under test. SAFE:
+/// every test in this binary is `#[serial]`, so no thread reads the env during
+/// the write.
+struct ModeGuard;
+impl ModeGuard {
+    fn set(mode: &str) -> Self {
+        unsafe { std::env::set_var("OMNIGRAPH_TRAVERSAL_MODE", mode) };
+        ModeGuard
+    }
+}
+impl Drop for ModeGuard {
+    fn drop(&mut self) {
+        unsafe { std::env::remove_var("OMNIGRAPH_TRAVERSAL_MODE") };
+    }
+}
+
+async fn load_graph(graph: &GenGraph) -> (tempfile::TempDir, Omnigraph) {
+    let dir = tempfile::tempdir().unwrap();
+    let uri = dir.path().to_str().unwrap();
+    let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
+    load_jsonl(&mut db, &graph.to_jsonl(), LoadMode::Overwrite)
+        .await
+        .unwrap();
+    (dir, db)
+}
+
+fn one_param(val: &str) -> ParamMap {
+    let mut m = ParamMap::new();
+    m.insert("name".to_string(), Literal::String(val.to_string()));
+    m
+}
+
+/// First-column strings, sorted (MULTISET — preserves duplicate-row count so
+/// mode comparisons catch dedup divergence, not just set divergence).
+async fn col0_sorted(db: &mut Omnigraph, name: &str, params: &ParamMap) -> Vec<String> {
+    let r = db
+        .query(ReadTarget::branch("main"), QUERIES, name, params)
+        .await
+        .unwrap();
+    if r.num_rows() == 0 {
+        return Vec::new();
+    }
+    let b = r.concat_batches().unwrap();
+    let col = b.column(0).as_any().downcast_ref::<StringArray>().unwrap();
+    let mut v: Vec<String> = (0..col.len()).map(|i| col.value(i).to_string()).collect();
+    v.sort();
+    v
+}
+
+async fn col0_set(db: &mut Omnigraph, name: &str, params: &ParamMap) -> HashSet<String> {
+    col0_sorted(db, name, params).await.into_iter().collect()
+}
+
+// INVARIANT 1: mode equivalence. For any generated graph and start key, the
+// CSR, indexed, and auto paths return identical result multisets — over both a
+// same-type traversal (knows{1,3}, exercises cycles/self-loops) and a cross-type
+// one (worksAt{1,2}, collision-prone). This is the search-over-the-class version
+// of the hand-built cross-type-collision fixture.
+#[test]
+#[serial]
+fn prop_expand_indexed_eq_csr() {
+    let rt = tokio::runtime::Runtime::new().unwrap();
+    let mut runner = TestRunner::new(config());
+    runner
+        .run(&arb_graph(), |graph| {
+            let mismatch = rt.block_on(async {
+                let (_dir, mut db) = load_graph(&graph).await;
+                for start in graph.persons.clone() {
+                    let p = one_param(&start);
+                    for q in ["friends", "employers"] {
+                        // Each guard clears the mode on drop (end of the block,
+                        // or on panic), so a forced mode never leaks across runs.
+                        let csr = {
+                            let _g = ModeGuard::set("csr");
+                            col0_sorted(&mut db, q, &p).await
+                        };
+                        let indexed = {
+                            let _g = ModeGuard::set("indexed");
+                            col0_sorted(&mut db, q, &p).await
+                        };
+                        // No guard → env unset → auto (cost-based) path.
+                        let auto = col0_sorted(&mut db, q, &p).await;
+                        if csr != indexed || csr != auto {
+                            return Some((start, q, csr, indexed, auto));
+                        }
+                    }
+                }
+                None
+            });
+            prop_assert!(
+                mismatch.is_none(),
+                "Expand mode divergence: {:?}",
+                mismatch
+            );
+            Ok(())
+        })
+        .unwrap();
+}
+
+// INVARIANT 2: no phantom rows. Every key a traversal returns must belong to the
+// destination type's loaded key set — independent of the two-mode comparison, so
+// it catches over-emission even if both modes are wrong identically.
+#[test]
+#[serial]
+fn prop_results_subset_of_existing_nodes() {
+    clear_mode();
+    let rt = tokio::runtime::Runtime::new().unwrap();
+    let mut runner = TestRunner::new(config());
+    runner
+        .run(&arb_graph(), |graph| {
+            let bad = rt.block_on(async {
+                let (_dir, mut db) = load_graph(&graph).await;
+                let persons: HashSet<String> = graph.persons.iter().cloned().collect();
+                let companies: HashSet<String> = graph.companies.iter().cloned().collect();
+                for start in graph.persons.clone() {
+                    let p = one_param(&start);
+                    for f in col0_set(&mut db, "friends", &p).await {
+                        if !persons.contains(&f) {
+                            return Some(("friends", start, f));
+                        }
+                    }
+                    for c in col0_set(&mut db, "employers", &p).await {
+                        if !companies.contains(&c) {
+                            return Some(("employers", start, c));
+                        }
+                    }
+                }
+                None
+            });
+            prop_assert!(bad.is_none(), "phantom row: {:?}", bad);
+            Ok(())
+        })
+        .unwrap();
+}
+
+// INVARIANT 3: anti-join complement. `not { $p worksAt $_ }` and its complement
+// (persons WITH a worksAt) must be disjoint and together cover all persons.
+#[test]
+#[serial]
+fn prop_antijoin_partitions_persons() {
+    clear_mode();
+    let rt = tokio::runtime::Runtime::new().unwrap();
+    let mut runner = TestRunner::new(config());
+    runner
+        .run(&arb_graph(), |graph| {
+            let err = rt.block_on(async {
+                let (_dir, mut db) = load_graph(&graph).await;
+                let all = col0_set(&mut db, "all_persons", &ParamMap::new()).await;
+                let unemployed = col0_set(&mut db, "unemployed", &ParamMap::new()).await;
+                let employed = col0_set(&mut db, "employed", &ParamMap::new()).await;
+                let overlap: Vec<_> = unemployed.intersection(&employed).cloned().collect();
+                let union: HashSet<_> = unemployed.union(&employed).cloned().collect();
+                if !overlap.is_empty() {
+                    return Some(format!("overlap {overlap:?}"));
+                }
+                if union != all {
+                    return Some(format!("union {union:?} != all {all:?}"));
+                }
+                None
+            });
+            prop_assert!(err.is_none(), "anti-join partition broken: {:?}", err);
+            Ok(())
+        })
+        .unwrap();
+}
--- a/crates/omnigraph/tests/recovery.rs
+++ b/crates/omnigraph/tests/recovery.rs
@ -278,6 +278,97 @@ async fn recovery_rolls_back_synthetic_drift_on_open() {
    );
 }

+/// Regression: recovery roll-back must PUBLISH the restored version so
+/// `manifest == Lance HEAD` afterward (no residual "orphaned drift"). Before the
+/// fix, roll-back restored via `Dataset::restore` but left the manifest pin
+/// behind HEAD, so a subsequent strict write / schema apply failed its
+/// HEAD-vs-manifest precondition ("stale view … refresh and retry") — and a
+/// failed schema apply's own roll-back leaked +1 each retry (the original bug's
+/// loop). With convergence, one roll-back leaves `manifest == HEAD` and the
+/// follow-up succeeds.
+#[tokio::test]
+async fn recovery_rollback_converges_manifest_so_schema_apply_succeeds() {
+    use omnigraph::db::ReadTarget;
+    use omnigraph::loader::{LoadMode, load_jsonl};
+    use omnigraph::table_store::TableStore;
+
+    let dir = tempfile::tempdir().unwrap();
+    let uri = dir.path().to_str().unwrap();
+
+    let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
+    load_jsonl(
+        &mut db,
+        r#"{"type":"Person","data":{"name":"alice","age":30}}
+{"type":"Person","data":{"name":"bob","age":25}}
+"#,
+        LoadMode::Append,
+    )
+    .await
+    .unwrap();
+    drop(db);
+
+    // Forge a Phase-B residual: advance Person's Lance HEAD without publishing to
+    // the manifest (the manifest pin stays at the load's committed version).
+    let person_uri = node_table_uri(uri, "Person");
+    let store = TableStore::new(uri);
+    let mut ds = Dataset::open(&person_uri).await.unwrap();
+    let manifest_pin = ds.version().version;
+    let _ = store
+        .delete_where(&person_uri, &mut ds, "1 = 2")
+        .await
+        .unwrap();
+    drop(ds);
+
+    // Roll-back-classified sidecar (post_commit_pin != observed head ⇒
+    // UnexpectedAtP1 ⇒ RollBack).
+    let sidecar_json = format!(
+        r#"{{
+            "schema_version": 1,
+            "operation_id": "01H0000000000000000000CVG",
+            "started_at": "0",
+            "branch": null,
+            "actor_id": "act-test",
+            "writer_kind": "Mutation",
+            "tables": [
+                {{
+                    "table_key": "node:Person",
+                    "table_path": "{}",
+                    "expected_version": {},
+                    "post_commit_pin": {}
+                }}
+            ]
+        }}"#,
+        person_uri, manifest_pin, manifest_pin
+    );
+    write_sidecar_file(dir.path(), "01H0000000000000000000CVG", &sidecar_json);
+
+    // Reopen runs the sweep: restore Person to manifest_pin, then PUBLISH so the
+    // manifest tracks the restored Lance HEAD.
+    let db = Omnigraph::open(uri).await.unwrap();
+
+    // Convergence: manifest pin == Lance HEAD. Fails before the fix — the
+    // manifest stays at manifest_pin while HEAD advanced past it.
+    let snap = db.snapshot_of(ReadTarget::branch("main")).await.unwrap();
+    let entry = snap.entry("node:Person").unwrap();
+    let lance_head = Dataset::open(&person_uri).await.unwrap().version().version;
+    assert_eq!(
+        entry.table_version, lance_head,
+        "roll-back must publish so manifest pin ({}) == Lance HEAD ({})",
+        entry.table_version, lance_head,
+    );
+
+    // The +1-loop victim: an additive schema apply must now succeed (its
+    // HEAD-vs-manifest precondition is satisfied). Before the fix this failed
+    // with "stale view … refresh and retry".
+    let desired = TEST_SCHEMA.replace(
+        "    age: I32?\n}",
+        "    age: I32?\n    nickname: String?\n}",
+    );
+    db.apply_schema(&desired)
+        .await
+        .expect("schema apply after a converging roll-back must succeed");
+}
+
 // =====================================================================
 // Phase 4 — roll-forward path + audit row recording
 // =====================================================================
--- a/crates/omnigraph/tests/search.rs
+++ b/crates/omnigraph/tests/search.rs
@ -556,6 +556,111 @@ async fn bm25_returns_ranked_results() {
    assert!(result.num_rows() <= 3, "bm25 should respect limit 3");
 }

+// Full rank-ORDER golden (not just top-1 / non-empty): pins ranks 2..k so a
+// regression corrupting the tail or reversing the sort direction fails loudly.
+// nearest skips apply_ordering (is_search_ordered) and returns Lance native
+// order, so result_slugs row order == rank order.
+#[tokio::test]
+#[serial]
+async fn nearest_full_rank_order() {
+    let dir = tempfile::tempdir().unwrap();
+    let mut db = init_search_db(&dir).await;
+    let result = query_main(
+        &mut db,
+        SEARCH_QUERIES,
+        "vector_search",
+        &vector_param("$q", &[0.1, 0.2, 0.3, 0.4]),
+    )
+    .await
+    .unwrap();
+    // [0.1,0.2,0.3,0.4] == ml-intro's embedding (dist 0); the rest by ascending L2.
+    assert_eq!(result_slugs(&result), vec!["ml-intro", "nlp-guide", "rl-intro"]);
+}
+
+#[tokio::test]
+#[serial]
+async fn bm25_full_rank_order() {
+    let dir = tempfile::tempdir().unwrap();
+    let mut db = init_search_db(&dir).await;
+    let result = query_main(
+        &mut db,
+        SEARCH_QUERIES,
+        "bm25_search",
+        &params(&[("$q", "Learning")]),
+    )
+    .await
+    .unwrap();
+    // Descending BM25 score order.
+    assert_eq!(result_slugs(&result), vec!["rl-intro", "ml-intro", "dl-basics"]);
+}
+
+// Characterization: fuzzy() does NOT match under the default tokenizer/index in
+// this setup — a one-edit typo ("Introductio" for "Introduction") returns no
+// rows. (`search`/`match_text` DO work, so FTS itself is fine; fuzzy term
+// queries specifically are inert here.) This pins that documented limitation
+// instead of leaving fuzzy silently unasserted: if a Lance/tokenizer change
+// makes fuzzy match, this turns red and should be promoted to a real
+// matched-set + exclusion golden.
+#[tokio::test]
+#[serial]
+async fn fuzzy_does_not_match_under_default_tokenizer() {
+    let dir = tempfile::tempdir().unwrap();
+    let mut db = init_search_db(&dir).await;
+    let r = query_main(&mut db, SEARCH_QUERIES, "fuzzy_search", &params(&[("$q", "Introductio")]))
+        .await
+        .unwrap();
+    assert!(
+        result_slugs(&r).is_empty(),
+        "fuzzy now matches — promote this to a real matched-set/exclusion golden"
+    );
+}
+
+// match_text is a FILTER on the body: assert the exact matched set, not contains.
+#[tokio::test]
+#[serial]
+async fn match_text_matches_exact_set_excludes_unrelated() {
+    let dir = tempfile::tempdir().unwrap();
+    let mut db = init_search_db(&dir).await;
+    // "neural" appears only in dl-basics's body ("neural networks").
+    let r = query_main(&mut db, SEARCH_QUERIES, "phrase_search", &params(&[("$q", "neural")]))
+        .await
+        .unwrap();
+    let mut got = result_slugs(&r);
+    got.sort();
+    assert_eq!(got, vec!["dl-basics"]);
+}
+
+// RRF fuses arms OTHER than the default nearest+bm25: two FTS arms (title+body).
+// Proves primary_var resolves when neither arm is `nearest`, and fusion runs.
+#[tokio::test]
+#[serial]
+async fn rrf_fuses_two_fts_fields() {
+    let dir = tempfile::tempdir().unwrap();
+    let mut db = init_search_db(&dir).await;
+    let r = query_main(&mut db, SEARCH_QUERIES, "rrf_two_fts", &params(&[("$q", "learning")]))
+        .await
+        .unwrap();
+    assert_eq!(result_slugs(&r), vec!["dl-basics", "ml-intro", "rl-intro"]);
+}
+
+// RRF fuses two vector arms (no embedding creds — explicit vectors). A doc near
+// BOTH query vectors out-ranks one near only one.
+#[tokio::test]
+#[serial]
+async fn rrf_fuses_two_vector_queries() {
+    let dir = tempfile::tempdir().unwrap();
+    let mut db = init_search_db(&dir).await;
+    let r = query_main(
+        &mut db,
+        SEARCH_QUERIES,
+        "rrf_two_vectors",
+        &two_vector_params("$q1", &[0.1, 0.2, 0.3, 0.4], "$q2", &[0.5, 0.6, 0.7, 0.8]),
+    )
+    .await
+    .unwrap();
+    assert_eq!(result_slugs(&r), vec!["rl-intro", "ml-intro", "dl-basics"]);
+}
+
 #[tokio::test]
 #[serial]
 async fn mutation_commit_refreshes_search_indices_without_manual_ensure() {
--- a/crates/omnigraph/tests/traversal.rs
+++ b/crates/omnigraph/tests/traversal.rs
@ -46,6 +46,194 @@ query not_at_acme() {
    assert_eq!(names_vec, vec!["Bob", "Charlie", "Diana"]);
 }

+// Nested anti-join (double negation): proves `not { … not { … } }` recurses
+// through execute_pipeline. "People who do NOT work at any NON-Acme company":
+// inner `not { $c.name = "Acme" }` keeps the non-Acme employers, the outer `not`
+// removes anyone who has one. Alice (Acme only), Charlie & Diana (no employer)
+// remain — distinct from plain unemployed {Charlie, Diana}.
+#[tokio::test]
+async fn nested_anti_join_double_negation() {
+    let dir = tempfile::tempdir().unwrap();
+    let mut db = init_and_load(&dir).await;
+
+    let queries = r#"
+query no_nonacme_employer() {
+    match {
+        $p: Person
+        not {
+            $p worksAt $c
+            not {
+                $c.name = "Acme"
+            }
+        }
+    }
+    return { $p.name }
+}
+"#;
+    let result = query_main(&mut db, queries, "no_nonacme_employer", &ParamMap::new())
+        .await
+        .unwrap();
+
+    let batch = result.concat_batches().unwrap();
+    let names = batch
+        .column(0)
+        .as_any()
+        .downcast_ref::<StringArray>()
+        .unwrap();
+    let mut names_vec: Vec<&str> = (0..names.len()).map(|i| names.value(i)).collect();
+    names_vec.sort();
+    assert_eq!(names_vec, vec!["Alice", "Charlie", "Diana"]);
+}
+
+// The anti-join has two execution forks: the CSR `has_neighbors` fast path
+// (bare single-op Expand inner) and the set-oriented inner-pipeline replay (when
+// dst_filters force a multi-op inner). They must agree. `not { $p worksAt $_ }`
+// takes the fast path; the same negation with an always-true dst filter
+// (`$c.name != ""`) is semantically identical but forces the slow path.
+#[tokio::test]
+async fn anti_join_fast_and_slow_paths_agree() {
+    let dir = tempfile::tempdir().unwrap();
+    let mut db = init_and_load(&dir).await;
+
+    let queries = r#"
+query fast() {
+    match {
+        $p: Person
+        not { $p worksAt $_ }
+    }
+    return { $p.name }
+}
+query slow() {
+    match {
+        $p: Person
+        not {
+            $p worksAt $c
+            $c.name != ""
+        }
+    }
+    return { $p.name }
+}
+"#;
+    let names = |result: omnigraph_compiler::result::QueryResult| {
+        let batch = result.concat_batches().unwrap();
+        let col = batch
+            .column(0)
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .unwrap();
+        let mut v: Vec<String> = (0..col.len()).map(|i| col.value(i).to_string()).collect();
+        v.sort();
+        v
+    };
+
+    let fast = names(query_main(&mut db, queries, "fast", &ParamMap::new()).await.unwrap());
+    let slow = names(query_main(&mut db, queries, "slow", &ParamMap::new()).await.unwrap());
+
+    assert_eq!(fast, slow, "anti-join fast and slow paths must agree");
+    // Alice->Acme, Bob->Globex employed; Charlie & Diana have no employer.
+    assert_eq!(fast, vec!["Charlie", "Diana"]);
+}
+
+// Regression: nested slow-path anti-joins must not collide on the synthetic
+// correlation tag. The outer anti-join tags rows with a correlation column that
+// rides through its inner pipeline; when the inner pipeline contains ANOTHER
+// slow-path anti-join, a fixed tag name would duplicate, and reading it by name
+// returns the OUTER tag — mis-correlating the inner negation. Fan-out (p1 works
+// at two companies) makes the inner row indices diverge from the outer tags, so
+// the bug produces a different person set than the correct one.
+#[tokio::test]
+async fn nested_anti_join_with_fanout_correlates_correctly() {
+    let dir = tempfile::tempdir().unwrap();
+    let uri = dir.path().to_str().unwrap();
+    // p1 -> {Acme, Globex} (fan-out), p2 -> Globex, p3 -> Acme, p4 -> (none).
+    let data = r#"{"type":"Person","data":{"name":"p1"}}
+{"type":"Person","data":{"name":"p2"}}
+{"type":"Person","data":{"name":"p3"}}
+{"type":"Person","data":{"name":"p4"}}
+{"type":"Company","data":{"name":"Acme"}}
+{"type":"Company","data":{"name":"Globex"}}
+{"edge":"WorksAt","from":"p1","to":"Acme"}
+{"edge":"WorksAt","from":"p1","to":"Globex"}
+{"edge":"WorksAt","from":"p2","to":"Globex"}
+{"edge":"WorksAt","from":"p3","to":"Acme"}"#;
+    let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
+    load_jsonl(&mut db, data, LoadMode::Overwrite).await.unwrap();
+
+    let queries = r#"
+query no_nonacme_employer() {
+    match {
+        $p: Person
+        not {
+            $p worksAt $c
+            not {
+                $c.name = "Acme"
+            }
+        }
+    }
+    return { $p.name }
+}
+"#;
+    let result = query_main(&mut db, queries, "no_nonacme_employer", &ParamMap::new())
+        .await
+        .unwrap();
+    let batch = result.concat_batches().unwrap();
+    let names = batch
+        .column(0)
+        .as_any()
+        .downcast_ref::<StringArray>()
+        .unwrap();
+    let mut names_vec: Vec<&str> = (0..names.len()).map(|i| names.value(i)).collect();
+    names_vec.sort();
+    // p1 & p2 have a non-Acme employer (Globex) -> excluded; p3 (Acme only) and
+    // p4 (no employer) remain.
+    assert_eq!(names_vec, vec!["p3", "p4"]);
+}
+
+// Regression: a multi-hop anti-join must not take the bulk fast path. The fast
+// path answers via `has_neighbors` (ONE-hop existence), so `not { $p knows{2,2}
+// $x }` would wrongly drop a node that has a 1-hop neighbor but no 2-hop path.
+// Graph: a->b (b is a sink, so a has no 2-hop path), c->d->e (c has a 2-hop
+// path). Only c has a 2-hop knows path, so only c is removed.
+#[tokio::test]
+async fn anti_join_respects_multi_hop_bounds() {
+    let dir = tempfile::tempdir().unwrap();
+    let uri = dir.path().to_str().unwrap();
+    let data = r#"{"type":"Person","data":{"name":"a"}}
+{"type":"Person","data":{"name":"b"}}
+{"type":"Person","data":{"name":"c"}}
+{"type":"Person","data":{"name":"d"}}
+{"type":"Person","data":{"name":"e"}}
+{"edge":"Knows","from":"a","to":"b"}
+{"edge":"Knows","from":"c","to":"d"}
+{"edge":"Knows","from":"d","to":"e"}"#;
+    let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
+    load_jsonl(&mut db, data, LoadMode::Overwrite).await.unwrap();
+
+    let queries = r#"
+query no_two_hop() {
+    match {
+        $p: Person
+        not { $p knows{2,2} $x }
+    }
+    return { $p.name }
+}
+"#;
+    let result = query_main(&mut db, queries, "no_two_hop", &ParamMap::new())
+        .await
+        .unwrap();
+    let batch = result.concat_batches().unwrap();
+    let names = batch
+        .column(0)
+        .as_any()
+        .downcast_ref::<StringArray>()
+        .unwrap();
+    let mut names_vec: Vec<&str> = (0..names.len()).map(|i| names.value(i)).collect();
+    names_vec.sort();
+    // Only c has a 2-hop knows path → removed; everyone else (incl. a, which has
+    // a 1-hop neighbor but no 2-hop path) is kept.
+    assert_eq!(names_vec, vec!["a", "b", "d", "e"]);
+}
+
 // ─── Variable-length hops ───────────────────────────────────────────────────

 const CHAIN_SCHEMA: &str = r#"
--- a/crates/omnigraph/tests/traversal_indexed.rs
+++ b/crates/omnigraph/tests/traversal_indexed.rs
@ -0,0 +1,327 @@
+//! BTREE-indexed Expand path (`execute_expand_indexed`) coverage.
+//!
+//! These tests force the Expand execution mode via `OMNIGRAPH_TRAVERSAL_MODE`
+//! and assert the indexed path matches the CSR path (both are semantically
+//! identical — the indexed path just serves neighbor lookups from the persisted
+//! src/dst BTREE instead of an in-memory CSR). They live in their own test
+//! binary and are all `#[serial]`, so the env writes never race a concurrent
+//! reader: within this process serial execution serializes every env read, and
+//! other test binaries (e.g. `traversal.rs`) are separate processes whose env
+//! stays unset (→ CSR), validating the shared hydrate/align tail on the CSR path.
+
+mod helpers;
+
+use arrow_array::{Array, StringArray};
+
+use omnigraph::db::Omnigraph;
+use omnigraph::loader::{LoadMode, load_jsonl};
+use omnigraph::table_store::{IndexCoverage, TableStore};
+use omnigraph_compiler::ir::ParamMap;
+use serial_test::serial;
+
+use helpers::*;
+
+fn set_mode(mode: &str) {
+    // SAFE: every test here is #[serial] and this binary has no non-serial
+    // env reader, so no thread reads the environment during this write.
+    unsafe { std::env::set_var("OMNIGRAPH_TRAVERSAL_MODE", mode) };
+}
+
+fn clear_mode() {
+    unsafe { std::env::remove_var("OMNIGRAPH_TRAVERSAL_MODE") };
+}
+
+/// Run a name-returning query and return its first column, sorted.
+async fn sorted_names(db: &mut Omnigraph, queries: &str, name: &str, params: &ParamMap) -> Vec<String> {
+    let result = query_main(db, queries, name, params).await.unwrap();
+    if result.num_rows() == 0 {
+        return Vec::new();
+    }
+    let batch = result.concat_batches().unwrap();
+    let col = batch
+        .column(0)
+        .as_any()
+        .downcast_ref::<StringArray>()
+        .unwrap();
+    let mut v: Vec<String> = (0..col.len()).map(|i| col.value(i).to_string()).collect();
+    v.sort();
+    v
+}
+
+/// Run the same query under CSR, indexed, and auto (cost-chooser) modes; assert
+/// all three produce identical results and return them. The auto pass exercises
+/// `choose_expand_mode` end to end: whichever path it selects, the rows must
+/// match the forced paths (the chooser changes which path runs, never the result).
+async fn both_modes(db: &mut Omnigraph, queries: &str, name: &str, params: &ParamMap) -> Vec<String> {
+    set_mode("csr");
+    let csr = sorted_names(db, queries, name, params).await;
+    set_mode("indexed");
+    let indexed = sorted_names(db, queries, name, params).await;
+    clear_mode();
+    let auto = sorted_names(db, queries, name, params).await;
+    assert_eq!(
+        indexed, csr,
+        "indexed Expand must produce identical results to CSR for query '{name}'"
+    );
+    assert_eq!(
+        auto, csr,
+        "auto (cost-chooser) Expand must produce identical results to the forced paths for query '{name}'"
+    );
+    indexed
+}
+
+// The C6 index-coverage guard: `key_column_index_coverage` must report whether
+// a `key_col IN (...)` scan will use the persisted BTREE or silently full-scan.
+// Not #[serial] — it calls the helper directly and reads no env.
+#[tokio::test]
+async fn key_column_index_coverage_detects_btree_presence() {
+    let dir = tempfile::tempdir().unwrap();
+    let db = init_and_load(&dir).await;
+    let snap = snapshot_main(&db).await.unwrap();
+
+    // Edge `src` gets a BTREE from ensure_indices on load → Indexed.
+    let edge_ds = snap.open("edge:Knows").await.unwrap();
+    let src_cov = TableStore::key_column_index_coverage(&edge_ds, "src")
+        .await
+        .unwrap();
+    assert_eq!(src_cov, IndexCoverage::Indexed, "edge src is BTREE-indexed");
+
+    // A node property column with no scalar index → Degraded (the warn path).
+    let node_ds = snap.open("node:Person").await.unwrap();
+    let age_cov = TableStore::key_column_index_coverage(&node_ds, "age")
+        .await
+        .unwrap();
+    assert!(
+        matches!(age_cov, IndexCoverage::Degraded { .. }),
+        "non-indexed column should be Degraded, got {age_cov:?}"
+    );
+}
+
+// An edge appended after the BTREE was built lands in a new fragment that the
+// index does not cover (edge-index creation is skipped once a BTREE exists). The
+// scan is then partly a full scan, so coverage must report `Degraded` — otherwise
+// the cost chooser would price an unindexed-in-part scan as fully indexed.
+// (Results stay correct regardless — `indexed_finds_unindexed_appended_edge`.)
+#[tokio::test]
+async fn coverage_degrades_for_appended_unindexed_fragment() {
+    let dir = tempfile::tempdir().unwrap();
+    let mut db = init_and_load(&dir).await;
+
+    // Fresh load: the Knows BTREE covers every fragment → Indexed.
+    let snap = snapshot_main(&db).await.unwrap();
+    let edge_ds = snap.open("edge:Knows").await.unwrap();
+    assert_eq!(
+        TableStore::key_column_index_coverage(&edge_ds, "src").await.unwrap(),
+        IndexCoverage::Indexed,
+        "freshly-loaded edge BTREE covers all fragments"
+    );
+
+    // Append an edge → a new, unindexed fragment outside the index fragment_bitmap.
+    mutate_main(
+        &mut db,
+        MUTATION_QUERIES,
+        "add_friend",
+        &params(&[("$from", "Alice"), ("$to", "Diana")]),
+    )
+    .await
+    .unwrap();
+
+    let snap2 = snapshot_main(&db).await.unwrap();
+    let edge_ds2 = snap2.open("edge:Knows").await.unwrap();
+    let cov = TableStore::key_column_index_coverage(&edge_ds2, "src").await.unwrap();
+    assert!(
+        matches!(cov, IndexCoverage::Degraded { .. }),
+        "appended unindexed fragment must degrade coverage, got {cov:?}"
+    );
+}
+
+#[tokio::test]
+#[serial]
+async fn indexed_matches_csr_one_hop_same_type() {
+    let dir = tempfile::tempdir().unwrap();
+    let mut db = init_and_load(&dir).await;
+    // friends_of: `$p knows $f` (Person -> Person, single hop).
+    let got = both_modes(&mut db, TEST_QUERIES, "friends_of", &params(&[("$name", "Alice")])).await;
+    assert_eq!(got, vec!["Bob", "Charlie"], "Alice knows Bob and Charlie");
+}
+
+#[tokio::test]
+#[serial]
+async fn indexed_matches_csr_multi_hop_same_type() {
+    let dir = tempfile::tempdir().unwrap();
+    let mut db = init_and_load(&dir).await;
+    let queries = r#"
+query reach($name: String) {
+    match {
+        $p: Person { name: $name }
+        $p knows{1,2} $f
+    }
+    return { $f.name }
+}
+"#;
+    // Alice -> Bob, Charlie (1 hop); Bob -> Diana (2 hops).
+    let got = both_modes(&mut db, queries, "reach", &params(&[("$name", "Alice")])).await;
+    assert_eq!(got, vec!["Bob", "Charlie", "Diana"]);
+}
+
+#[tokio::test]
+#[serial]
+async fn indexed_matches_csr_cross_type() {
+    let dir = tempfile::tempdir().unwrap();
+    let mut db = init_and_load(&dir).await;
+    let queries = r#"
+query employer($name: String) {
+    match {
+        $p: Person { name: $name }
+        $p worksAt $c
+    }
+    return { $c.name }
+}
+"#;
+    let got = both_modes(&mut db, queries, "employer", &params(&[("$name", "Alice")])).await;
+    assert_eq!(got, vec!["Acme"], "Alice works at Acme");
+}
+
+#[tokio::test]
+#[serial]
+async fn indexed_matches_csr_no_match() {
+    let dir = tempfile::tempdir().unwrap();
+    let mut db = init_and_load(&dir).await;
+    // Diana has no outgoing Knows edges → empty in both modes.
+    let got = both_modes(&mut db, TEST_QUERIES, "friends_of", &params(&[("$name", "Diana")])).await;
+    assert!(got.is_empty(), "Diana knows no one");
+}
+
+#[tokio::test]
+#[serial]
+async fn indexed_finds_unindexed_appended_edge() {
+    let dir = tempfile::tempdir().unwrap();
+    let mut db = init_and_load(&dir).await;
+
+    // Append Alice -> Diana AFTER the initial load. `ensure_indices`' existence
+    // guard means the src/dst BTREE built on the first load does NOT cover this
+    // new fragment. The indexed path must still find it via Lance's
+    // unindexed-fragment scan (fast_search=false default), so partial index
+    // coverage never silently drops rows.
+    mutate_main(
+        &mut db,
+        MUTATION_QUERIES,
+        "add_friend",
+        &params(&[("$from", "Alice"), ("$to", "Diana")]),
+    )
+    .await
+    .unwrap();
+
+    set_mode("indexed");
+    let got = sorted_names(&mut db, TEST_QUERIES, "friends_of", &params(&[("$name", "Alice")])).await;
+    clear_mode();
+
+    assert_eq!(
+        got,
+        vec!["Bob", "Charlie", "Diana"],
+        "indexed traversal must see the freshly-appended, unindexed edge"
+    );
+}
+
+// Regression: a node `id` is unique only WITHIN a type, so a `Person` and a
+// `Company` can share an id string. A variable-length traversal over a
+// cross-type edge (`worksAt`, Person -> Company) must structurally stop after
+// one hop — a Company is not a `worksAt` source — so `worksAt{1,2}` returns
+// exactly the one-hop companies. Before the structural hop-cap, the indexed
+// path's single string interner de-interned the hop-1 Company id back to the
+// colliding Person id and ran a hop-2 `worksAt src IN (...)` scan that matched
+// that same-string Person's edges, emitting a spurious second-hop company the
+// CSR path never produces. `both_modes` (csr == indexed == auto) plus the
+// golden assert catch both the divergence and an over-emitting shared bug.
+#[tokio::test]
+#[serial]
+async fn cross_type_id_collision_does_not_bleed_into_second_hop() {
+    const SCHEMA: &str = r#"
+node Person { name: String @key }
+node Company { name: String @key }
+edge WorksAt: Person -> Company
+"#;
+    // `shared` is BOTH a Person id and a Company id. alice worksAt the Company
+    // `shared`; the Person `shared` worksAt the Company `other`.
+    const DATA: &str = r#"{"type":"Person","data":{"name":"alice"}}
+{"type":"Person","data":{"name":"shared"}}
+{"type":"Company","data":{"name":"shared"}}
+{"type":"Company","data":{"name":"other"}}
+{"edge":"WorksAt","from":"alice","to":"shared"}
+{"edge":"WorksAt","from":"shared","to":"other"}"#;
+    const QUERY: &str = r#"
+query reach($name: String) {
+    match {
+        $p: Person { name: $name }
+        $p worksAt{1,2} $c
+    }
+    return { $c.name }
+}
+"#;
+    let dir = tempfile::tempdir().unwrap();
+    let uri = dir.path().to_str().unwrap();
+    let mut db = Omnigraph::init(uri, SCHEMA).await.unwrap();
+    load_jsonl(&mut db, DATA, LoadMode::Overwrite).await.unwrap();
+
+    let got = both_modes(&mut db, QUERY, "reach", &params(&[("$name", "alice")])).await;
+    assert_eq!(
+        got,
+        vec!["shared"],
+        "cross-type worksAt{{1,2}} must return only the one-hop company; a hop-2 \
+         result means the id-string collision bled across types"
+    );
+}
+
+const REACH_5: &str = r#"
+query reach($name: String) {
+    match {
+        $p: Person { name: $name }
+        $p knows{1,5} $f
+    }
+    return { $f.name }
+}
+"#;
+
+// A directed 3-cycle a->b->c->a, traversed with a hop ceiling (5) ABOVE the cycle
+// length. Variable-length traversal must terminate and dedup (the source is
+// seeded into `visited`, so the c->a back-edge does not re-emit a). Uses a
+// bounded range deliberately: an unbounded `{1,}` is a typecheck error, not a
+// runtime path. `both_modes` also confirms indexed == csr on the cycle.
+#[tokio::test]
+#[serial]
+async fn variable_hops_terminate_and_dedup_on_cycle() {
+    let dir = tempfile::tempdir().unwrap();
+    let uri = dir.path().to_str().unwrap();
+    let data = r#"{"type":"Person","data":{"name":"a"}}
+{"type":"Person","data":{"name":"b"}}
+{"type":"Person","data":{"name":"c"}}
+{"edge":"Knows","from":"a","to":"b"}
+{"edge":"Knows","from":"b","to":"c"}
+{"edge":"Knows","from":"c","to":"a"}"#;
+    let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
+    load_jsonl(&mut db, data, LoadMode::Overwrite).await.unwrap();
+
+    let got = both_modes(&mut db, REACH_5, "reach", &params(&[("$name", "a")])).await;
+    // From a: b (1 hop), c (2 hops); the c->a back-edge hits the seeded source
+    // and is not re-emitted. No infinite loop, each node at most once.
+    assert_eq!(got, vec!["b", "c"]);
+}
+
+// A self-loop a->a plus a->b. Variable-length traversal must not loop forever and
+// must not re-emit the seeded source.
+#[tokio::test]
+#[serial]
+async fn variable_hops_handle_self_loop() {
+    let dir = tempfile::tempdir().unwrap();
+    let uri = dir.path().to_str().unwrap();
+    let data = r#"{"type":"Person","data":{"name":"a"}}
+{"type":"Person","data":{"name":"b"}}
+{"edge":"Knows","from":"a","to":"a"}
+{"edge":"Knows","from":"a","to":"b"}"#;
+    let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
+    load_jsonl(&mut db, data, LoadMode::Overwrite).await.unwrap();
+
+    let got = both_modes(&mut db, REACH_5, "reach", &params(&[("$name", "a")])).await;
+    // a->a hits the seeded source (pruned); only b is reached.
+    assert_eq!(got, vec!["b"]);
+}
--- a/crates/omnigraph/tests/writes.rs
+++ b/crates/omnigraph/tests/writes.rs
@ -6,8 +6,8 @@
 //! What this file covers:
 //! - No `__run__*` branches are created by load or mutate.
 //! - Cancellation of a mutation future leaves no graph-level state.
-//! - Concurrent writers to the same table land exactly one publish; the
-//!   loser surfaces `ManifestConflictDetails::ExpectedVersionMismatch`.
+//! - Concurrent non-strict inserts/merges rebase under the per-table queue;
+//!   strict updates/deletes surface `ExpectedVersionMismatch` on stale state.
 //! - Failed mutations and loads leave the target unchanged.
 //! - Multi-statement mutations are atomic (one commit per query).
 //! - actor_id propagates through to the commit graph.
@ -17,7 +17,7 @@ mod helpers;
 use arrow_array::Array;
 use omnigraph::db::commit_graph::CommitGraph;
 use omnigraph::db::{Omnigraph, ReadTarget};
-use omnigraph::error::{ManifestConflictDetails, ManifestErrorKind, OmniError};
+use omnigraph::error::OmniError;
 use omnigraph::loader::{LoadMode, load_jsonl};

 use helpers::*;
@ -241,18 +241,11 @@ async fn partial_failure_leaves_target_queryable_and_unblocks_next_mutation() {
    assert_eq!(frank.num_rows(), 1, "Frank must be visible after publish");
 }

-/// Concurrent writers to the same `(table, branch)` produce exactly one
-/// success and one `ExpectedVersionMismatch`. The replacement for the old
-/// `concurrent_conflicting_run_publish_fails_cleanly` test — the OCC fence
-/// has moved from a graph-level run-publish merge into the publisher's
-/// per-table CAS.
-///
-/// Drives the race by interleaving two handles that captured the same
-/// pre-write manifest snapshot: A commits first; B's commit then sees
-/// `expected_versions[node:Person] = pre` while the manifest is at
-/// `pre + 1`, and the publisher rejects.
+/// Stale non-strict writers rebase to the live manifest pin under the
+/// per-table queue instead of folding raw drift or returning a false 409.
+/// Strict update/delete semantics are covered by the consistency/server tests.
 #[tokio::test]
-async fn concurrent_writers_one_succeeds_one_gets_expected_version_mismatch() {
+async fn stale_non_strict_insert_rebases_to_live_manifest_pin() {
    let dir = tempfile::tempdir().unwrap();
    let uri = dir.path().to_string_lossy().into_owned();

@ -281,40 +274,30 @@ async fn concurrent_writers_one_succeeds_one_gets_expected_version_mismatch() {
        .unwrap();
    }

-    // Writer B's coordinator is still at the pre-A snapshot. Its mutation
-    // captures expected_versions[node:Person] = pre (stale), then publishes
-    // — the publisher's CAS pre-check sees the manifest is now at post and
-    // rejects with ExpectedVersionMismatch.
-    let result_b = db_b
-        .mutate(
-            "main",
-            MUTATION_QUERIES,
-            "insert_person",
-            &mixed_params(&[("$name", "WriterB")], &[("$age", 42)]),
-        )
-        .await;
+    // Writer B's coordinator is still at the pre-A snapshot, but Insert is
+    // non-strict: commit_all re-reads the live manifest pin under the queue,
+    // verifies Lance HEAD equals that pin, and then lets Lance rebase the
+    // staged append.
+    db_b.mutate(
+        "main",
+        MUTATION_QUERIES,
+        "insert_person",
+        &mixed_params(&[("$name", "WriterB")], &[("$age", 42)]),
+    )
+    .await
+    .unwrap();

-    let err = result_b.expect_err("stale writer must hit ExpectedVersionMismatch");
-    let OmniError::Manifest(manifest_err) = err else {
-        panic!("expected Manifest error, got {err:?}");
-    };
-    assert_eq!(manifest_err.kind, ManifestErrorKind::Conflict);
-    let Some(ManifestConflictDetails::ExpectedVersionMismatch {
-        ref table_key,
-        expected,
-        actual,
-    }) = manifest_err.details
-    else {
-        panic!(
-            "expected ExpectedVersionMismatch, got {:?}",
-            manifest_err.details,
-        );
-    };
-    assert_eq!(table_key, "node:Person");
-    assert!(
-        actual > expected,
-        "actual ({actual}) should be ahead of expected ({expected})",
-    );
+    for name in ["WriterA", "WriterB"] {
+        let person = query_main(
+            &mut db_b,
+            TEST_QUERIES,
+            "get_person",
+            &params(&[("$name", name)]),
+        )
+        .await
+        .unwrap();
+        assert_eq!(person.num_rows(), 1, "{name} should be visible");
+    }
 }

 /// The cancellation hole that motivated removing the Run state machine: dropping a mutation future
@ -371,11 +354,10 @@ async fn cancelled_mutation_future_leaves_no_state() {

    // Cancel-safety property: no graph-level run/staging state remains.
    //
-    // Note: `branch_list()` already filters `__run__*` via
-    // `is_internal_system_branch`, so a runtime "no `__run__` branches" check
-    // would be vacuous. The structural property that no `__run__` branches
-    // can ever be created is enforced by deletion of `begin_run` etc. in
-    // (verified by the build itself — those symbols no longer exist).
+    // No `__run__` branches can ever be created: the Run state machine
+    // (`begin_run` etc.) was deleted in MR-771 — verified by the build itself,
+    // those symbols no longer exist. Any legacy `__run__*` branch on an
+    // upgraded graph is swept by the v2→v3 manifest migration.
    //
    // (1) The branch list is unchanged: cancellation/completion cannot
    //     synthesize new public branches.
@ -442,34 +424,40 @@ async fn repeated_loads_do_not_accumulate_branches() {
    assert_eq!(db.branch_list().await.unwrap(), vec!["main".to_string()]);
 }

-/// User code must not be able to write to internal `__run__*` names.
-/// The branch-name guard predicate is kept as defense-in-depth; it
-/// will be removed once a future production sweep retires the legacy
-/// branches.
+/// After MR-770, `__run__*` is an ordinary branch name — the Run state machine
+/// and its `is_internal_run_branch` guard are gone. The surviving internal-ref
+/// guard still rejects the active `__schema_apply_lock__` branch on the public
+/// create/merge APIs.
 #[tokio::test]
-async fn public_branch_apis_reject_internal_run_refs() {
+async fn public_branch_apis_reject_internal_system_refs() {
    let dir = tempfile::tempdir().unwrap();
    let mut db = init_and_load(&dir).await;

-    let create_err = db.branch_create("__run__synthetic").await.unwrap_err();
+    // `__run__*` is no longer reserved — creating it now succeeds.
+    db.branch_create("__run__formerly_reserved")
+        .await
+        .expect("__run__ prefix is a normal branch name post-MR-770");
+
+    // The schema-apply lock branch is still rejected on public branch APIs.
+    let create_err = db.branch_create("__schema_apply_lock__").await.unwrap_err();
    let OmniError::Manifest(err) = create_err else {
        panic!("expected Manifest error");
    };
    assert!(
-        err.message.contains("internal run ref"),
+        err.message.contains("internal system ref"),
        "unexpected error: {}",
        err.message
    );

    let merge_err = db
-        .branch_merge("__run__synthetic", "main")
+        .branch_merge("__schema_apply_lock__", "main")
        .await
        .unwrap_err();
    let OmniError::Manifest(err) = merge_err else {
        panic!("expected Manifest error");
    };
    assert!(
-        err.message.contains("internal run refs"),
+        err.message.contains("internal system refs"),
        "unexpected error: {}",
        err.message
    );