mirror of
https://github.com/ModernRelay/omnigraph.git
synced 2026-06-24 02:38:06 +02:00
fix(engine): guard absent _graph_commits + always compact internal tables
Addresses PR #291 review findings: - Greptile (P1): optimize unconditionally opened `_graph_commits` for compaction, but a graph can validly have none (the coordinator opens it as `Option`, gated on `storage.exists`, for graphs predating the commit graph). `Dataset::open` on the absent table errored and failed the whole optimize. Guard the `_graph_commits` compaction with the same `storage_adapter().exists()` check the coordinator uses; `__manifest` always exists so it stays unguarded. Regression test `optimize_tolerates_absent_graph_commits_table` (empty graph so no publish recreates the table before the guard). - Cursor (low): the `table_tasks.is_empty()` early return skipped internal-table compaction for a schema with no node/edge types. Removed it so the internal tables are compacted regardless of the data-table set. - Codex (auto-cleanup, P1): documented — `compact_files` commits with a default `CommitConfig` (no skip_auto_cleanup) and `CompactionOptions` exposes no override, so on a graph storing an *on* auto_cleanup config the commit would fire version GC. Both internal tables are created with `auto_cleanup: None`, so new graphs are safe; the only exposure is pre-fix upgraded graphs, identical to the existing data-table optimize path, with step 2b's watermark as the comprehensive guard. Added a comment in `compact_internal_table` recording this.
This commit is contained in:
parent
8db8937a6a
commit
b2f65062c8
2 changed files with 53 additions and 12 deletions
|
|
@ -248,10 +248,8 @@ pub async fn optimize_all_tables(db: &Omnigraph) -> Result<Vec<TableOptimizeStat
|
|||
tasks
|
||||
};
|
||||
|
||||
if table_tasks.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
// NB: do NOT early-return when `table_tasks` is empty (a schema with no
|
||||
// node/edge types) — the internal system tables below must still be compacted.
|
||||
let concurrency = maint_concurrency().min(table_tasks.len()).max(1);
|
||||
|
||||
let stats: Vec<Result<TableOptimizeStats>> = futures::stream::iter(table_tasks.into_iter())
|
||||
|
|
@ -286,18 +284,18 @@ pub async fn optimize_all_tables(db: &Omnigraph) -> Result<Vec<TableOptimizeStat
|
|||
// data-table stats only; each internal compaction does its own coordinator
|
||||
// refresh for cache coherence.
|
||||
let mut all = stats;
|
||||
// `__manifest` always exists (created at init). `_graph_commits` may be absent
|
||||
// (the coordinator opens it as `Option`, gated on existence — graphs predating
|
||||
// the commit graph have none), so guard it with the same existence check rather
|
||||
// than letting `Dataset::open` error and fail the whole optimize.
|
||||
all.push(
|
||||
compact_internal_table(db, "__manifest", crate::db::manifest::manifest_uri(db.root_uri()))
|
||||
.await,
|
||||
);
|
||||
all.push(
|
||||
compact_internal_table(
|
||||
db,
|
||||
"_graph_commits",
|
||||
crate::db::commit_graph::graph_commits_uri(db.root_uri()),
|
||||
)
|
||||
.await,
|
||||
);
|
||||
let graph_commits_uri = crate::db::commit_graph::graph_commits_uri(db.root_uri());
|
||||
if db.storage_adapter().exists(&graph_commits_uri).await? {
|
||||
all.push(compact_internal_table(db, "_graph_commits", graph_commits_uri).await);
|
||||
}
|
||||
|
||||
all.into_iter().collect()
|
||||
}
|
||||
|
|
@ -575,6 +573,17 @@ async fn compact_internal_table(
|
|||
));
|
||||
}
|
||||
|
||||
// `compact_files` commits with a default `CommitConfig` (skip_auto_cleanup =
|
||||
// false) and `CompactionOptions` exposes no override, so on a graph whose
|
||||
// dataset stores an *on* auto_cleanup config the commit would fire Lance's
|
||||
// auto-cleanup (version GC). For the internal tables that GC would be version
|
||||
// deletion on `__manifest`/`_graph_commits` — the cleanup-resurrection surface
|
||||
// this compaction-only step deliberately avoids (RFC-013 step 2b / Q8). Both
|
||||
// internal tables are created with `auto_cleanup: None` (see
|
||||
// `manifest/graph.rs`, `commit_graph.rs`), so a NEW graph stores no config and
|
||||
// nothing fires here. Pre-auto_cleanup-fix upgraded graphs are the only
|
||||
// exposure — identical to the existing data-table `optimize_one_table` path —
|
||||
// and the comprehensive guard is step 2b's watermark.
|
||||
let metrics = compact_files(&mut ds, options, None)
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
|
|
|
|||
|
|
@ -205,6 +205,38 @@ async fn optimize_compacts_internal_tables() {
|
|||
.unwrap();
|
||||
}
|
||||
|
||||
/// `optimize` must not fail on a graph that has no `_graph_commits.lance` — a valid
|
||||
/// state the coordinator opens as `commit_graph = None` (graphs predating the commit
|
||||
/// graph). Without the existence guard, `Dataset::open` on the absent table errors
|
||||
/// and fails the whole optimize. Regression for the missing-existence-guard.
|
||||
///
|
||||
/// Uses an EMPTY graph deliberately: a graph with data would publish during
|
||||
/// optimize, and a publish records a graph commit that recreates `_graph_commits`
|
||||
/// before the guard runs — masking the bug. With no data, nothing recreates it, so
|
||||
/// the table stays absent through the guard.
|
||||
#[tokio::test]
|
||||
async fn optimize_tolerates_absent_graph_commits_table() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
|
||||
|
||||
// Simulate a graph with no commit-graph dataset.
|
||||
std::fs::remove_dir_all(dir.path().join("_graph_commits.lance")).unwrap();
|
||||
|
||||
// Coordinator tolerates the absence; optimize must succeed (the guard skips the
|
||||
// absent table rather than letting `Dataset::open` error) and omit its stat.
|
||||
let db = Omnigraph::open(uri).await.unwrap();
|
||||
let stats = db.optimize().await.unwrap();
|
||||
assert!(
|
||||
stats.iter().any(|s| s.table_key == "__manifest"),
|
||||
"__manifest must still be compacted"
|
||||
);
|
||||
assert!(
|
||||
!stats.iter().any(|s| s.table_key == "_graph_commits"),
|
||||
"absent _graph_commits must be skipped, not opened (would error)"
|
||||
);
|
||||
}
|
||||
|
||||
// PR3 (Workstream B): an existing scalar index does not cover fragments
|
||||
// appended after it was built (build_indices is existence-gated), so those
|
||||
// rows are scanned unindexed. `optimize` must fold them back in via Lance's
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue