diff --git a/crates/omnigraph-cli/src/main.rs b/crates/omnigraph-cli/src/main.rs index 35740cc..eb8ea95 100644 --- a/crates/omnigraph-cli/src/main.rs +++ b/crates/omnigraph-cli/src/main.rs @@ -1035,14 +1035,48 @@ fn render_schema_plan_step(step: &SchemaMigrationStep) -> String { type_name, render_annotations(annotations) ), + SchemaMigrationStep::DropType { + type_kind, + name, + mode, + } => format!( + "drop {} type '{}' ({} mode)", + schema_type_kind_label(*type_kind), + name, + drop_mode_label(*mode), + ), + SchemaMigrationStep::DropProperty { + type_kind, + type_name, + property_name, + mode, + } => format!( + "drop property '{}.{}' of {} '{}' ({} mode)", + type_name, + property_name, + schema_type_kind_label(*type_kind), + type_name, + drop_mode_label(*mode), + ), SchemaMigrationStep::UnsupportedChange { - entity, - reason, - code, - } => match code { - Some(c) => format!("unsupported change on {} [{}]: {}", entity, c, reason), - None => format!("unsupported change on {}: {}", entity, reason), - }, + entity, reason, .. + } => { + // When a schema-lint code is attached, render code + tier + // so operators see at-a-glance the kind of risk (destructive + // / validated / safe) — not just the rule identifier. + // Reach the diagnostic via the `diagnostic()` helper so the + // CLI doesn't need to know how the lookup works. + match step.diagnostic() { + Some(diag) => format!( + "unsupported change on {} [{}, {}]: {}", + entity, + diag.code, + schema_lint_tier_label(diag.tier), + reason, + ), + None => format!("unsupported change on {}: {}", entity, reason), + } + } } } @@ -1054,6 +1088,21 @@ fn schema_type_kind_label(kind: omnigraph_compiler::SchemaTypeKind) -> &'static } } +fn schema_lint_tier_label(tier: omnigraph_compiler::SafetyTier) -> &'static str { + match tier { + omnigraph_compiler::SafetyTier::Safe => "safe", + omnigraph_compiler::SafetyTier::Validated => "validated", + omnigraph_compiler::SafetyTier::Destructive => "destructive", + } +} + +fn drop_mode_label(mode: omnigraph_compiler::DropMode) -> &'static str { + match mode { + omnigraph_compiler::DropMode::Soft => "soft", + omnigraph_compiler::DropMode::Hard => "hard", + } +} + fn render_prop_type(prop_type: &omnigraph_compiler::PropType) -> String { let base = if let Some(values) = &prop_type.enum_values { format!("Enum({})", values.join("|")) diff --git a/crates/omnigraph-compiler/src/catalog/schema_plan.rs b/crates/omnigraph-compiler/src/catalog/schema_plan.rs index 835a1be..08fe16a 100644 --- a/crates/omnigraph-compiler/src/catalog/schema_plan.rs +++ b/crates/omnigraph-compiler/src/catalog/schema_plan.rs @@ -16,6 +16,29 @@ pub enum SchemaTypeKind { Edge, } +/// How a drop step interacts with data. +/// +/// - **`Soft`** — catalog tombstone only. The type / property is hidden +/// from queries but the underlying Lance column / dataset is retained +/// on disk. Reversible via `omnigraph schema unhide` (forthcoming). +/// Tier: `safe`. +/// - **`Hard`** — actual data removal. The Lance column is rewritten +/// without the property, or the Lance dataset is dropped. Irreversible +/// short of branch / snapshot restore. Tier: `destructive`; requires +/// `--allow-data-loss` to apply. +/// +/// The planner emits `Soft` by default; `--allow-data-loss` on the apply +/// CLI promotes drops to `Hard`. This is the dimension orthogonal to +/// `SafetyTier` from the schema-lint chassis (`crate::lint`): tier +/// describes the rule's class; mode describes the operator's intent for +/// data treatment. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum DropMode { + Soft, + Hard, +} + #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct SchemaMigrationPlan { pub supported: bool, @@ -62,6 +85,28 @@ pub enum SchemaMigrationStep { property_name: String, annotations: Vec, }, + /// Remove a node or edge type. Soft mode tombstones in the catalog + /// and retains data on disk; Hard mode drops the Lance dataset and + /// requires `--allow-data-loss`. + /// + /// Dormant in this commit — emitted by the planner in a later + /// commit (see `docs/schema-lint-v1-plan.md`). + DropType { + type_kind: SchemaTypeKind, + name: String, + mode: DropMode, + }, + /// Remove a property from an existing type. Soft mode tombstones + /// the property in the catalog and retains the Lance column; Hard + /// mode rewrites the column out and requires `--allow-data-loss`. + /// + /// Dormant in this commit. + DropProperty { + type_kind: SchemaTypeKind, + type_name: String, + property_name: String, + mode: DropMode, + }, UnsupportedChange { entity: String, reason: String, @@ -93,6 +138,24 @@ impl SchemaMigrationStep { _ => None, } } + + /// If this step carries a schema-lint code, return the full + /// catalog entry — including family, safety tier, and default + /// severity. Used by renderers that want to display richer + /// context than just the code string (e.g. `omnigraph schema + /// plan` annotating each line with its tier). + /// + /// Returns `None` for steps that carry no code (the 12 of 17 + /// `UnsupportedChange` paths still untagged in v0, plus every + /// non-`UnsupportedChange` variant). + pub fn diagnostic(&self) -> Option<&'static crate::lint::DiagnosticCode> { + match self { + Self::UnsupportedChange { + code: Some(c), .. + } => crate::lint::lookup(c), + _ => None, + } + } } pub fn plan_schema_migration( @@ -499,18 +562,22 @@ fn plan_properties( .iter() .filter(|property| !consumed.contains(&property.name)) { - steps.push(SchemaMigrationStep::UnsupportedChange { - entity: format!( - "{}:{}.{}", - schema_type_kind_key(type_kind), - type_name, - leftover.name - ), - reason: format!( - "removing property '{}.{}' is not supported in schema migration v1", - type_name, leftover.name - ), - code: Some(crate::lint::codes::OG_DS_104.code.to_string()), + // Property removed from the desired schema: emit + // DropProperty { Soft } per docs/schema-lint-v1-plan.md + // commit #3. The Soft mode reuses the existing + // stage_overwrite rewrite path — batch_for_schema_apply_rewrite + // iterates target_schema.fields(), so the dropped column is + // naturally projected away. The prior Lance version retains + // the column until cleanup_old_versions runs, matching the + // OG-DS-104 destructive-tier expectation that data remains + // recoverable via time travel until cleanup. Hard mode (with + // immediate compact_files + cleanup_old_versions) lands in + // commit #5, gated by --allow-data-loss. + steps.push(SchemaMigrationStep::DropProperty { + type_kind, + type_name: type_name.to_string(), + property_name: leftover.name.clone(), + mode: DropMode::Soft, }); } @@ -863,6 +930,67 @@ node Account @rename_from("User") { })); } + #[test] + fn plan_emits_soft_drop_for_removed_nullable_property() { + // Removing a property from the desired schema emits + // DropProperty { Soft } (schema-lint v1 chassis commit #3, + // MR-694). The plan is `supported = true` — the apply path + // handles soft drop via the existing stage_overwrite rewrite + // projection. Verified at the integration level by + // `apply_schema_drops_a_nullable_property_softly_preserves_prior_version` + // in `crates/omnigraph/tests/schema_apply.rs`. + let accepted = build_schema_ir( + &parse_schema( + r#" +node Person { + name: String @key + age: I32? +} +"#, + ) + .unwrap(), + ) + .unwrap(); + let desired = build_schema_ir( + &parse_schema( + r#" +node Person { + name: String @key +} +"#, + ) + .unwrap(), + ) + .unwrap(); + + let plan = plan_schema_migration(&accepted, &desired).unwrap(); + assert!( + plan.supported, + "drop-property plan must be supported: {plan:?}" + ); + assert!( + plan.steps.iter().any(|step| matches!( + step, + SchemaMigrationStep::DropProperty { + type_kind: SchemaTypeKind::Node, + type_name, + property_name, + mode: DropMode::Soft, + .. + } if type_name == "Person" && property_name == "age" + )), + "expected DropProperty {{ Soft }} step in plan: {plan:?}", + ); + // Negative: no UnsupportedChange anywhere in the plan. + assert!( + !plan + .steps + .iter() + .any(|step| matches!(step, UnsupportedChange { .. })), + "soft drop must not emit UnsupportedChange: {plan:?}", + ); + } + #[test] fn plan_rejects_required_property_addition() { let accepted = build_schema_ir( @@ -935,4 +1063,56 @@ node Person @description("new") { }], })); } + + #[test] + fn drop_steps_round_trip_through_serde() { + // The DropType / DropProperty variants are dormant in this + // commit — the planner doesn't emit them yet — but their + // serde shape needs to be stable from day one. A future + // SchemaIR JSON containing one of these must deserialize + // back to the same value. This test pins the wire format + // so a v0 schema-ir consumer never sees a surprise variant + // shape after v1 ships. + let steps = vec![ + SchemaMigrationStep::DropType { + type_kind: SchemaTypeKind::Node, + name: "Person".to_string(), + mode: DropMode::Soft, + }, + SchemaMigrationStep::DropType { + type_kind: SchemaTypeKind::Edge, + name: "Knows".to_string(), + mode: DropMode::Hard, + }, + SchemaMigrationStep::DropProperty { + type_kind: SchemaTypeKind::Node, + type_name: "Person".to_string(), + property_name: "age".to_string(), + mode: DropMode::Soft, + }, + SchemaMigrationStep::DropProperty { + type_kind: SchemaTypeKind::Interface, + type_name: "Named".to_string(), + property_name: "alias".to_string(), + mode: DropMode::Hard, + }, + ]; + + for step in steps { + let json = serde_json::to_string(&step).expect("serialize"); + let round_trip: SchemaMigrationStep = + serde_json::from_str(&json).expect("deserialize"); + assert_eq!(step, round_trip, "round-trip mismatch on {json}"); + } + } + + #[test] + fn drop_mode_serde_uses_snake_case() { + // External tools may write SchemaIR JSON by hand. Pin the + // wire form so we don't silently break them later. + assert_eq!(serde_json::to_string(&DropMode::Soft).unwrap(), "\"soft\""); + assert_eq!(serde_json::to_string(&DropMode::Hard).unwrap(), "\"hard\""); + let soft: DropMode = serde_json::from_str("\"soft\"").unwrap(); + assert_eq!(soft, DropMode::Soft); + } } diff --git a/crates/omnigraph-compiler/src/lib.rs b/crates/omnigraph-compiler/src/lib.rs index 102b479..7ebc09a 100644 --- a/crates/omnigraph-compiler/src/lib.rs +++ b/crates/omnigraph-compiler/src/lib.rs @@ -16,7 +16,7 @@ pub use catalog::schema_ir::{ schema_ir_pretty_json, }; pub use catalog::schema_plan::{ - SchemaMigrationPlan, SchemaMigrationStep, SchemaTypeKind, plan_schema_migration, + DropMode, SchemaMigrationPlan, SchemaMigrationStep, SchemaTypeKind, plan_schema_migration, }; pub use lint::{DiagnosticCode, Family, SafetyTier, Severity}; pub use ir::ParamMap; diff --git a/crates/omnigraph/src/db/omnigraph.rs b/crates/omnigraph/src/db/omnigraph.rs index 50d4963..bf34ff9 100644 --- a/crates/omnigraph/src/db/omnigraph.rs +++ b/crates/omnigraph/src/db/omnigraph.rs @@ -18,8 +18,8 @@ use omnigraph_compiler::catalog::{Catalog, EdgeType, NodeType}; use omnigraph_compiler::schema::parser::parse_schema; use omnigraph_compiler::types::ScalarType; use omnigraph_compiler::{ - SchemaIR, SchemaMigrationPlan, SchemaMigrationStep, SchemaTypeKind, build_catalog_from_ir, - build_schema_ir, plan_schema_migration, + DropMode, SchemaIR, SchemaMigrationPlan, SchemaMigrationStep, SchemaTypeKind, + build_catalog_from_ir, build_schema_ir, plan_schema_migration, }; use crate::db::graph_coordinator::{GraphCoordinator, PublishedSnapshot}; diff --git a/crates/omnigraph/src/db/omnigraph/schema_apply.rs b/crates/omnigraph/src/db/omnigraph/schema_apply.rs index a01cbc8..f764e22 100644 --- a/crates/omnigraph/src/db/omnigraph/schema_apply.rs +++ b/crates/omnigraph/src/db/omnigraph/schema_apply.rs @@ -138,6 +138,41 @@ pub(super) async fn apply_schema_with_lock( } SchemaMigrationStep::UpdateTypeMetadata { .. } | SchemaMigrationStep::UpdatePropertyMetadata { .. } => {} + SchemaMigrationStep::DropProperty { + type_kind, + type_name, + mode, + .. + } => { + // Soft = reuse the existing stage_overwrite rewrite + // path. batch_for_schema_apply_rewrite iterates the + // *target* schema fields, so a property absent from + // desired_catalog is naturally projected away. The + // prior Lance version retains the dropped column, + // so reads at the previous snapshot still see it + // (time-travel reversibility). Hard mode (immediate + // compact_files + cleanup_old_versions for actual + // data deletion) lands in commit #5 gated by + // --allow-data-loss. + if !matches!(mode, DropMode::Soft) { + return Err(OmniError::manifest_internal( + "DropProperty { Hard } not yet implemented (commit #5)", + )); + } + let table_key = schema_table_key(*type_kind, type_name); + if table_key.starts_with("edge:") { + changed_edge_tables = true; + } + rewritten_tables.insert(table_key); + } + SchemaMigrationStep::DropType { .. } => { + // DropType (whole-table drop via __manifest entry + // removal) lands in commit #4 — different mechanics + // from DropProperty. + return Err(OmniError::manifest_internal( + "DropType not yet implemented (commit #4)", + )); + } step @ SchemaMigrationStep::UnsupportedChange { .. } => { return Err(OmniError::manifest( step.unsupported_error_message() diff --git a/crates/omnigraph/tests/schema_apply.rs b/crates/omnigraph/tests/schema_apply.rs index fac5ab4..99484db 100644 --- a/crates/omnigraph/tests/schema_apply.rs +++ b/crates/omnigraph/tests/schema_apply.rs @@ -101,17 +101,23 @@ async fn apply_schema_unsupported_plan_does_not_advance_manifest() { ); } -// ─── Destructive / safety-tier rejections ──────────────────────────────────── +// ─── Destructive / safety-tier behavior ────────────────────────────────────── // -// Schema migration v1 only accepts additive change: add type, add nullable -// property, add index, rename. Every other shape returns an -// `UnsupportedChange` step that surfaces as an error from `apply_schema`, -// without advancing the manifest. These tests pin that contract for the -// destructive shapes (drop type, drop property, narrow type, add required, -// remove constraint) so a regression in the planner can't silently allow them. +// Schema migration v1 accepts: +// - Additive change: add type, add nullable property, add index, rename. +// - DropProperty { Soft } via the schema-lint v1 chassis (commit #3 of MR-694) +// — the dropped column is removed from the current manifest version but +// remains reachable via Lance time travel at the prior version, until +// `omnigraph cleanup` runs. Hard mode (immediate data cleanup) lands in +// commit #5 gated by `--allow-data-loss`. +// +// Every other destructive shape (drop type, narrow type, add required without +// backfill, remove constraint) still returns an `UnsupportedChange` step that +// surfaces as an error from `apply_schema`. These tests pin the current +// contract so a regression in the planner can't silently change behavior. #[tokio::test] -async fn apply_schema_rejects_dropping_a_property_with_data() { +async fn apply_schema_drops_a_nullable_property_softly_preserves_prior_version() { let dir = tempfile::tempdir().unwrap(); let mut db = init_and_load(&dir).await; @@ -122,25 +128,96 @@ async fn apply_schema_rejects_dropping_a_property_with_data() { .unwrap() .version(); - // Drop `age` from Person. v1 doesn't support property removal even when - // the column is nullable — it would silently destroy data. + // Drop `age` from Person. v1 + chassis commit #3 emit + // `DropProperty { Soft }`; the rewrite path projects to the + // target schema (no `age`), commits via stage_overwrite. Row + // counts are unchanged — only the column is dropped from the + // current schema view. let desired = TEST_SCHEMA.replace(" age: I32?\n", ""); - let err = db.apply_schema(&desired).await.unwrap_err(); - let msg = err.to_string(); + + // Confirm the plan emits DropProperty { Soft } (not UnsupportedChange). + let plan = db.plan_schema(&desired).await.unwrap(); + assert!(plan.supported, "drop-property plan must be supported"); assert!( - msg.contains("OG-DS-104"), - "expected schema-lint code OG-DS-104 in error, got: {msg}" + plan.steps.iter().any(|step| matches!( + step, + SchemaMigrationStep::DropProperty { + type_kind: SchemaTypeKind::Node, + type_name, + property_name, + mode: omnigraph_compiler::DropMode::Soft, + .. + } if type_name == "Person" && property_name == "age" + )), + "expected DropProperty {{ type=Person, property=age, mode=Soft }} in plan; got {plan:?}", ); - // Manifest didn't advance and existing rows are untouched. - assert_eq!( - db.snapshot_of(ReadTarget::branch("main")) - .await - .unwrap() - .version(), - before_version + let result = db.apply_schema(&desired).await.unwrap(); + assert!(result.supported); + assert!(result.applied); + + // Manifest advanced; row count unchanged. + let after_version = db + .snapshot_of(ReadTarget::branch("main")) + .await + .unwrap() + .version(); + assert!( + after_version > before_version, + "manifest version should advance after soft drop; before={before_version}, after={after_version}", ); assert_eq!(count_rows(&db, "node:Person").await, people_before); + + // (a) Current snapshot: `age` is gone from the dataset schema. + let current_snapshot = db.snapshot_of(ReadTarget::branch("main")).await.unwrap(); + let current_ds = current_snapshot.open("node:Person").await.unwrap(); + let current_fields = current_ds + .schema() + .fields + .iter() + .map(|f| f.name.clone()) + .collect::>(); + assert!( + !current_fields.iter().any(|f| f == "age"), + "current Person dataset schema must not include 'age' after soft drop; got fields {current_fields:?}", + ); + + // (b) Time travel: at the pre-drop manifest version, the prior + // Person dataset version still has `age`. Soft drop is reversible + // via Lance's version graph until `omnigraph cleanup` runs. + let pre_drop_snapshot = db.snapshot_at_version(before_version).await.unwrap(); + let pre_drop_ds = pre_drop_snapshot.open("node:Person").await.unwrap(); + let pre_drop_fields = pre_drop_ds + .schema() + .fields + .iter() + .map(|f| f.name.clone()) + .collect::>(); + assert!( + pre_drop_fields.iter().any(|f| f == "age"), + "pre-drop Person dataset schema must still include 'age' (time-travel reversibility); got fields {pre_drop_fields:?}", + ); + + // (c) Reopen consistency: close the engine, reopen, verify the + // drop is preserved (column still absent from current schema). + let uri = dir.path().to_str().unwrap().to_string(); + drop(db); + let reopened = Omnigraph::open(&uri).await.unwrap(); + let reopened_snapshot = reopened + .snapshot_of(ReadTarget::branch("main")) + .await + .unwrap(); + let reopened_ds = reopened_snapshot.open("node:Person").await.unwrap(); + let reopened_fields = reopened_ds + .schema() + .fields + .iter() + .map(|f| f.name.clone()) + .collect::>(); + assert!( + !reopened_fields.iter().any(|f| f == "age"), + "after reopen, Person dataset schema must still lack 'age'; got fields {reopened_fields:?}", + ); } #[tokio::test] diff --git a/docs/dev/index.md b/docs/dev/index.md index 3a2b674..504c277 100644 --- a/docs/dev/index.md +++ b/docs/dev/index.md @@ -51,6 +51,14 @@ constraints. User-facing behavior should still be documented through | Install and deployment packaging | [install.md](../user/install.md), [deployment.md](../user/deployment.md) | | Release history | [releases/](../releases/) | +## Active Implementation Plans + +Working documents for in-flight feature work. Removed when the work lands. + +| Area | Read | +|---|---| +| Schema-lint chassis v1 (MR-694) — `--allow-data-loss`, soft/hard drops | [schema-lint-v1-plan.md](schema-lint-v1-plan.md) | + ## Boundary Developer docs may mention implementation details, stale gaps, upstream Lance diff --git a/docs/dev/schema-lint-v1-plan.md b/docs/dev/schema-lint-v1-plan.md new file mode 100644 index 0000000..86eeb4b --- /dev/null +++ b/docs/dev/schema-lint-v1-plan.md @@ -0,0 +1,86 @@ +# Schema-lint chassis v1 — implementation plan + +Work-in-progress checklist for the next slice of the chassis. v0 (the code-tagged diagnostics layer, MR-694 first PR #87) shipped on `main`. v1 brings the chassis to **enforced behavior**: `--allow-data-loss` flag, the `Soft | Hard` mode dimension on drops, and the first real "destructive but supported" migration step. + +This document tracks scope so the PR can land in incremental commits without losing the thread. Delete after the work is merged. + +## Lance substrate alignment (revised 2026-05-13) + +After a substrate audit against the [Lance data-evolution guide](https://lance.org/guide/data_evolution/), the v1 plan was simplified. Lance's `drop_columns()` is **already metadata-only and reversible via time travel until cleanup**: + +> `drop_columns` is metadata-only and remains reversible as long as old versions are retained. After `compact_files()` rewrites data files and `cleanup_old_versions()` removes old manifests/files, removed data may become permanently unrecoverable. + +This means: +- **Soft mode = `Dataset::drop_columns([name])`**. No separate `tombstoned: bool` catalog field. Lance's version graph IS the tombstone. +- **Hard mode = `drop_columns()` + `compact_files()` + `cleanup_old_versions()`** (the existing `omnigraph cleanup` pipeline). +- **No `omnigraph schema unhide` command needed**. Undo is `omnigraph snapshot --at ` or `omnigraph branch create --from ` — the existing time-travel surface. + +Two commits dropped from the v1 plan as a result: the old commit 3 (tombstone fields on catalog IR) and commit 8 (unhide command). Net: ~250 LoC less surface, more substrate-aligned, fewer new concepts. + +The broader substrate migration (using Lance native APIs across all migration steps, not just drop) is tracked in **MR-948** — out of scope for this branch but linked from each commit below. + +## Done in this branch so far + +- [x] **Commit 1** — `SchemaMigrationStep::diagnostic()` helper + CLI plan output displays tier alongside the code: `unsupported change on node:Person.age [OG-DS-104, destructive]: ...`. No behavior change. All 11 existing `schema_apply` tests still pass. +- [x] **Commit 2** — `DropMode { Soft, Hard }` enum + dormant `DropType` and `DropProperty` variants on `SchemaMigrationStep`. Apply path has an exhaustive-match arm returning `manifest_internal` if either variant arrives via deserialization. Serde round-trip pinned for stable wire shape. + +## Next commits (in order) + +### Commit 3 — Planner emits `DropProperty { Soft }` + apply calls `Dataset::drop_columns` + +Replaces the earlier "tombstone fields on catalog IR" commit. No catalog IR changes needed — Lance handles the tombstone via its version graph. + +- [ ] In `plan_properties`'s leftover-property branch: emit `DropProperty { Soft }` instead of `UnsupportedChange` for OG-DS-104. +- [ ] Same for node-type removal (`plan_nodes` leftover → `DropType { Soft }`, OG-DS-102) and edge-type removal (`plan_edges` leftover → `DropType { Soft }`, OG-DS-103). +- [ ] `apply_schema_with_lock` handles `DropProperty { Soft }`: calls `Dataset::drop_columns(&[property_name])` and commits via the staged-write path. **Substrate primitive: Lance metadata-only commit.** +- [ ] `apply_schema_with_lock` handles `DropType { Soft }`: marks the table tombstoned in `__manifest` (data files retained). Reversible via branch / snapshot restore. +- [ ] Recovery sidecar: standard `catalog_only` discipline — the Lance commit IS the recoverable unit. +- [ ] CLI plan output renders the new variants with mode visible. +- [ ] Integration test (extends `tests/schema_apply.rs`): remove a property, assert apply succeeds, row count preserved, current-version schema query no longer surfaces the property, prior-version time-travel query still sees it. + +### Commit 4 — Convert PR #62 destructive-rejection tests + +- [ ] `tests/schema_apply.rs`'s 6 PR #62 tests currently assert "removing X fails with `OG-DS-XXX`". Convert each to: + - **Without `--allow-data-loss`**: soft drop succeeds (Lance metadata-only, rows preserved in current version, recoverable via time travel). + - **With `--allow-data-loss`**: hard drop succeeds (column data deleted after compact + cleanup) — covered by commit 5. +- [ ] Add a new test that asserts **time-travel reversibility**: drop a column, query at prior version, verify the column is still present in that snapshot. + +### Commit 5 — `--allow-data-loss` CLI flag + `Hard` mode + +- [ ] Add `--allow-data-loss` boolean flag to `omnigraph schema apply` and `omnigraph schema plan` (plan shows what would happen if applied with the flag). +- [ ] Thread through to `apply_schema_with_lock(.., allow_data_loss: bool)`. +- [ ] Planner: when `--allow-data-loss` is set, emit `Hard` mode instead of `Soft` for drop paths. +- [ ] Apply path for `Hard` mode `DropProperty`: `drop_columns()` + `compact_files()` + `cleanup_old_versions()`. **Substrate primitives:** Lance's existing cleanup pipeline; same APIs `omnigraph cleanup` already uses. +- [ ] Apply path for `Hard` mode `DropType`: remove the manifest entry + drop the Lance dataset. +- [ ] Recovery sidecar discipline: `full_rewrite` (the cleanup phase is the rewrite). +- [ ] Integration tests: hard drop deletes data; without flag, hard drop is impossible (planner only emits soft). + +## Open questions + +- **`Hard` mode cleanup: inline vs. deferred.** Should `--allow-data-loss` on apply run `compact_files` + `cleanup_old_versions` **inline** (operator gets data deletion immediately) or **defer** to the next `omnigraph cleanup` run (operator's existing pipeline)? Recommend **inline** for ergonomic guarantee — the flag is explicit consent and operators who said "yes data loss" expect data to actually be gone after the apply returns. +- **Query-level enforcement on dropped types**: should a `match { $p: DroppedType }` query fail at parse time, lint time, or runtime? Recommend: lint warning at parse time (new code in the QL family); runtime returns empty result (Lance no longer has the column). Different from before: no "tombstoned" state to surface — the type is genuinely gone in the current version's catalog. +- **`Hard` mode for `DropType` data forensics**: dataset deletion via Lance is irreversible after `cleanup_old_versions`. Operators who want forensics should take a snapshot or tag first. Document this in `docs/schema-language.md` once this lands. No special escape hatch in the apply path. + +## Not in v1 scope (deferred) + +- Severity config in `omnigraph.yaml` (per-rule `error` / `warn` / `force`). +- `@allow(OG-XXX-NNN, "rationale")` suppression directives. +- Pre-migration checks (MR-941). +- CD / VE / LK / NM family rules (MR-942..MR-945). +- CI integration (MR-946). +- The remaining 12 of 17 `UnsupportedChange` paths still untagged with codes (interface removal, edge endpoint change, edge cardinality change, etc.). Each goes through its own MR-XXX issue. +- **Substrate alignment of non-drop migration steps** (Lance native `add_columns`, `alter_columns` for renames, type casts, defaults). Tracked in **MR-948**. Today's `AddProperty` and `RenameProperty` still go through `stage_overwrite`; v1 doesn't change that. + +## What changed from the original plan + +For posterity / reviewers comparing against the initial plan committed in commit 1: + +| Was | Now | Why | +|---|---|---| +| Commit 3: `tombstoned: bool` on `NodeIR`/`EdgeIR`/`PropertyIR` | **Removed.** No catalog IR change needed. | Lance's `drop_columns()` is metadata-only; Lance's version graph is the tombstone. | +| Commit 5: apply path writes `tombstoned: true` into catalog | Apply path calls `Dataset::drop_columns([name])` | Substrate-aligned. Lance handles the metadata commit. | +| Commit 7 Hard mode: `stage_overwrite` removing the column | `drop_columns` + `compact_files` + `cleanup_old_versions` | Substrate-aligned. Reuses `omnigraph cleanup` pipeline. | +| Commit 8: `omnigraph schema unhide ` | **Removed.** | Time travel is the undo. `omnigraph snapshot --at ` reaches the pre-drop version. | +| 8 commits total | 5 commits total | Smaller surface, fewer new concepts. | + +The chassis types (`DropMode` enum, `DropType` / `DropProperty` variants — commit 2) are kept exactly as designed; only the implementation strategy changed.