From 044ed4601964fab172cc1bb472de61e72f6d3150 Mon Sep 17 00:00:00 2001 From: Ragnor Comerford Date: Fri, 1 May 2026 22:45:38 +0200 Subject: [PATCH] chore: scrub Linear ticket numbers and review-bot mentions from code comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OmniGraph is OSS; internal Linear ticket references and code-review-bot mentions in source-code comments don't help external readers and leak internal tooling. Replace ticket numbers (MR-XXX) with descriptive prose, drop linear.app URLs, and remove inline mentions of Cursor/Bugbot/Cubic/Codex review threads. Scope is limited to source-code comments (`crates/`). Docs under `docs/` keep their MR-XXX references — those are part of the established change-history narrative for in-repo docs and don't require a Linear account to find context for. No behavior changes; no public API changes. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/omnigraph-cli/tests/cli.rs | 2 +- crates/omnigraph-cli/tests/system_local.rs | 20 ++-- crates/omnigraph-cli/tests/system_remote.rs | 2 +- crates/omnigraph-server/tests/server.rs | 10 +- crates/omnigraph/src/db/omnigraph.rs | 14 +-- crates/omnigraph/src/db/omnigraph/optimize.rs | 4 +- .../src/db/omnigraph/schema_apply.rs | 6 +- crates/omnigraph/src/db/run_registry.rs | 16 +-- crates/omnigraph/src/exec/mutation.rs | 23 ++-- crates/omnigraph/src/exec/staging.rs | 4 +- crates/omnigraph/src/loader/mod.rs | 17 ++- crates/omnigraph/src/table_store.rs | 44 ++++---- crates/omnigraph/tests/consistency.rs | 11 +- crates/omnigraph/tests/failpoints.rs | 3 +- crates/omnigraph/tests/runs.rs | 102 +++++++++--------- crates/omnigraph/tests/s3_storage.rs | 4 +- crates/omnigraph/tests/staged_writes.rs | 44 ++++---- 17 files changed, 164 insertions(+), 162 deletions(-) diff --git a/crates/omnigraph-cli/tests/cli.rs b/crates/omnigraph-cli/tests/cli.rs index b1c1398..9dc7338 100644 --- a/crates/omnigraph-cli/tests/cli.rs +++ b/crates/omnigraph-cli/tests/cli.rs @@ -1905,7 +1905,7 @@ fn cli_fails_for_invalid_merge_requests() { ); } -// MR-771: `omnigraph run list/show/publish/abort` subcommands removed +// `omnigraph run list/show/publish/abort` subcommands removed // alongside the run state machine. Direct-to-target writes leave nothing // for these CLIs to manage. Audit history is now visible via // `omnigraph commit list` reading the commit graph. diff --git a/crates/omnigraph-cli/tests/system_local.rs b/crates/omnigraph-cli/tests/system_local.rs index f5e75eb..d890603 100644 --- a/crates/omnigraph-cli/tests/system_local.rs +++ b/crates/omnigraph-cli/tests/system_local.rs @@ -307,7 +307,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() { assert_eq!(main_read["row_count"], 1); assert_eq!(main_read["rows"][0]["p.name"], "Zoe"); - // MR-771: `omnigraph run list` removed. Audit visible via commit list. + // `omnigraph run list` removed. Audit visible via commit list. let commits_payload = parse_stdout_json(&output_success( cli() .arg("commit") @@ -597,8 +597,8 @@ fn local_cli_failed_load_keeps_target_state_unchanged() { snapshot_table_row_count(&repo, "edge:Knows"), knows_rows_before ); - // MR-771: failed loads no longer leave a RunRecord. The atomicity - // guarantee is verified above (target tables are unchanged). + // Failed loads leave no run record (the run lifecycle has been + // removed); atomicity is verified above by the unchanged target. } #[test] @@ -631,9 +631,8 @@ fn local_cli_failed_change_keeps_target_state_unchanged() { .arg("--json"), )); assert_eq!(friends_payload["row_count"], 2); - // MR-771: failed mutations no longer leave a RunRecord. The atomicity - // guarantee is verified above (the friends_of read above shows main - // unchanged). + // Failed mutations leave no run record (the run lifecycle has been + // removed); atomicity is verified above by the unchanged target. } #[test] @@ -941,12 +940,13 @@ query vector_search($q: String) { assert_eq!(result["rows"][0]["d.slug"], "alpha-doc"); } -// MR-771: the publisher CAS conflict shape is verified end-to-end at the -// engine level in `crates/omnigraph/tests/runs.rs::concurrent_writers_one_succeeds_one_gets_expected_version_mismatch` +// The publisher CAS conflict shape is verified end-to-end at the engine +// level in +// `crates/omnigraph/tests/runs.rs::concurrent_writers_one_succeeds_one_gets_expected_version_mismatch` // and at the HTTP boundary in // `crates/omnigraph-server/tests/server.rs::change_conflict_returns_manifest_conflict_409`. -// The pre-MR-771 CLI-level race was timing-dependent; with direct-publish -// the surface is the same engine path the unit test already covers. +// A CLI-level race would be timing-dependent; with direct-publish the +// surface is the same engine path the unit test already covers. #[test] fn local_cli_policy_tooling_is_end_to_end_while_local_writes_stay_unenforced() { diff --git a/crates/omnigraph-cli/tests/system_remote.rs b/crates/omnigraph-cli/tests/system_remote.rs index cecd725..15f3a6f 100644 --- a/crates/omnigraph-cli/tests/system_remote.rs +++ b/crates/omnigraph-cli/tests/system_remote.rs @@ -192,7 +192,7 @@ query insert_person($name: String, $age: I32) { assert_eq!(local_verify["row_count"], 1); assert_eq!(local_verify["rows"][0]["p.name"], "Mina"); - // MR-771: `run publish` / `run list` removed. Direct-to-target writes + // `run publish` / `run list` removed. Direct-to-target writes // already landed via the change call above; the commit graph is now // the audit surface (verified separately by `commit list`). } diff --git a/crates/omnigraph-server/tests/server.rs b/crates/omnigraph-server/tests/server.rs index 60d4043..f8d6b8d 100644 --- a/crates/omnigraph-server/tests/server.rs +++ b/crates/omnigraph-server/tests/server.rs @@ -1346,7 +1346,7 @@ async fn policy_blocks_non_admin_merge_to_main_and_allows_admin() { #[tokio::test(flavor = "multi_thread")] async fn authenticated_change_stamps_actor_on_commits() { - // MR-771: with the Run state machine removed, actor_id is recorded + // With the Run state machine removed, actor_id is recorded // directly on the commit graph (no intermediate run record). let (_temp, app) = app_for_loaded_repo_with_auth_tokens(&[("act-andrew", "token-one")]).await; @@ -2108,10 +2108,10 @@ query vector_search_string($q: String) { #[tokio::test(flavor = "multi_thread")] async fn change_conflict_returns_manifest_conflict_409() { - // MR-771: a write that races with another writer surfaces as HTTP 409 - // with a structured `manifest_conflict` body — `table_key`, `expected`, - // and `actual` — so clients can detect-and-retry without parsing the - // message. (Replaces the old run-publish merge-conflict shape.) + // A write that races with another writer surfaces as HTTP 409 with + // a structured `manifest_conflict` body — `table_key`, `expected`, + // and `actual` — so clients can detect-and-retry without parsing + // the message. let temp = init_loaded_repo().await; let repo = repo_path(temp.path()); diff --git a/crates/omnigraph/src/db/omnigraph.rs b/crates/omnigraph/src/db/omnigraph.rs index 01cf551..a924b1f 100644 --- a/crates/omnigraph/src/db/omnigraph.rs +++ b/crates/omnigraph/src/db/omnigraph.rs @@ -1430,12 +1430,12 @@ edge WorksAt: Person -> Company #[tokio::test] async fn test_apply_schema_succeeds_after_load() { - // MR-670 + MR-674: schema apply used to be blocked by leftover - // __run__ branches. MR-670 added a defense-in-depth filter that - // skips internal system branches. MR-674 made run branches - // ephemeral on every terminal state, so in practice no __run__ - // branch survives publish — but the filter still guards the - // invariant. + // Historical: schema apply used to be blocked by leftover + // `__run__` branches. A defense-in-depth filter now skips + // internal system branches, and run branches were made + // ephemeral on every terminal state — so in practice no + // `__run__` branch survives publish. The filter still guards + // the invariant. let dir = tempfile::tempdir().unwrap(); let uri = dir.path().to_str().unwrap(); let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap(); @@ -1451,7 +1451,7 @@ edge WorksAt: Person -> Company let all_branches = db.coordinator.all_branches().await.unwrap(); assert!( !all_branches.iter().any(|b| is_internal_run_branch(b)), - "MR-674: run branch should be deleted after publish, got: {:?}", + "run branch should be deleted after publish, got: {:?}", all_branches ); diff --git a/crates/omnigraph/src/db/omnigraph/optimize.rs b/crates/omnigraph/src/db/omnigraph/optimize.rs index 32889a0..21050c1 100644 --- a/crates/omnigraph/src/db/omnigraph/optimize.rs +++ b/crates/omnigraph/src/db/omnigraph/optimize.rs @@ -15,8 +15,8 @@ //! retention. Destructive to version history — callers should gate this //! behind an explicit confirm flag at the CLI layer. //! -//! Both walk every node + edge table on the `main` branch. Run branches are -//! ephemeral by design (MR-670 / MR-674) so we do not optimize them. +//! Both walk every node + edge table on the `main` branch. Run branches +//! are ephemeral by design so we do not optimize them. use std::time::Duration; diff --git a/crates/omnigraph/src/db/omnigraph/schema_apply.rs b/crates/omnigraph/src/db/omnigraph/schema_apply.rs index b294260..b096dbd 100644 --- a/crates/omnigraph/src/db/omnigraph/schema_apply.rs +++ b/crates/omnigraph/src/db/omnigraph/schema_apply.rs @@ -34,9 +34,9 @@ pub(super) async fn apply_schema_with_lock( let branches = db.coordinator.all_branches().await?; // Skip `main` and internal system branches. The schema-apply lock branch // is excluded because it is the cluster-wide schema-apply serializer. - // `__run__*` branches are no longer created (MR-771); the filter remains - // as defense-in-depth for legacy repos with leftover staging branches — - // MR-770 will sweep them and this guard can go. + // `__run__*` branches are no longer created; the filter remains as + // defense-in-depth for legacy repos with leftover staging branches. + // A future production sweep will let this guard go. let blocking_branches = branches .into_iter() .filter(|branch| branch != "main" && !is_internal_system_branch(branch)) diff --git a/crates/omnigraph/src/db/run_registry.rs b/crates/omnigraph/src/db/run_registry.rs index 9f1c376..ee3d336 100644 --- a/crates/omnigraph/src/db/run_registry.rs +++ b/crates/omnigraph/src/db/run_registry.rs @@ -1,12 +1,12 @@ -// Run state-machine has been removed (MR-771). Mutations now write directly -// to target tables and use the publisher's `expected_table_versions` CAS for -// cross-table OCC; the `__run__` staging branches and `_graph_runs.lance` -// state machine no longer exist. +// The Run state machine has been removed. Mutations now write directly +// to target tables and use the publisher's `expected_table_versions` +// CAS for cross-table OCC; `__run__` staging branches and the +// `_graph_runs.lance` state machine no longer exist. // -// What remains is the branch-name predicate, kept as a defense-in-depth guard -// against users naming a public branch `__run__*`. MR-770 owns the production -// sweep of legacy `_graph_runs.lance` rows and stale `__run__*` branches; once -// that lands the predicate (and this file) can go too. +// What remains is the branch-name predicate, kept as a defense-in-depth +// guard against users naming a public branch `__run__*`. A future +// production sweep of legacy `_graph_runs.lance` rows and stale +// `__run__*` branches will let this predicate (and this file) go too. pub(crate) const INTERNAL_RUN_BRANCH_PREFIX: &str = "__run__"; diff --git a/crates/omnigraph/src/exec/mutation.rs b/crates/omnigraph/src/exec/mutation.rs index a588c68..e996241 100644 --- a/crates/omnigraph/src/exec/mutation.rs +++ b/crates/omnigraph/src/exec/mutation.rs @@ -590,9 +590,9 @@ use super::staging::{MutationStaging, PendingMode}; /// is the source of truth for "where is Lance HEAD right now on /// this table within this query." /// -/// The `inline_committed` reopen branch closes the cursor-bot HIGH -/// "multi-delete fails on same table" finding. The branch goes away -/// once Lance exposes a two-phase delete API +/// The `inline_committed` reopen branch closes the multi-delete-on-same-table +/// failure path that pre-staged-write engines inherited. The branch goes +/// away once Lance exposes a two-phase delete API /// ([lance-format/lance#6658](https://github.com/lance-format/lance/issues/6658)) /// and we can stage deletes on the same path as inserts/updates. async fn open_table_for_mutation( @@ -633,9 +633,9 @@ async fn open_table_for_mutation( /// D₂ parse-time check: a single mutation query is either insert/update-only /// or delete-only. Mixed → reject before any I/O. /// -/// Reason: under the staged-write rewire (MR-794 step 2+), inserts and -/// updates accumulate in memory and commit at end-of-query, while deletes -/// still inline-commit (Lance lacks a public two-phase delete in 4.0.0). +/// Reason: under the staged-write writer, inserts and updates +/// accumulate in memory and commit at end-of-query, while deletes still +/// inline-commit (Lance lacks a public two-phase delete in 4.0.0). /// Mixing creates ordering hazards (same-row insert→delete becomes a no-op /// because the staged insert isn't visible to delete; cascading deletes /// of just-inserted edges break referential integrity by silent design). @@ -661,7 +661,7 @@ fn enforce_no_mixed_destructive_constructive( "mutation '{}' on the same query mixes inserts/updates and deletes; \ split into separate mutations: (1) inserts and updates, then (2) deletes. \ This restriction lifts when Lance exposes a two-phase delete API \ - (tracked: MR-793 / Lance-upstream).", + (tracked: lance-format/lance#6658).", ir.name ))); } @@ -706,11 +706,10 @@ impl Omnigraph { ) -> Result { self.ensure_schema_state_valid().await?; let requested = Self::normalize_branch_name(branch)?; - // Reject internal `__run__*` / system-prefixed branches at the public - // write boundary. The pre-MR-771 path got this guard transitively via - // `begin_run`'s `ensure_public_branch_ref` call; the direct-publish - // path needs to assert it explicitly so a caller can't write to - // legacy or system staging branches by passing the prefix verbatim. + // Reject internal `__run__*` / system-prefixed branches at the + // public write boundary. Direct-publish paths assert this + // explicitly so a caller can't write to legacy or system + // staging branches by passing the prefix verbatim. if let Some(name) = requested.as_deref() { crate::db::ensure_public_branch_ref(name, "mutate")?; } diff --git a/crates/omnigraph/src/exec/staging.rs b/crates/omnigraph/src/exec/staging.rs index 92ddfac..2c7eae3 100644 --- a/crates/omnigraph/src/exec/staging.rs +++ b/crates/omnigraph/src/exec/staging.rs @@ -1,4 +1,4 @@ -//! Per-query staging accumulator for direct-publish writes (MR-794 step 2+). +//! Per-query staging accumulator for direct-publish writes. //! //! `MutationStaging` accumulates per-table input batches in memory during a //! `mutate_as` or `load` query, then at end-of-query commits each touched @@ -74,7 +74,7 @@ pub(crate) struct StagedTablePath { /// Per-query staging state. /// -/// Replaces the post-MR-771 inline-commit `MutationStaging.latest` map with +/// Replaces the legacy inline-commit `MutationStaging.latest` map with /// an in-memory accumulator that defers all Lance HEAD advances to /// end-of-query. After this rewire the bug class "Lance HEAD drifts ahead /// of `__manifest`" is unreachable in `mutate_as` and `load` for inserts diff --git a/crates/omnigraph/src/loader/mod.rs b/crates/omnigraph/src/loader/mod.rs index 374adc9..f275938 100644 --- a/crates/omnigraph/src/loader/mod.rs +++ b/crates/omnigraph/src/loader/mod.rs @@ -155,11 +155,10 @@ impl Omnigraph { pub async fn load(&mut self, branch: &str, data: &str, mode: LoadMode) -> Result { self.ensure_schema_state_valid().await?; - // Reject internal `__run__*` / system-prefixed branches at the public - // write boundary. The pre-MR-771 path got this guard transitively via - // `begin_run`'s `ensure_public_branch_ref` call; the direct-publish - // path needs to assert it explicitly so a caller can't write to - // legacy or system staging branches by passing the prefix verbatim. + // Reject internal `__run__*` / system-prefixed branches at the + // public write boundary. Direct-publish paths assert this + // explicitly so a caller can't write to legacy or system + // staging branches by passing the prefix verbatim. crate::db::ensure_public_branch_ref(branch, "load")?; // Branch convention: `None` represents `main`. Re-normalizing to // `Some("main")` here would route the publisher commit through a @@ -1561,10 +1560,10 @@ pub(crate) async fn validate_edge_cardinality( /// Lance version). /// /// `mode` controls dedup behavior. `LoadMode::Merge` passes `Some("id")` -/// so committed edges that the load is *updating* (same edge id, possibly -/// changed `src`) are not double-counted (Cubic P1 finding on PR #68). -/// `LoadMode::Append` passes `None` because each line generates a fresh -/// ULID id that never collides with committed. +/// so committed edges that the load is *updating* (same edge id, +/// possibly changed `src`) are not double-counted. `LoadMode::Append` +/// passes `None` because each line generates a fresh ULID id that +/// never collides with committed. async fn validate_edge_cardinality_with_pending_loader( db: &Omnigraph, branch: Option<&str>, diff --git a/crates/omnigraph/src/table_store.rs b/crates/omnigraph/src/table_store.rs index b0ecc18..adc7c2b 100644 --- a/crates/omnigraph/src/table_store.rs +++ b/crates/omnigraph/src/table_store.rs @@ -40,12 +40,12 @@ pub struct DeleteState { /// A Lance write that has produced fragment files on object storage but is /// not yet committed to the dataset's manifest. The staged-write primitives -/// are defined here for later integration in `MutationStaging` -/// (`exec/mutation.rs`) and the loader (`loader/mod.rs`) — those rewires -/// land in [MR-794](https://linear.app/modernrelay/issue/MR-794) step 2+. -/// The intent: defer Lance commits to end-of-query so a mid-query failure -/// leaves the touched table at the pre-mutation HEAD instead of drifting -/// ahead. +/// are consumed by `MutationStaging` (`exec/staging.rs`, +/// `exec/mutation.rs`) and the bulk loader (`loader/mod.rs`). The +/// intent: defer Lance commits to end-of-query so a mid-query failure +/// leaves the touched table at the pre-mutation HEAD instead of +/// drifting ahead. See `docs/runs.md` for the publisher-CAS contract +/// this builds on. /// /// `transaction` is opaque from our side — Lance owns its semantics. We /// commit it via `CommitBuilder::execute(transaction)` (see @@ -545,7 +545,7 @@ impl TableStore { }) } - // ─── Staged-write API (MR-794) ─────────────────────────────────────────── + // ─── Staged-write API ──────────────────────────────────────────────────── // // These primitives wrap Lance's distributed-write API: each call writes // fragment files to object storage but does NOT advance the dataset's @@ -672,16 +672,16 @@ impl TableStore { /// /// This is intrinsic to the underlying Lance API: there is no public /// way to make `MergeInsertBuilder` see uncommitted fragments. The - /// engine's mutation path enforces the rule "per touched table: all - /// stage_append OR exactly one stage_merge_insert" at parse time - /// (the D₂′ check landing with [MR-794](https://linear.app/modernrelay/issue/MR-794) - /// step 2+ in `exec/mutation.rs`). Multi-table queries and append-chains - /// remain safe; only chained merges on a single table are rejected. + /// engine's `MutationStaging` accumulator works around this by + /// concatenating per-table batches in memory and issuing exactly + /// one `stage_merge_insert` per touched table at end-of-query (with + /// last-write-wins dedupe by id) — see `exec/staging.rs`. Direct + /// callers of this primitive must respect the contract themselves. /// /// Lift path: either a Lance API extension that lets /// `MergeInsertBuilder` accept additional staged fragments, or an /// in-memory pre-merge here that folds prior staged batches into the - /// input stream. See `docs/runs.md` and MR-793. + /// input stream. See `docs/runs.md`. pub async fn stage_merge_insert( &self, ds: Dataset, @@ -780,10 +780,10 @@ impl TableStore { /// filtered scan even when their data would match. Staged-fragment /// rows are silently absent from the result. `scanner.use_stats(false)` /// does not fix this in lance 4.0.0. Callers needing correct filtered - /// reads against staged data should use a different strategy (the - /// engine's MR-794 step 2+ design uses in-memory pending-batch - /// accumulation + DataFusion `MemTable` instead — see - /// `.context/mr-794-step2-design.md`). + /// reads against staged data should use a different strategy — the + /// engine's `MutationStaging` accumulator unions in-memory pending + /// batches with the committed scan via DataFusion `MemTable` (see + /// `scan_with_pending`). /// /// This method remains on the surface for primitive-level testing /// (basic stage + scan correctness without filters works) and for @@ -824,10 +824,10 @@ impl TableStore { /// Scan committed via Lance + apply the same filter to in-memory /// pending batches via DataFusion `MemTable`, concat the two result /// streams. The replacement for `scan_with_staged` in engine code: - /// MR-794 step 2+ accumulates input batches in memory and unions - /// them with the committed snapshot at read time, sidestepping the - /// `Scanner::with_fragments` filter-pushdown limitation documented - /// on `scan_with_staged`. + /// the staged-write writer accumulates input batches in memory and + /// unions them with the committed snapshot at read time, + /// sidestepping the `Scanner::with_fragments` filter-pushdown + /// limitation documented on `scan_with_staged`. /// /// `committed_ds` should be opened at the pre-mutation /// `expected_version` (the same version captured in `MutationStaging::expected_versions` @@ -1251,7 +1251,7 @@ fn filter_out_rows_where_string_in( /// Apply `projection` and `filter` to in-memory pending batches via a /// fresh DataFusion `SessionContext`. Used by `scan_with_pending` for -/// the read-your-writes side of MR-794's in-memory accumulator. +/// the read-your-writes side of the in-memory staging accumulator. /// /// `pending_batches` must be non-empty (the caller short-circuits on /// empty). diff --git a/crates/omnigraph/tests/consistency.rs b/crates/omnigraph/tests/consistency.rs index 205893c..63dc3f7 100644 --- a/crates/omnigraph/tests/consistency.rs +++ b/crates/omnigraph/tests/consistency.rs @@ -522,11 +522,12 @@ query high_value() { #[tokio::test] async fn stale_handle_public_mutation_must_refresh_then_retry() { - // MR-771: with the Run state machine removed, the engine no longer - // auto-rebases stale-handle mutations onto the latest target head. The - // publisher's `expected_table_versions` CAS makes the contract explicit - // — a stale writer fails loudly with `ExpectedVersionMismatch` and the - // client decides whether to refresh-and-retry. + // With the Run state machine removed, the engine no longer + // auto-rebases stale-handle mutations onto the latest target head. + // The publisher's `expected_table_versions` CAS makes the contract + // explicit — a stale writer fails loudly with + // `ExpectedVersionMismatch` and the client decides whether to + // refresh-and-retry. let dir = tempfile::tempdir().unwrap(); let _db = init_and_load(&dir).await; drop(_db); diff --git a/crates/omnigraph/tests/failpoints.rs b/crates/omnigraph/tests/failpoints.rs index 3adc15d..f16ed59 100644 --- a/crates/omnigraph/tests/failpoints.rs +++ b/crates/omnigraph/tests/failpoints.rs @@ -140,7 +140,8 @@ async fn schema_apply_recovers_partial_rename() { assert_no_staging_files(dir.path()); } -/// Pin the documented "finalize → publisher residual" from MR-794. +/// Pin the documented "finalize → publisher residual" of the +/// staged-write commit path. /// /// `MutationStaging::finalize` runs `commit_staged` per touched table /// sequentially before the publisher commits the manifest. Lance has no diff --git a/crates/omnigraph/tests/runs.rs b/crates/omnigraph/tests/runs.rs index 74bafc1..4e363bf 100644 --- a/crates/omnigraph/tests/runs.rs +++ b/crates/omnigraph/tests/runs.rs @@ -1,4 +1,4 @@ -//! Tests for the direct-to-target write path (MR-771: Run state machine +//! Tests for the direct-to-target write path (Run state machine //! removed). The Run/`__run__` staging branch / RunRecord state machine no //! longer exists; mutations and loads write directly to target tables and //! commit once via the publisher's `expected_table_versions` CAS. @@ -161,17 +161,17 @@ async fn multi_statement_mutation_is_atomic_with_read_your_writes() { assert_eq!(friends.num_rows(), 1); } -/// Mid-query partial failure: op-1 stages a Person insert, op-2 fails on -/// referential integrity (validate_edge_insert_endpoints). Under the -/// MR-794 staged-write rewire, op-1's batch lives in the in-memory +/// Mid-query partial failure: op-1 stages a Person insert, op-2 fails +/// on referential integrity (validate_edge_insert_endpoints). Under +/// the staged-write writer, op-1's batch lives in the in-memory /// accumulator and never reaches Lance — Lance HEAD on `node:Person` /// stays at the pre-mutation version. The publisher never publishes, /// the manifest never advances, and the next mutation against the same /// table proceeds normally (no `ExpectedVersionMismatch`). /// -/// This test pins the post-MR-794 contract: -/// - Failed multi-statement mutation surfaces a clear error, no manifest -/// commit, no observable state change. +/// Pins the staged-write contract: +/// - Failed multi-statement mutation surfaces a clear error, no +/// manifest commit, no observable state change. /// - The touched tables stay queryable and writable from the next /// query — Lance HEAD has not drifted. #[tokio::test] @@ -228,7 +228,7 @@ async fn partial_failure_leaves_target_queryable_and_unblocks_next_mutation() { &mixed_params(&[("$name", "Frank")], &[("$age", 33)]), ) .await - .expect("next mutation on the touched table must succeed under MR-794"); + .expect("next mutation on the touched table must succeed under the staged-write writer"); assert_eq!( result.affected_nodes, 1, "follow-up insert should report 1 affected node" @@ -251,7 +251,7 @@ async fn partial_failure_leaves_target_queryable_and_unblocks_next_mutation() { /// success and one `ExpectedVersionMismatch`. The replacement for the old /// `concurrent_conflicting_run_publish_fails_cleanly` test — the OCC fence /// has moved from a graph-level run-publish merge into the publisher's -/// per-table CAS (MR-766 + MR-771). +/// per-table CAS. /// /// Drives the race by interleaving two handles that captured the same /// pre-write manifest snapshot: A commits first; B's commit then sees @@ -323,7 +323,7 @@ async fn concurrent_writers_one_succeeds_one_gets_expected_version_mismatch() { ); } -/// The cancellation hole that motivated MR-771: dropping a mutation future +/// The cancellation hole that motivated removing the Run state machine: dropping a mutation future /// mid-flight must not leave any graph-level state behind. With the run /// state machine gone, only orphaned Lance fragments can remain — and those /// are reclaimed by `omnigraph cleanup`. @@ -381,7 +381,7 @@ async fn cancelled_mutation_future_leaves_no_state() { // `is_internal_system_branch`, so a runtime "no `__run__` branches" check // would be vacuous. The structural property that no `__run__` branches // can ever be created is enforced by deletion of `begin_run` etc. in - // MR-771 (verified by the build itself — those symbols no longer exist). + // (verified by the build itself — those symbols no longer exist). // // (1) The branch list is unchanged: cancellation/completion cannot // synthesize new public branches. @@ -448,9 +448,10 @@ async fn repeated_loads_do_not_accumulate_branches() { assert_eq!(db.branch_list().await.unwrap(), vec!["main".to_string()]); } -/// User code must not be able to write to internal `__run__*` names. The -/// branch-name guard predicate is kept as defense-in-depth (MR-770 will -/// remove it once production legacy branches are swept). +/// User code must not be able to write to internal `__run__*` names. +/// The branch-name guard predicate is kept as defense-in-depth; it +/// will be removed once a future production sweep retires the legacy +/// branches. #[tokio::test] async fn public_branch_apis_reject_internal_run_refs() { let dir = tempfile::tempdir().unwrap(); @@ -480,11 +481,11 @@ async fn public_branch_apis_reject_internal_run_refs() { ); } -// ─── MR-794: staged-write rewire — additional contract tests ──────────────── +// ─── Staged-write rewire — additional contract tests ─────────────────────── -/// Mutation queries used only by the MR-794 tests below. Kept in the test -/// file (not in helpers' shared `MUTATION_QUERIES`) to keep their scope -/// local to the staged-write coverage. +/// Mutation queries used only by the staged-write tests below. Kept in +/// the test file (not in helpers' shared `MUTATION_QUERIES`) to keep +/// their scope local to the staged-write coverage. const STAGED_QUERIES: &str = r#" query insert_two_persons($a_name: String, $a_age: I32, $b_name: String, $b_age: I32) { insert Person { name: $a_name, age: $a_age } @@ -852,10 +853,10 @@ edge WorksAt: Person -> Company @card(0..1) ); } -// ─── PR #68 review-comment fixes — pinned coverage ────────────────────────── +// ─── Chained-mutation correctness — pinned coverage ───────────────────────── -/// Codex P1 / Cubic P1 #1: chained `update` ops in one query must respect -/// each previous op's view of the rows. Without merge-shadow semantics on +/// Chained `update` ops in one query must respect each previous op's +/// view of the rows. Without merge-shadow semantics on /// `scan_with_pending`, the second update sees the stale committed value /// (the first update's row still appears in the Lance scan because the /// pending side hasn't committed), the predicate matches it, and the @@ -932,8 +933,8 @@ async fn chained_updates_with_overlapping_predicate_respects_intermediate_value( ); } -/// Cursor Bugbot HIGH: two `delete` ops on the same node table in one -/// query. Pre-fix, op-2's `open_table_for_mutation` went through +/// Two `delete` ops on the same node table in one query. Pre-fix, +/// op-2's `open_table_for_mutation` went through /// `open_for_mutation_on_branch` which trips `ensure_expected_version` /// (Lance HEAD has advanced past the manifest's pinned version after /// op-1's inline-commit, but the manifest hasn't moved). Post-fix, @@ -984,7 +985,7 @@ async fn multi_statement_delete_on_same_node_table() { } } -/// Cursor Bugbot HIGH (cascade variant): deleting a node cascades to its +/// Cascade-then-explicit variant: deleting a node cascades to its /// edges, advancing Lance HEAD on the edge table. A subsequent /// `delete ` op in the same query must reopen at the /// post-cascade-commit version of the edge table — not trip @@ -1030,11 +1031,10 @@ query cascade_then_explicit($name: String, $other: String) { ); } -/// Codex P2 / Cursor Bugbot LOW / Cubic P2: the engine cardinality path -/// must enforce `min` bounds. Pre-fix the engine path silently dropped -/// the min check (a `let _ = card.min;` line). The loader path always -/// enforced both. Post-fix, both paths route through -/// `enforce_cardinality_bounds` which checks both bounds. +/// The engine cardinality path must enforce `min` bounds. Pre-fix the +/// engine path silently dropped the min check (a `let _ = card.min;` +/// line). The loader path always enforced both. Post-fix, both paths +/// route through `enforce_cardinality_bounds` which checks both bounds. /// /// Build a custom schema with `Knows: Person -> Person @card(2..*)`. /// Inserting a single Knows edge violates min=2. The mutation path must @@ -1085,8 +1085,8 @@ query add_friend($from: String, $to: String) { ); } -/// Cubic P1 #2: `LoadMode::Merge` on edges must NOT double-count the -/// committed edge AND its updated pending replacement. Build a custom +/// `LoadMode::Merge` on edges must NOT double-count the committed +/// edge AND its updated pending replacement. Build a custom /// schema where WorksAt has @card(0..1). Seed Alice with one WorksAt to /// Acme. Then Merge-load the SAME edge id (so it's an update, not an /// insert) pointing Alice's WorksAt at Bigco. Cardinality must count @@ -1132,12 +1132,11 @@ edge WorksAt: Person -> Company @card(0..1) assert_eq!(count_rows(&db, "edge:WorksAt").await, 1); } -/// Cubic P2 (follow-up to PR #68 review of commit 3223b51): a Merge load -/// whose input has TWO rows with the same edge id must be deduped at -/// cardinality-count time, not just at finalize. Without dedup, two -/// pending rows count twice → spurious `@card` violation. With dedup -/// (last-occurrence-wins, mirroring `dedupe_merge_batches_by_id`), the -/// pending side counts once. +/// A Merge load whose input has TWO rows with the same edge id must be +/// deduped at cardinality-count time, not just at finalize. Without +/// dedup, two pending rows count twice → spurious `@card` violation. +/// With dedup (last-occurrence-wins, mirroring +/// `dedupe_merge_batches_by_id`), the pending side counts once. /// /// This is a separate path from `load_merge_mode_dedupes_edge_for_cardinality_count` /// (which dedupes committed-vs-pending). Here we verify pending-vs-pending @@ -1182,11 +1181,11 @@ edge WorksAt: Person -> Company @card(0..1) assert_eq!(count_rows(&db, "edge:WorksAt").await, 1); } -/// Cubic P2 (follow-up): `scan_with_pending` must reject a call where -/// `key_column` is requested but the projection omits that column. -/// Without the up-front check, the helper silently degraded to union -/// semantics — letting a chained-update bug slip through unnoticed. -/// This test verifies the contract is enforced at the API boundary. +/// `scan_with_pending` must reject a call where `key_column` is +/// requested but the projection omits that column. Without the +/// up-front check, the helper silently degraded to union semantics — +/// letting a chained-update bug slip through unnoticed. This test +/// verifies the contract is enforced at the API boundary. #[tokio::test] async fn scan_with_pending_rejects_key_column_missing_from_projection() { use arrow_array::{RecordBatch, StringArray}; @@ -1275,20 +1274,19 @@ async fn scan_with_pending_rejects_key_column_missing_from_projection() { ); } -/// Cursor Bugbot Medium (follow-up on commit 052b6e6): the -/// `PendingTable.schema` field is captured from the first `append_batch` -/// call and never updated. On a blob-bearing table, an `insert` -/// produces a full-schema batch (blob columns included) and an `update` -/// that doesn't assign every blob produces a subset-schema batch. Mixed -/// in one query, the second `append_batch` would silently push an +/// `PendingTable.schema` is captured from the first `append_batch` call +/// and never updated. On a blob-bearing table, an `insert` produces a +/// full-schema batch (blob columns included) and an `update` that +/// doesn't assign every blob produces a subset-schema batch. Mixed in +/// one query, the second `append_batch` would silently push an /// incompatible batch — the mismatch surfaced eventually at /// `concat_batches`/MemTable construction inside finalize, but the /// failure point was distant from the offending op. /// -/// Post-fix: `append_batch` validates the new batch's schema against -/// the existing accumulator's schema and returns a typed error -/// directing the caller to split the mutation. The error fires at the -/// second op (the update), not at end-of-query. +/// `append_batch` validates the new batch's schema against the existing +/// accumulator's schema and returns a typed error directing the caller +/// to split the mutation. The error fires at the second op (the +/// update), not at end-of-query. #[tokio::test] async fn append_batch_rejects_mismatched_schema_in_blob_table_at_offending_op() { use omnigraph::loader::{LoadMode, load_jsonl}; diff --git a/crates/omnigraph/tests/s3_storage.rs b/crates/omnigraph/tests/s3_storage.rs index 3a3d2be..5b90022 100644 --- a/crates/omnigraph/tests/s3_storage.rs +++ b/crates/omnigraph/tests/s3_storage.rs @@ -44,7 +44,7 @@ async fn s3_compatible_repo_lifecycle_works() { .await .unwrap(); - // Direct-to-target load (MR-771): no run lifecycle, single publisher + // Direct-to-target load: no run lifecycle, single publisher // commit lands the row. reopened .load( @@ -153,7 +153,7 @@ async fn s3_public_load_uses_hidden_run_and_publishes() { .await .unwrap(); - // Direct-to-target writes (MR-771): no run state machine, just the + // Direct-to-target writes: no run state machine, just the // published commit lands the row. Verify by reopening and reading. let mut reopened = Omnigraph::open(&uri).await.unwrap(); let loaded = query_main( diff --git a/crates/omnigraph/tests/staged_writes.rs b/crates/omnigraph/tests/staged_writes.rs index 811ccaf..1f8822d 100644 --- a/crates/omnigraph/tests/staged_writes.rs +++ b/crates/omnigraph/tests/staged_writes.rs @@ -1,20 +1,21 @@ -//! Primitive-level tests for `TableStore`'s staged-write API -//! (MR-794 step 1). These exercise `stage_append`, `stage_merge_insert`, -//! `scan_with_staged`, and `count_rows_with_staged` directly against a -//! Lance dataset — no Omnigraph engine involved. The engine-level rewire -//! (MR-794 step 2+) lives in `tests/runs.rs` once it lands. +//! Primitive-level tests for `TableStore`'s staged-write API. These +//! exercise `stage_append`, `stage_merge_insert`, `scan_with_staged`, +//! and `count_rows_with_staged` directly against a Lance dataset — no +//! Omnigraph engine involved. The engine-level use of these primitives +//! is exercised by `tests/runs.rs`. //! //! Test surface here: //! 1. `stage_append` + `scan_with_staged` shows committed + staged data //! without duplicates. //! 2. `stage_merge_insert` of a row that supersedes a committed fragment -//! surfaces only the rewritten row, not both — the -//! `removed_fragment_ids` dedup landed in PR #66's `730631c`. -//! 3. **Documented contract**: chained `stage_merge_insert` calls on the -//! same dataset whose source rows share keys produce duplicate rows in -//! `scan_with_staged`. The engine's parse-time D₂′ check (MR-794 step -//! 2+) prevents callers from triggering this; this test pins the -//! primitive's behavior so a future change either (a) preserves it or +//! surfaces only the rewritten row, not both, via the +//! `removed_fragment_ids` dedup contract. +//! 3. **Documented contract**: chained `stage_merge_insert` calls on +//! the same dataset whose source rows share keys produce duplicate +//! rows in `scan_with_staged`. The engine's accumulator dedupes by +//! id at finalize time so this primitive-level pitfall doesn't +//! surface in production paths; this test pins the primitive's +//! behavior so a future change either (a) preserves it or //! (b) consciously fixes it (and updates this test). use arrow_array::{Array, Int32Array, RecordBatch, StringArray, UInt64Array}; @@ -120,8 +121,9 @@ async fn stage_merge_insert_dedupes_superseded_committed_fragment() { assert!( !staged.removed_fragment_ids.is_empty(), "merge_insert that rewrites a committed row must set removed_fragment_ids \ - (this is the dedup invariant from PR #66 commit 730631c — its absence \ - was caught by Cubic/Cursor/Codex on PR #66)" + so the scan-with-staged composer can shadow the superseded committed \ + fragment — without it, the committed row and its rewrite both appear, \ + producing duplicates by key" ); // scan_with_staged: alice appears exactly once, with the new age. @@ -347,11 +349,11 @@ async fn stage_merge_insert_then_commit_persists_merged_view() { /// silently absent. `scanner.use_stats(false)` does not bypass this in /// lance 4.0.0. /// -/// This test pins the actual behavior so a future change either preserves -/// it (and updates the doc) or fixes it (and rewrites this test). The -/// engine's MR-794 step 2+ design uses in-memory pending-batch -/// accumulation + DataFusion `MemTable` for read-your-writes instead, so -/// production code is unaffected. +/// This test pins the actual behavior so a future change either +/// preserves it (and updates the doc) or fixes it (and rewrites this +/// test). The engine's `MutationStaging` accumulator unions in-memory +/// pending batches with the committed scan via DataFusion `MemTable` +/// for read-your-writes instead, so production code is unaffected. #[tokio::test] async fn scan_with_staged_with_filter_silently_drops_staged_rows() { let dir = tempfile::tempdir().unwrap(); @@ -485,6 +487,8 @@ async fn chained_stage_merge_insert_with_shared_key_documents_duplicate_behavior here because this assertion failed: either (a) the primitive was \ improved to dedupe across stages (good — update to assert == 1) \ or (b) something subtler broke (investigate before changing the \ - assertion). See PR #67 Codex P1 thread + .context/mr-794-step2-design.md §3.1." + assertion). The engine's MutationStaging accumulator dedupes by \ + id at finalize time so this primitive-level pitfall doesn't \ + surface in production paths — see exec/staging.rs." ); }