chore: scrub Linear ticket numbers and review-bot mentions from code comments

OmniGraph is OSS; internal Linear ticket references and code-review-bot
mentions in source-code comments don't help external readers and leak
internal tooling. Replace ticket numbers (MR-XXX) with descriptive
prose, drop linear.app URLs, and remove inline mentions of
Cursor/Bugbot/Cubic/Codex review threads.

Scope is limited to source-code comments (`crates/`). Docs under
`docs/` keep their MR-XXX references — those are part of the
established change-history narrative for in-repo docs and don't
require a Linear account to find context for.

No behavior changes; no public API changes.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Ragnor Comerford 2026-05-01 22:45:38 +02:00
parent ea16c74329
commit 044ed46019
No known key found for this signature in database
17 changed files with 164 additions and 162 deletions

View file

@ -1905,7 +1905,7 @@ fn cli_fails_for_invalid_merge_requests() {
);
}
// MR-771: `omnigraph run list/show/publish/abort` subcommands removed
// `omnigraph run list/show/publish/abort` subcommands removed
// alongside the run state machine. Direct-to-target writes leave nothing
// for these CLIs to manage. Audit history is now visible via
// `omnigraph commit list` reading the commit graph.

View file

@ -307,7 +307,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() {
assert_eq!(main_read["row_count"], 1);
assert_eq!(main_read["rows"][0]["p.name"], "Zoe");
// MR-771: `omnigraph run list` removed. Audit visible via commit list.
// `omnigraph run list` removed. Audit visible via commit list.
let commits_payload = parse_stdout_json(&output_success(
cli()
.arg("commit")
@ -597,8 +597,8 @@ fn local_cli_failed_load_keeps_target_state_unchanged() {
snapshot_table_row_count(&repo, "edge:Knows"),
knows_rows_before
);
// MR-771: failed loads no longer leave a RunRecord. The atomicity
// guarantee is verified above (target tables are unchanged).
// Failed loads leave no run record (the run lifecycle has been
// removed); atomicity is verified above by the unchanged target.
}
#[test]
@ -631,9 +631,8 @@ fn local_cli_failed_change_keeps_target_state_unchanged() {
.arg("--json"),
));
assert_eq!(friends_payload["row_count"], 2);
// MR-771: failed mutations no longer leave a RunRecord. The atomicity
// guarantee is verified above (the friends_of read above shows main
// unchanged).
// Failed mutations leave no run record (the run lifecycle has been
// removed); atomicity is verified above by the unchanged target.
}
#[test]
@ -941,12 +940,13 @@ query vector_search($q: String) {
assert_eq!(result["rows"][0]["d.slug"], "alpha-doc");
}
// MR-771: the publisher CAS conflict shape is verified end-to-end at the
// engine level in `crates/omnigraph/tests/runs.rs::concurrent_writers_one_succeeds_one_gets_expected_version_mismatch`
// The publisher CAS conflict shape is verified end-to-end at the engine
// level in
// `crates/omnigraph/tests/runs.rs::concurrent_writers_one_succeeds_one_gets_expected_version_mismatch`
// and at the HTTP boundary in
// `crates/omnigraph-server/tests/server.rs::change_conflict_returns_manifest_conflict_409`.
// The pre-MR-771 CLI-level race was timing-dependent; with direct-publish
// the surface is the same engine path the unit test already covers.
// A CLI-level race would be timing-dependent; with direct-publish the
// surface is the same engine path the unit test already covers.
#[test]
fn local_cli_policy_tooling_is_end_to_end_while_local_writes_stay_unenforced() {

View file

@ -192,7 +192,7 @@ query insert_person($name: String, $age: I32) {
assert_eq!(local_verify["row_count"], 1);
assert_eq!(local_verify["rows"][0]["p.name"], "Mina");
// MR-771: `run publish` / `run list` removed. Direct-to-target writes
// `run publish` / `run list` removed. Direct-to-target writes
// already landed via the change call above; the commit graph is now
// the audit surface (verified separately by `commit list`).
}

View file

@ -1346,7 +1346,7 @@ async fn policy_blocks_non_admin_merge_to_main_and_allows_admin() {
#[tokio::test(flavor = "multi_thread")]
async fn authenticated_change_stamps_actor_on_commits() {
// MR-771: with the Run state machine removed, actor_id is recorded
// With the Run state machine removed, actor_id is recorded
// directly on the commit graph (no intermediate run record).
let (_temp, app) = app_for_loaded_repo_with_auth_tokens(&[("act-andrew", "token-one")]).await;
@ -2108,10 +2108,10 @@ query vector_search_string($q: String) {
#[tokio::test(flavor = "multi_thread")]
async fn change_conflict_returns_manifest_conflict_409() {
// MR-771: a write that races with another writer surfaces as HTTP 409
// with a structured `manifest_conflict` body — `table_key`, `expected`,
// and `actual` — so clients can detect-and-retry without parsing the
// message. (Replaces the old run-publish merge-conflict shape.)
// A write that races with another writer surfaces as HTTP 409 with
// a structured `manifest_conflict` body — `table_key`, `expected`,
// and `actual` — so clients can detect-and-retry without parsing
// the message.
let temp = init_loaded_repo().await;
let repo = repo_path(temp.path());

View file

@ -1430,12 +1430,12 @@ edge WorksAt: Person -> Company
#[tokio::test]
async fn test_apply_schema_succeeds_after_load() {
// MR-670 + MR-674: schema apply used to be blocked by leftover
// __run__ branches. MR-670 added a defense-in-depth filter that
// skips internal system branches. MR-674 made run branches
// ephemeral on every terminal state, so in practice no __run__
// branch survives publish — but the filter still guards the
// invariant.
// Historical: schema apply used to be blocked by leftover
// `__run__` branches. A defense-in-depth filter now skips
// internal system branches, and run branches were made
// ephemeral on every terminal state — so in practice no
// `__run__` branch survives publish. The filter still guards
// the invariant.
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap();
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
@ -1451,7 +1451,7 @@ edge WorksAt: Person -> Company
let all_branches = db.coordinator.all_branches().await.unwrap();
assert!(
!all_branches.iter().any(|b| is_internal_run_branch(b)),
"MR-674: run branch should be deleted after publish, got: {:?}",
"run branch should be deleted after publish, got: {:?}",
all_branches
);

View file

@ -15,8 +15,8 @@
//! retention. Destructive to version history — callers should gate this
//! behind an explicit confirm flag at the CLI layer.
//!
//! Both walk every node + edge table on the `main` branch. Run branches are
//! ephemeral by design (MR-670 / MR-674) so we do not optimize them.
//! Both walk every node + edge table on the `main` branch. Run branches
//! are ephemeral by design so we do not optimize them.
use std::time::Duration;

View file

@ -34,9 +34,9 @@ pub(super) async fn apply_schema_with_lock(
let branches = db.coordinator.all_branches().await?;
// Skip `main` and internal system branches. The schema-apply lock branch
// is excluded because it is the cluster-wide schema-apply serializer.
// `__run__*` branches are no longer created (MR-771); the filter remains
// as defense-in-depth for legacy repos with leftover staging branches
// MR-770 will sweep them and this guard can go.
// `__run__*` branches are no longer created; the filter remains as
// defense-in-depth for legacy repos with leftover staging branches.
// A future production sweep will let this guard go.
let blocking_branches = branches
.into_iter()
.filter(|branch| branch != "main" && !is_internal_system_branch(branch))

View file

@ -1,12 +1,12 @@
// Run state-machine has been removed (MR-771). Mutations now write directly
// to target tables and use the publisher's `expected_table_versions` CAS for
// cross-table OCC; the `__run__<id>` staging branches and `_graph_runs.lance`
// state machine no longer exist.
// The Run state machine has been removed. Mutations now write directly
// to target tables and use the publisher's `expected_table_versions`
// CAS for cross-table OCC; `__run__<id>` staging branches and the
// `_graph_runs.lance` state machine no longer exist.
//
// What remains is the branch-name predicate, kept as a defense-in-depth guard
// against users naming a public branch `__run__*`. MR-770 owns the production
// sweep of legacy `_graph_runs.lance` rows and stale `__run__*` branches; once
// that lands the predicate (and this file) can go too.
// What remains is the branch-name predicate, kept as a defense-in-depth
// guard against users naming a public branch `__run__*`. A future
// production sweep of legacy `_graph_runs.lance` rows and stale
// `__run__*` branches will let this predicate (and this file) go too.
pub(crate) const INTERNAL_RUN_BRANCH_PREFIX: &str = "__run__";

View file

@ -590,9 +590,9 @@ use super::staging::{MutationStaging, PendingMode};
/// is the source of truth for "where is Lance HEAD right now on
/// this table within this query."
///
/// The `inline_committed` reopen branch closes the cursor-bot HIGH
/// "multi-delete fails on same table" finding. The branch goes away
/// once Lance exposes a two-phase delete API
/// The `inline_committed` reopen branch closes the multi-delete-on-same-table
/// failure path that pre-staged-write engines inherited. The branch goes
/// away once Lance exposes a two-phase delete API
/// ([lance-format/lance#6658](https://github.com/lance-format/lance/issues/6658))
/// and we can stage deletes on the same path as inserts/updates.
async fn open_table_for_mutation(
@ -633,9 +633,9 @@ async fn open_table_for_mutation(
/// D₂ parse-time check: a single mutation query is either insert/update-only
/// or delete-only. Mixed → reject before any I/O.
///
/// Reason: under the staged-write rewire (MR-794 step 2+), inserts and
/// updates accumulate in memory and commit at end-of-query, while deletes
/// still inline-commit (Lance lacks a public two-phase delete in 4.0.0).
/// Reason: under the staged-write writer, inserts and updates
/// accumulate in memory and commit at end-of-query, while deletes still
/// inline-commit (Lance lacks a public two-phase delete in 4.0.0).
/// Mixing creates ordering hazards (same-row insert→delete becomes a no-op
/// because the staged insert isn't visible to delete; cascading deletes
/// of just-inserted edges break referential integrity by silent design).
@ -661,7 +661,7 @@ fn enforce_no_mixed_destructive_constructive(
"mutation '{}' on the same query mixes inserts/updates and deletes; \
split into separate mutations: (1) inserts and updates, then (2) deletes. \
This restriction lifts when Lance exposes a two-phase delete API \
(tracked: MR-793 / Lance-upstream).",
(tracked: lance-format/lance#6658).",
ir.name
)));
}
@ -706,11 +706,10 @@ impl Omnigraph {
) -> Result<MutationResult> {
self.ensure_schema_state_valid().await?;
let requested = Self::normalize_branch_name(branch)?;
// Reject internal `__run__*` / system-prefixed branches at the public
// write boundary. The pre-MR-771 path got this guard transitively via
// `begin_run`'s `ensure_public_branch_ref` call; the direct-publish
// path needs to assert it explicitly so a caller can't write to
// legacy or system staging branches by passing the prefix verbatim.
// Reject internal `__run__*` / system-prefixed branches at the
// public write boundary. Direct-publish paths assert this
// explicitly so a caller can't write to legacy or system
// staging branches by passing the prefix verbatim.
if let Some(name) = requested.as_deref() {
crate::db::ensure_public_branch_ref(name, "mutate")?;
}

View file

@ -1,4 +1,4 @@
//! Per-query staging accumulator for direct-publish writes (MR-794 step 2+).
//! Per-query staging accumulator for direct-publish writes.
//!
//! `MutationStaging` accumulates per-table input batches in memory during a
//! `mutate_as` or `load` query, then at end-of-query commits each touched
@ -74,7 +74,7 @@ pub(crate) struct StagedTablePath {
/// Per-query staging state.
///
/// Replaces the post-MR-771 inline-commit `MutationStaging.latest` map with
/// Replaces the legacy inline-commit `MutationStaging.latest` map with
/// an in-memory accumulator that defers all Lance HEAD advances to
/// end-of-query. After this rewire the bug class "Lance HEAD drifts ahead
/// of `__manifest`" is unreachable in `mutate_as` and `load` for inserts

View file

@ -155,11 +155,10 @@ impl Omnigraph {
pub async fn load(&mut self, branch: &str, data: &str, mode: LoadMode) -> Result<LoadResult> {
self.ensure_schema_state_valid().await?;
// Reject internal `__run__*` / system-prefixed branches at the public
// write boundary. The pre-MR-771 path got this guard transitively via
// `begin_run`'s `ensure_public_branch_ref` call; the direct-publish
// path needs to assert it explicitly so a caller can't write to
// legacy or system staging branches by passing the prefix verbatim.
// Reject internal `__run__*` / system-prefixed branches at the
// public write boundary. Direct-publish paths assert this
// explicitly so a caller can't write to legacy or system
// staging branches by passing the prefix verbatim.
crate::db::ensure_public_branch_ref(branch, "load")?;
// Branch convention: `None` represents `main`. Re-normalizing to
// `Some("main")` here would route the publisher commit through a
@ -1561,10 +1560,10 @@ pub(crate) async fn validate_edge_cardinality(
/// Lance version).
///
/// `mode` controls dedup behavior. `LoadMode::Merge` passes `Some("id")`
/// so committed edges that the load is *updating* (same edge id, possibly
/// changed `src`) are not double-counted (Cubic P1 finding on PR #68).
/// `LoadMode::Append` passes `None` because each line generates a fresh
/// ULID id that never collides with committed.
/// so committed edges that the load is *updating* (same edge id,
/// possibly changed `src`) are not double-counted. `LoadMode::Append`
/// passes `None` because each line generates a fresh ULID id that
/// never collides with committed.
async fn validate_edge_cardinality_with_pending_loader(
db: &Omnigraph,
branch: Option<&str>,

View file

@ -40,12 +40,12 @@ pub struct DeleteState {
/// A Lance write that has produced fragment files on object storage but is
/// not yet committed to the dataset's manifest. The staged-write primitives
/// are defined here for later integration in `MutationStaging`
/// (`exec/mutation.rs`) and the loader (`loader/mod.rs`) — those rewires
/// land in [MR-794](https://linear.app/modernrelay/issue/MR-794) step 2+.
/// The intent: defer Lance commits to end-of-query so a mid-query failure
/// leaves the touched table at the pre-mutation HEAD instead of drifting
/// ahead.
/// are consumed by `MutationStaging` (`exec/staging.rs`,
/// `exec/mutation.rs`) and the bulk loader (`loader/mod.rs`). The
/// intent: defer Lance commits to end-of-query so a mid-query failure
/// leaves the touched table at the pre-mutation HEAD instead of
/// drifting ahead. See `docs/runs.md` for the publisher-CAS contract
/// this builds on.
///
/// `transaction` is opaque from our side — Lance owns its semantics. We
/// commit it via `CommitBuilder::execute(transaction)` (see
@ -545,7 +545,7 @@ impl TableStore {
})
}
// ─── Staged-write API (MR-794) ───────────────────────────────────────────
// ─── Staged-write API ────────────────────────────────────────────────────
//
// These primitives wrap Lance's distributed-write API: each call writes
// fragment files to object storage but does NOT advance the dataset's
@ -672,16 +672,16 @@ impl TableStore {
///
/// This is intrinsic to the underlying Lance API: there is no public
/// way to make `MergeInsertBuilder` see uncommitted fragments. The
/// engine's mutation path enforces the rule "per touched table: all
/// stage_append OR exactly one stage_merge_insert" at parse time
/// (the D₂ check landing with [MR-794](https://linear.app/modernrelay/issue/MR-794)
/// step 2+ in `exec/mutation.rs`). Multi-table queries and append-chains
/// remain safe; only chained merges on a single table are rejected.
/// engine's `MutationStaging` accumulator works around this by
/// concatenating per-table batches in memory and issuing exactly
/// one `stage_merge_insert` per touched table at end-of-query (with
/// last-write-wins dedupe by id) — see `exec/staging.rs`. Direct
/// callers of this primitive must respect the contract themselves.
///
/// Lift path: either a Lance API extension that lets
/// `MergeInsertBuilder` accept additional staged fragments, or an
/// in-memory pre-merge here that folds prior staged batches into the
/// input stream. See `docs/runs.md` and MR-793.
/// input stream. See `docs/runs.md`.
pub async fn stage_merge_insert(
&self,
ds: Dataset,
@ -780,10 +780,10 @@ impl TableStore {
/// filtered scan even when their data would match. Staged-fragment
/// rows are silently absent from the result. `scanner.use_stats(false)`
/// does not fix this in lance 4.0.0. Callers needing correct filtered
/// reads against staged data should use a different strategy (the
/// engine's MR-794 step 2+ design uses in-memory pending-batch
/// accumulation + DataFusion `MemTable` instead — see
/// `.context/mr-794-step2-design.md`).
/// reads against staged data should use a different strategy the
/// engine's `MutationStaging` accumulator unions in-memory pending
/// batches with the committed scan via DataFusion `MemTable` (see
/// `scan_with_pending`).
///
/// This method remains on the surface for primitive-level testing
/// (basic stage + scan correctness without filters works) and for
@ -824,10 +824,10 @@ impl TableStore {
/// Scan committed via Lance + apply the same filter to in-memory
/// pending batches via DataFusion `MemTable`, concat the two result
/// streams. The replacement for `scan_with_staged` in engine code:
/// MR-794 step 2+ accumulates input batches in memory and unions
/// them with the committed snapshot at read time, sidestepping the
/// `Scanner::with_fragments` filter-pushdown limitation documented
/// on `scan_with_staged`.
/// the staged-write writer accumulates input batches in memory and
/// unions them with the committed snapshot at read time,
/// sidestepping the `Scanner::with_fragments` filter-pushdown
/// limitation documented on `scan_with_staged`.
///
/// `committed_ds` should be opened at the pre-mutation
/// `expected_version` (the same version captured in `MutationStaging::expected_versions`
@ -1251,7 +1251,7 @@ fn filter_out_rows_where_string_in(
/// Apply `projection` and `filter` to in-memory pending batches via a
/// fresh DataFusion `SessionContext`. Used by `scan_with_pending` for
/// the read-your-writes side of MR-794's in-memory accumulator.
/// the read-your-writes side of the in-memory staging accumulator.
///
/// `pending_batches` must be non-empty (the caller short-circuits on
/// empty).

View file

@ -522,11 +522,12 @@ query high_value() {
#[tokio::test]
async fn stale_handle_public_mutation_must_refresh_then_retry() {
// MR-771: with the Run state machine removed, the engine no longer
// auto-rebases stale-handle mutations onto the latest target head. The
// publisher's `expected_table_versions` CAS makes the contract explicit
// — a stale writer fails loudly with `ExpectedVersionMismatch` and the
// client decides whether to refresh-and-retry.
// With the Run state machine removed, the engine no longer
// auto-rebases stale-handle mutations onto the latest target head.
// The publisher's `expected_table_versions` CAS makes the contract
// explicit — a stale writer fails loudly with
// `ExpectedVersionMismatch` and the client decides whether to
// refresh-and-retry.
let dir = tempfile::tempdir().unwrap();
let _db = init_and_load(&dir).await;
drop(_db);

View file

@ -140,7 +140,8 @@ async fn schema_apply_recovers_partial_rename() {
assert_no_staging_files(dir.path());
}
/// Pin the documented "finalize → publisher residual" from MR-794.
/// Pin the documented "finalize → publisher residual" of the
/// staged-write commit path.
///
/// `MutationStaging::finalize` runs `commit_staged` per touched table
/// sequentially before the publisher commits the manifest. Lance has no

View file

@ -1,4 +1,4 @@
//! Tests for the direct-to-target write path (MR-771: Run state machine
//! Tests for the direct-to-target write path (Run state machine
//! removed). The Run/`__run__` staging branch / RunRecord state machine no
//! longer exists; mutations and loads write directly to target tables and
//! commit once via the publisher's `expected_table_versions` CAS.
@ -161,17 +161,17 @@ async fn multi_statement_mutation_is_atomic_with_read_your_writes() {
assert_eq!(friends.num_rows(), 1);
}
/// Mid-query partial failure: op-1 stages a Person insert, op-2 fails on
/// referential integrity (validate_edge_insert_endpoints). Under the
/// MR-794 staged-write rewire, op-1's batch lives in the in-memory
/// Mid-query partial failure: op-1 stages a Person insert, op-2 fails
/// on referential integrity (validate_edge_insert_endpoints). Under
/// the staged-write writer, op-1's batch lives in the in-memory
/// accumulator and never reaches Lance — Lance HEAD on `node:Person`
/// stays at the pre-mutation version. The publisher never publishes,
/// the manifest never advances, and the next mutation against the same
/// table proceeds normally (no `ExpectedVersionMismatch`).
///
/// This test pins the post-MR-794 contract:
/// - Failed multi-statement mutation surfaces a clear error, no manifest
/// commit, no observable state change.
/// Pins the staged-write contract:
/// - Failed multi-statement mutation surfaces a clear error, no
/// manifest commit, no observable state change.
/// - The touched tables stay queryable and writable from the next
/// query — Lance HEAD has not drifted.
#[tokio::test]
@ -228,7 +228,7 @@ async fn partial_failure_leaves_target_queryable_and_unblocks_next_mutation() {
&mixed_params(&[("$name", "Frank")], &[("$age", 33)]),
)
.await
.expect("next mutation on the touched table must succeed under MR-794");
.expect("next mutation on the touched table must succeed under the staged-write writer");
assert_eq!(
result.affected_nodes, 1,
"follow-up insert should report 1 affected node"
@ -251,7 +251,7 @@ async fn partial_failure_leaves_target_queryable_and_unblocks_next_mutation() {
/// success and one `ExpectedVersionMismatch`. The replacement for the old
/// `concurrent_conflicting_run_publish_fails_cleanly` test — the OCC fence
/// has moved from a graph-level run-publish merge into the publisher's
/// per-table CAS (MR-766 + MR-771).
/// per-table CAS.
///
/// Drives the race by interleaving two handles that captured the same
/// pre-write manifest snapshot: A commits first; B's commit then sees
@ -323,7 +323,7 @@ async fn concurrent_writers_one_succeeds_one_gets_expected_version_mismatch() {
);
}
/// The cancellation hole that motivated MR-771: dropping a mutation future
/// The cancellation hole that motivated removing the Run state machine: dropping a mutation future
/// mid-flight must not leave any graph-level state behind. With the run
/// state machine gone, only orphaned Lance fragments can remain — and those
/// are reclaimed by `omnigraph cleanup`.
@ -381,7 +381,7 @@ async fn cancelled_mutation_future_leaves_no_state() {
// `is_internal_system_branch`, so a runtime "no `__run__` branches" check
// would be vacuous. The structural property that no `__run__` branches
// can ever be created is enforced by deletion of `begin_run` etc. in
// MR-771 (verified by the build itself — those symbols no longer exist).
// (verified by the build itself — those symbols no longer exist).
//
// (1) The branch list is unchanged: cancellation/completion cannot
// synthesize new public branches.
@ -448,9 +448,10 @@ async fn repeated_loads_do_not_accumulate_branches() {
assert_eq!(db.branch_list().await.unwrap(), vec!["main".to_string()]);
}
/// User code must not be able to write to internal `__run__*` names. The
/// branch-name guard predicate is kept as defense-in-depth (MR-770 will
/// remove it once production legacy branches are swept).
/// User code must not be able to write to internal `__run__*` names.
/// The branch-name guard predicate is kept as defense-in-depth; it
/// will be removed once a future production sweep retires the legacy
/// branches.
#[tokio::test]
async fn public_branch_apis_reject_internal_run_refs() {
let dir = tempfile::tempdir().unwrap();
@ -480,11 +481,11 @@ async fn public_branch_apis_reject_internal_run_refs() {
);
}
// ─── MR-794: staged-write rewire — additional contract tests ────────────────
// ─── Staged-write rewire — additional contract tests ───────────────────────
/// Mutation queries used only by the MR-794 tests below. Kept in the test
/// file (not in helpers' shared `MUTATION_QUERIES`) to keep their scope
/// local to the staged-write coverage.
/// Mutation queries used only by the staged-write tests below. Kept in
/// the test file (not in helpers' shared `MUTATION_QUERIES`) to keep
/// their scope local to the staged-write coverage.
const STAGED_QUERIES: &str = r#"
query insert_two_persons($a_name: String, $a_age: I32, $b_name: String, $b_age: I32) {
insert Person { name: $a_name, age: $a_age }
@ -852,10 +853,10 @@ edge WorksAt: Person -> Company @card(0..1)
);
}
// ─── PR #68 review-comment fixes — pinned coverage ─────────────────────────
// ─── Chained-mutation correctness — pinned coverage ─────────────────────────
/// Codex P1 / Cubic P1 #1: chained `update` ops in one query must respect
/// each previous op's view of the rows. Without merge-shadow semantics on
/// Chained `update` ops in one query must respect each previous op's
/// view of the rows. Without merge-shadow semantics on
/// `scan_with_pending`, the second update sees the stale committed value
/// (the first update's row still appears in the Lance scan because the
/// pending side hasn't committed), the predicate matches it, and the
@ -932,8 +933,8 @@ async fn chained_updates_with_overlapping_predicate_respects_intermediate_value(
);
}
/// Cursor Bugbot HIGH: two `delete` ops on the same node table in one
/// query. Pre-fix, op-2's `open_table_for_mutation` went through
/// Two `delete` ops on the same node table in one query. Pre-fix,
/// op-2's `open_table_for_mutation` went through
/// `open_for_mutation_on_branch` which trips `ensure_expected_version`
/// (Lance HEAD has advanced past the manifest's pinned version after
/// op-1's inline-commit, but the manifest hasn't moved). Post-fix,
@ -984,7 +985,7 @@ async fn multi_statement_delete_on_same_node_table() {
}
}
/// Cursor Bugbot HIGH (cascade variant): deleting a node cascades to its
/// Cascade-then-explicit variant: deleting a node cascades to its
/// edges, advancing Lance HEAD on the edge table. A subsequent
/// `delete <Edge>` op in the same query must reopen at the
/// post-cascade-commit version of the edge table — not trip
@ -1030,11 +1031,10 @@ query cascade_then_explicit($name: String, $other: String) {
);
}
/// Codex P2 / Cursor Bugbot LOW / Cubic P2: the engine cardinality path
/// must enforce `min` bounds. Pre-fix the engine path silently dropped
/// the min check (a `let _ = card.min;` line). The loader path always
/// enforced both. Post-fix, both paths route through
/// `enforce_cardinality_bounds` which checks both bounds.
/// The engine cardinality path must enforce `min` bounds. Pre-fix the
/// engine path silently dropped the min check (a `let _ = card.min;`
/// line). The loader path always enforced both. Post-fix, both paths
/// route through `enforce_cardinality_bounds` which checks both bounds.
///
/// Build a custom schema with `Knows: Person -> Person @card(2..*)`.
/// Inserting a single Knows edge violates min=2. The mutation path must
@ -1085,8 +1085,8 @@ query add_friend($from: String, $to: String) {
);
}
/// Cubic P1 #2: `LoadMode::Merge` on edges must NOT double-count the
/// committed edge AND its updated pending replacement. Build a custom
/// `LoadMode::Merge` on edges must NOT double-count the committed
/// edge AND its updated pending replacement. Build a custom
/// schema where WorksAt has @card(0..1). Seed Alice with one WorksAt to
/// Acme. Then Merge-load the SAME edge id (so it's an update, not an
/// insert) pointing Alice's WorksAt at Bigco. Cardinality must count
@ -1132,12 +1132,11 @@ edge WorksAt: Person -> Company @card(0..1)
assert_eq!(count_rows(&db, "edge:WorksAt").await, 1);
}
/// Cubic P2 (follow-up to PR #68 review of commit 3223b51): a Merge load
/// whose input has TWO rows with the same edge id must be deduped at
/// cardinality-count time, not just at finalize. Without dedup, two
/// pending rows count twice → spurious `@card` violation. With dedup
/// (last-occurrence-wins, mirroring `dedupe_merge_batches_by_id`), the
/// pending side counts once.
/// A Merge load whose input has TWO rows with the same edge id must be
/// deduped at cardinality-count time, not just at finalize. Without
/// dedup, two pending rows count twice → spurious `@card` violation.
/// With dedup (last-occurrence-wins, mirroring
/// `dedupe_merge_batches_by_id`), the pending side counts once.
///
/// This is a separate path from `load_merge_mode_dedupes_edge_for_cardinality_count`
/// (which dedupes committed-vs-pending). Here we verify pending-vs-pending
@ -1182,11 +1181,11 @@ edge WorksAt: Person -> Company @card(0..1)
assert_eq!(count_rows(&db, "edge:WorksAt").await, 1);
}
/// Cubic P2 (follow-up): `scan_with_pending` must reject a call where
/// `key_column` is requested but the projection omits that column.
/// Without the up-front check, the helper silently degraded to union
/// semantics — letting a chained-update bug slip through unnoticed.
/// This test verifies the contract is enforced at the API boundary.
/// `scan_with_pending` must reject a call where `key_column` is
/// requested but the projection omits that column. Without the
/// up-front check, the helper silently degraded to union semantics —
/// letting a chained-update bug slip through unnoticed. This test
/// verifies the contract is enforced at the API boundary.
#[tokio::test]
async fn scan_with_pending_rejects_key_column_missing_from_projection() {
use arrow_array::{RecordBatch, StringArray};
@ -1275,20 +1274,19 @@ async fn scan_with_pending_rejects_key_column_missing_from_projection() {
);
}
/// Cursor Bugbot Medium (follow-up on commit 052b6e6): the
/// `PendingTable.schema` field is captured from the first `append_batch`
/// call and never updated. On a blob-bearing table, an `insert`
/// produces a full-schema batch (blob columns included) and an `update`
/// that doesn't assign every blob produces a subset-schema batch. Mixed
/// in one query, the second `append_batch` would silently push an
/// `PendingTable.schema` is captured from the first `append_batch` call
/// and never updated. On a blob-bearing table, an `insert` produces a
/// full-schema batch (blob columns included) and an `update` that
/// doesn't assign every blob produces a subset-schema batch. Mixed in
/// one query, the second `append_batch` would silently push an
/// incompatible batch — the mismatch surfaced eventually at
/// `concat_batches`/MemTable construction inside finalize, but the
/// failure point was distant from the offending op.
///
/// Post-fix: `append_batch` validates the new batch's schema against
/// the existing accumulator's schema and returns a typed error
/// directing the caller to split the mutation. The error fires at the
/// second op (the update), not at end-of-query.
/// `append_batch` validates the new batch's schema against the existing
/// accumulator's schema and returns a typed error directing the caller
/// to split the mutation. The error fires at the second op (the
/// update), not at end-of-query.
#[tokio::test]
async fn append_batch_rejects_mismatched_schema_in_blob_table_at_offending_op() {
use omnigraph::loader::{LoadMode, load_jsonl};

View file

@ -44,7 +44,7 @@ async fn s3_compatible_repo_lifecycle_works() {
.await
.unwrap();
// Direct-to-target load (MR-771): no run lifecycle, single publisher
// Direct-to-target load: no run lifecycle, single publisher
// commit lands the row.
reopened
.load(
@ -153,7 +153,7 @@ async fn s3_public_load_uses_hidden_run_and_publishes() {
.await
.unwrap();
// Direct-to-target writes (MR-771): no run state machine, just the
// Direct-to-target writes: no run state machine, just the
// published commit lands the row. Verify by reopening and reading.
let mut reopened = Omnigraph::open(&uri).await.unwrap();
let loaded = query_main(

View file

@ -1,20 +1,21 @@
//! Primitive-level tests for `TableStore`'s staged-write API
//! (MR-794 step 1). These exercise `stage_append`, `stage_merge_insert`,
//! `scan_with_staged`, and `count_rows_with_staged` directly against a
//! Lance dataset — no Omnigraph engine involved. The engine-level rewire
//! (MR-794 step 2+) lives in `tests/runs.rs` once it lands.
//! Primitive-level tests for `TableStore`'s staged-write API. These
//! exercise `stage_append`, `stage_merge_insert`, `scan_with_staged`,
//! and `count_rows_with_staged` directly against a Lance dataset — no
//! Omnigraph engine involved. The engine-level use of these primitives
//! is exercised by `tests/runs.rs`.
//!
//! Test surface here:
//! 1. `stage_append` + `scan_with_staged` shows committed + staged data
//! without duplicates.
//! 2. `stage_merge_insert` of a row that supersedes a committed fragment
//! surfaces only the rewritten row, not both — the
//! `removed_fragment_ids` dedup landed in PR #66's `730631c`.
//! 3. **Documented contract**: chained `stage_merge_insert` calls on the
//! same dataset whose source rows share keys produce duplicate rows in
//! `scan_with_staged`. The engine's parse-time D₂ check (MR-794 step
//! 2+) prevents callers from triggering this; this test pins the
//! primitive's behavior so a future change either (a) preserves it or
//! surfaces only the rewritten row, not both, via the
//! `removed_fragment_ids` dedup contract.
//! 3. **Documented contract**: chained `stage_merge_insert` calls on
//! the same dataset whose source rows share keys produce duplicate
//! rows in `scan_with_staged`. The engine's accumulator dedupes by
//! id at finalize time so this primitive-level pitfall doesn't
//! surface in production paths; this test pins the primitive's
//! behavior so a future change either (a) preserves it or
//! (b) consciously fixes it (and updates this test).
use arrow_array::{Array, Int32Array, RecordBatch, StringArray, UInt64Array};
@ -120,8 +121,9 @@ async fn stage_merge_insert_dedupes_superseded_committed_fragment() {
assert!(
!staged.removed_fragment_ids.is_empty(),
"merge_insert that rewrites a committed row must set removed_fragment_ids \
(this is the dedup invariant from PR #66 commit 730631c its absence \
was caught by Cubic/Cursor/Codex on PR #66)"
so the scan-with-staged composer can shadow the superseded committed \
fragment without it, the committed row and its rewrite both appear, \
producing duplicates by key"
);
// scan_with_staged: alice appears exactly once, with the new age.
@ -347,11 +349,11 @@ async fn stage_merge_insert_then_commit_persists_merged_view() {
/// silently absent. `scanner.use_stats(false)` does not bypass this in
/// lance 4.0.0.
///
/// This test pins the actual behavior so a future change either preserves
/// it (and updates the doc) or fixes it (and rewrites this test). The
/// engine's MR-794 step 2+ design uses in-memory pending-batch
/// accumulation + DataFusion `MemTable` for read-your-writes instead, so
/// production code is unaffected.
/// This test pins the actual behavior so a future change either
/// preserves it (and updates the doc) or fixes it (and rewrites this
/// test). The engine's `MutationStaging` accumulator unions in-memory
/// pending batches with the committed scan via DataFusion `MemTable`
/// for read-your-writes instead, so production code is unaffected.
#[tokio::test]
async fn scan_with_staged_with_filter_silently_drops_staged_rows() {
let dir = tempfile::tempdir().unwrap();
@ -485,6 +487,8 @@ async fn chained_stage_merge_insert_with_shared_key_documents_duplicate_behavior
here because this assertion failed: either (a) the primitive was \
improved to dedupe across stages (good update to assert == 1) \
or (b) something subtler broke (investigate before changing the \
assertion). See PR #67 Codex P1 thread + .context/mr-794-step2-design.md §3.1."
assertion). The engine's MutationStaging accumulator dedupes by \
id at finalize time so this primitive-level pitfall doesn't \
surface in production paths see exec/staging.rs."
);
}