mirror of
https://github.com/ModernRelay/omnigraph.git
synced 2026-06-09 01:35:18 +02:00
chore: scrub Linear ticket numbers and review-bot mentions from code comments
OmniGraph is OSS; internal Linear ticket references and code-review-bot mentions in source-code comments don't help external readers and leak internal tooling. Replace ticket numbers (MR-XXX) with descriptive prose, drop linear.app URLs, and remove inline mentions of Cursor/Bugbot/Cubic/Codex review threads. Scope is limited to source-code comments (`crates/`). Docs under `docs/` keep their MR-XXX references — those are part of the established change-history narrative for in-repo docs and don't require a Linear account to find context for. No behavior changes; no public API changes. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
ea16c74329
commit
044ed46019
17 changed files with 164 additions and 162 deletions
|
|
@ -1905,7 +1905,7 @@ fn cli_fails_for_invalid_merge_requests() {
|
|||
);
|
||||
}
|
||||
|
||||
// MR-771: `omnigraph run list/show/publish/abort` subcommands removed
|
||||
// `omnigraph run list/show/publish/abort` subcommands removed
|
||||
// alongside the run state machine. Direct-to-target writes leave nothing
|
||||
// for these CLIs to manage. Audit history is now visible via
|
||||
// `omnigraph commit list` reading the commit graph.
|
||||
|
|
|
|||
|
|
@ -307,7 +307,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() {
|
|||
assert_eq!(main_read["row_count"], 1);
|
||||
assert_eq!(main_read["rows"][0]["p.name"], "Zoe");
|
||||
|
||||
// MR-771: `omnigraph run list` removed. Audit visible via commit list.
|
||||
// `omnigraph run list` removed. Audit visible via commit list.
|
||||
let commits_payload = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("commit")
|
||||
|
|
@ -597,8 +597,8 @@ fn local_cli_failed_load_keeps_target_state_unchanged() {
|
|||
snapshot_table_row_count(&repo, "edge:Knows"),
|
||||
knows_rows_before
|
||||
);
|
||||
// MR-771: failed loads no longer leave a RunRecord. The atomicity
|
||||
// guarantee is verified above (target tables are unchanged).
|
||||
// Failed loads leave no run record (the run lifecycle has been
|
||||
// removed); atomicity is verified above by the unchanged target.
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -631,9 +631,8 @@ fn local_cli_failed_change_keeps_target_state_unchanged() {
|
|||
.arg("--json"),
|
||||
));
|
||||
assert_eq!(friends_payload["row_count"], 2);
|
||||
// MR-771: failed mutations no longer leave a RunRecord. The atomicity
|
||||
// guarantee is verified above (the friends_of read above shows main
|
||||
// unchanged).
|
||||
// Failed mutations leave no run record (the run lifecycle has been
|
||||
// removed); atomicity is verified above by the unchanged target.
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -941,12 +940,13 @@ query vector_search($q: String) {
|
|||
assert_eq!(result["rows"][0]["d.slug"], "alpha-doc");
|
||||
}
|
||||
|
||||
// MR-771: the publisher CAS conflict shape is verified end-to-end at the
|
||||
// engine level in `crates/omnigraph/tests/runs.rs::concurrent_writers_one_succeeds_one_gets_expected_version_mismatch`
|
||||
// The publisher CAS conflict shape is verified end-to-end at the engine
|
||||
// level in
|
||||
// `crates/omnigraph/tests/runs.rs::concurrent_writers_one_succeeds_one_gets_expected_version_mismatch`
|
||||
// and at the HTTP boundary in
|
||||
// `crates/omnigraph-server/tests/server.rs::change_conflict_returns_manifest_conflict_409`.
|
||||
// The pre-MR-771 CLI-level race was timing-dependent; with direct-publish
|
||||
// the surface is the same engine path the unit test already covers.
|
||||
// A CLI-level race would be timing-dependent; with direct-publish the
|
||||
// surface is the same engine path the unit test already covers.
|
||||
|
||||
#[test]
|
||||
fn local_cli_policy_tooling_is_end_to_end_while_local_writes_stay_unenforced() {
|
||||
|
|
|
|||
|
|
@ -192,7 +192,7 @@ query insert_person($name: String, $age: I32) {
|
|||
assert_eq!(local_verify["row_count"], 1);
|
||||
assert_eq!(local_verify["rows"][0]["p.name"], "Mina");
|
||||
|
||||
// MR-771: `run publish` / `run list` removed. Direct-to-target writes
|
||||
// `run publish` / `run list` removed. Direct-to-target writes
|
||||
// already landed via the change call above; the commit graph is now
|
||||
// the audit surface (verified separately by `commit list`).
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1346,7 +1346,7 @@ async fn policy_blocks_non_admin_merge_to_main_and_allows_admin() {
|
|||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn authenticated_change_stamps_actor_on_commits() {
|
||||
// MR-771: with the Run state machine removed, actor_id is recorded
|
||||
// With the Run state machine removed, actor_id is recorded
|
||||
// directly on the commit graph (no intermediate run record).
|
||||
let (_temp, app) = app_for_loaded_repo_with_auth_tokens(&[("act-andrew", "token-one")]).await;
|
||||
|
||||
|
|
@ -2108,10 +2108,10 @@ query vector_search_string($q: String) {
|
|||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn change_conflict_returns_manifest_conflict_409() {
|
||||
// MR-771: a write that races with another writer surfaces as HTTP 409
|
||||
// with a structured `manifest_conflict` body — `table_key`, `expected`,
|
||||
// and `actual` — so clients can detect-and-retry without parsing the
|
||||
// message. (Replaces the old run-publish merge-conflict shape.)
|
||||
// A write that races with another writer surfaces as HTTP 409 with
|
||||
// a structured `manifest_conflict` body — `table_key`, `expected`,
|
||||
// and `actual` — so clients can detect-and-retry without parsing
|
||||
// the message.
|
||||
let temp = init_loaded_repo().await;
|
||||
let repo = repo_path(temp.path());
|
||||
|
||||
|
|
|
|||
|
|
@ -1430,12 +1430,12 @@ edge WorksAt: Person -> Company
|
|||
|
||||
#[tokio::test]
|
||||
async fn test_apply_schema_succeeds_after_load() {
|
||||
// MR-670 + MR-674: schema apply used to be blocked by leftover
|
||||
// __run__ branches. MR-670 added a defense-in-depth filter that
|
||||
// skips internal system branches. MR-674 made run branches
|
||||
// ephemeral on every terminal state, so in practice no __run__
|
||||
// branch survives publish — but the filter still guards the
|
||||
// invariant.
|
||||
// Historical: schema apply used to be blocked by leftover
|
||||
// `__run__` branches. A defense-in-depth filter now skips
|
||||
// internal system branches, and run branches were made
|
||||
// ephemeral on every terminal state — so in practice no
|
||||
// `__run__` branch survives publish. The filter still guards
|
||||
// the invariant.
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
|
||||
|
|
@ -1451,7 +1451,7 @@ edge WorksAt: Person -> Company
|
|||
let all_branches = db.coordinator.all_branches().await.unwrap();
|
||||
assert!(
|
||||
!all_branches.iter().any(|b| is_internal_run_branch(b)),
|
||||
"MR-674: run branch should be deleted after publish, got: {:?}",
|
||||
"run branch should be deleted after publish, got: {:?}",
|
||||
all_branches
|
||||
);
|
||||
|
||||
|
|
|
|||
|
|
@ -15,8 +15,8 @@
|
|||
//! retention. Destructive to version history — callers should gate this
|
||||
//! behind an explicit confirm flag at the CLI layer.
|
||||
//!
|
||||
//! Both walk every node + edge table on the `main` branch. Run branches are
|
||||
//! ephemeral by design (MR-670 / MR-674) so we do not optimize them.
|
||||
//! Both walk every node + edge table on the `main` branch. Run branches
|
||||
//! are ephemeral by design so we do not optimize them.
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
|
|
|
|||
|
|
@ -34,9 +34,9 @@ pub(super) async fn apply_schema_with_lock(
|
|||
let branches = db.coordinator.all_branches().await?;
|
||||
// Skip `main` and internal system branches. The schema-apply lock branch
|
||||
// is excluded because it is the cluster-wide schema-apply serializer.
|
||||
// `__run__*` branches are no longer created (MR-771); the filter remains
|
||||
// as defense-in-depth for legacy repos with leftover staging branches —
|
||||
// MR-770 will sweep them and this guard can go.
|
||||
// `__run__*` branches are no longer created; the filter remains as
|
||||
// defense-in-depth for legacy repos with leftover staging branches.
|
||||
// A future production sweep will let this guard go.
|
||||
let blocking_branches = branches
|
||||
.into_iter()
|
||||
.filter(|branch| branch != "main" && !is_internal_system_branch(branch))
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
// Run state-machine has been removed (MR-771). Mutations now write directly
|
||||
// to target tables and use the publisher's `expected_table_versions` CAS for
|
||||
// cross-table OCC; the `__run__<id>` staging branches and `_graph_runs.lance`
|
||||
// state machine no longer exist.
|
||||
// The Run state machine has been removed. Mutations now write directly
|
||||
// to target tables and use the publisher's `expected_table_versions`
|
||||
// CAS for cross-table OCC; `__run__<id>` staging branches and the
|
||||
// `_graph_runs.lance` state machine no longer exist.
|
||||
//
|
||||
// What remains is the branch-name predicate, kept as a defense-in-depth guard
|
||||
// against users naming a public branch `__run__*`. MR-770 owns the production
|
||||
// sweep of legacy `_graph_runs.lance` rows and stale `__run__*` branches; once
|
||||
// that lands the predicate (and this file) can go too.
|
||||
// What remains is the branch-name predicate, kept as a defense-in-depth
|
||||
// guard against users naming a public branch `__run__*`. A future
|
||||
// production sweep of legacy `_graph_runs.lance` rows and stale
|
||||
// `__run__*` branches will let this predicate (and this file) go too.
|
||||
|
||||
pub(crate) const INTERNAL_RUN_BRANCH_PREFIX: &str = "__run__";
|
||||
|
||||
|
|
|
|||
|
|
@ -590,9 +590,9 @@ use super::staging::{MutationStaging, PendingMode};
|
|||
/// is the source of truth for "where is Lance HEAD right now on
|
||||
/// this table within this query."
|
||||
///
|
||||
/// The `inline_committed` reopen branch closes the cursor-bot HIGH
|
||||
/// "multi-delete fails on same table" finding. The branch goes away
|
||||
/// once Lance exposes a two-phase delete API
|
||||
/// The `inline_committed` reopen branch closes the multi-delete-on-same-table
|
||||
/// failure path that pre-staged-write engines inherited. The branch goes
|
||||
/// away once Lance exposes a two-phase delete API
|
||||
/// ([lance-format/lance#6658](https://github.com/lance-format/lance/issues/6658))
|
||||
/// and we can stage deletes on the same path as inserts/updates.
|
||||
async fn open_table_for_mutation(
|
||||
|
|
@ -633,9 +633,9 @@ async fn open_table_for_mutation(
|
|||
/// D₂ parse-time check: a single mutation query is either insert/update-only
|
||||
/// or delete-only. Mixed → reject before any I/O.
|
||||
///
|
||||
/// Reason: under the staged-write rewire (MR-794 step 2+), inserts and
|
||||
/// updates accumulate in memory and commit at end-of-query, while deletes
|
||||
/// still inline-commit (Lance lacks a public two-phase delete in 4.0.0).
|
||||
/// Reason: under the staged-write writer, inserts and updates
|
||||
/// accumulate in memory and commit at end-of-query, while deletes still
|
||||
/// inline-commit (Lance lacks a public two-phase delete in 4.0.0).
|
||||
/// Mixing creates ordering hazards (same-row insert→delete becomes a no-op
|
||||
/// because the staged insert isn't visible to delete; cascading deletes
|
||||
/// of just-inserted edges break referential integrity by silent design).
|
||||
|
|
@ -661,7 +661,7 @@ fn enforce_no_mixed_destructive_constructive(
|
|||
"mutation '{}' on the same query mixes inserts/updates and deletes; \
|
||||
split into separate mutations: (1) inserts and updates, then (2) deletes. \
|
||||
This restriction lifts when Lance exposes a two-phase delete API \
|
||||
(tracked: MR-793 / Lance-upstream).",
|
||||
(tracked: lance-format/lance#6658).",
|
||||
ir.name
|
||||
)));
|
||||
}
|
||||
|
|
@ -706,11 +706,10 @@ impl Omnigraph {
|
|||
) -> Result<MutationResult> {
|
||||
self.ensure_schema_state_valid().await?;
|
||||
let requested = Self::normalize_branch_name(branch)?;
|
||||
// Reject internal `__run__*` / system-prefixed branches at the public
|
||||
// write boundary. The pre-MR-771 path got this guard transitively via
|
||||
// `begin_run`'s `ensure_public_branch_ref` call; the direct-publish
|
||||
// path needs to assert it explicitly so a caller can't write to
|
||||
// legacy or system staging branches by passing the prefix verbatim.
|
||||
// Reject internal `__run__*` / system-prefixed branches at the
|
||||
// public write boundary. Direct-publish paths assert this
|
||||
// explicitly so a caller can't write to legacy or system
|
||||
// staging branches by passing the prefix verbatim.
|
||||
if let Some(name) = requested.as_deref() {
|
||||
crate::db::ensure_public_branch_ref(name, "mutate")?;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
//! Per-query staging accumulator for direct-publish writes (MR-794 step 2+).
|
||||
//! Per-query staging accumulator for direct-publish writes.
|
||||
//!
|
||||
//! `MutationStaging` accumulates per-table input batches in memory during a
|
||||
//! `mutate_as` or `load` query, then at end-of-query commits each touched
|
||||
|
|
@ -74,7 +74,7 @@ pub(crate) struct StagedTablePath {
|
|||
|
||||
/// Per-query staging state.
|
||||
///
|
||||
/// Replaces the post-MR-771 inline-commit `MutationStaging.latest` map with
|
||||
/// Replaces the legacy inline-commit `MutationStaging.latest` map with
|
||||
/// an in-memory accumulator that defers all Lance HEAD advances to
|
||||
/// end-of-query. After this rewire the bug class "Lance HEAD drifts ahead
|
||||
/// of `__manifest`" is unreachable in `mutate_as` and `load` for inserts
|
||||
|
|
|
|||
|
|
@ -155,11 +155,10 @@ impl Omnigraph {
|
|||
|
||||
pub async fn load(&mut self, branch: &str, data: &str, mode: LoadMode) -> Result<LoadResult> {
|
||||
self.ensure_schema_state_valid().await?;
|
||||
// Reject internal `__run__*` / system-prefixed branches at the public
|
||||
// write boundary. The pre-MR-771 path got this guard transitively via
|
||||
// `begin_run`'s `ensure_public_branch_ref` call; the direct-publish
|
||||
// path needs to assert it explicitly so a caller can't write to
|
||||
// legacy or system staging branches by passing the prefix verbatim.
|
||||
// Reject internal `__run__*` / system-prefixed branches at the
|
||||
// public write boundary. Direct-publish paths assert this
|
||||
// explicitly so a caller can't write to legacy or system
|
||||
// staging branches by passing the prefix verbatim.
|
||||
crate::db::ensure_public_branch_ref(branch, "load")?;
|
||||
// Branch convention: `None` represents `main`. Re-normalizing to
|
||||
// `Some("main")` here would route the publisher commit through a
|
||||
|
|
@ -1561,10 +1560,10 @@ pub(crate) async fn validate_edge_cardinality(
|
|||
/// Lance version).
|
||||
///
|
||||
/// `mode` controls dedup behavior. `LoadMode::Merge` passes `Some("id")`
|
||||
/// so committed edges that the load is *updating* (same edge id, possibly
|
||||
/// changed `src`) are not double-counted (Cubic P1 finding on PR #68).
|
||||
/// `LoadMode::Append` passes `None` because each line generates a fresh
|
||||
/// ULID id that never collides with committed.
|
||||
/// so committed edges that the load is *updating* (same edge id,
|
||||
/// possibly changed `src`) are not double-counted. `LoadMode::Append`
|
||||
/// passes `None` because each line generates a fresh ULID id that
|
||||
/// never collides with committed.
|
||||
async fn validate_edge_cardinality_with_pending_loader(
|
||||
db: &Omnigraph,
|
||||
branch: Option<&str>,
|
||||
|
|
|
|||
|
|
@ -40,12 +40,12 @@ pub struct DeleteState {
|
|||
|
||||
/// A Lance write that has produced fragment files on object storage but is
|
||||
/// not yet committed to the dataset's manifest. The staged-write primitives
|
||||
/// are defined here for later integration in `MutationStaging`
|
||||
/// (`exec/mutation.rs`) and the loader (`loader/mod.rs`) — those rewires
|
||||
/// land in [MR-794](https://linear.app/modernrelay/issue/MR-794) step 2+.
|
||||
/// The intent: defer Lance commits to end-of-query so a mid-query failure
|
||||
/// leaves the touched table at the pre-mutation HEAD instead of drifting
|
||||
/// ahead.
|
||||
/// are consumed by `MutationStaging` (`exec/staging.rs`,
|
||||
/// `exec/mutation.rs`) and the bulk loader (`loader/mod.rs`). The
|
||||
/// intent: defer Lance commits to end-of-query so a mid-query failure
|
||||
/// leaves the touched table at the pre-mutation HEAD instead of
|
||||
/// drifting ahead. See `docs/runs.md` for the publisher-CAS contract
|
||||
/// this builds on.
|
||||
///
|
||||
/// `transaction` is opaque from our side — Lance owns its semantics. We
|
||||
/// commit it via `CommitBuilder::execute(transaction)` (see
|
||||
|
|
@ -545,7 +545,7 @@ impl TableStore {
|
|||
})
|
||||
}
|
||||
|
||||
// ─── Staged-write API (MR-794) ───────────────────────────────────────────
|
||||
// ─── Staged-write API ────────────────────────────────────────────────────
|
||||
//
|
||||
// These primitives wrap Lance's distributed-write API: each call writes
|
||||
// fragment files to object storage but does NOT advance the dataset's
|
||||
|
|
@ -672,16 +672,16 @@ impl TableStore {
|
|||
///
|
||||
/// This is intrinsic to the underlying Lance API: there is no public
|
||||
/// way to make `MergeInsertBuilder` see uncommitted fragments. The
|
||||
/// engine's mutation path enforces the rule "per touched table: all
|
||||
/// stage_append OR exactly one stage_merge_insert" at parse time
|
||||
/// (the D₂′ check landing with [MR-794](https://linear.app/modernrelay/issue/MR-794)
|
||||
/// step 2+ in `exec/mutation.rs`). Multi-table queries and append-chains
|
||||
/// remain safe; only chained merges on a single table are rejected.
|
||||
/// engine's `MutationStaging` accumulator works around this by
|
||||
/// concatenating per-table batches in memory and issuing exactly
|
||||
/// one `stage_merge_insert` per touched table at end-of-query (with
|
||||
/// last-write-wins dedupe by id) — see `exec/staging.rs`. Direct
|
||||
/// callers of this primitive must respect the contract themselves.
|
||||
///
|
||||
/// Lift path: either a Lance API extension that lets
|
||||
/// `MergeInsertBuilder` accept additional staged fragments, or an
|
||||
/// in-memory pre-merge here that folds prior staged batches into the
|
||||
/// input stream. See `docs/runs.md` and MR-793.
|
||||
/// input stream. See `docs/runs.md`.
|
||||
pub async fn stage_merge_insert(
|
||||
&self,
|
||||
ds: Dataset,
|
||||
|
|
@ -780,10 +780,10 @@ impl TableStore {
|
|||
/// filtered scan even when their data would match. Staged-fragment
|
||||
/// rows are silently absent from the result. `scanner.use_stats(false)`
|
||||
/// does not fix this in lance 4.0.0. Callers needing correct filtered
|
||||
/// reads against staged data should use a different strategy (the
|
||||
/// engine's MR-794 step 2+ design uses in-memory pending-batch
|
||||
/// accumulation + DataFusion `MemTable` instead — see
|
||||
/// `.context/mr-794-step2-design.md`).
|
||||
/// reads against staged data should use a different strategy — the
|
||||
/// engine's `MutationStaging` accumulator unions in-memory pending
|
||||
/// batches with the committed scan via DataFusion `MemTable` (see
|
||||
/// `scan_with_pending`).
|
||||
///
|
||||
/// This method remains on the surface for primitive-level testing
|
||||
/// (basic stage + scan correctness without filters works) and for
|
||||
|
|
@ -824,10 +824,10 @@ impl TableStore {
|
|||
/// Scan committed via Lance + apply the same filter to in-memory
|
||||
/// pending batches via DataFusion `MemTable`, concat the two result
|
||||
/// streams. The replacement for `scan_with_staged` in engine code:
|
||||
/// MR-794 step 2+ accumulates input batches in memory and unions
|
||||
/// them with the committed snapshot at read time, sidestepping the
|
||||
/// `Scanner::with_fragments` filter-pushdown limitation documented
|
||||
/// on `scan_with_staged`.
|
||||
/// the staged-write writer accumulates input batches in memory and
|
||||
/// unions them with the committed snapshot at read time,
|
||||
/// sidestepping the `Scanner::with_fragments` filter-pushdown
|
||||
/// limitation documented on `scan_with_staged`.
|
||||
///
|
||||
/// `committed_ds` should be opened at the pre-mutation
|
||||
/// `expected_version` (the same version captured in `MutationStaging::expected_versions`
|
||||
|
|
@ -1251,7 +1251,7 @@ fn filter_out_rows_where_string_in(
|
|||
|
||||
/// Apply `projection` and `filter` to in-memory pending batches via a
|
||||
/// fresh DataFusion `SessionContext`. Used by `scan_with_pending` for
|
||||
/// the read-your-writes side of MR-794's in-memory accumulator.
|
||||
/// the read-your-writes side of the in-memory staging accumulator.
|
||||
///
|
||||
/// `pending_batches` must be non-empty (the caller short-circuits on
|
||||
/// empty).
|
||||
|
|
|
|||
|
|
@ -522,11 +522,12 @@ query high_value() {
|
|||
|
||||
#[tokio::test]
|
||||
async fn stale_handle_public_mutation_must_refresh_then_retry() {
|
||||
// MR-771: with the Run state machine removed, the engine no longer
|
||||
// auto-rebases stale-handle mutations onto the latest target head. The
|
||||
// publisher's `expected_table_versions` CAS makes the contract explicit
|
||||
// — a stale writer fails loudly with `ExpectedVersionMismatch` and the
|
||||
// client decides whether to refresh-and-retry.
|
||||
// With the Run state machine removed, the engine no longer
|
||||
// auto-rebases stale-handle mutations onto the latest target head.
|
||||
// The publisher's `expected_table_versions` CAS makes the contract
|
||||
// explicit — a stale writer fails loudly with
|
||||
// `ExpectedVersionMismatch` and the client decides whether to
|
||||
// refresh-and-retry.
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let _db = init_and_load(&dir).await;
|
||||
drop(_db);
|
||||
|
|
|
|||
|
|
@ -140,7 +140,8 @@ async fn schema_apply_recovers_partial_rename() {
|
|||
assert_no_staging_files(dir.path());
|
||||
}
|
||||
|
||||
/// Pin the documented "finalize → publisher residual" from MR-794.
|
||||
/// Pin the documented "finalize → publisher residual" of the
|
||||
/// staged-write commit path.
|
||||
///
|
||||
/// `MutationStaging::finalize` runs `commit_staged` per touched table
|
||||
/// sequentially before the publisher commits the manifest. Lance has no
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
//! Tests for the direct-to-target write path (MR-771: Run state machine
|
||||
//! Tests for the direct-to-target write path (Run state machine
|
||||
//! removed). The Run/`__run__` staging branch / RunRecord state machine no
|
||||
//! longer exists; mutations and loads write directly to target tables and
|
||||
//! commit once via the publisher's `expected_table_versions` CAS.
|
||||
|
|
@ -161,17 +161,17 @@ async fn multi_statement_mutation_is_atomic_with_read_your_writes() {
|
|||
assert_eq!(friends.num_rows(), 1);
|
||||
}
|
||||
|
||||
/// Mid-query partial failure: op-1 stages a Person insert, op-2 fails on
|
||||
/// referential integrity (validate_edge_insert_endpoints). Under the
|
||||
/// MR-794 staged-write rewire, op-1's batch lives in the in-memory
|
||||
/// Mid-query partial failure: op-1 stages a Person insert, op-2 fails
|
||||
/// on referential integrity (validate_edge_insert_endpoints). Under
|
||||
/// the staged-write writer, op-1's batch lives in the in-memory
|
||||
/// accumulator and never reaches Lance — Lance HEAD on `node:Person`
|
||||
/// stays at the pre-mutation version. The publisher never publishes,
|
||||
/// the manifest never advances, and the next mutation against the same
|
||||
/// table proceeds normally (no `ExpectedVersionMismatch`).
|
||||
///
|
||||
/// This test pins the post-MR-794 contract:
|
||||
/// - Failed multi-statement mutation surfaces a clear error, no manifest
|
||||
/// commit, no observable state change.
|
||||
/// Pins the staged-write contract:
|
||||
/// - Failed multi-statement mutation surfaces a clear error, no
|
||||
/// manifest commit, no observable state change.
|
||||
/// - The touched tables stay queryable and writable from the next
|
||||
/// query — Lance HEAD has not drifted.
|
||||
#[tokio::test]
|
||||
|
|
@ -228,7 +228,7 @@ async fn partial_failure_leaves_target_queryable_and_unblocks_next_mutation() {
|
|||
&mixed_params(&[("$name", "Frank")], &[("$age", 33)]),
|
||||
)
|
||||
.await
|
||||
.expect("next mutation on the touched table must succeed under MR-794");
|
||||
.expect("next mutation on the touched table must succeed under the staged-write writer");
|
||||
assert_eq!(
|
||||
result.affected_nodes, 1,
|
||||
"follow-up insert should report 1 affected node"
|
||||
|
|
@ -251,7 +251,7 @@ async fn partial_failure_leaves_target_queryable_and_unblocks_next_mutation() {
|
|||
/// success and one `ExpectedVersionMismatch`. The replacement for the old
|
||||
/// `concurrent_conflicting_run_publish_fails_cleanly` test — the OCC fence
|
||||
/// has moved from a graph-level run-publish merge into the publisher's
|
||||
/// per-table CAS (MR-766 + MR-771).
|
||||
/// per-table CAS.
|
||||
///
|
||||
/// Drives the race by interleaving two handles that captured the same
|
||||
/// pre-write manifest snapshot: A commits first; B's commit then sees
|
||||
|
|
@ -323,7 +323,7 @@ async fn concurrent_writers_one_succeeds_one_gets_expected_version_mismatch() {
|
|||
);
|
||||
}
|
||||
|
||||
/// The cancellation hole that motivated MR-771: dropping a mutation future
|
||||
/// The cancellation hole that motivated removing the Run state machine: dropping a mutation future
|
||||
/// mid-flight must not leave any graph-level state behind. With the run
|
||||
/// state machine gone, only orphaned Lance fragments can remain — and those
|
||||
/// are reclaimed by `omnigraph cleanup`.
|
||||
|
|
@ -381,7 +381,7 @@ async fn cancelled_mutation_future_leaves_no_state() {
|
|||
// `is_internal_system_branch`, so a runtime "no `__run__` branches" check
|
||||
// would be vacuous. The structural property that no `__run__` branches
|
||||
// can ever be created is enforced by deletion of `begin_run` etc. in
|
||||
// MR-771 (verified by the build itself — those symbols no longer exist).
|
||||
// (verified by the build itself — those symbols no longer exist).
|
||||
//
|
||||
// (1) The branch list is unchanged: cancellation/completion cannot
|
||||
// synthesize new public branches.
|
||||
|
|
@ -448,9 +448,10 @@ async fn repeated_loads_do_not_accumulate_branches() {
|
|||
assert_eq!(db.branch_list().await.unwrap(), vec!["main".to_string()]);
|
||||
}
|
||||
|
||||
/// User code must not be able to write to internal `__run__*` names. The
|
||||
/// branch-name guard predicate is kept as defense-in-depth (MR-770 will
|
||||
/// remove it once production legacy branches are swept).
|
||||
/// User code must not be able to write to internal `__run__*` names.
|
||||
/// The branch-name guard predicate is kept as defense-in-depth; it
|
||||
/// will be removed once a future production sweep retires the legacy
|
||||
/// branches.
|
||||
#[tokio::test]
|
||||
async fn public_branch_apis_reject_internal_run_refs() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
|
|
@ -480,11 +481,11 @@ async fn public_branch_apis_reject_internal_run_refs() {
|
|||
);
|
||||
}
|
||||
|
||||
// ─── MR-794: staged-write rewire — additional contract tests ────────────────
|
||||
// ─── Staged-write rewire — additional contract tests ───────────────────────
|
||||
|
||||
/// Mutation queries used only by the MR-794 tests below. Kept in the test
|
||||
/// file (not in helpers' shared `MUTATION_QUERIES`) to keep their scope
|
||||
/// local to the staged-write coverage.
|
||||
/// Mutation queries used only by the staged-write tests below. Kept in
|
||||
/// the test file (not in helpers' shared `MUTATION_QUERIES`) to keep
|
||||
/// their scope local to the staged-write coverage.
|
||||
const STAGED_QUERIES: &str = r#"
|
||||
query insert_two_persons($a_name: String, $a_age: I32, $b_name: String, $b_age: I32) {
|
||||
insert Person { name: $a_name, age: $a_age }
|
||||
|
|
@ -852,10 +853,10 @@ edge WorksAt: Person -> Company @card(0..1)
|
|||
);
|
||||
}
|
||||
|
||||
// ─── PR #68 review-comment fixes — pinned coverage ──────────────────────────
|
||||
// ─── Chained-mutation correctness — pinned coverage ─────────────────────────
|
||||
|
||||
/// Codex P1 / Cubic P1 #1: chained `update` ops in one query must respect
|
||||
/// each previous op's view of the rows. Without merge-shadow semantics on
|
||||
/// Chained `update` ops in one query must respect each previous op's
|
||||
/// view of the rows. Without merge-shadow semantics on
|
||||
/// `scan_with_pending`, the second update sees the stale committed value
|
||||
/// (the first update's row still appears in the Lance scan because the
|
||||
/// pending side hasn't committed), the predicate matches it, and the
|
||||
|
|
@ -932,8 +933,8 @@ async fn chained_updates_with_overlapping_predicate_respects_intermediate_value(
|
|||
);
|
||||
}
|
||||
|
||||
/// Cursor Bugbot HIGH: two `delete` ops on the same node table in one
|
||||
/// query. Pre-fix, op-2's `open_table_for_mutation` went through
|
||||
/// Two `delete` ops on the same node table in one query. Pre-fix,
|
||||
/// op-2's `open_table_for_mutation` went through
|
||||
/// `open_for_mutation_on_branch` which trips `ensure_expected_version`
|
||||
/// (Lance HEAD has advanced past the manifest's pinned version after
|
||||
/// op-1's inline-commit, but the manifest hasn't moved). Post-fix,
|
||||
|
|
@ -984,7 +985,7 @@ async fn multi_statement_delete_on_same_node_table() {
|
|||
}
|
||||
}
|
||||
|
||||
/// Cursor Bugbot HIGH (cascade variant): deleting a node cascades to its
|
||||
/// Cascade-then-explicit variant: deleting a node cascades to its
|
||||
/// edges, advancing Lance HEAD on the edge table. A subsequent
|
||||
/// `delete <Edge>` op in the same query must reopen at the
|
||||
/// post-cascade-commit version of the edge table — not trip
|
||||
|
|
@ -1030,11 +1031,10 @@ query cascade_then_explicit($name: String, $other: String) {
|
|||
);
|
||||
}
|
||||
|
||||
/// Codex P2 / Cursor Bugbot LOW / Cubic P2: the engine cardinality path
|
||||
/// must enforce `min` bounds. Pre-fix the engine path silently dropped
|
||||
/// the min check (a `let _ = card.min;` line). The loader path always
|
||||
/// enforced both. Post-fix, both paths route through
|
||||
/// `enforce_cardinality_bounds` which checks both bounds.
|
||||
/// The engine cardinality path must enforce `min` bounds. Pre-fix the
|
||||
/// engine path silently dropped the min check (a `let _ = card.min;`
|
||||
/// line). The loader path always enforced both. Post-fix, both paths
|
||||
/// route through `enforce_cardinality_bounds` which checks both bounds.
|
||||
///
|
||||
/// Build a custom schema with `Knows: Person -> Person @card(2..*)`.
|
||||
/// Inserting a single Knows edge violates min=2. The mutation path must
|
||||
|
|
@ -1085,8 +1085,8 @@ query add_friend($from: String, $to: String) {
|
|||
);
|
||||
}
|
||||
|
||||
/// Cubic P1 #2: `LoadMode::Merge` on edges must NOT double-count the
|
||||
/// committed edge AND its updated pending replacement. Build a custom
|
||||
/// `LoadMode::Merge` on edges must NOT double-count the committed
|
||||
/// edge AND its updated pending replacement. Build a custom
|
||||
/// schema where WorksAt has @card(0..1). Seed Alice with one WorksAt to
|
||||
/// Acme. Then Merge-load the SAME edge id (so it's an update, not an
|
||||
/// insert) pointing Alice's WorksAt at Bigco. Cardinality must count
|
||||
|
|
@ -1132,12 +1132,11 @@ edge WorksAt: Person -> Company @card(0..1)
|
|||
assert_eq!(count_rows(&db, "edge:WorksAt").await, 1);
|
||||
}
|
||||
|
||||
/// Cubic P2 (follow-up to PR #68 review of commit 3223b51): a Merge load
|
||||
/// whose input has TWO rows with the same edge id must be deduped at
|
||||
/// cardinality-count time, not just at finalize. Without dedup, two
|
||||
/// pending rows count twice → spurious `@card` violation. With dedup
|
||||
/// (last-occurrence-wins, mirroring `dedupe_merge_batches_by_id`), the
|
||||
/// pending side counts once.
|
||||
/// A Merge load whose input has TWO rows with the same edge id must be
|
||||
/// deduped at cardinality-count time, not just at finalize. Without
|
||||
/// dedup, two pending rows count twice → spurious `@card` violation.
|
||||
/// With dedup (last-occurrence-wins, mirroring
|
||||
/// `dedupe_merge_batches_by_id`), the pending side counts once.
|
||||
///
|
||||
/// This is a separate path from `load_merge_mode_dedupes_edge_for_cardinality_count`
|
||||
/// (which dedupes committed-vs-pending). Here we verify pending-vs-pending
|
||||
|
|
@ -1182,11 +1181,11 @@ edge WorksAt: Person -> Company @card(0..1)
|
|||
assert_eq!(count_rows(&db, "edge:WorksAt").await, 1);
|
||||
}
|
||||
|
||||
/// Cubic P2 (follow-up): `scan_with_pending` must reject a call where
|
||||
/// `key_column` is requested but the projection omits that column.
|
||||
/// Without the up-front check, the helper silently degraded to union
|
||||
/// semantics — letting a chained-update bug slip through unnoticed.
|
||||
/// This test verifies the contract is enforced at the API boundary.
|
||||
/// `scan_with_pending` must reject a call where `key_column` is
|
||||
/// requested but the projection omits that column. Without the
|
||||
/// up-front check, the helper silently degraded to union semantics —
|
||||
/// letting a chained-update bug slip through unnoticed. This test
|
||||
/// verifies the contract is enforced at the API boundary.
|
||||
#[tokio::test]
|
||||
async fn scan_with_pending_rejects_key_column_missing_from_projection() {
|
||||
use arrow_array::{RecordBatch, StringArray};
|
||||
|
|
@ -1275,20 +1274,19 @@ async fn scan_with_pending_rejects_key_column_missing_from_projection() {
|
|||
);
|
||||
}
|
||||
|
||||
/// Cursor Bugbot Medium (follow-up on commit 052b6e6): the
|
||||
/// `PendingTable.schema` field is captured from the first `append_batch`
|
||||
/// call and never updated. On a blob-bearing table, an `insert`
|
||||
/// produces a full-schema batch (blob columns included) and an `update`
|
||||
/// that doesn't assign every blob produces a subset-schema batch. Mixed
|
||||
/// in one query, the second `append_batch` would silently push an
|
||||
/// `PendingTable.schema` is captured from the first `append_batch` call
|
||||
/// and never updated. On a blob-bearing table, an `insert` produces a
|
||||
/// full-schema batch (blob columns included) and an `update` that
|
||||
/// doesn't assign every blob produces a subset-schema batch. Mixed in
|
||||
/// one query, the second `append_batch` would silently push an
|
||||
/// incompatible batch — the mismatch surfaced eventually at
|
||||
/// `concat_batches`/MemTable construction inside finalize, but the
|
||||
/// failure point was distant from the offending op.
|
||||
///
|
||||
/// Post-fix: `append_batch` validates the new batch's schema against
|
||||
/// the existing accumulator's schema and returns a typed error
|
||||
/// directing the caller to split the mutation. The error fires at the
|
||||
/// second op (the update), not at end-of-query.
|
||||
/// `append_batch` validates the new batch's schema against the existing
|
||||
/// accumulator's schema and returns a typed error directing the caller
|
||||
/// to split the mutation. The error fires at the second op (the
|
||||
/// update), not at end-of-query.
|
||||
#[tokio::test]
|
||||
async fn append_batch_rejects_mismatched_schema_in_blob_table_at_offending_op() {
|
||||
use omnigraph::loader::{LoadMode, load_jsonl};
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ async fn s3_compatible_repo_lifecycle_works() {
|
|||
.await
|
||||
.unwrap();
|
||||
|
||||
// Direct-to-target load (MR-771): no run lifecycle, single publisher
|
||||
// Direct-to-target load: no run lifecycle, single publisher
|
||||
// commit lands the row.
|
||||
reopened
|
||||
.load(
|
||||
|
|
@ -153,7 +153,7 @@ async fn s3_public_load_uses_hidden_run_and_publishes() {
|
|||
.await
|
||||
.unwrap();
|
||||
|
||||
// Direct-to-target writes (MR-771): no run state machine, just the
|
||||
// Direct-to-target writes: no run state machine, just the
|
||||
// published commit lands the row. Verify by reopening and reading.
|
||||
let mut reopened = Omnigraph::open(&uri).await.unwrap();
|
||||
let loaded = query_main(
|
||||
|
|
|
|||
|
|
@ -1,20 +1,21 @@
|
|||
//! Primitive-level tests for `TableStore`'s staged-write API
|
||||
//! (MR-794 step 1). These exercise `stage_append`, `stage_merge_insert`,
|
||||
//! `scan_with_staged`, and `count_rows_with_staged` directly against a
|
||||
//! Lance dataset — no Omnigraph engine involved. The engine-level rewire
|
||||
//! (MR-794 step 2+) lives in `tests/runs.rs` once it lands.
|
||||
//! Primitive-level tests for `TableStore`'s staged-write API. These
|
||||
//! exercise `stage_append`, `stage_merge_insert`, `scan_with_staged`,
|
||||
//! and `count_rows_with_staged` directly against a Lance dataset — no
|
||||
//! Omnigraph engine involved. The engine-level use of these primitives
|
||||
//! is exercised by `tests/runs.rs`.
|
||||
//!
|
||||
//! Test surface here:
|
||||
//! 1. `stage_append` + `scan_with_staged` shows committed + staged data
|
||||
//! without duplicates.
|
||||
//! 2. `stage_merge_insert` of a row that supersedes a committed fragment
|
||||
//! surfaces only the rewritten row, not both — the
|
||||
//! `removed_fragment_ids` dedup landed in PR #66's `730631c`.
|
||||
//! 3. **Documented contract**: chained `stage_merge_insert` calls on the
|
||||
//! same dataset whose source rows share keys produce duplicate rows in
|
||||
//! `scan_with_staged`. The engine's parse-time D₂′ check (MR-794 step
|
||||
//! 2+) prevents callers from triggering this; this test pins the
|
||||
//! primitive's behavior so a future change either (a) preserves it or
|
||||
//! surfaces only the rewritten row, not both, via the
|
||||
//! `removed_fragment_ids` dedup contract.
|
||||
//! 3. **Documented contract**: chained `stage_merge_insert` calls on
|
||||
//! the same dataset whose source rows share keys produce duplicate
|
||||
//! rows in `scan_with_staged`. The engine's accumulator dedupes by
|
||||
//! id at finalize time so this primitive-level pitfall doesn't
|
||||
//! surface in production paths; this test pins the primitive's
|
||||
//! behavior so a future change either (a) preserves it or
|
||||
//! (b) consciously fixes it (and updates this test).
|
||||
|
||||
use arrow_array::{Array, Int32Array, RecordBatch, StringArray, UInt64Array};
|
||||
|
|
@ -120,8 +121,9 @@ async fn stage_merge_insert_dedupes_superseded_committed_fragment() {
|
|||
assert!(
|
||||
!staged.removed_fragment_ids.is_empty(),
|
||||
"merge_insert that rewrites a committed row must set removed_fragment_ids \
|
||||
(this is the dedup invariant from PR #66 commit 730631c — its absence \
|
||||
was caught by Cubic/Cursor/Codex on PR #66)"
|
||||
so the scan-with-staged composer can shadow the superseded committed \
|
||||
fragment — without it, the committed row and its rewrite both appear, \
|
||||
producing duplicates by key"
|
||||
);
|
||||
|
||||
// scan_with_staged: alice appears exactly once, with the new age.
|
||||
|
|
@ -347,11 +349,11 @@ async fn stage_merge_insert_then_commit_persists_merged_view() {
|
|||
/// silently absent. `scanner.use_stats(false)` does not bypass this in
|
||||
/// lance 4.0.0.
|
||||
///
|
||||
/// This test pins the actual behavior so a future change either preserves
|
||||
/// it (and updates the doc) or fixes it (and rewrites this test). The
|
||||
/// engine's MR-794 step 2+ design uses in-memory pending-batch
|
||||
/// accumulation + DataFusion `MemTable` for read-your-writes instead, so
|
||||
/// production code is unaffected.
|
||||
/// This test pins the actual behavior so a future change either
|
||||
/// preserves it (and updates the doc) or fixes it (and rewrites this
|
||||
/// test). The engine's `MutationStaging` accumulator unions in-memory
|
||||
/// pending batches with the committed scan via DataFusion `MemTable`
|
||||
/// for read-your-writes instead, so production code is unaffected.
|
||||
#[tokio::test]
|
||||
async fn scan_with_staged_with_filter_silently_drops_staged_rows() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
|
|
@ -485,6 +487,8 @@ async fn chained_stage_merge_insert_with_shared_key_documents_duplicate_behavior
|
|||
here because this assertion failed: either (a) the primitive was \
|
||||
improved to dedupe across stages (good — update to assert == 1) \
|
||||
or (b) something subtler broke (investigate before changing the \
|
||||
assertion). See PR #67 Codex P1 thread + .context/mr-794-step2-design.md §3.1."
|
||||
assertion). The engine's MutationStaging accumulator dedupes by \
|
||||
id at finalize time so this primitive-level pitfall doesn't \
|
||||
surface in production paths — see exec/staging.rs."
|
||||
);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue