mr-686: bundle PR 0/1a/1b foundation + PR 2 catalog/schema_source ArcSwap

Bundles the working-tree state from the prior session (PR 0 bench harness,
PR 1a audit_actor_id removal, PR 1b WriteQueueManager + writer integration)
together with the first half of PR 2's interior-mutability foundation
(catalog and schema_source wrapped in Arc<ArcSwap<...>>). The two streams
intermix in 7 of the same files, so splitting via git add -p was
impractical. Subsequent PR 2 steps land as separate atomic commits.

PR 0 — server-level concurrent /change bench harness
  - crates/omnigraph-server/examples/bench_concurrent_http.rs (new)
  - .context/bench-results/{baseline-main,after-pr1}/ (gitignored)

PR 1a — drop the audit_actor_id field, thread per-call
  - removed Omnigraph::audit_actor_id and the swap-restore patterns in
    mutation.rs, merge.rs, loader/mod.rs
  - actor_id: Option<&str> threaded through MutationStaging::finalize,
    mutate_with_current_actor, ingest_with_current_actor,
    branch_merge_impl, branch_merge_on_current_target,
    commit_prepared_updates*, record_merge_commit,
    commit_updates_on_branch_with_expected
  - apply_schema and ensure_indices_for_branch pass None (system-attributed)

PR 1b — per-(table_key, branch) write queue + revalidation + sidecar
  - new crates/omnigraph/src/db/write_queue.rs with WriteQueueManager,
    acquire/acquire_many, sorted+deduped acquisition; 6 unit tests
  - Arc<WriteQueueManager> field on Omnigraph + db.write_queue() accessor
  - MutationStaging::finalize split into stage_all (Phase A, no queue)
    and StagedMutation::commit_all (Phase B, acquire_many + revalidate
    pins + sidecar + commit_staged); guards held across publisher
  - delete-only mutations now emit recovery sidecars; revalidation
    extended to inline_committed tables
  - branch_merge_on_current_target, apply_schema_with_lock, and
    ensure_indices_for_branch acquire per-table queues for their
    touched tables

PR 2 Step B (partial) — catalog and schema_source via ArcSwap
  - catalog: Catalog -> Arc<ArcSwap<Catalog>>
  - schema_source: String -> Arc<ArcSwap<String>>
  - public accessors return Arc<Catalog> / Arc<String>; readers bind
    locally where the borrow has to outlive an expression
  - new pub(crate) store_catalog / store_schema_source helpers replace
    the field assignments in apply_schema and reload_schema_if_source_changed
  - 117 tests across lifecycle/end_to_end/branching/runs pass; engine
    lib + workspace compile clean

Coordinator wrap (Mutex) and the &mut self -> &self engine API
conversion follow in subsequent commits.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Ragnor Comerford 2026-05-07 16:22:38 +02:00
parent cd780e2d37
commit fcb47620d3
No known key found for this signature in database
15 changed files with 1041 additions and 183 deletions

View file

@ -90,13 +90,8 @@ impl Omnigraph {
mode: LoadMode,
actor_id: Option<&str>,
) -> Result<IngestResult> {
let previous_actor = self.audit_actor_id.clone();
self.audit_actor_id = actor_id.map(str::to_string);
let result = self
.ingest_with_current_actor(branch, from, data, mode)
.await;
self.audit_actor_id = previous_actor;
result
self.ingest_with_current_actor(branch, from, data, mode, actor_id)
.await
}
pub async fn ingest_file(
@ -127,6 +122,7 @@ impl Omnigraph {
from: Option<&str>,
data: &str,
mode: LoadMode,
actor_id: Option<&str>,
) -> Result<IngestResult> {
self.ensure_schema_state_valid().await?;
let target_branch =
@ -143,7 +139,7 @@ impl Omnigraph {
.await?;
}
let result = self.load(&target_branch, data, mode).await?;
let result = self.load_as(&target_branch, data, mode, actor_id).await?;
Ok(IngestResult {
branch: target_branch,
base_branch,
@ -154,6 +150,16 @@ impl Omnigraph {
}
pub async fn load(&mut self, branch: &str, data: &str, mode: LoadMode) -> Result<LoadResult> {
self.load_as(branch, data, mode, None).await
}
pub async fn load_as(
&mut self,
branch: &str,
data: &str,
mode: LoadMode,
actor_id: Option<&str>,
) -> Result<LoadResult> {
self.ensure_schema_state_valid().await?;
// Reject internal `__run__*` / system-prefixed branches at the
// public write boundary. Direct-publish paths assert this
@ -169,7 +175,7 @@ impl Omnigraph {
// Direct-to-target writes: no Run state machine, no `__run__` staging
// branch. Cross-table OCC is enforced by the publisher's
// `expected_table_versions` CAS inside `load_jsonl_reader`.
self.load_direct_on_branch(requested.as_deref(), data, mode)
self.load_direct_on_branch(requested.as_deref(), data, mode, actor_id)
.await
}
@ -188,9 +194,10 @@ impl Omnigraph {
branch: Option<&str>,
data: &str,
mode: LoadMode,
actor_id: Option<&str>,
) -> Result<LoadResult> {
let reader = BufReader::new(Cursor::new(data.as_bytes()));
load_jsonl_reader(self, branch, reader, mode).await
load_jsonl_reader(self, branch, reader, mode, actor_id).await
}
}
@ -232,6 +239,7 @@ async fn load_jsonl_reader<R: BufRead>(
branch: Option<&str>,
reader: R,
mode: LoadMode,
actor_id: Option<&str>,
) -> Result<LoadResult> {
let catalog = db.catalog().clone();
@ -537,15 +545,19 @@ async fn load_jsonl_reader<R: BufRead>(
// Phase 4: Atomic manifest commit with publisher-level OCC.
if use_staging {
let (updates, expected_versions, sidecar_handle) = staging
.finalize(db, branch, crate::db::manifest::SidecarKind::Load)
let staged = staging.stage_all(db, branch).await?;
// `_queue_guards` holds per-(table_key, branch) write queues
// across the manifest publish below — see exec/mutation.rs for
// the rationale (interleaving prevention).
let (updates, expected_versions, sidecar_handle, _queue_guards) = staged
.commit_all(db, branch, crate::db::manifest::SidecarKind::Load, actor_id)
.await?;
// Same finalize → publisher residual as mutations: per-table
// staged commits have advanced Lance HEAD, but the manifest
// publish has not run yet. Reuse the mutation failpoint name so
// one failpoint pins the shared `MutationStaging` boundary.
crate::failpoints::maybe_fail("mutation.post_finalize_pre_publisher")?;
db.commit_updates_on_branch_with_expected(branch, &updates, &expected_versions)
db.commit_updates_on_branch_with_expected(branch, &updates, &expected_versions, actor_id)
.await?;
// The recovery sidecar protects the per-table commit_staged →
// manifest publish window. Phase C succeeded — clean up
@ -574,6 +586,7 @@ async fn load_jsonl_reader<R: BufRead>(
branch,
&overwrite_updates,
&overwrite_expected,
actor_id,
)
.await?;
}