refactor(engine): remove the legacy __run__ branch guard (MR-770)

With the v2→v3 migration sweeping stale `__run__*` branches off `__manifest`
on first read-write open, the defense-in-depth `is_internal_run_branch` guard
is no longer needed.

- delete `db/run_registry.rs`; drop the module + re-export from `db/mod.rs`
- collapse `is_internal_system_branch` to the schema-apply-lock check only
- `ensure_public_branch_ref`: drop the run-ref rejection; `__run__*` is now an
  ordinary branch name
- `branch_merge`: reject `is_internal_system_branch` (was run-only) so the
  schema-apply lock is rejected consistently with create/delete — a small,
  deliberate tightening
- update the inline schema-apply test + the writes integration tests
  (`public_branch_apis_reject_internal_run_refs` →
  `public_branch_apis_reject_internal_system_refs`, which also asserts
  `__run__*` now creates successfully)
- docs: flip the "pending production sweep / defense-in-depth" notes to
  "auto-swept by the v2→v3 migration"; document the read-only-open limitation

Known residual: the inert `_graph_runs.lance` / `_graph_run_actors.lance` bytes
remain until a `StorageAdapter::delete_prefix` primitive lands.
This commit is contained in:
Ragnor Comerford 2026-05-30 22:53:10 +02:00
parent 1aab951c25
commit 4ed2313a80
No known key found for this signature in database
11 changed files with 46 additions and 60 deletions

View file

@ -3,7 +3,6 @@ pub mod graph_coordinator;
pub mod manifest;
mod omnigraph;
mod recovery_audit;
mod run_registry;
mod schema_state;
pub(crate) mod write_queue;
@ -15,7 +14,6 @@ pub use omnigraph::{
CleanupPolicyOptions, InitOptions, MergeOutcome, Omnigraph, OpenMode, SchemaApplyOptions,
SchemaApplyResult, TableCleanupStats, TableOptimizeStats,
};
pub(crate) use run_registry::is_internal_run_branch;
pub(crate) const SCHEMA_APPLY_LOCK_BRANCH: &str = "__schema_apply_lock__";
@ -69,5 +67,8 @@ pub(crate) fn is_schema_apply_lock_branch(name: &str) -> bool {
}
pub(crate) fn is_internal_system_branch(name: &str) -> bool {
is_internal_run_branch(name) || is_schema_apply_lock_branch(name)
// Legacy `__run__*` staging branches (Run state machine, removed MR-771)
// are swept off `__manifest` by the v2→v3 internal-schema migration, so the
// only internal branch the engine still creates is the schema-apply lock.
is_schema_apply_lock_branch(name)
}

View file

@ -1444,12 +1444,6 @@ pub(crate) fn normalize_branch_name(branch: &str) -> Result<Option<String>> {
}
pub(crate) fn ensure_public_branch_ref(branch: &str, operation: &str) -> Result<()> {
if super::is_internal_run_branch(branch) {
return Err(OmniError::manifest(format!(
"{} does not allow internal run ref '{}'",
operation, branch
)));
}
if is_internal_system_branch(branch) {
return Err(OmniError::manifest(format!(
"{} does not allow internal system ref '{}'",
@ -1853,7 +1847,6 @@ fn json_value_from_array(array: &dyn Array, row: usize) -> Result<serde_json::Va
#[cfg(test)]
mod tests {
use super::*;
use crate::db::is_internal_run_branch;
use crate::db::manifest::ManifestCoordinator;
use async_trait::async_trait;
use serde_json::Value;
@ -2191,11 +2184,11 @@ edge WorksAt: Person -> Company
#[tokio::test]
async fn test_apply_schema_succeeds_after_load() {
// Historical: schema apply used to be blocked by leftover
// `__run__` branches. A defense-in-depth filter now skips
// internal system branches, and run branches were made
// ephemeral on every terminal state — so in practice no
// `__run__` branch survives publish. The filter still guards
// the invariant.
// `__run__` branches. The Run state machine was removed in
// MR-771, so a fresh graph never creates a `__run__` branch;
// legacy ones are swept by the v2→v3 manifest migration. This
// asserts the invariant a current graph upholds: publish leaves
// no `__run__` branch behind, so schema apply proceeds.
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap();
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
@ -2210,8 +2203,8 @@ edge WorksAt: Person -> Company
let all_branches = db.coordinator.read().await.all_branches().await.unwrap();
assert!(
!all_branches.iter().any(|b| is_internal_run_branch(b)),
"run branch should be deleted after publish, got: {:?}",
!all_branches.iter().any(|b| b.starts_with("__run__")),
"no __run__ branch should exist after publish, got: {:?}",
all_branches
);

View file

@ -1,16 +0,0 @@
// The Run state machine has been removed. Mutations now write directly
// to target tables and use the publisher's `expected_table_versions`
// CAS for cross-table OCC; `__run__<id>` staging branches and the
// `_graph_runs.lance` state machine no longer exist.
//
// What remains is the branch-name predicate, kept as a defense-in-depth
// guard against users naming a public branch `__run__*`. A future
// production sweep of legacy `_graph_runs.lance` rows and stale
// `__run__*` branches will let this predicate (and this file) go too.
pub(crate) const INTERNAL_RUN_BRANCH_PREFIX: &str = "__run__";
pub(crate) fn is_internal_run_branch(name: &str) -> bool {
name.trim_start_matches('/')
.starts_with(INTERNAL_RUN_BRANCH_PREFIX)
}

View file

@ -1087,9 +1087,9 @@ impl Omnigraph {
target: &str,
actor_id: Option<&str>,
) -> Result<MergeOutcome> {
if is_internal_run_branch(source) || is_internal_run_branch(target) {
if is_internal_system_branch(source) || is_internal_system_branch(target) {
return Err(OmniError::manifest(format!(
"branch_merge does not allow internal run refs ('{}' -> '{}')",
"branch_merge does not allow internal system refs ('{}' -> '{}')",
source, target
)));
}

View file

@ -35,7 +35,7 @@ use time::format_description::well_known::Rfc3339;
use crate::db::commit_graph::CommitGraph;
use crate::db::manifest::ManifestCoordinator;
use crate::db::{MergeOutcome, Omnigraph, is_internal_run_branch};
use crate::db::{MergeOutcome, Omnigraph, is_internal_system_branch};
use crate::db::{ReadTarget, Snapshot};
use crate::embedding::EmbeddingClient;
use crate::error::{MergeConflict, MergeConflictKind, OmniError, Result};

View file

@ -371,11 +371,10 @@ async fn cancelled_mutation_future_leaves_no_state() {
// Cancel-safety property: no graph-level run/staging state remains.
//
// Note: `branch_list()` already filters `__run__*` via
// `is_internal_system_branch`, so a runtime "no `__run__` branches" check
// would be vacuous. The structural property that no `__run__` branches
// can ever be created is enforced by deletion of `begin_run` etc. in
// (verified by the build itself — those symbols no longer exist).
// No `__run__` branches can ever be created: the Run state machine
// (`begin_run` etc.) was deleted in MR-771 — verified by the build itself,
// those symbols no longer exist. Any legacy `__run__*` branch on an
// upgraded graph is swept by the v2→v3 manifest migration.
//
// (1) The branch list is unchanged: cancellation/completion cannot
// synthesize new public branches.
@ -442,34 +441,40 @@ async fn repeated_loads_do_not_accumulate_branches() {
assert_eq!(db.branch_list().await.unwrap(), vec!["main".to_string()]);
}
/// User code must not be able to write to internal `__run__*` names.
/// The branch-name guard predicate is kept as defense-in-depth; it
/// will be removed once a future production sweep retires the legacy
/// branches.
/// After MR-770, `__run__*` is an ordinary branch name — the Run state machine
/// and its `is_internal_run_branch` guard are gone. The surviving internal-ref
/// guard still rejects the active `__schema_apply_lock__` branch on the public
/// create/merge APIs.
#[tokio::test]
async fn public_branch_apis_reject_internal_run_refs() {
async fn public_branch_apis_reject_internal_system_refs() {
let dir = tempfile::tempdir().unwrap();
let mut db = init_and_load(&dir).await;
let create_err = db.branch_create("__run__synthetic").await.unwrap_err();
// `__run__*` is no longer reserved — creating it now succeeds.
db.branch_create("__run__formerly_reserved")
.await
.expect("__run__ prefix is a normal branch name post-MR-770");
// The schema-apply lock branch is still rejected on public branch APIs.
let create_err = db.branch_create("__schema_apply_lock__").await.unwrap_err();
let OmniError::Manifest(err) = create_err else {
panic!("expected Manifest error");
};
assert!(
err.message.contains("internal run ref"),
err.message.contains("internal system ref"),
"unexpected error: {}",
err.message
);
let merge_err = db
.branch_merge("__run__synthetic", "main")
.branch_merge("__schema_apply_lock__", "main")
.await
.unwrap_err();
let OmniError::Manifest(err) = merge_err else {
panic!("expected Manifest error");
};
assert!(
err.message.contains("internal run refs"),
err.message.contains("internal system refs"),
"unexpected error: {}",
err.message
);