MR-771: demote Run to direct-publish via expected_table_versions CAS

mutate_as and load now write directly to target tables and call the publisher once at the end with per-table expected versions; the Run state machine, _graph_runs.lance writers, __run__ staging branches, and server /runs/* endpoints are removed. Multi-statement mutations remain atomic at the manifest level via an in-memory MutationStaging accumulator that gives read-your-writes within a query and a single publish at the end. Concurrent-writer conflicts surface as ExpectedVersionMismatch (HTTP 409 manifest_conflict) instead of the old DivergentUpdate merge shape. Documents one known limitation in docs/runs.md: a multi-statement mid-query failure where op-N writes a Lance fragment and op-N+1 fails leaves Lance HEAD ahead of the manifest until a follow-up introduces per-table Lance branches. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-18 02:24:27 +02:00 · 2026-04-30 08:52:50 +02:00 · 2026-04-30 08:52:50 +02:00 · 35be20cb05
commit 35be20cb05
parent 4e5374a85e
28 changed files with 1188 additions and 3216 deletions
--- a/crates/omnigraph-server/src/api.rs
+++ b/crates/omnigraph-server/src/api.rs
@ -1,6 +1,4 @@
-use omnigraph::db::{
-    GraphCommit, MergeOutcome, ReadTarget, RunRecord, SchemaApplyResult, Snapshot,
-};
+use omnigraph::db::{GraphCommit, MergeOutcome, ReadTarget, SchemaApplyResult, Snapshot};
 use omnigraph::error::{MergeConflict, MergeConflictKind};
 use omnigraph::loader::{IngestResult, LoadMode};
 use omnigraph_compiler::SchemaMigrationStep;
@ -41,30 +39,6 @@ pub struct SnapshotOutput {
    pub tables: Vec<SnapshotTableOutput>,
 }

-#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
-pub struct RunOutput {
-    pub run_id: String,
-    pub target_branch: String,
-    pub run_branch: String,
-    pub base_snapshot_id: String,
-    pub base_manifest_version: u64,
-    pub operation_hash: Option<String>,
-    pub actor_id: Option<String>,
-    pub status: String,
-    pub published_snapshot_id: Option<String>,
-    /// Run creation time as Unix epoch microseconds.
-    #[schema(example = 1714000000000000i64)]
-    pub created_at: i64,
-    /// Last status change as Unix epoch microseconds.
-    #[schema(example = 1714000000000000i64)]
-    pub updated_at: i64,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
-pub struct RunListOutput {
-    pub runs: Vec<RunOutput>,
-}
-
 #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
 pub struct BranchCreateRequest {
    /// Parent branch to fork from. Defaults to `main`.
@ -368,6 +342,17 @@ pub enum ErrorCode {
    Internal,
 }

+/// Structured details for a publisher-level OCC failure. Surfaces alongside
+/// HTTP 409 when a write was rejected because the caller's pre-write view of
+/// one table's manifest version was stale relative to the current head. The
+/// expected/actual fields tell the client which table to refresh.
+#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
+pub struct ManifestConflictOutput {
+    pub table_key: String,
+    pub expected: u64,
+    pub actual: u64,
+}
+
 #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
 pub struct ErrorOutput {
    pub error: String,
@ -375,6 +360,12 @@ pub struct ErrorOutput {
    pub code: Option<ErrorCode>,
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub merge_conflicts: Vec<MergeConflictOutput>,
+    /// Set when the conflict is a publisher CAS rejection
+    /// (`ManifestConflictDetails::ExpectedVersionMismatch`). The caller's
+    /// pre-write view of `table_key` was at version `expected` but the
+    /// manifest is now at `actual`. Refresh and retry.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub manifest_conflict: Option<ManifestConflictOutput>,
 }

 pub fn snapshot_payload(branch: &str, snapshot: &Snapshot) -> SnapshotOutput {
@ -408,22 +399,6 @@ pub fn schema_apply_output(uri: &str, result: SchemaApplyResult) -> SchemaApplyO
    }
 }

-pub fn run_output(run: &RunRecord) -> RunOutput {
-    RunOutput {
-        run_id: run.run_id.as_str().to_string(),
-        target_branch: run.target_branch.clone(),
-        run_branch: run.run_branch.clone(),
-        base_snapshot_id: run.base_snapshot_id.as_str().to_string(),
-        base_manifest_version: run.base_manifest_version,
-        operation_hash: run.operation_hash.clone(),
-        actor_id: run.actor_id.clone(),
-        status: run.status.as_str().to_string(),
-        published_snapshot_id: run.published_snapshot_id.clone(),
-        created_at: run.created_at,
-        updated_at: run.updated_at,
-    }
-}
-
 pub fn commit_output(commit: &GraphCommit) -> CommitOutput {
    CommitOutput {
        graph_commit_id: commit.graph_commit_id.clone(),
--- a/crates/omnigraph-server/src/lib.rs
+++ b/crates/omnigraph-server/src/lib.rs
@ -14,8 +14,8 @@ use api::{
    BranchCreateOutput, BranchCreateRequest, BranchDeleteOutput, BranchListOutput,
    BranchMergeOutput, BranchMergeRequest, ChangeOutput, ChangeRequest, CommitListOutput,
    CommitListQuery, ErrorCode, ErrorOutput, ExportRequest, HealthOutput, IngestOutput,
-    IngestRequest, ReadOutput, ReadRequest, RunListOutput, SchemaApplyOutput, SchemaApplyRequest,
-    SchemaOutput, SnapshotQuery, ingest_output, schema_apply_output, snapshot_payload,
+    IngestRequest, ReadOutput, ReadRequest, SchemaApplyOutput, SchemaApplyRequest, SchemaOutput,
+    SnapshotQuery, ingest_output, schema_apply_output, snapshot_payload,
 };
 use axum::body::{Body, Bytes};
 use axum::extract::DefaultBodyLimit;
@ -33,8 +33,8 @@ pub use config::{
    load_config,
 };
 use futures::stream;
-use omnigraph::db::{Omnigraph, ReadTarget, RunId};
-use omnigraph::error::{ManifestErrorKind, OmniError};
+use omnigraph::db::{Omnigraph, ReadTarget};
+use omnigraph::error::{ManifestConflictDetails, ManifestErrorKind, OmniError};
 use omnigraph_compiler::json_params_to_param_map;
 use omnigraph_compiler::query::parser::parse_query;
 use omnigraph_compiler::{JsonParamMode, ParamMap};
@ -82,10 +82,6 @@ fn hash_bearer_token(token: &str) -> BearerTokenHash {
        server_branch_create,
        server_branch_delete,
        server_branch_merge,
-        server_run_list,
-        server_run_show,
-        server_run_publish,
-        server_run_abort,
        server_commit_list,
        server_commit_show,
    ),
@ -159,6 +155,7 @@ pub struct ApiError {
    code: ErrorCode,
    message: String,
    merge_conflicts: Vec<api::MergeConflictOutput>,
+    manifest_conflict: Option<api::ManifestConflictOutput>,
 }

 impl AppState {
@ -280,6 +277,7 @@ impl ApiError {
            code: ErrorCode::Unauthorized,
            message: message.into(),
            merge_conflicts: Vec::new(),
+            manifest_conflict: None,
        }
    }

@ -289,6 +287,7 @@ impl ApiError {
            code: ErrorCode::Forbidden,
            message: message.into(),
            merge_conflicts: Vec::new(),
+            manifest_conflict: None,
        }
    }

@ -298,6 +297,7 @@ impl ApiError {
            code: ErrorCode::BadRequest,
            message: message.into(),
            merge_conflicts: Vec::new(),
+            manifest_conflict: None,
        }
    }

@ -307,6 +307,7 @@ impl ApiError {
            code: ErrorCode::NotFound,
            message: message.into(),
            merge_conflicts: Vec::new(),
+            manifest_conflict: None,
        }
    }

@ -316,6 +317,7 @@ impl ApiError {
            code: ErrorCode::Conflict,
            message: message.into(),
            merge_conflicts: Vec::new(),
+            manifest_conflict: None,
        }
    }

@ -325,6 +327,7 @@ impl ApiError {
            code: ErrorCode::Internal,
            message: message.into(),
            merge_conflicts: Vec::new(),
+            manifest_conflict: None,
        }
    }

@ -334,6 +337,20 @@ impl ApiError {
            code: ErrorCode::Conflict,
            message: summarize_merge_conflicts(&conflicts),
            merge_conflicts: conflicts,
+            manifest_conflict: None,
+        }
+    }
+
+    fn manifest_version_conflict(
+        message: String,
+        details: api::ManifestConflictOutput,
+    ) -> Self {
+        Self {
+            status: StatusCode::CONFLICT,
+            code: ErrorCode::Conflict,
+            message,
+            merge_conflicts: Vec::new(),
+            manifest_conflict: Some(details),
        }
    }

@ -344,7 +361,21 @@ impl ApiError {
            OmniError::Manifest(err) => match err.kind {
                ManifestErrorKind::BadRequest => Self::bad_request(err.message),
                ManifestErrorKind::NotFound => Self::not_found(err.message),
-                ManifestErrorKind::Conflict => Self::conflict(err.message),
+                ManifestErrorKind::Conflict => match err.details {
+                    Some(ManifestConflictDetails::ExpectedVersionMismatch {
+                        table_key,
+                        expected,
+                        actual,
+                    }) => Self::manifest_version_conflict(
+                        err.message,
+                        api::ManifestConflictOutput {
+                            table_key,
+                            expected,
+                            actual,
+                        },
+                    ),
+                    _ => Self::conflict(err.message),
+                },
                ManifestErrorKind::Internal => Self::internal(err.message),
            },
            OmniError::MergeConflicts(conflicts) => Self::merge_conflict(
@ -395,6 +426,7 @@ impl IntoResponse for ApiError {
                error: self.message,
                code: Some(self.code),
                merge_conflicts: self.merge_conflicts,
+                manifest_conflict: self.manifest_conflict,
            }),
        )
            .into_response()
@ -443,10 +475,6 @@ pub fn build_app(state: AppState) -> Router {
        )
        .route("/branches/{branch}", delete(server_branch_delete))
        .route("/branches/merge", post(server_branch_merge))
-        .route("/runs", get(server_run_list))
-        .route("/runs/{run_id}", get(server_run_show))
-        .route("/runs/{run_id}/publish", post(server_run_publish))
-        .route("/runs/{run_id}/abort", post(server_run_abort))
        .route("/commits", get(server_commit_list))
        .route("/commits/{commit_id}", get(server_commit_show))
        .route_layer(middleware::from_fn_with_state(
@ -1219,203 +1247,6 @@ async fn server_branch_merge(
    }))
 }

-#[utoipa::path(
-    get,
-    path = "/runs",
-    tag = "runs",
-    operation_id = "listRuns",
-    responses(
-        (status = 200, description = "List of runs", body = RunListOutput),
-        (status = 401, description = "Unauthorized", body = ErrorOutput),
-        (status = 403, description = "Forbidden", body = ErrorOutput),
-    ),
-    security(("bearer_token" = [])),
-)]
-/// List all runs.
-///
-/// A run is an ephemeral branch produced by an agent or background job. The
-/// list includes pending, in-progress, published, and aborted runs across
-/// all target branches. Read-only.
-async fn server_run_list(
-    State(state): State<AppState>,
-    actor: Option<Extension<AuthenticatedActor>>,
-) -> std::result::Result<Json<RunListOutput>, ApiError> {
-    authorize_request(
-        &state,
-        actor.as_ref().map(|Extension(actor)| actor),
-        PolicyRequest {
-            actor_id: actor
-                .as_ref()
-                .map(|Extension(actor)| actor.as_str().to_string())
-                .unwrap_or_default(),
-            action: PolicyAction::Read,
-            branch: None,
-            target_branch: None,
-        },
-    )?;
-    let runs = {
-        let db = Arc::clone(&state.db).read_owned().await;
-        db.list_runs().await.map_err(ApiError::from_omni)?
-    };
-    Ok(Json(RunListOutput {
-        runs: runs.iter().map(api::run_output).collect(),
-    }))
-}
-
-#[utoipa::path(
-    get,
-    path = "/runs/{run_id}",
-    tag = "runs",
-    operation_id = "getRun",
-    params(
-        ("run_id" = String, Path, description = "Run identifier"),
-    ),
-    responses(
-        (status = 200, description = "Run details", body = api::RunOutput),
-        (status = 401, description = "Unauthorized", body = ErrorOutput),
-        (status = 403, description = "Forbidden", body = ErrorOutput),
-        (status = 404, description = "Run not found", body = ErrorOutput),
-    ),
-    security(("bearer_token" = [])),
-)]
-/// Get a single run.
-///
-/// Returns the run's status, target/run branches, base snapshot, and (if
-/// published) the resulting snapshot id. Read-only.
-async fn server_run_show(
-    State(state): State<AppState>,
-    actor: Option<Extension<AuthenticatedActor>>,
-    Path(run_id): Path<String>,
-) -> std::result::Result<Json<api::RunOutput>, ApiError> {
-    authorize_request(
-        &state,
-        actor.as_ref().map(|Extension(actor)| actor),
-        PolicyRequest {
-            actor_id: actor
-                .as_ref()
-                .map(|Extension(actor)| actor.as_str().to_string())
-                .unwrap_or_default(),
-            action: PolicyAction::Read,
-            branch: None,
-            target_branch: None,
-        },
-    )?;
-    let run = {
-        let db = Arc::clone(&state.db).read_owned().await;
-        db.get_run(&RunId::new(run_id))
-            .await
-            .map_err(ApiError::from_omni)?
-    };
-    Ok(Json(api::run_output(&run)))
-}
-
-#[utoipa::path(
-    post,
-    path = "/runs/{run_id}/publish",
-    tag = "runs",
-    operation_id = "publishRun",
-    params(
-        ("run_id" = String, Path, description = "Run identifier"),
-    ),
-    responses(
-        (status = 200, description = "Run published", body = api::RunOutput),
-        (status = 401, description = "Unauthorized", body = ErrorOutput),
-        (status = 403, description = "Forbidden", body = ErrorOutput),
-        (status = 404, description = "Run not found", body = ErrorOutput),
-    ),
-    security(("bearer_token" = [])),
-)]
-/// Publish a run to its target branch.
-///
-/// Promotes the run's snapshot onto its `target_branch` as a new commit. The
-/// run must be in a publishable state. **Destructive** to the target branch.
-async fn server_run_publish(
-    State(state): State<AppState>,
-    actor: Option<Extension<AuthenticatedActor>>,
-    Path(run_id): Path<String>,
-) -> std::result::Result<Json<api::RunOutput>, ApiError> {
-    let run_id = RunId::new(run_id);
-    let actor_id = actor.as_ref().map(|Extension(actor)| actor.as_str());
-    let target_branch = {
-        let db = Arc::clone(&state.db).read_owned().await;
-        db.get_run(&run_id)
-            .await
-            .map_err(ApiError::from_omni)?
-            .target_branch
-    };
-    authorize_request(
-        &state,
-        actor.as_ref().map(|Extension(actor)| actor),
-        PolicyRequest {
-            actor_id: actor_id.map(str::to_string).unwrap_or_default(),
-            action: PolicyAction::RunPublish,
-            branch: None,
-            target_branch: Some(target_branch),
-        },
-    )?;
-    let run = {
-        let mut db = Arc::clone(&state.db).write_owned().await;
-        db.publish_run_as(&run_id, actor_id)
-            .await
-            .map_err(ApiError::from_omni)?;
-        db.get_run(&run_id).await.map_err(ApiError::from_omni)?
-    };
-    Ok(Json(api::run_output(&run)))
-}
-
-#[utoipa::path(
-    post,
-    path = "/runs/{run_id}/abort",
-    tag = "runs",
-    operation_id = "abortRun",
-    params(
-        ("run_id" = String, Path, description = "Run identifier"),
-    ),
-    responses(
-        (status = 200, description = "Run aborted", body = api::RunOutput),
-        (status = 401, description = "Unauthorized", body = ErrorOutput),
-        (status = 403, description = "Forbidden", body = ErrorOutput),
-        (status = 404, description = "Run not found", body = ErrorOutput),
-    ),
-    security(("bearer_token" = [])),
-)]
-/// Abort a run.
-///
-/// Marks the run as aborted and releases its working branch. **Irreversible**:
-/// the run cannot be resumed once aborted.
-async fn server_run_abort(
-    State(state): State<AppState>,
-    actor: Option<Extension<AuthenticatedActor>>,
-    Path(run_id): Path<String>,
-) -> std::result::Result<Json<api::RunOutput>, ApiError> {
-    let run_id = RunId::new(run_id);
-    let target_branch = {
-        let db = Arc::clone(&state.db).read_owned().await;
-        db.get_run(&run_id)
-            .await
-            .map_err(ApiError::from_omni)?
-            .target_branch
-    };
-    authorize_request(
-        &state,
-        actor.as_ref().map(|Extension(actor)| actor),
-        PolicyRequest {
-            actor_id: actor
-                .as_ref()
-                .map(|Extension(actor)| actor.as_str().to_string())
-                .unwrap_or_default(),
-            action: PolicyAction::RunAbort,
-            branch: None,
-            target_branch: Some(target_branch),
-        },
-    )?;
-    let run = {
-        let mut db = Arc::clone(&state.db).write_owned().await;
-        db.abort_run(&run_id).await.map_err(ApiError::from_omni)?
-    };
-    Ok(Json(api::run_output(&run)))
-}
-
 #[utoipa::path(
    get,
    path = "/commits",
--- a/crates/omnigraph-server/src/policy.rs
+++ b/crates/omnigraph-server/src/policy.rs
@ -23,8 +23,6 @@ pub enum PolicyAction {
    BranchCreate,
    BranchDelete,
    BranchMerge,
-    RunPublish,
-    RunAbort,
    Admin,
 }

@ -38,8 +36,6 @@ impl PolicyAction {
            Self::BranchCreate => "branch_create",
            Self::BranchDelete => "branch_delete",
            Self::BranchMerge => "branch_merge",
-            Self::RunPublish => "run_publish",
-            Self::RunAbort => "run_abort",
            Self::Admin => "admin",
        }
    }
@ -51,12 +47,7 @@ impl PolicyAction {
    fn uses_target_branch_scope(self) -> bool {
        matches!(
            self,
-            Self::BranchCreate
-                | Self::SchemaApply
-                | Self::BranchDelete
-                | Self::BranchMerge
-                | Self::RunPublish
-                | Self::RunAbort
+            Self::BranchCreate | Self::SchemaApply | Self::BranchDelete | Self::BranchMerge
        )
    }
 }
@ -79,8 +70,6 @@ impl FromStr for PolicyAction {
            "branch_create" => Ok(Self::BranchCreate),
            "branch_delete" => Ok(Self::BranchDelete),
            "branch_merge" => Ok(Self::BranchMerge),
-            "run_publish" => Ok(Self::RunPublish),
-            "run_abort" => Ok(Self::RunAbort),
            "admin" => Ok(Self::Admin),
            other => bail!("unknown policy action '{other}'"),
        }
@ -599,8 +588,6 @@ namespace Omnigraph {
    action "branch_create" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
    action "branch_delete" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
    action "branch_merge" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
-    action "run_publish" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
-    action "run_abort" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
    action "admin" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
 }
 "#
@ -732,7 +719,7 @@ rules:
  - id: admins-promote
    allow:
      actors: { group: admins }
-      actions: [branch_delete, branch_merge, run_publish]
+      actions: [branch_delete, branch_merge]
      target_branch_scope: protected
 "#,
        )