mirror of
https://github.com/ModernRelay/omnigraph.git
synced 2026-06-21 02:28:07 +02:00
441 lines
17 KiB
Rust
441 lines
17 KiB
Rust
//! The recovery sweep: RFC-004's roll-forward-only sidecar
|
|
//! classification (moved verbatim from lib.rs in the modularization).
|
|
|
|
use super::*;
|
|
|
|
/// Recovery sweep (RFC-004 §D3): runs at the start of every state-mutating
|
|
/// cluster command, under the state lock, before the command's own work.
|
|
/// Roll-forward-only — the engine's own sidecars make each graph-level
|
|
/// operation atomic within the graph, so the cluster never rolls a graph
|
|
/// back; it converges the ledger to observable reality or refuses loudly.
|
|
/// Mutations ride the calling command's CAS-checked state write; completed
|
|
/// sidecars are deleted only after that write lands.
|
|
pub(crate) async fn sweep_recovery_sidecars(
|
|
backend: &ClusterStore,
|
|
state: &mut ClusterState,
|
|
diagnostics: &mut Vec<Diagnostic>,
|
|
) -> SweepOutcome {
|
|
let mut outcome = SweepOutcome::default();
|
|
for (path, sidecar) in backend.list_recovery_sidecars(diagnostics).await {
|
|
match sidecar.kind {
|
|
RecoverySidecarKind::GraphCreate => {
|
|
sweep_graph_create_sidecar(
|
|
backend,
|
|
path,
|
|
sidecar,
|
|
state,
|
|
diagnostics,
|
|
&mut outcome,
|
|
)
|
|
.await;
|
|
}
|
|
RecoverySidecarKind::SchemaApply => {
|
|
sweep_schema_apply_sidecar(path, sidecar, state, diagnostics, &mut outcome).await;
|
|
}
|
|
RecoverySidecarKind::GraphDelete => {
|
|
sweep_graph_delete_sidecar(
|
|
backend,
|
|
path,
|
|
sidecar,
|
|
state,
|
|
diagnostics,
|
|
&mut outcome,
|
|
)
|
|
.await;
|
|
}
|
|
}
|
|
}
|
|
outcome
|
|
}
|
|
|
|
pub(crate) async fn sweep_graph_create_sidecar(
|
|
backend: &ClusterStore,
|
|
path: String,
|
|
sidecar: RecoverySidecar,
|
|
state: &mut ClusterState,
|
|
diagnostics: &mut Vec<Diagnostic>,
|
|
outcome: &mut SweepOutcome,
|
|
) {
|
|
let graph_address = graph_address(&sidecar.graph_id);
|
|
let schema_addr = schema_address(&sidecar.graph_id);
|
|
|
|
// Row 1: nothing moved — the init never landed. The sidecar is pure
|
|
// intent; retire it (deferred to the command's post-CAS cleanup, like
|
|
// every other completed sidecar — a failed CAS simply re-sweeps it) and
|
|
// let the command's own plan re-propose the create.
|
|
if !backend.graph_root_exists(&sidecar.graph_uri).await {
|
|
outcome.completed_sidecars.push(path);
|
|
return;
|
|
}
|
|
|
|
match Omnigraph::open_read_only(&sidecar.graph_uri).await {
|
|
Ok(db) => {
|
|
let live_digest = sha256_hex(db.schema_source().as_bytes());
|
|
let recorded = state
|
|
.applied_revision
|
|
.resources
|
|
.get(&schema_addr)
|
|
.map(|resource| resource.digest.clone());
|
|
if recorded.as_deref() == Some(live_digest.as_str()) {
|
|
// Row 2: crash fell between the state CAS and sidecar delete.
|
|
outcome.completed_sidecars.push(path);
|
|
} else if live_digest == sidecar.desired_schema_digest {
|
|
// Row 4: the create completed on the graph; roll the cluster
|
|
// state forward to observable reality.
|
|
state.applied_revision.resources.insert(
|
|
schema_addr.clone(),
|
|
StateResource {
|
|
digest: live_digest.clone(),
|
|
applies_to: None,
|
|
embedding_provider: None,
|
|
embedding_profile: None,
|
|
},
|
|
);
|
|
let query_digests = state_query_digests_for_graph(state, &sidecar.graph_id);
|
|
let embedding_provider = state_graph_embedding_provider(state, &sidecar.graph_id);
|
|
let embedding_provider_digest =
|
|
state_embedding_provider_digest(state, embedding_provider.as_deref());
|
|
let composite = graph_digest(
|
|
&sidecar.graph_id,
|
|
Some(&live_digest),
|
|
Some(&query_digests),
|
|
embedding_provider.as_deref(),
|
|
embedding_provider_digest.as_ref(),
|
|
);
|
|
state.applied_revision.resources.insert(
|
|
graph_address.clone(),
|
|
StateResource {
|
|
digest: composite,
|
|
applies_to: None,
|
|
embedding_provider,
|
|
embedding_profile: None,
|
|
},
|
|
);
|
|
set_resource_status_applied(state, &graph_address);
|
|
set_resource_status_applied(state, &schema_addr);
|
|
state.recovery_records.insert(
|
|
sidecar.operation_id.clone(),
|
|
json!({
|
|
"kind": "graph_create",
|
|
"graph_id": sidecar.graph_id,
|
|
"outcome": "rolled_forward",
|
|
"recovered_at": now_rfc3339(),
|
|
"actor": sidecar.actor,
|
|
}),
|
|
);
|
|
diagnostics.push(Diagnostic::warning(
|
|
"cluster_recovery_rolled_forward",
|
|
graph_address.clone(),
|
|
"an interrupted graph create had completed on the graph; cluster state was rolled forward to match",
|
|
));
|
|
outcome.completed_sidecars.push(path);
|
|
} else {
|
|
// Row 6: the graph moved to something the sidecar did not
|
|
// intend. Refuse to guess; require refresh + operator re-plan.
|
|
set_resource_status(
|
|
state,
|
|
&graph_address,
|
|
ResourceLifecycleStatus::Drifted,
|
|
"actual_applied_state_pending",
|
|
"graph state does not match the interrupted operation; run `cluster refresh` and re-plan",
|
|
);
|
|
set_resource_status(
|
|
state,
|
|
&schema_addr,
|
|
ResourceLifecycleStatus::Drifted,
|
|
"actual_applied_state_pending",
|
|
"graph state does not match the interrupted operation; run `cluster refresh` and re-plan",
|
|
);
|
|
diagnostics.push(Diagnostic::warning(
|
|
"cluster_recovery_pending",
|
|
graph_address.clone(),
|
|
"an interrupted graph create left unexpected graph state; graph-moving work is blocked until repaired",
|
|
));
|
|
outcome.pending_graphs.insert(sidecar.graph_id.clone());
|
|
}
|
|
}
|
|
Err(err) => {
|
|
// Row 5: partial root (the engine's documented init gap). Never
|
|
// auto-delete — reconciler deletes are the same data-loss class
|
|
// as human deletes; the operator removes the root explicitly.
|
|
set_resource_status(
|
|
state,
|
|
&graph_address,
|
|
ResourceLifecycleStatus::Error,
|
|
"graph_create_incomplete",
|
|
"graph root exists but cannot be opened; remove the graph root and re-run `cluster apply`",
|
|
);
|
|
set_resource_status(
|
|
state,
|
|
&schema_addr,
|
|
ResourceLifecycleStatus::Error,
|
|
"graph_create_incomplete",
|
|
"graph root exists but cannot be opened; remove the graph root and re-run `cluster apply`",
|
|
);
|
|
diagnostics.push(Diagnostic::error(
|
|
"graph_create_incomplete",
|
|
graph_address.clone(),
|
|
format!(
|
|
"graph root '{}' exists but cannot be opened ({err}); remove the graph root and re-run `cluster apply`",
|
|
sidecar.graph_uri
|
|
),
|
|
));
|
|
outcome.pending_graphs.insert(sidecar.graph_id.clone());
|
|
}
|
|
}
|
|
}
|
|
|
|
pub(crate) async fn sweep_schema_apply_sidecar(
|
|
path: String,
|
|
sidecar: RecoverySidecar,
|
|
state: &mut ClusterState,
|
|
diagnostics: &mut Vec<Diagnostic>,
|
|
outcome: &mut SweepOutcome,
|
|
) {
|
|
let graph_address = graph_address(&sidecar.graph_id);
|
|
let schema_addr = schema_address(&sidecar.graph_id);
|
|
|
|
// Digest-based classification: robust to unrelated manifest movement;
|
|
// the sidecar's version pins stay forensic.
|
|
let live_digest = match Omnigraph::open_read_only(&sidecar.graph_uri).await {
|
|
Ok(db) => sha256_hex(db.schema_source().as_bytes()),
|
|
Err(err) => {
|
|
// Cannot verify the interrupted operation — refuse to guess.
|
|
diagnostics.push(Diagnostic::warning(
|
|
"cluster_recovery_pending",
|
|
graph_address.clone(),
|
|
format!(
|
|
"an interrupted schema apply cannot be verified (graph '{}' did not open: {err}); graph-moving work is blocked until repaired",
|
|
sidecar.graph_uri
|
|
),
|
|
));
|
|
outcome.pending_graphs.insert(sidecar.graph_id.clone());
|
|
return;
|
|
}
|
|
};
|
|
|
|
let recorded = state
|
|
.applied_revision
|
|
.resources
|
|
.get(&schema_addr)
|
|
.map(|resource| resource.digest.clone());
|
|
if recorded.as_deref() == Some(live_digest.as_str()) {
|
|
// Ledger consistent with the live graph (the apply never landed, or
|
|
// landed and was recorded): the sidecar is stale intent — retire it.
|
|
outcome.completed_sidecars.push(path);
|
|
} else if live_digest == sidecar.desired_schema_digest {
|
|
// RFC-004 §D3 row 3: the schema apply completed on the graph; roll
|
|
// the cluster state forward to observable reality.
|
|
state.applied_revision.resources.insert(
|
|
schema_addr.clone(),
|
|
StateResource {
|
|
digest: live_digest.clone(),
|
|
applies_to: None,
|
|
embedding_provider: None,
|
|
embedding_profile: None,
|
|
},
|
|
);
|
|
let query_digests = state_query_digests_for_graph(state, &sidecar.graph_id);
|
|
let embedding_provider = state_graph_embedding_provider(state, &sidecar.graph_id);
|
|
let embedding_provider_digest =
|
|
state_embedding_provider_digest(state, embedding_provider.as_deref());
|
|
let composite = graph_digest(
|
|
&sidecar.graph_id,
|
|
Some(&live_digest),
|
|
Some(&query_digests),
|
|
embedding_provider.as_deref(),
|
|
embedding_provider_digest.as_ref(),
|
|
);
|
|
state.applied_revision.resources.insert(
|
|
graph_address.clone(),
|
|
StateResource {
|
|
digest: composite,
|
|
applies_to: None,
|
|
embedding_provider,
|
|
embedding_profile: None,
|
|
},
|
|
);
|
|
set_resource_status_applied(state, &graph_address);
|
|
set_resource_status_applied(state, &schema_addr);
|
|
state.recovery_records.insert(
|
|
sidecar.operation_id.clone(),
|
|
json!({
|
|
"kind": "schema_apply",
|
|
"graph_id": sidecar.graph_id,
|
|
"outcome": "rolled_forward",
|
|
"recovered_at": now_rfc3339(),
|
|
"actor": sidecar.actor,
|
|
}),
|
|
);
|
|
diagnostics.push(Diagnostic::warning(
|
|
"cluster_recovery_rolled_forward",
|
|
graph_address.clone(),
|
|
"an interrupted schema apply had completed on the graph; cluster state was rolled forward to match",
|
|
));
|
|
outcome.completed_sidecars.push(path);
|
|
} else {
|
|
// Row 6: live schema is neither the recorded nor the desired digest.
|
|
set_resource_status(
|
|
state,
|
|
&graph_address,
|
|
ResourceLifecycleStatus::Drifted,
|
|
"actual_applied_state_pending",
|
|
"graph state does not match the interrupted operation; run `cluster refresh` and re-plan",
|
|
);
|
|
set_resource_status(
|
|
state,
|
|
&schema_addr,
|
|
ResourceLifecycleStatus::Drifted,
|
|
"actual_applied_state_pending",
|
|
"graph state does not match the interrupted operation; run `cluster refresh` and re-plan",
|
|
);
|
|
diagnostics.push(Diagnostic::warning(
|
|
"cluster_recovery_pending",
|
|
graph_address.clone(),
|
|
"an interrupted schema apply left unexpected graph state; graph-moving work is blocked until repaired",
|
|
));
|
|
outcome.pending_graphs.insert(sidecar.graph_id.clone());
|
|
}
|
|
}
|
|
|
|
pub(crate) async fn sweep_graph_delete_sidecar(
|
|
backend: &ClusterStore,
|
|
path: String,
|
|
sidecar: RecoverySidecar,
|
|
state: &mut ClusterState,
|
|
diagnostics: &mut Vec<Diagnostic>,
|
|
outcome: &mut SweepOutcome,
|
|
) {
|
|
let graph_address = graph_address(&sidecar.graph_id);
|
|
|
|
if backend.graph_root_exists(&sidecar.graph_uri).await {
|
|
// Row 8: the delete never completed. Prefix removal is idempotent and
|
|
// works on partial roots, so the repair is simply the re-proposed,
|
|
// still-approved delete on a later run — retire the stale intent.
|
|
diagnostics.push(Diagnostic::warning(
|
|
"graph_delete_incomplete",
|
|
graph_address,
|
|
"a previous graph delete did not complete; it will be re-proposed by plan and can be retried under its approval",
|
|
));
|
|
outcome.completed_sidecars.push(path);
|
|
return;
|
|
}
|
|
|
|
if !state
|
|
.applied_revision
|
|
.resources
|
|
.contains_key(&graph_address)
|
|
{
|
|
// Row 7: already tombstoned (or never recorded); crash fell between
|
|
// the state CAS and sidecar delete.
|
|
outcome.completed_sidecars.push(path);
|
|
return;
|
|
}
|
|
|
|
// Row 7b: the root is gone, the ledger is stale — roll forward the
|
|
// tombstone, consume the approval the sidecar carries, audit.
|
|
tombstone_graph_subtree(
|
|
state,
|
|
&sidecar.graph_id,
|
|
sidecar.approval_id.as_deref(),
|
|
sidecar.actor.as_deref(),
|
|
);
|
|
state.recovery_records.insert(
|
|
sidecar.operation_id.clone(),
|
|
json!({
|
|
"kind": "graph_delete",
|
|
"graph_id": sidecar.graph_id,
|
|
"outcome": "rolled_forward",
|
|
"recovered_at": now_rfc3339(),
|
|
"actor": sidecar.actor,
|
|
}),
|
|
);
|
|
if let Some(approval_id) = &sidecar.approval_id {
|
|
record_approval_consumed(state, approval_id, &sidecar.operation_id);
|
|
outcome.consumed_approvals.push(approval_id.clone());
|
|
}
|
|
diagnostics.push(Diagnostic::warning(
|
|
"cluster_recovery_rolled_forward",
|
|
graph_address,
|
|
"an interrupted graph delete had completed on disk; cluster state was rolled forward to match",
|
|
));
|
|
outcome.completed_sidecars.push(path);
|
|
}
|
|
|
|
/// Remove a graph's subtree (graph, schema, queries) from the ledger and
|
|
/// leave a tombstone observation. Idempotent.
|
|
pub(crate) fn tombstone_graph_subtree(
|
|
state: &mut ClusterState,
|
|
graph_id: &str,
|
|
approval_id: Option<&str>,
|
|
actor: Option<&str>,
|
|
) {
|
|
let graph_addr = graph_address(graph_id);
|
|
let schema_addr = schema_address(graph_id);
|
|
let query_prefix = format!("query.{graph_id}.");
|
|
state.applied_revision.resources.remove(&graph_addr);
|
|
state.applied_revision.resources.remove(&schema_addr);
|
|
state
|
|
.applied_revision
|
|
.resources
|
|
.retain(|address, _| !address.starts_with(&query_prefix));
|
|
state.resource_statuses.remove(&graph_addr);
|
|
state.resource_statuses.remove(&schema_addr);
|
|
state
|
|
.resource_statuses
|
|
.retain(|address, _| !address.starts_with(&query_prefix));
|
|
state.observations.insert(
|
|
graph_addr,
|
|
json!({
|
|
"kind": "tombstone",
|
|
"deleted_at": now_rfc3339(),
|
|
"approval_id": approval_id,
|
|
"actor": actor,
|
|
}),
|
|
);
|
|
}
|
|
|
|
/// Record approval consumption in the state ledger. The artifact FILE is
|
|
/// rewritten with consumed_at only after the state write lands, so a failed
|
|
/// CAS leaves the approval valid for the retry.
|
|
pub(crate) fn record_approval_consumed(
|
|
state: &mut ClusterState,
|
|
approval_id: &str,
|
|
operation_id: &str,
|
|
) {
|
|
state.approval_records.insert(
|
|
approval_id.to_string(),
|
|
json!({
|
|
"consumed_at": now_rfc3339(),
|
|
"consumed_by_operation": operation_id,
|
|
}),
|
|
);
|
|
}
|
|
|
|
/// Mark approval artifact files consumed on disk (post-CAS).
|
|
pub(crate) async fn mark_approvals_consumed(backend: &ClusterStore, approval_ids: &[String]) {
|
|
if approval_ids.is_empty() {
|
|
return;
|
|
}
|
|
let mut sink = Vec::new();
|
|
for (_, mut artifact) in backend.list_approval_artifacts(&mut sink).await {
|
|
if approval_ids.contains(&artifact.approval_id) && artifact.consumed_at.is_none() {
|
|
artifact.consumed_at = Some(now_rfc3339());
|
|
let _ = backend.write_approval_artifact(&artifact).await;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Read-only commands report pending sidecars without acting on them.
|
|
pub(crate) async fn warn_pending_recovery_sidecars(
|
|
backend: &ClusterStore,
|
|
diagnostics: &mut Vec<Diagnostic>,
|
|
) {
|
|
for location in backend.list_recovery_sidecar_locations(diagnostics).await {
|
|
diagnostics.push(Diagnostic::warning(
|
|
"cluster_recovery_pending",
|
|
location,
|
|
"a recovery sidecar from an interrupted apply is pending; the next state-mutating command will classify it",
|
|
));
|
|
}
|
|
}
|