mirror of
https://github.com/ModernRelay/omnigraph.git
synced 2026-06-24 02:38:06 +02:00
Merge branch 'main' into ragnorc/omnigraph-mcp-crate
Bring the MCP feature branch up to date with main (14 commits). One conflict — compiler/parser.rs: main's `NanoError` → `CompilerError` rename vs this branch's `@mcp` / per-param `@description` parser additions; resolved by keeping the new parsing under the renamed error type. The CLI `queries list` change (#280, surfacing `@description`/`@instruction`) auto-merged with this branch's `mcp_expose`/`tool_name` columns.
This commit is contained in:
commit
fbf455a250
110 changed files with 6396 additions and 2511 deletions
|
|
@ -873,6 +873,25 @@ pub(crate) async fn execute_queries_validate(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Print a stored-query annotation under its `queries list` entry. A
|
||||
/// `@description`/`@instruction` value may be multiline (GQ string literals
|
||||
/// admit newlines); continuation lines are indented to align under the first
|
||||
/// so the catalog stays readable instead of breaking the left margin.
|
||||
fn print_query_annotation(label: &str, value: &str) {
|
||||
let prefix = format!(" {label}: ");
|
||||
let continuation = " ".repeat(prefix.len());
|
||||
let mut lines = value.split('\n');
|
||||
match lines.next() {
|
||||
Some(first) => {
|
||||
println!("{prefix}{first}");
|
||||
for line in lines {
|
||||
println!("{continuation}{line}");
|
||||
}
|
||||
}
|
||||
None => println!("{prefix}"),
|
||||
}
|
||||
}
|
||||
|
||||
/// `queries list --cluster <dir>` (RFC-011): list the catalog's stored queries.
|
||||
/// With `--graph`, scope to one graph.
|
||||
pub(crate) async fn execute_queries_list(
|
||||
|
|
@ -891,6 +910,8 @@ pub(crate) async fn execute_queries_list(
|
|||
mcp_expose: q.is_exposed(),
|
||||
tool_name: q.decl.mcp.tool_name.clone(),
|
||||
mutation: q.is_mutation(),
|
||||
description: q.decl.description.clone(),
|
||||
instruction: q.decl.instruction.clone(),
|
||||
params: q
|
||||
.decl
|
||||
.params
|
||||
|
|
@ -931,6 +952,12 @@ pub(crate) async fn execute_queries_list(
|
|||
String::new()
|
||||
};
|
||||
println!("{kind} {}({params}){mcp}", q.name);
|
||||
if let Some(description) = &q.description {
|
||||
print_query_annotation("description", description);
|
||||
}
|
||||
if let Some(instruction) = &q.instruction {
|
||||
print_query_annotation("instruction", instruction);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
|
|
|
|||
|
|
@ -1050,7 +1050,7 @@ async fn main() -> Result<()> {
|
|||
// The actor attributes graph-moving operations (sidecars,
|
||||
// audit entries, engine schema-apply commits). Cluster FACTS
|
||||
// stay unlayered; the operator's identity resolves --as flag
|
||||
// first, then the per-operator omnigraph.yaml `cli.actor`.
|
||||
// first, then per-operator config `operator.actor`.
|
||||
let actor = resolve_cluster_actor(cli.as_actor.as_deref())?;
|
||||
let output = apply_config_dir_with_options(config, ApplyOptions { actor }).await;
|
||||
finish_cluster_apply(&output, json)?;
|
||||
|
|
@ -1062,7 +1062,7 @@ async fn main() -> Result<()> {
|
|||
} => {
|
||||
let Some(approver) = resolve_cluster_actor(cli.as_actor.as_deref())? else {
|
||||
bail!(
|
||||
"`cluster approve` requires an approver: pass the global --as <ACTOR> flag or set `cli.actor` in your omnigraph.yaml — an approval without an approver is meaningless"
|
||||
"`cluster approve` requires an approver: pass the global --as <ACTOR> flag or set `operator.actor` in ~/.omnigraph/config.yaml — an approval without an approver is meaningless"
|
||||
);
|
||||
};
|
||||
let output = approve_config_dir(config, &resource, &approver).await;
|
||||
|
|
|
|||
|
|
@ -849,6 +849,13 @@ pub(crate) struct QueriesListItem {
|
|||
pub(crate) mcp_expose: bool,
|
||||
pub(crate) tool_name: Option<String>,
|
||||
pub(crate) mutation: bool,
|
||||
/// `@description` from the query declaration — what the query is for.
|
||||
/// Carried so the CLI catalog matches the HTTP `GET /queries` surface.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) description: Option<String>,
|
||||
/// `@instruction` from the query declaration — how/when to invoke it.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) instruction: Option<String>,
|
||||
pub(crate) params: Vec<QueriesParam>,
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -796,6 +796,10 @@ fn cluster_approve_uses_operator_actor_fallback() {
|
|||
);
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
assert!(stderr.contains("--as"), "{stderr}");
|
||||
assert!(stderr.contains("operator.actor"), "{stderr}");
|
||||
assert!(stderr.contains("config.yaml"), "{stderr}");
|
||||
assert!(!stderr.contains("cli.actor"), "{stderr}");
|
||||
assert!(!stderr.contains("omnigraph.yaml"), "{stderr}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -231,6 +231,125 @@ fn queries_list_prints_registered_query() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn queries_list_surfaces_description_and_instruction() {
|
||||
// `@description`/`@instruction` are the whole point of a stored query in a
|
||||
// catalog — they tell an agent/operator what it does and how to invoke it.
|
||||
// The CLI catalog must surface them in both human and --json output, to
|
||||
// match the HTTP `GET /queries` surface.
|
||||
let cluster = converged_cluster_with_query(
|
||||
"described.gq",
|
||||
"query described($name: String) \
|
||||
@description(\"Find a person by exact name.\") \
|
||||
@instruction(\"Use for exact lookups; prefer search for fuzzy matches.\") \
|
||||
{ match { $p: Person { name: $name } } return { $p.age } }",
|
||||
" described:\n file: ./described.gq\n",
|
||||
);
|
||||
|
||||
// Human output.
|
||||
let output = output_success(
|
||||
cli().arg("queries").arg("list").arg("--cluster").arg(cluster.path()),
|
||||
);
|
||||
let stdout = stdout_string(&output);
|
||||
assert!(
|
||||
stdout.contains("description: Find a person by exact name."),
|
||||
"human list must show @description; stdout:\n{stdout}"
|
||||
);
|
||||
assert!(
|
||||
stdout.contains("instruction: Use for exact lookups; prefer search for fuzzy matches."),
|
||||
"human list must show @instruction; stdout:\n{stdout}"
|
||||
);
|
||||
|
||||
// --json output.
|
||||
let output = output_success(
|
||||
cli()
|
||||
.arg("queries")
|
||||
.arg("list")
|
||||
.arg("--cluster")
|
||||
.arg(cluster.path())
|
||||
.arg("--json"),
|
||||
);
|
||||
let body: serde_json::Value = serde_json::from_slice(&output.stdout).unwrap();
|
||||
let entry = body["queries"]
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.find(|q| q["name"] == "described")
|
||||
.unwrap();
|
||||
assert_eq!(entry["description"], "Find a person by exact name.");
|
||||
assert_eq!(
|
||||
entry["instruction"],
|
||||
"Use for exact lookups; prefer search for fuzzy matches."
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn queries_list_indents_multiline_annotation_continuation() {
|
||||
// GQ string literals admit newlines, so a `@description`/`@instruction`
|
||||
// can be multiline. Human output must indent continuation lines to align
|
||||
// under the first rather than breaking back to the left margin.
|
||||
let cluster = converged_cluster_with_query(
|
||||
"multi.gq",
|
||||
"query multi($name: String) \
|
||||
@description(\"line one\\nline two\") \
|
||||
{ match { $p: Person { name: $name } } return { $p.age } }",
|
||||
" multi:\n file: ./multi.gq\n",
|
||||
);
|
||||
let output = output_success(
|
||||
cli().arg("queries").arg("list").arg("--cluster").arg(cluster.path()),
|
||||
);
|
||||
let stdout = stdout_string(&output);
|
||||
// " description: " is 17 chars wide; the continuation aligns under it.
|
||||
assert!(
|
||||
stdout.contains(" description: line one\n line two"),
|
||||
"multiline annotation must indent the continuation; stdout:\n{stdout}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn queries_list_omits_annotations_when_absent() {
|
||||
// The other half of the contract: a query that declares neither annotation
|
||||
// prints no extra lines and omits both JSON fields entirely. This keeps the
|
||||
// catalog clean rather than echoing empty `description:`/`instruction:`.
|
||||
let cluster = converged_cluster_with_query(
|
||||
"bare.gq",
|
||||
"query bare() { match { $p: Person } return { $p.name } }",
|
||||
" bare:\n file: ./bare.gq\n",
|
||||
);
|
||||
|
||||
// Human output: the query is listed, but no annotation lines.
|
||||
let output = output_success(
|
||||
cli().arg("queries").arg("list").arg("--cluster").arg(cluster.path()),
|
||||
);
|
||||
let stdout = stdout_string(&output);
|
||||
assert!(stdout.contains("bare()"), "stdout:\n{stdout}");
|
||||
assert!(
|
||||
!stdout.contains("description:") && !stdout.contains("instruction:"),
|
||||
"a query without annotations prints no annotation lines; stdout:\n{stdout}"
|
||||
);
|
||||
|
||||
// --json output: both fields omitted (not present as null).
|
||||
let output = output_success(
|
||||
cli()
|
||||
.arg("queries")
|
||||
.arg("list")
|
||||
.arg("--cluster")
|
||||
.arg(cluster.path())
|
||||
.arg("--json"),
|
||||
);
|
||||
let body: serde_json::Value = serde_json::from_slice(&output.stdout).unwrap();
|
||||
let entry = body["queries"]
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.find(|q| q["name"] == "bare")
|
||||
.unwrap();
|
||||
assert!(
|
||||
entry.get("description").is_none() && entry.get("instruction").is_none(),
|
||||
"a query without annotations omits both JSON fields: {entry}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn queries_validate_requires_a_cluster() {
|
||||
// RFC-011: with no --cluster (and no cluster profile), the command errors
|
||||
|
|
|
|||
|
|
@ -10,8 +10,8 @@ documentation = "https://docs.rs/omnigraph-cluster"
|
|||
|
||||
[features]
|
||||
# Fault-injection hooks for the apply protocol (crash-mid-apply, CAS-race
|
||||
# tests). Deliberately does NOT enable omnigraph/failpoints.
|
||||
failpoints = ["dep:fail", "fail/failpoints"]
|
||||
# tests), including cluster/engine boundary failures.
|
||||
failpoints = ["dep:fail", "fail/failpoints", "omnigraph/failpoints"]
|
||||
|
||||
[dependencies]
|
||||
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.7.0" }
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ pub(crate) fn diff_resources(
|
|||
disposition: None,
|
||||
reason: None,
|
||||
binding_change: false,
|
||||
metadata_change: None,
|
||||
migration: None,
|
||||
}),
|
||||
Some(before) if before != after => changes.push(PlanChange {
|
||||
|
|
@ -28,6 +29,7 @@ pub(crate) fn diff_resources(
|
|||
disposition: None,
|
||||
reason: None,
|
||||
binding_change: false,
|
||||
metadata_change: None,
|
||||
migration: None,
|
||||
}),
|
||||
Some(_) => {}
|
||||
|
|
@ -43,6 +45,7 @@ pub(crate) fn diff_resources(
|
|||
disposition: None,
|
||||
reason: None,
|
||||
binding_change: false,
|
||||
metadata_change: None,
|
||||
migration: None,
|
||||
});
|
||||
}
|
||||
|
|
@ -82,6 +85,47 @@ pub(crate) fn append_policy_binding_changes(
|
|||
disposition: None,
|
||||
reason: None,
|
||||
binding_change: true,
|
||||
metadata_change: Some(PlanMetadataChange::PolicyBindings),
|
||||
migration: None,
|
||||
});
|
||||
}
|
||||
changes.sort_by(|a, b| a.resource.cmp(&b.resource));
|
||||
}
|
||||
|
||||
/// Metadata-only embedding provider changes: the provider digest is unchanged
|
||||
/// but the applied state predates storing the profile body needed by
|
||||
/// config-free serving. This mirrors policy binding backfill instead of
|
||||
/// hiding a serving-time failure behind a no-op plan.
|
||||
pub(crate) fn append_embedding_profile_changes(
|
||||
changes: &mut Vec<PlanChange>,
|
||||
prior_state: Option<&ClusterState>,
|
||||
desired: &DesiredCluster,
|
||||
) {
|
||||
let Some(state) = prior_state else {
|
||||
return; // no state: provider Creates carry profiles already
|
||||
};
|
||||
for (address, desired_profile) in &desired.embedding_providers {
|
||||
if changes
|
||||
.iter()
|
||||
.any(|change| change.resource.as_str() == address.as_str())
|
||||
{
|
||||
continue; // content change already covers it
|
||||
}
|
||||
let Some(entry) = state.applied_revision.resources.get(address) else {
|
||||
continue; // not applied yet: the Create covers it
|
||||
};
|
||||
if entry.embedding_profile.as_ref() == Some(desired_profile) {
|
||||
continue;
|
||||
}
|
||||
changes.push(PlanChange {
|
||||
resource: address.clone(),
|
||||
operation: PlanOperation::Update,
|
||||
before_digest: Some(entry.digest.clone()),
|
||||
after_digest: Some(entry.digest.clone()),
|
||||
disposition: None,
|
||||
reason: None,
|
||||
binding_change: false,
|
||||
metadata_change: Some(PlanMetadataChange::EmbeddingProfile),
|
||||
migration: None,
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,9 +33,9 @@ use config::{
|
|||
validate_id, validate_query_source,
|
||||
};
|
||||
use diff::{
|
||||
FailedGraphOrigin, ResourceKind, append_policy_binding_changes, approved_resources,
|
||||
classify_changes, compute_approvals, compute_blast_radius, demote_dependents_of_failed_graphs,
|
||||
diff_resources, resource_kind,
|
||||
FailedGraphOrigin, ResourceKind, append_embedding_profile_changes,
|
||||
append_policy_binding_changes, approved_resources, classify_changes, compute_approvals,
|
||||
compute_blast_radius, demote_dependents_of_failed_graphs, diff_resources, resource_kind,
|
||||
};
|
||||
pub use serve::{
|
||||
ServingGraph, ServingPolicy, ServingQuery, ServingSnapshot, cluster_graph_ids,
|
||||
|
|
@ -160,7 +160,7 @@ pub async fn plan_config_dir(config_dir: impl AsRef<Path>) -> PlanOutput {
|
|||
|
||||
// Plan is read-only: pending sidecars are reported, never acted on
|
||||
// (RFC-004 open question 3 keeps read-only commands warn-only).
|
||||
warn_pending_recovery_sidecars(&desired.config_dir, &mut diagnostics);
|
||||
warn_pending_recovery_sidecars(&backend, &mut diagnostics).await;
|
||||
|
||||
let mut prior_resources = BTreeMap::new();
|
||||
let mut prior_state: Option<ClusterState> = None;
|
||||
|
|
@ -183,6 +183,7 @@ pub async fn plan_config_dir(config_dir: impl AsRef<Path>) -> PlanOutput {
|
|||
};
|
||||
if !has_errors(&diagnostics) {
|
||||
append_policy_binding_changes(&mut changes, prior_state.as_ref(), &desired);
|
||||
append_embedding_profile_changes(&mut changes, prior_state.as_ref(), &desired);
|
||||
}
|
||||
// Plan previews dispositions without sweeping; a pending recovery is
|
||||
// surfaced as the cluster_recovery_pending warning above instead.
|
||||
|
|
@ -404,6 +405,7 @@ pub async fn apply_config_dir_with_options(
|
|||
let prior_resources = state_resource_digests(&state);
|
||||
let mut changes = diff_resources(&prior_resources, &desired.resource_digests);
|
||||
append_policy_binding_changes(&mut changes, Some(&state), &desired);
|
||||
append_embedding_profile_changes(&mut changes, Some(&state), &desired);
|
||||
let approval_artifacts = backend.list_approval_artifacts(&mut diagnostics).await;
|
||||
let approved = approved_resources(
|
||||
&approval_artifacts,
|
||||
|
|
@ -639,42 +641,9 @@ pub async fn apply_config_dir_with_options(
|
|||
continue;
|
||||
}
|
||||
};
|
||||
let observed_manifest_version = match db.snapshot_of(ReadTarget::branch("main")).await {
|
||||
Ok(snapshot) => Some(snapshot.version()),
|
||||
Err(_) => None,
|
||||
};
|
||||
let mut sidecar = RecoverySidecar {
|
||||
schema_version: 1,
|
||||
operation_id: Ulid::new().to_string(),
|
||||
started_at: now_rfc3339(),
|
||||
actor: options.actor.clone(),
|
||||
kind: RecoverySidecarKind::SchemaApply,
|
||||
graph_id: graph_id.clone(),
|
||||
graph_uri: graph_uri.clone(),
|
||||
observed_manifest_version,
|
||||
expected_manifest_version: None,
|
||||
desired_schema_digest: desired_graph.schema_digest.clone(),
|
||||
state_cas_base: expected_cas.clone(),
|
||||
approval_id: None,
|
||||
};
|
||||
let sidecar_path = match backend.write_recovery_sidecar(&sidecar).await {
|
||||
Ok(path) => path,
|
||||
Err(diagnostic) => {
|
||||
diagnostics.push(diagnostic);
|
||||
failed_graphs.insert(graph_id.clone(), FailedGraphOrigin::SchemaApply);
|
||||
graph_moving_aborted = true;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
if let Err(diagnostic) = failpoints::maybe_fail("cluster_apply.before_schema_apply") {
|
||||
// Simulated crash before the engine call: the sidecar stays; the
|
||||
// sweep retires it next run (ledger still consistent with live).
|
||||
diagnostics.push(diagnostic);
|
||||
failed_graphs.insert(graph_id.clone(), FailedGraphOrigin::SchemaApply);
|
||||
graph_moving_aborted = true;
|
||||
continue;
|
||||
}
|
||||
// Re-read + digest-verify the desired schema source under the lock.
|
||||
// Re-read + digest-verify the desired schema source before the
|
||||
// cluster sidecar exists. Parser/planner rejections cannot have
|
||||
// moved graph state, so they must not leave recovery work behind.
|
||||
let schema_source = source_paths
|
||||
.get(schema_address(graph_id).as_str())
|
||||
.ok_or_else(|| {
|
||||
|
|
@ -708,12 +677,64 @@ pub async fn apply_config_dir_with_options(
|
|||
Ok(source) => source,
|
||||
Err(diagnostic) => {
|
||||
diagnostics.push(diagnostic);
|
||||
backend.delete_object(&sidecar_path).await; // nothing moved
|
||||
failed_graphs.insert(graph_id.clone(), FailedGraphOrigin::SchemaApply);
|
||||
graph_moving_aborted = true;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
if let Err(err) = db
|
||||
.preview_schema_apply_with_options(&schema_source, SchemaApplyOptions::default())
|
||||
.await
|
||||
{
|
||||
diagnostics.push(Diagnostic::error(
|
||||
"schema_apply_failed",
|
||||
schema_address(graph_id),
|
||||
format!("schema apply is not supported on '{graph_uri}': {err}"),
|
||||
));
|
||||
failed_graphs.insert(graph_id.clone(), FailedGraphOrigin::SchemaApply);
|
||||
graph_moving_aborted = true;
|
||||
continue;
|
||||
}
|
||||
let observed_manifest_version = match db.snapshot_of(ReadTarget::branch("main")).await {
|
||||
Ok(snapshot) => Some(snapshot.version()),
|
||||
Err(_) => None,
|
||||
};
|
||||
let recorded_schema_digest = state
|
||||
.applied_revision
|
||||
.resources
|
||||
.get(&schema_address(graph_id))
|
||||
.map(|entry| entry.digest.clone());
|
||||
let mut sidecar = RecoverySidecar {
|
||||
schema_version: 1,
|
||||
operation_id: Ulid::new().to_string(),
|
||||
started_at: now_rfc3339(),
|
||||
actor: options.actor.clone(),
|
||||
kind: RecoverySidecarKind::SchemaApply,
|
||||
graph_id: graph_id.clone(),
|
||||
graph_uri: graph_uri.clone(),
|
||||
observed_manifest_version,
|
||||
expected_manifest_version: None,
|
||||
desired_schema_digest: desired_graph.schema_digest.clone(),
|
||||
state_cas_base: expected_cas.clone(),
|
||||
approval_id: None,
|
||||
};
|
||||
let sidecar_path = match backend.write_recovery_sidecar(&sidecar).await {
|
||||
Ok(path) => path,
|
||||
Err(diagnostic) => {
|
||||
diagnostics.push(diagnostic);
|
||||
failed_graphs.insert(graph_id.clone(), FailedGraphOrigin::SchemaApply);
|
||||
graph_moving_aborted = true;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
if let Err(diagnostic) = failpoints::maybe_fail("cluster_apply.before_schema_apply") {
|
||||
// Simulated crash before the engine call: the sidecar stays; the
|
||||
// sweep retires it next run (ledger still consistent with live).
|
||||
diagnostics.push(diagnostic);
|
||||
failed_graphs.insert(graph_id.clone(), FailedGraphOrigin::SchemaApply);
|
||||
graph_moving_aborted = true;
|
||||
continue;
|
||||
}
|
||||
// Soft drops only: allow_data_loss stays false until the approval
|
||||
// artifacts of stage 4C exist (RFC-004 §D4).
|
||||
match db
|
||||
|
|
@ -736,8 +757,29 @@ pub async fn apply_config_dir_with_options(
|
|||
schema_address(graph_id),
|
||||
format!("schema apply failed on '{graph_uri}': {err}"),
|
||||
));
|
||||
// Sidecar stays; the sweep retires it (live digest unchanged
|
||||
// == ledger consistent) or flags real movement.
|
||||
if live_schema_matches_recorded_digest(
|
||||
&graph_uri,
|
||||
recorded_schema_digest.as_deref(),
|
||||
observed_manifest_version,
|
||||
)
|
||||
.await
|
||||
{
|
||||
// Pre-movement rejection: nothing moved, so retire the
|
||||
// sidecar eagerly. A delete failure leaves it safe (the
|
||||
// graph is quarantined until the next sweep), but surface
|
||||
// it so an operator isn't left debugging a silent stick.
|
||||
if let Err(err) = backend.try_delete_object(&sidecar_path).await {
|
||||
diagnostics.push(Diagnostic::warning(
|
||||
"recovery_sidecar_cleanup_failed",
|
||||
sidecar_path.clone(),
|
||||
format!(
|
||||
"could not delete the stale recovery sidecar after a pre-movement \
|
||||
schema-apply rejection; graph `{graph_id}` stays quarantined until \
|
||||
a state-mutating cluster command sweeps it: {err}"
|
||||
),
|
||||
));
|
||||
}
|
||||
}
|
||||
failed_graphs.insert(graph_id.clone(), FailedGraphOrigin::SchemaApply);
|
||||
graph_moving_aborted = true;
|
||||
continue;
|
||||
|
|
@ -1022,6 +1064,7 @@ pub async fn apply_config_dir_with_options(
|
|||
&desired.resource_digests,
|
||||
);
|
||||
append_policy_binding_changes(&mut residual, Some(&new_state), &desired);
|
||||
append_embedding_profile_changes(&mut residual, Some(&new_state), &desired);
|
||||
let converged = residual.is_empty();
|
||||
if converged {
|
||||
new_state.applied_revision.config_digest = Some(desired.config_digest.clone());
|
||||
|
|
@ -1260,7 +1303,7 @@ pub async fn status_config_dir(config_dir: impl AsRef<Path>) -> StatusOutput {
|
|||
backend
|
||||
.observe_lock(&mut observations, &mut diagnostics)
|
||||
.await;
|
||||
warn_pending_recovery_sidecars(&parsed.config_dir, &mut diagnostics);
|
||||
warn_pending_recovery_sidecars(&backend, &mut diagnostics).await;
|
||||
|
||||
let mut resource_digests = BTreeMap::new();
|
||||
let mut resource_statuses = BTreeMap::new();
|
||||
|
|
@ -1939,6 +1982,29 @@ fn embedding_provider_digest(profile: &EmbeddingProviderConfig) -> String {
|
|||
sha256_hex(input.as_bytes())
|
||||
}
|
||||
|
||||
async fn live_schema_matches_recorded_digest(
|
||||
graph_uri: &str,
|
||||
recorded_schema_digest: Option<&str>,
|
||||
observed_manifest_version: Option<u64>,
|
||||
) -> bool {
|
||||
let Some(recorded_schema_digest) = recorded_schema_digest else {
|
||||
return false;
|
||||
};
|
||||
let Some(observed_manifest_version) = observed_manifest_version else {
|
||||
return false;
|
||||
};
|
||||
let Ok(db) = Omnigraph::open_read_only(graph_uri).await else {
|
||||
return false;
|
||||
};
|
||||
let Ok(snapshot) = db.snapshot_of(ReadTarget::branch("main")).await else {
|
||||
return false;
|
||||
};
|
||||
if snapshot.version() != observed_manifest_version {
|
||||
return false;
|
||||
}
|
||||
sha256_hex(db.schema_source().as_bytes()) == recorded_schema_digest
|
||||
}
|
||||
|
||||
fn desired_config_digest(
|
||||
raw: &RawClusterConfig,
|
||||
resource_digests: &BTreeMap<String, String>,
|
||||
|
|
|
|||
|
|
@ -37,11 +37,14 @@ pub struct ServingSnapshot {
|
|||
pub graphs: Vec<ServingGraph>,
|
||||
pub queries: Vec<ServingQuery>,
|
||||
pub policies: Vec<ServingPolicy>,
|
||||
pub diagnostics: Vec<Diagnostic>,
|
||||
}
|
||||
|
||||
/// Read the applied revision as a serving snapshot — the read-only loader for
|
||||
/// the Phase-5 server boot. All-or-nothing per RFC-005 §D4: every readiness
|
||||
/// failure is collected and the whole snapshot refused; no partial serving.
|
||||
/// the Phase-5 server boot. Cluster-global readiness failures are still
|
||||
/// all-or-nothing, but graph-attributed pending recovery sidecars quarantine
|
||||
/// only that graph so healthy graphs can continue serving. This loader never
|
||||
/// runs a recovery sweep.
|
||||
/// Takes no lock: the state file is replaced atomically, so this reads a
|
||||
/// consistent point-in-time ledger.
|
||||
pub async fn read_serving_snapshot(
|
||||
|
|
@ -190,19 +193,44 @@ async fn read_snapshot_with_store(
|
|||
backend: ClusterStore,
|
||||
) -> Result<ServingSnapshot, Vec<Diagnostic>> {
|
||||
let mut diagnostics: Vec<Diagnostic> = Vec::new();
|
||||
let mut startup_diagnostics: Vec<Diagnostic> = Vec::new();
|
||||
let mut quarantined_graphs: BTreeSet<String> = BTreeSet::new();
|
||||
|
||||
// A ledger a sweep is about to rewrite must not start serving.
|
||||
// Do not sweep at serve time. Valid graph-attributed sidecars quarantine
|
||||
// that graph; malformed/unattributable sidecars remain cluster-fatal
|
||||
// because serving cannot prove their blast radius.
|
||||
let sidecar_diag_start = diagnostics.len();
|
||||
let sidecars = backend.list_recovery_sidecars(&mut diagnostics).await;
|
||||
if !sidecars.is_empty() {
|
||||
diagnostics.push(Diagnostic::error(
|
||||
// Every diagnostic `list_recovery_sidecars` appends is a genuine
|
||||
// read/parse/version failure (emitted as a warning by `store::list_json_dir`)
|
||||
// whose blast radius serving cannot prove — promote each to a cluster-fatal
|
||||
// error. This depends on that listing only ever emitting failure diagnostics;
|
||||
// if it grows a benign/informational one, promote by code instead.
|
||||
for diagnostic in diagnostics.iter_mut().skip(sidecar_diag_start) {
|
||||
diagnostic.severity = DiagnosticSeverity::Error;
|
||||
}
|
||||
for (path, sidecar) in sidecars {
|
||||
if sidecar.graph_id.trim().is_empty() {
|
||||
diagnostics.push(Diagnostic::error(
|
||||
"cluster_recovery_unattributed",
|
||||
path,
|
||||
"recovery sidecar has no graph id; run a state-mutating cluster command to sweep it before serving",
|
||||
));
|
||||
continue;
|
||||
}
|
||||
quarantined_graphs.insert(sidecar.graph_id.clone());
|
||||
startup_diagnostics.push(Diagnostic::warning(
|
||||
"cluster_recovery_pending",
|
||||
CLUSTER_RECOVERIES_DIR,
|
||||
graph_address(&sidecar.graph_id),
|
||||
format!(
|
||||
"{} interrupted operation(s) await recovery; run any state-mutating cluster command (e.g. `cluster apply`) to sweep, then retry",
|
||||
sidecars.len()
|
||||
"graph `{}` is quarantined because interrupted operation `{}` awaits recovery; run any state-mutating cluster command (e.g. `cluster apply`) to sweep",
|
||||
sidecar.graph_id, sidecar.operation_id
|
||||
),
|
||||
));
|
||||
}
|
||||
if has_errors(&diagnostics) {
|
||||
return Err(diagnostics);
|
||||
}
|
||||
|
||||
let mut observations = backend.observations();
|
||||
let state = match backend.read_state(&mut observations).await {
|
||||
|
|
@ -223,14 +251,29 @@ async fn read_snapshot_with_store(
|
|||
}
|
||||
};
|
||||
let Some(state) = state else {
|
||||
diagnostics.extend(startup_diagnostics);
|
||||
return Err(diagnostics);
|
||||
};
|
||||
|
||||
let required_embedding_providers: BTreeSet<String> = state
|
||||
.applied_revision
|
||||
.resources
|
||||
.iter()
|
||||
.filter_map(|(address, entry)| match resource_kind(address) {
|
||||
ResourceKind::Graph(graph_id) if !quarantined_graphs.contains(&graph_id) => {
|
||||
entry.embedding_provider.clone()
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
let mut embedding_profiles: BTreeMap<String, EmbeddingProviderConfig> = BTreeMap::new();
|
||||
for (address, entry) in &state.applied_revision.resources {
|
||||
if !matches!(resource_kind(address), ResourceKind::EmbeddingProvider(_)) {
|
||||
continue;
|
||||
}
|
||||
if !required_embedding_providers.contains(address) {
|
||||
continue;
|
||||
}
|
||||
let Some(profile) = entry.embedding_profile.clone() else {
|
||||
diagnostics.push(Diagnostic::error(
|
||||
"embedding_provider_profile_missing",
|
||||
|
|
@ -256,9 +299,14 @@ async fn read_snapshot_with_store(
|
|||
let mut graphs = Vec::new();
|
||||
let mut queries = Vec::new();
|
||||
let mut policies = Vec::new();
|
||||
let mut saw_applied_graph = false;
|
||||
for (address, entry) in &state.applied_revision.resources {
|
||||
match resource_kind(address) {
|
||||
ResourceKind::Graph(graph_id) => {
|
||||
saw_applied_graph = true;
|
||||
if quarantined_graphs.contains(&graph_id) {
|
||||
continue;
|
||||
}
|
||||
let embedding = match entry.embedding_provider.as_deref() {
|
||||
Some(provider_address) => match resource_kind(provider_address) {
|
||||
ResourceKind::EmbeddingProvider(_) => {
|
||||
|
|
@ -300,6 +348,9 @@ async fn read_snapshot_with_store(
|
|||
let ResourceKind::Query { graph, name } = &kind else {
|
||||
unreachable!()
|
||||
};
|
||||
if quarantined_graphs.contains(graph) {
|
||||
continue;
|
||||
}
|
||||
match backend
|
||||
.read_verified_payload(&kind, &entry.digest, address)
|
||||
.await
|
||||
|
|
@ -324,6 +375,17 @@ async fn read_snapshot_with_store(
|
|||
));
|
||||
continue;
|
||||
};
|
||||
let applies_to: Vec<String> = applies_to
|
||||
.into_iter()
|
||||
.filter(|binding| {
|
||||
binding
|
||||
.strip_prefix("graph.")
|
||||
.is_none_or(|graph| !quarantined_graphs.contains(graph))
|
||||
})
|
||||
.collect();
|
||||
if applies_to.is_empty() {
|
||||
continue;
|
||||
}
|
||||
match backend
|
||||
.read_verified_payload(&kind, &entry.digest, address)
|
||||
.await
|
||||
|
|
@ -342,19 +404,29 @@ async fn read_snapshot_with_store(
|
|||
}
|
||||
|
||||
if graphs.is_empty() {
|
||||
diagnostics.push(Diagnostic::error(
|
||||
"cluster_empty",
|
||||
CLUSTER_STATE_FILE,
|
||||
"the applied revision records no graphs; apply a cluster with at least one graph before serving from it",
|
||||
));
|
||||
if saw_applied_graph && !quarantined_graphs.is_empty() {
|
||||
diagnostics.push(Diagnostic::error(
|
||||
"cluster_no_healthy_graphs",
|
||||
CLUSTER_RECOVERIES_DIR,
|
||||
"all applied graphs are quarantined by pending recovery sidecars; run any state-mutating cluster command (e.g. `cluster apply`) to sweep, then retry",
|
||||
));
|
||||
} else {
|
||||
diagnostics.push(Diagnostic::error(
|
||||
"cluster_empty",
|
||||
CLUSTER_STATE_FILE,
|
||||
"the applied revision records no graphs; apply a cluster with at least one graph before serving from it",
|
||||
));
|
||||
}
|
||||
}
|
||||
if has_errors(&diagnostics) {
|
||||
diagnostics.extend(startup_diagnostics);
|
||||
return Err(diagnostics);
|
||||
}
|
||||
Ok(ServingSnapshot {
|
||||
graphs,
|
||||
queries,
|
||||
policies,
|
||||
diagnostics: startup_diagnostics,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -250,7 +250,14 @@ impl ClusterStore {
|
|||
/// Best-effort object removal (sidecar retirement after a CAS lands,
|
||||
/// lock cleanup) — failures are recoverable by the next sweep.
|
||||
pub(crate) async fn delete_object(&self, uri: &str) {
|
||||
let _ = self.adapter.delete(uri).await;
|
||||
let _ = self.try_delete_object(uri).await;
|
||||
}
|
||||
|
||||
/// Like `delete_object` but surfaces the failure, so a caller that depends
|
||||
/// on the deletion (e.g. the pre-movement sidecar cleanup fast-path) can
|
||||
/// report it as a diagnostic instead of silently leaving stale state.
|
||||
pub(crate) async fn try_delete_object(&self, uri: &str) -> Result<(), String> {
|
||||
self.adapter.delete(uri).await.map_err(|err| err.to_string())
|
||||
}
|
||||
|
||||
/// Recursive prefix delete for graph roots (approved deletes). Idempotent;
|
||||
|
|
@ -321,6 +328,32 @@ impl ClusterStore {
|
|||
|
||||
// ---- recovery sidecars ----
|
||||
|
||||
pub(crate) async fn list_recovery_sidecar_locations(
|
||||
&self,
|
||||
diagnostics: &mut Vec<Diagnostic>,
|
||||
) -> Vec<String> {
|
||||
let dir_uri = self.uri(CLUSTER_RECOVERIES_DIR);
|
||||
let mut uris = match self.adapter.list_dir(&dir_uri).await {
|
||||
Ok(uris) => uris,
|
||||
Err(err) => {
|
||||
diagnostics.push(Diagnostic::warning(
|
||||
"recovery_sidecar_read_error",
|
||||
CLUSTER_RECOVERIES_DIR,
|
||||
format!("could not list '{CLUSTER_RECOVERIES_DIR}': {err}"),
|
||||
));
|
||||
return Vec::new();
|
||||
}
|
||||
};
|
||||
uris.retain(|uri| uri.ends_with(".json"));
|
||||
uris.sort();
|
||||
uris.into_iter()
|
||||
.map(|uri| {
|
||||
let name = uri.rsplit_once('/').map_or(uri.as_str(), |(_, name)| name);
|
||||
format!("{}/{name}", self.display(CLUSTER_RECOVERIES_DIR))
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub(crate) async fn list_recovery_sidecars(
|
||||
&self,
|
||||
diagnostics: &mut Vec<Diagnostic>,
|
||||
|
|
|
|||
|
|
@ -427,21 +427,14 @@ pub(crate) async fn mark_approvals_consumed(backend: &ClusterStore, approval_ids
|
|||
}
|
||||
|
||||
/// Read-only commands report pending sidecars without acting on them.
|
||||
pub(crate) fn warn_pending_recovery_sidecars(config_dir: &Path, diagnostics: &mut Vec<Diagnostic>) {
|
||||
let recoveries_dir = config_dir.join(CLUSTER_RECOVERIES_DIR);
|
||||
let Ok(entries) = fs::read_dir(&recoveries_dir) else {
|
||||
return;
|
||||
};
|
||||
let mut names: Vec<String> = entries
|
||||
.flatten()
|
||||
.filter(|entry| entry.path().extension().is_some_and(|ext| ext == "json"))
|
||||
.map(|entry| entry.file_name().to_string_lossy().into_owned())
|
||||
.collect();
|
||||
names.sort();
|
||||
for name in names {
|
||||
pub(crate) async fn warn_pending_recovery_sidecars(
|
||||
backend: &ClusterStore,
|
||||
diagnostics: &mut Vec<Diagnostic>,
|
||||
) {
|
||||
for location in backend.list_recovery_sidecar_locations(diagnostics).await {
|
||||
diagnostics.push(Diagnostic::warning(
|
||||
"cluster_recovery_pending",
|
||||
format!("{CLUSTER_RECOVERIES_DIR}/{name}"),
|
||||
location,
|
||||
"a recovery sidecar from an interrupted apply is pending; the next state-mutating command will classify it",
|
||||
));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1174,6 +1174,19 @@ graphs:
|
|||
.unwrap()
|
||||
}
|
||||
|
||||
fn recovery_sidecars(config_dir: &Path) -> Vec<std::path::PathBuf> {
|
||||
let dir = config_dir.join(CLUSTER_RECOVERIES_DIR);
|
||||
if !dir.exists() {
|
||||
return Vec::new();
|
||||
}
|
||||
let mut sidecars: Vec<_> = fs::read_dir(dir)
|
||||
.unwrap()
|
||||
.map(|entry| entry.unwrap().path())
|
||||
.collect();
|
||||
sidecars.sort();
|
||||
sidecars
|
||||
}
|
||||
|
||||
fn query_payload_path(config_dir: &Path, digest: &str) -> std::path::PathBuf {
|
||||
config_dir
|
||||
.join(CLUSTER_RESOURCES_DIR)
|
||||
|
|
@ -1586,8 +1599,17 @@ graphs:
|
|||
state["applied_revision"]["resources"]["schema.knowledge"]["digest"],
|
||||
desired.resource_digests["schema.knowledge"]
|
||||
);
|
||||
// Second run: the sweep retires the stale sidecar (ledger consistent)
|
||||
// and the run fails just as loudly — idempotent loudness.
|
||||
let db = Omnigraph::open_read_only(&derived_graph_uri(dir.path(), "knowledge"))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(db.schema_source().as_str(), SCHEMA);
|
||||
assert!(
|
||||
recovery_sidecars(dir.path()).is_empty(),
|
||||
"{:?}",
|
||||
recovery_sidecars(dir.path())
|
||||
);
|
||||
// Second run fails just as loudly and still leaves no sidecar because
|
||||
// the engine preview rejects before graph state can move.
|
||||
let second = apply_config_dir(dir.path()).await;
|
||||
assert!(!second.ok);
|
||||
assert!(
|
||||
|
|
@ -1596,6 +1618,45 @@ graphs:
|
|||
.iter()
|
||||
.any(|diagnostic| diagnostic.code == "schema_apply_failed")
|
||||
);
|
||||
assert!(
|
||||
recovery_sidecars(dir.path()).is_empty(),
|
||||
"{:?}",
|
||||
recovery_sidecars(dir.path())
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn apply_schema_update_blocked_by_non_main_branch_leaves_no_sidecar() {
|
||||
let dir = fixture();
|
||||
init_derived_graph(dir.path()).await;
|
||||
write_applyable_state(dir.path());
|
||||
let graph_uri = derived_graph_uri(dir.path(), "knowledge");
|
||||
let db = Omnigraph::open(&graph_uri).await.unwrap();
|
||||
db.branch_create("feature").await.unwrap();
|
||||
drop(db);
|
||||
let before_state = read_state_json(dir.path());
|
||||
fs::write(dir.path().join("people.pg"), SCHEMA_V2).unwrap();
|
||||
|
||||
let out = apply_config_dir(dir.path()).await;
|
||||
assert!(!out.ok);
|
||||
assert!(out.diagnostics.iter().any(|diagnostic| {
|
||||
diagnostic.code == "schema_apply_failed"
|
||||
&& diagnostic
|
||||
.message
|
||||
.contains("schema apply requires a graph with only main")
|
||||
}));
|
||||
assert!(
|
||||
recovery_sidecars(dir.path()).is_empty(),
|
||||
"{:?}",
|
||||
recovery_sidecars(dir.path())
|
||||
);
|
||||
let after_state = read_state_json(dir.path());
|
||||
assert_eq!(
|
||||
after_state["applied_revision"]["resources"],
|
||||
before_state["applied_revision"]["resources"]
|
||||
);
|
||||
let reopened = Omnigraph::open_read_only(&graph_uri).await.unwrap();
|
||||
assert_eq!(reopened.schema_source().as_str(), SCHEMA);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
@ -2964,6 +3025,10 @@ policies:
|
|||
.find(|change| change.resource == "policy.base")
|
||||
.expect("binding change must be visible in plan");
|
||||
assert!(change.binding_change);
|
||||
assert_eq!(
|
||||
change.metadata_change,
|
||||
Some(PlanMetadataChange::PolicyBindings)
|
||||
);
|
||||
assert_eq!(change.operation, PlanOperation::Update);
|
||||
assert_eq!(change.before_digest, change.after_digest);
|
||||
|
||||
|
|
@ -3002,9 +3067,9 @@ policies:
|
|||
|
||||
let plan = plan_config_dir(dir.path()).await;
|
||||
assert!(
|
||||
plan.changes
|
||||
.iter()
|
||||
.any(|change| change.resource == "policy.base" && change.binding_change),
|
||||
plan.changes.iter().any(|change| change.resource == "policy.base"
|
||||
&& change.binding_change
|
||||
&& change.metadata_change == Some(PlanMetadataChange::PolicyBindings)),
|
||||
"{plan:?}"
|
||||
);
|
||||
let out = apply_config_dir(dir.path()).await;
|
||||
|
|
@ -3016,6 +3081,52 @@ policies:
|
|||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn pre_5a_state_backfills_embedding_profile() {
|
||||
let dir = fixture();
|
||||
init_derived_graph(dir.path()).await;
|
||||
write_mock_embedding_cluster(dir.path(), "recorded-x");
|
||||
write_applyable_state(dir.path());
|
||||
let converge = apply_config_dir(dir.path()).await;
|
||||
assert!(converge.converged, "{converge:?}");
|
||||
|
||||
let mut state = read_state_json(dir.path());
|
||||
state["applied_revision"]["resources"]["provider.embedding.default"]
|
||||
.as_object_mut()
|
||||
.unwrap()
|
||||
.remove("embedding_profile");
|
||||
fs::write(
|
||||
dir.path().join(CLUSTER_STATE_FILE),
|
||||
serde_json::to_string_pretty(&state).unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let plan = plan_config_dir(dir.path()).await;
|
||||
let change = plan
|
||||
.changes
|
||||
.iter()
|
||||
.find(|change| change.resource == "provider.embedding.default")
|
||||
.expect("embedding profile backfill must be visible in plan");
|
||||
assert_eq!(change.operation, PlanOperation::Update);
|
||||
assert_eq!(change.before_digest, change.after_digest);
|
||||
assert_eq!(
|
||||
change.metadata_change,
|
||||
Some(PlanMetadataChange::EmbeddingProfile)
|
||||
);
|
||||
|
||||
let out = apply_config_dir(dir.path()).await;
|
||||
assert!(out.ok && out.converged, "{out:?}");
|
||||
let healed = read_state_json(dir.path());
|
||||
assert_eq!(
|
||||
healed["applied_revision"]["resources"]["provider.embedding.default"]
|
||||
["embedding_profile"]["model"],
|
||||
serde_json::json!("recorded-x")
|
||||
);
|
||||
let snapshot = read_serving_snapshot(dir.path()).await.unwrap();
|
||||
let profile = snapshot.graphs[0].embedding.as_ref().unwrap();
|
||||
assert_eq!(profile.model.as_deref(), Some("recorded-x"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn bindings_survive_refresh() {
|
||||
let dir = fixture();
|
||||
|
|
@ -3189,9 +3300,92 @@ policies:
|
|||
|
||||
let err = read_serving_snapshot(dir.path()).await.unwrap_err();
|
||||
assert!(
|
||||
err.iter().any(|diagnostic| diagnostic.code == "cluster_recovery_pending"),
|
||||
err.iter()
|
||||
.any(|diagnostic| diagnostic.code == "cluster_no_healthy_graphs"),
|
||||
"{err:?}"
|
||||
);
|
||||
assert!(
|
||||
err.iter().any(|diagnostic| {
|
||||
diagnostic.code == "cluster_recovery_pending"
|
||||
&& diagnostic.path == "graph.knowledge"
|
||||
}),
|
||||
"{err:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn serving_snapshot_quarantines_one_graph_with_pending_recovery() {
|
||||
let dir = fixture();
|
||||
fs::write(
|
||||
dir.path().join(CLUSTER_CONFIG_FILE),
|
||||
r#"
|
||||
version: 1
|
||||
metadata:
|
||||
name: test
|
||||
state:
|
||||
backend: cluster
|
||||
lock: true
|
||||
graphs:
|
||||
knowledge:
|
||||
schema: ./people.pg
|
||||
archive:
|
||||
schema: ./people.pg
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
let graph_dir = dir.path().join(CLUSTER_GRAPHS_DIR);
|
||||
fs::create_dir_all(&graph_dir).unwrap();
|
||||
Omnigraph::init(
|
||||
graph_dir.join("knowledge.omni").to_string_lossy().as_ref(),
|
||||
SCHEMA,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
Omnigraph::init(
|
||||
graph_dir.join("archive.omni").to_string_lossy().as_ref(),
|
||||
SCHEMA,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let desired = validate_config_dir(dir.path());
|
||||
assert!(desired.ok, "{:?}", desired.diagnostics);
|
||||
let schema_digest = desired.resource_digests["schema.knowledge"].clone();
|
||||
let empty_queries = BTreeMap::new();
|
||||
let knowledge_digest = graph_digest(
|
||||
"knowledge",
|
||||
Some(&schema_digest),
|
||||
Some(&empty_queries),
|
||||
None,
|
||||
None,
|
||||
);
|
||||
let archive_digest = graph_digest(
|
||||
"archive",
|
||||
Some(&schema_digest),
|
||||
Some(&empty_queries),
|
||||
None,
|
||||
None,
|
||||
);
|
||||
write_state_resources(
|
||||
dir.path(),
|
||||
&[
|
||||
("graph.knowledge", knowledge_digest.as_str()),
|
||||
("schema.knowledge", schema_digest.as_str()),
|
||||
("graph.archive", archive_digest.as_str()),
|
||||
("schema.archive", schema_digest.as_str()),
|
||||
],
|
||||
);
|
||||
write_schema_apply_sidecar(dir.path(), "knowledge", "whatever", "01SERVE2");
|
||||
|
||||
let snapshot = read_serving_snapshot(dir.path()).await.unwrap();
|
||||
assert_eq!(snapshot.graphs.len(), 1);
|
||||
assert_eq!(snapshot.graphs[0].graph_id, "archive");
|
||||
assert!(snapshot.queries.is_empty());
|
||||
assert!(snapshot.policies.is_empty());
|
||||
assert!(snapshot.diagnostics.iter().any(|diagnostic| {
|
||||
diagnostic.code == "cluster_recovery_pending"
|
||||
&& diagnostic.path == "graph.knowledge"
|
||||
&& diagnostic.severity == DiagnosticSeverity::Warning
|
||||
}));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
@ -3375,6 +3569,96 @@ policies:
|
|||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn read_only_commands_ignore_missing_recovery_sidecar_dir() {
|
||||
let dir = fixture();
|
||||
write_applyable_state(dir.path());
|
||||
assert!(!dir.path().join(CLUSTER_RECOVERIES_DIR).exists());
|
||||
|
||||
let status = status_config_dir(dir.path()).await;
|
||||
assert!(status.ok, "{:?}", status.diagnostics);
|
||||
assert!(
|
||||
!status.diagnostics.iter().any(|diagnostic| matches!(
|
||||
diagnostic.code.as_str(),
|
||||
"recovery_sidecar_read_error" | "cluster_recovery_pending"
|
||||
)),
|
||||
"{:?}",
|
||||
status.diagnostics
|
||||
);
|
||||
|
||||
let plan = plan_config_dir(dir.path()).await;
|
||||
assert!(plan.ok, "{:?}", plan.diagnostics);
|
||||
assert!(
|
||||
!plan.diagnostics.iter().any(|diagnostic| matches!(
|
||||
diagnostic.code.as_str(),
|
||||
"recovery_sidecar_read_error" | "cluster_recovery_pending"
|
||||
)),
|
||||
"{:?}",
|
||||
plan.diagnostics
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn read_only_commands_warn_on_pending_recovery_sidecar_in_storage_root() {
|
||||
let dir = fixture();
|
||||
let storage = tempfile::tempdir().unwrap();
|
||||
let storage_path = storage.path().to_string_lossy().to_string();
|
||||
let mut config = fs::read_to_string(dir.path().join(CLUSTER_CONFIG_FILE)).unwrap();
|
||||
config = config.replace(
|
||||
"version: 1\n",
|
||||
&format!("version: 1\nstorage: {storage_path}\n"),
|
||||
);
|
||||
fs::write(dir.path().join(CLUSTER_CONFIG_FILE), config).unwrap();
|
||||
|
||||
let desired = validate_config_dir(dir.path());
|
||||
assert!(desired.ok, "{:?}", desired.diagnostics);
|
||||
let schema_digest = desired
|
||||
.resource_digests
|
||||
.get("schema.knowledge")
|
||||
.unwrap()
|
||||
.clone();
|
||||
let graph_composite = graph_digest(
|
||||
"knowledge",
|
||||
Some(&schema_digest),
|
||||
Some(&BTreeMap::new()),
|
||||
None,
|
||||
None,
|
||||
);
|
||||
write_state_resources(
|
||||
storage.path(),
|
||||
&[
|
||||
("graph.knowledge", graph_composite.as_str()),
|
||||
("schema.knowledge", schema_digest.as_str()),
|
||||
],
|
||||
);
|
||||
write_create_sidecar(storage.path(), "knowledge", "irrelevant", "01STORAGE");
|
||||
|
||||
let status = status_config_dir(dir.path()).await;
|
||||
assert!(status.ok, "{:?}", status.diagnostics);
|
||||
assert!(
|
||||
status
|
||||
.diagnostics
|
||||
.iter()
|
||||
.any(|diagnostic| diagnostic.code == "cluster_recovery_pending"
|
||||
&& diagnostic.path.contains("01STORAGE.json")),
|
||||
"{:?}",
|
||||
status.diagnostics
|
||||
);
|
||||
|
||||
let plan = plan_config_dir(dir.path()).await;
|
||||
assert!(plan.ok, "{:?}", plan.diagnostics);
|
||||
assert!(
|
||||
plan.diagnostics
|
||||
.iter()
|
||||
.any(|diagnostic| diagnostic.code == "cluster_recovery_pending"
|
||||
&& diagnostic.path.contains("01STORAGE.json")),
|
||||
"{:?}",
|
||||
plan.diagnostics
|
||||
);
|
||||
|
||||
assert!(!dir.path().join(CLUSTER_RECOVERIES_DIR).exists());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn plan_annotates_apply_dispositions() {
|
||||
let dir = fixture();
|
||||
|
|
|
|||
|
|
@ -176,6 +176,10 @@ pub struct PlanChange {
|
|||
/// pre-5A backfill case).
|
||||
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
|
||||
pub binding_change: bool,
|
||||
/// Metadata-only updates whose resource content digest is unchanged but
|
||||
/// whose applied ledger metadata needs to converge.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub metadata_change: Option<PlanMetadataChange>,
|
||||
/// For schema updates: the engine's migration plan against the live
|
||||
/// graph (RFC-004 §D7's data-aware preview). Absent when the preview is
|
||||
/// unavailable (warning `schema_preview_unavailable`).
|
||||
|
|
@ -183,6 +187,13 @@ pub struct PlanChange {
|
|||
pub migration: Option<SchemaMigrationPlan>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, PartialEq, Eq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum PlanMetadataChange {
|
||||
PolicyBindings,
|
||||
EmbeddingProfile,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
|
||||
pub struct BlastRadius {
|
||||
pub resource: String,
|
||||
|
|
|
|||
|
|
@ -13,8 +13,9 @@ use std::fs;
|
|||
use std::path::{Path, PathBuf};
|
||||
|
||||
use fail::FailScenario;
|
||||
use omnigraph_cluster::failpoints::ScopedFailPoint;
|
||||
use omnigraph::db::Omnigraph;
|
||||
use omnigraph::failpoints::ScopedFailPoint as EngineScopedFailPoint;
|
||||
use omnigraph_cluster::failpoints::ScopedFailPoint;
|
||||
use omnigraph_cluster::{
|
||||
ApplyOptions, apply_config_dir, apply_config_dir_with_options, approve_config_dir,
|
||||
validate_config_dir,
|
||||
|
|
@ -178,13 +179,12 @@ async fn apply_cas_race_surfaces_state_cas_mismatch() {
|
|||
// after apply read it but before apply writes. RAII-guarded so a panic
|
||||
// inside apply cannot leak the callback into the global registry.
|
||||
let race_path = state_path(dir.path());
|
||||
let failpoint =
|
||||
ScopedFailPoint::with_callback("cluster_apply.before_state_write", move || {
|
||||
let mut state: serde_json::Value =
|
||||
serde_json::from_str(&fs::read_to_string(&race_path).unwrap()).unwrap();
|
||||
state["state_revision"] = serde_json::json!(99);
|
||||
fs::write(&race_path, serde_json::to_string_pretty(&state).unwrap()).unwrap();
|
||||
});
|
||||
let failpoint = ScopedFailPoint::with_callback("cluster_apply.before_state_write", move || {
|
||||
let mut state: serde_json::Value =
|
||||
serde_json::from_str(&fs::read_to_string(&race_path).unwrap()).unwrap();
|
||||
state["state_revision"] = serde_json::json!(99);
|
||||
fs::write(&race_path, serde_json::to_string_pretty(&state).unwrap()).unwrap();
|
||||
});
|
||||
|
||||
let out = apply_config_dir(dir.path()).await;
|
||||
drop(failpoint);
|
||||
|
|
@ -336,10 +336,9 @@ async fn create_crash_after_init_rolls_state_forward() {
|
|||
);
|
||||
assert!(recovered.converged);
|
||||
assert!(recovery_sidecars(dir.path()).is_empty());
|
||||
let state: serde_json::Value = serde_json::from_str(
|
||||
&fs::read_to_string(dir.path().join("__cluster/state.json")).unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
let state: serde_json::Value =
|
||||
serde_json::from_str(&fs::read_to_string(dir.path().join("__cluster/state.json")).unwrap())
|
||||
.unwrap();
|
||||
assert!(
|
||||
state["recovery_records"]
|
||||
.as_object()
|
||||
|
|
@ -422,6 +421,105 @@ async fn schema_crash_before_apply_recovers_via_sweep() {
|
|||
scenario.teardown();
|
||||
}
|
||||
|
||||
/// Engine apply fails after cluster preview and sidecar creation, but before
|
||||
/// the graph manifest moves. The defensive cleanup proof should remove the
|
||||
/// cluster sidecar immediately so a pre-movement error cannot brick boot.
|
||||
#[tokio::test]
|
||||
async fn schema_apply_error_before_graph_movement_removes_sidecar() {
|
||||
let scenario = FailScenario::setup();
|
||||
let dir = fixture();
|
||||
converge_with_live_graph(dir.path()).await;
|
||||
let pre_digest = live_schema_digest(dir.path()).await;
|
||||
fs::write(dir.path().join("people.pg"), SCHEMA_V2).unwrap();
|
||||
|
||||
{
|
||||
let _failpoint = EngineScopedFailPoint::new("schema_apply.before_staging_write", "return");
|
||||
let out = apply_config_dir(dir.path()).await;
|
||||
assert!(!out.ok);
|
||||
assert!(
|
||||
out.diagnostics
|
||||
.iter()
|
||||
.any(|diagnostic| diagnostic.code == "schema_apply_failed"),
|
||||
"{:?}",
|
||||
out.diagnostics
|
||||
);
|
||||
assert_eq!(live_schema_digest(dir.path()).await, pre_digest);
|
||||
assert!(
|
||||
recovery_sidecars(dir.path()).is_empty(),
|
||||
"{:?}",
|
||||
recovery_sidecars(dir.path())
|
||||
);
|
||||
}
|
||||
|
||||
let recovered = apply_config_dir(dir.path()).await;
|
||||
assert!(recovered.ok && recovered.converged, "{recovered:?}");
|
||||
assert!(recovery_sidecars(dir.path()).is_empty());
|
||||
assert_ne!(live_schema_digest(dir.path()).await, pre_digest);
|
||||
scenario.teardown();
|
||||
}
|
||||
|
||||
/// Engine apply fails after the graph manifest moved. The cluster cannot
|
||||
/// prove this is a pre-movement failure, so the sidecar must survive for
|
||||
/// explicit recovery/quarantine instead of being cleaned up defensively.
|
||||
#[tokio::test]
|
||||
async fn schema_apply_error_after_graph_movement_keeps_sidecar() {
|
||||
let scenario = FailScenario::setup();
|
||||
let dir = fixture();
|
||||
converge_with_live_graph(dir.path()).await;
|
||||
let pre_digest = live_schema_digest(dir.path()).await;
|
||||
fs::write(dir.path().join("people.pg"), SCHEMA_V2).unwrap();
|
||||
let desired = validate_config_dir(dir.path());
|
||||
let v2_digest = desired.resource_digests["schema.knowledge"].clone();
|
||||
|
||||
{
|
||||
let _failpoint = EngineScopedFailPoint::new("schema_apply.after_manifest_commit", "return");
|
||||
let out = apply_config_dir(dir.path()).await;
|
||||
assert!(!out.ok);
|
||||
assert!(
|
||||
out.diagnostics
|
||||
.iter()
|
||||
.any(|diagnostic| diagnostic.code == "schema_apply_failed"),
|
||||
"{:?}",
|
||||
out.diagnostics
|
||||
);
|
||||
// Read-only opens do not run engine schema-state recovery, so the
|
||||
// schema file still reads as the old digest even though the manifest
|
||||
// has moved. The cluster sidecar must remain because movement was
|
||||
// detected by the fallback manifest-version proof.
|
||||
assert_eq!(live_schema_digest(dir.path()).await, pre_digest);
|
||||
let sidecars = recovery_sidecars(dir.path());
|
||||
assert_eq!(sidecars.len(), 1, "{sidecars:?}");
|
||||
let sidecar: serde_json::Value =
|
||||
serde_json::from_str(&fs::read_to_string(&sidecars[0]).unwrap()).unwrap();
|
||||
assert_eq!(sidecar["kind"], "schema_apply");
|
||||
assert!(sidecar["expected_manifest_version"].is_null(), "{sidecar}");
|
||||
}
|
||||
|
||||
let uri = dir.path().join("graphs/knowledge.omni");
|
||||
let db = Omnigraph::open(uri.to_string_lossy().as_ref())
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
db.schema_source().as_str(),
|
||||
SCHEMA_V2,
|
||||
"read-write open should complete engine schema-state recovery"
|
||||
);
|
||||
drop(db);
|
||||
assert_eq!(live_schema_digest(dir.path()).await, v2_digest);
|
||||
|
||||
let recovered = apply_config_dir(dir.path()).await;
|
||||
assert!(recovered.ok, "{:?}", recovered.diagnostics);
|
||||
assert!(
|
||||
recovered
|
||||
.diagnostics
|
||||
.iter()
|
||||
.any(|diagnostic| diagnostic.code == "cluster_recovery_rolled_forward")
|
||||
);
|
||||
assert!(recovered.converged);
|
||||
assert!(recovery_sidecars(dir.path()).is_empty());
|
||||
scenario.teardown();
|
||||
}
|
||||
|
||||
/// Crash after the engine schema apply, before the state CAS: the manifest
|
||||
/// moved, the ledger is stale, nothing acknowledged; the next run's sweep
|
||||
/// rolls the ledger forward with an audit entry and the run converges.
|
||||
|
|
@ -447,7 +545,10 @@ async fn schema_crash_after_apply_rolls_state_forward() {
|
|||
assert_eq!(sidecars.len(), 1);
|
||||
let sidecar: serde_json::Value =
|
||||
serde_json::from_str(&fs::read_to_string(&sidecars[0]).unwrap()).unwrap();
|
||||
assert!(sidecar["expected_manifest_version"].is_number(), "{sidecar}");
|
||||
assert!(
|
||||
sidecar["expected_manifest_version"].is_number(),
|
||||
"{sidecar}"
|
||||
);
|
||||
}
|
||||
|
||||
let recovered = apply_config_dir(dir.path()).await;
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ use std::sync::Arc;
|
|||
|
||||
use arrow_schema::{DataType, Field, Schema, SchemaRef};
|
||||
|
||||
use crate::error::{NanoError, Result};
|
||||
use crate::error::{CompilerError, Result};
|
||||
use crate::schema::ast::{Cardinality, Constraint, ConstraintBound, SchemaDecl, SchemaFile};
|
||||
use crate::types::{PropType, ScalarType};
|
||||
|
||||
|
|
@ -151,7 +151,7 @@ pub fn build_catalog(schema: &SchemaFile) -> Result<Catalog> {
|
|||
for decl in &schema.declarations {
|
||||
if let SchemaDecl::Node(node) = decl {
|
||||
if node_types.contains_key(&node.name) {
|
||||
return Err(NanoError::Catalog(format!(
|
||||
return Err(CompilerError::Catalog(format!(
|
||||
"duplicate node type: {}",
|
||||
node.name
|
||||
)));
|
||||
|
|
@ -250,19 +250,19 @@ pub fn build_catalog(schema: &SchemaFile) -> Result<Catalog> {
|
|||
for decl in &schema.declarations {
|
||||
if let SchemaDecl::Edge(edge) = decl {
|
||||
if edge_types.contains_key(&edge.name) {
|
||||
return Err(NanoError::Catalog(format!(
|
||||
return Err(CompilerError::Catalog(format!(
|
||||
"duplicate edge type: {}",
|
||||
edge.name
|
||||
)));
|
||||
}
|
||||
if !node_types.contains_key(&edge.from_type) {
|
||||
return Err(NanoError::Catalog(format!(
|
||||
return Err(CompilerError::Catalog(format!(
|
||||
"edge {} references unknown source type: {}",
|
||||
edge.name, edge.from_type
|
||||
)));
|
||||
}
|
||||
if !node_types.contains_key(&edge.to_type) {
|
||||
return Err(NanoError::Catalog(format!(
|
||||
return Err(CompilerError::Catalog(format!(
|
||||
"edge {} references unknown target type: {}",
|
||||
edge.name, edge.to_type
|
||||
)));
|
||||
|
|
@ -302,7 +302,7 @@ pub fn build_catalog(schema: &SchemaFile) -> Result<Catalog> {
|
|||
if let Some(existing) = edge_name_index.get(&normalized_name)
|
||||
&& existing != &edge.name
|
||||
{
|
||||
return Err(NanoError::Catalog(format!(
|
||||
return Err(CompilerError::Catalog(format!(
|
||||
"edge name collision after case folding: '{}' conflicts with '{}'",
|
||||
edge.name, existing
|
||||
)));
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ use serde::{Deserialize, Serialize};
|
|||
use sha2::{Digest, Sha256};
|
||||
|
||||
use crate::catalog::{Catalog, build_catalog};
|
||||
use crate::error::{NanoError, Result};
|
||||
use crate::error::{CompilerError, Result};
|
||||
use crate::schema::ast::{Annotation, Cardinality, Constraint, PropDecl, SchemaDecl, SchemaFile};
|
||||
use crate::types::PropType;
|
||||
|
||||
|
|
@ -119,7 +119,7 @@ pub fn build_schema_ir(schema: &SchemaFile) -> Result<SchemaIR> {
|
|||
|
||||
pub fn build_catalog_from_ir(ir: &SchemaIR) -> Result<Catalog> {
|
||||
if ir.ir_version != SCHEMA_IR_VERSION {
|
||||
return Err(NanoError::Catalog(format!(
|
||||
return Err(CompilerError::Catalog(format!(
|
||||
"unsupported schema ir_version {} (expected {})",
|
||||
ir.ir_version, SCHEMA_IR_VERSION
|
||||
)));
|
||||
|
|
@ -167,12 +167,12 @@ pub fn build_catalog_from_ir(ir: &SchemaIR) -> Result<Catalog> {
|
|||
|
||||
pub fn schema_ir_json(ir: &SchemaIR) -> Result<String> {
|
||||
serde_json::to_string(ir)
|
||||
.map_err(|err| NanoError::Catalog(format!("serialize schema ir error: {}", err)))
|
||||
.map_err(|err| CompilerError::Catalog(format!("serialize schema ir error: {}", err)))
|
||||
}
|
||||
|
||||
pub fn schema_ir_pretty_json(ir: &SchemaIR) -> Result<String> {
|
||||
serde_json::to_string_pretty(ir)
|
||||
.map_err(|err| NanoError::Catalog(format!("serialize schema ir error: {}", err)))
|
||||
.map_err(|err| CompilerError::Catalog(format!("serialize schema ir error: {}", err)))
|
||||
}
|
||||
|
||||
pub fn schema_ir_hash(ir: &SchemaIR) -> Result<String> {
|
||||
|
|
@ -228,7 +228,7 @@ fn canonical_properties(
|
|||
.map(|property| {
|
||||
let prop_id = stable_prop_id(&owner_key, &property.name);
|
||||
if let Some(previous) = seen_prop_ids.insert(prop_id, property.name.clone()) {
|
||||
return Err(NanoError::Catalog(format!(
|
||||
return Err(CompilerError::Catalog(format!(
|
||||
"property id collision on {}: '{}' and '{}' both hash to {}",
|
||||
owner_name, previous, property.name, prop_id
|
||||
)));
|
||||
|
|
@ -308,7 +308,7 @@ fn check_type_id_collision(
|
|||
name: &str,
|
||||
) -> Result<()> {
|
||||
if let Some(previous) = seen_type_ids.insert(type_id, name.to_string()) {
|
||||
return Err(NanoError::Catalog(format!(
|
||||
return Err(CompilerError::Catalog(format!(
|
||||
"type id collision: '{}' and '{}' both hash to {}",
|
||||
previous, name, type_id
|
||||
)));
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ pub fn decode_string_literal(raw: &str) -> Result<String> {
|
|||
|
||||
let escaped = chars
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("unterminated escape sequence".to_string()))?;
|
||||
.ok_or_else(|| CompilerError::Parse("unterminated escape sequence".to_string()))?;
|
||||
match escaped {
|
||||
'"' => decoded.push('"'),
|
||||
'\\' => decoded.push('\\'),
|
||||
|
|
@ -63,7 +63,7 @@ pub fn decode_string_literal(raw: &str) -> Result<String> {
|
|||
'r' => decoded.push('\r'),
|
||||
't' => decoded.push('\t'),
|
||||
other => {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"unsupported escape sequence: \\{}",
|
||||
other
|
||||
)));
|
||||
|
|
@ -75,7 +75,7 @@ pub fn decode_string_literal(raw: &str) -> Result<String> {
|
|||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum NanoError {
|
||||
pub enum CompilerError {
|
||||
#[error("parse error: {0}")]
|
||||
Parse(String),
|
||||
|
||||
|
|
@ -118,11 +118,16 @@ pub enum NanoError {
|
|||
Manifest(String),
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, NanoError>;
|
||||
#[deprecated(note = "use CompilerError")]
|
||||
pub type NanoError = CompilerError;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, CompilerError>;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{SourceSpan, decode_string_literal, render_span};
|
||||
use std::path::Path;
|
||||
|
||||
use super::{CompilerError, SourceSpan, decode_string_literal, render_span};
|
||||
|
||||
#[test]
|
||||
fn source_span_preserves_zero_width() {
|
||||
|
|
@ -143,4 +148,77 @@ mod tests {
|
|||
let decoded = decode_string_literal("\"a\\n\\r\\t\\\\\\\"b\"").unwrap();
|
||||
assert_eq!(decoded, "a\n\r\t\\\"b");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compiler_error_parse_display_is_stable() {
|
||||
let err = CompilerError::Parse("bad token".to_string());
|
||||
assert_eq!(err.to_string(), "parse error: bad token");
|
||||
}
|
||||
|
||||
#[allow(deprecated)]
|
||||
#[test]
|
||||
fn legacy_nano_error_alias_constructs_variants() {
|
||||
let err = super::NanoError::Parse("bad token".to_string());
|
||||
assert_eq!(err.to_string(), "parse error: bad token");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn legacy_name_is_confined_to_alias_and_compatibility_test() {
|
||||
let legacy_name = ["Nano", "Error"].concat();
|
||||
let workspace_root = Path::new(env!("CARGO_MANIFEST_DIR"))
|
||||
.parent()
|
||||
.and_then(Path::parent)
|
||||
.expect("compiler crate should live under crates/");
|
||||
let allowed_file = workspace_root.join("crates/omnigraph-compiler/src/error.rs");
|
||||
let mut offenders = Vec::new();
|
||||
|
||||
visit_rs_files(workspace_root, &mut |path| {
|
||||
let text = std::fs::read_to_string(path).expect("source file should be readable");
|
||||
let count = text.matches(&legacy_name).count();
|
||||
if path == allowed_file {
|
||||
if count != 2 {
|
||||
offenders.push(format!(
|
||||
"{} contains {count} legacy-name occurrences; expected exactly 2",
|
||||
display_path(workspace_root, path)
|
||||
));
|
||||
}
|
||||
} else if count > 0 {
|
||||
offenders.push(format!(
|
||||
"{} contains {count} legacy-name occurrence(s)",
|
||||
display_path(workspace_root, path)
|
||||
));
|
||||
}
|
||||
});
|
||||
|
||||
assert!(
|
||||
offenders.is_empty(),
|
||||
"legacy compiler error name should stay compatibility-only:\n{}",
|
||||
offenders.join("\n")
|
||||
);
|
||||
}
|
||||
|
||||
fn visit_rs_files(dir: &Path, visit: &mut impl FnMut(&Path)) {
|
||||
for entry in std::fs::read_dir(dir).expect("source directory should be readable") {
|
||||
let entry = entry.expect("source entry should be readable");
|
||||
let path = entry.path();
|
||||
if path.is_dir() {
|
||||
if matches!(
|
||||
path.file_name().and_then(|name| name.to_str()),
|
||||
Some(".git" | "target")
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
visit_rs_files(&path, visit);
|
||||
} else if path.extension().and_then(|ext| ext.to_str()) == Some("rs") {
|
||||
visit(&path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn display_path(root: &Path, path: &Path) -> String {
|
||||
path.strip_prefix(root)
|
||||
.unwrap_or(path)
|
||||
.to_string_lossy()
|
||||
.into_owned()
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ pub fn lower_query(
|
|||
type_ctx: &TypeContext,
|
||||
) -> Result<QueryIR> {
|
||||
if !query.mutations.is_empty() {
|
||||
return Err(crate::error::NanoError::Plan(
|
||||
return Err(crate::error::CompilerError::Plan(
|
||||
"cannot lower mutation query with read-query lowerer".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -62,7 +62,7 @@ pub fn lower_query(
|
|||
|
||||
pub fn lower_mutation_query(query: &QueryDecl) -> Result<MutationIR> {
|
||||
if query.mutations.is_empty() {
|
||||
return Err(crate::error::NanoError::Plan(
|
||||
return Err(crate::error::CompilerError::Plan(
|
||||
"query does not contain a mutation body".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -261,7 +261,7 @@ fn lower_clauses(
|
|||
let edge = catalog
|
||||
.lookup_edge_by_name(&traversal.edge_name)
|
||||
.ok_or_else(|| {
|
||||
crate::error::NanoError::Plan(format!(
|
||||
crate::error::CompilerError::Plan(format!(
|
||||
"lowering traversal referenced missing edge '{}' after typecheck",
|
||||
traversal.edge_name
|
||||
))
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ use pest::error::InputLocation;
|
|||
use pest_derive::Parser;
|
||||
|
||||
use crate::error::{
|
||||
NanoError, ParseDiagnostic, Result, SourceSpan, decode_string_literal, render_span,
|
||||
CompilerError, ParseDiagnostic, Result, SourceSpan, decode_string_literal, render_span,
|
||||
};
|
||||
|
||||
use super::ast::*;
|
||||
|
|
@ -13,7 +13,7 @@ use super::ast::*;
|
|||
struct QueryParser;
|
||||
|
||||
pub fn parse_query(input: &str) -> Result<QueryFile> {
|
||||
parse_query_diagnostic(input).map_err(|e| NanoError::Parse(e.to_string()))
|
||||
parse_query_diagnostic(input).map_err(|e| CompilerError::Parse(e.to_string()))
|
||||
}
|
||||
|
||||
pub fn parse_query_diagnostic(input: &str) -> std::result::Result<QueryFile, ParseDiagnostic> {
|
||||
|
|
@ -24,7 +24,7 @@ pub fn parse_query_diagnostic(input: &str) -> std::result::Result<QueryFile, Par
|
|||
if let Rule::query_file = pair.as_rule() {
|
||||
for inner in pair.into_inner() {
|
||||
if let Rule::query_decl = inner.as_rule() {
|
||||
queries.push(parse_query_decl(inner).map_err(nano_error_to_diagnostic)?);
|
||||
queries.push(parse_query_decl(inner).map_err(compiler_error_to_diagnostic)?);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -40,7 +40,7 @@ fn pest_error_to_diagnostic(err: pest::error::Error<Rule>) -> ParseDiagnostic {
|
|||
ParseDiagnostic::new(err.to_string(), span)
|
||||
}
|
||||
|
||||
fn nano_error_to_diagnostic(err: NanoError) -> ParseDiagnostic {
|
||||
fn compiler_error_to_diagnostic(err: CompilerError) -> ParseDiagnostic {
|
||||
ParseDiagnostic::new(err.to_string(), None)
|
||||
}
|
||||
|
||||
|
|
@ -71,7 +71,7 @@ fn parse_query_decl(pair: pest::iterators::Pair<Rule>) -> Result<QueryDecl> {
|
|||
Rule::query_annotation => match parse_query_annotation(item)? {
|
||||
ParsedAnnotation::Description(value) => {
|
||||
if description.replace(value).is_some() {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"query `{}` cannot include duplicate @description annotations",
|
||||
name
|
||||
)));
|
||||
|
|
@ -79,7 +79,7 @@ fn parse_query_decl(pair: pest::iterators::Pair<Rule>) -> Result<QueryDecl> {
|
|||
}
|
||||
ParsedAnnotation::Instruction(value) => {
|
||||
if instruction.replace(value).is_some() {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"query `{}` cannot include duplicate @instruction annotations",
|
||||
name
|
||||
)));
|
||||
|
|
@ -87,7 +87,7 @@ fn parse_query_decl(pair: pest::iterators::Pair<Rule>) -> Result<QueryDecl> {
|
|||
}
|
||||
ParsedAnnotation::Mcp(value) => {
|
||||
if mcp_seen {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"query `{}` cannot include duplicate @mcp annotations",
|
||||
name
|
||||
)));
|
||||
|
|
@ -100,7 +100,7 @@ fn parse_query_decl(pair: pest::iterators::Pair<Rule>) -> Result<QueryDecl> {
|
|||
let body = item
|
||||
.into_inner()
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("query body cannot be empty".to_string()))?;
|
||||
.ok_or_else(|| CompilerError::Parse("query body cannot be empty".to_string()))?;
|
||||
match body.as_rule() {
|
||||
Rule::read_query_body => {
|
||||
for section in body.into_inner() {
|
||||
|
|
@ -130,7 +130,7 @@ fn parse_query_decl(pair: pest::iterators::Pair<Rule>) -> Result<QueryDecl> {
|
|||
let int_pair = section.into_inner().next().unwrap();
|
||||
limit =
|
||||
Some(int_pair.as_str().parse::<u64>().map_err(|e| {
|
||||
NanoError::Parse(format!("invalid limit: {}", e))
|
||||
CompilerError::Parse(format!("invalid limit: {}", e))
|
||||
})?);
|
||||
}
|
||||
_ => {}
|
||||
|
|
@ -141,7 +141,7 @@ fn parse_query_decl(pair: pest::iterators::Pair<Rule>) -> Result<QueryDecl> {
|
|||
for mutation_pair in body.into_inner() {
|
||||
if let Rule::mutation_stmt = mutation_pair.as_rule() {
|
||||
let stmt = mutation_pair.into_inner().next().ok_or_else(|| {
|
||||
NanoError::Parse(
|
||||
CompilerError::Parse(
|
||||
"mutation statement cannot be empty".to_string(),
|
||||
)
|
||||
})?;
|
||||
|
|
@ -181,7 +181,7 @@ enum ParsedAnnotation {
|
|||
fn annotation_string(pair: pest::iterators::Pair<Rule>, what: &str) -> Result<String> {
|
||||
pair.into_inner()
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse(format!("{what} requires a string literal")))
|
||||
.ok_or_else(|| CompilerError::Parse(format!("{what} requires a string literal")))
|
||||
.map(|value| parse_string_lit(value.as_str()))?
|
||||
}
|
||||
|
||||
|
|
@ -189,7 +189,7 @@ fn parse_query_annotation(pair: pest::iterators::Pair<Rule>) -> Result<ParsedAnn
|
|||
let inner = pair
|
||||
.into_inner()
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("query annotation cannot be empty".to_string()))?;
|
||||
.ok_or_else(|| CompilerError::Parse("query annotation cannot be empty".to_string()))?;
|
||||
match inner.as_rule() {
|
||||
Rule::description_annotation => Ok(ParsedAnnotation::Description(annotation_string(
|
||||
inner,
|
||||
|
|
@ -200,7 +200,7 @@ fn parse_query_annotation(pair: pest::iterators::Pair<Rule>) -> Result<ParsedAnn
|
|||
"@instruction",
|
||||
)?)),
|
||||
Rule::mcp_annotation => Ok(ParsedAnnotation::Mcp(parse_mcp_annotation(inner)?)),
|
||||
other => Err(NanoError::Parse(format!(
|
||||
other => Err(CompilerError::Parse(format!(
|
||||
"unexpected query annotation rule: {:?}",
|
||||
other
|
||||
))),
|
||||
|
|
@ -215,19 +215,19 @@ fn parse_mcp_annotation(pair: pest::iterators::Pair<Rule>) -> Result<McpQueryMet
|
|||
let kv = arg
|
||||
.into_inner()
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("@mcp argument cannot be empty".to_string()))?;
|
||||
.ok_or_else(|| CompilerError::Parse("@mcp argument cannot be empty".to_string()))?;
|
||||
match kv.as_rule() {
|
||||
Rule::mcp_expose_arg => {
|
||||
let value = kv
|
||||
.into_inner()
|
||||
.next()
|
||||
.ok_or_else(|| {
|
||||
NanoError::Parse("@mcp expose requires a boolean".to_string())
|
||||
CompilerError::Parse("@mcp expose requires a boolean".to_string())
|
||||
})?
|
||||
.as_str()
|
||||
== "true";
|
||||
if meta.expose.replace(value).is_some() {
|
||||
return Err(NanoError::Parse(
|
||||
return Err(CompilerError::Parse(
|
||||
"@mcp cannot include duplicate `expose` arguments".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -237,17 +237,17 @@ fn parse_mcp_annotation(pair: pest::iterators::Pair<Rule>) -> Result<McpQueryMet
|
|||
.into_inner()
|
||||
.next()
|
||||
.ok_or_else(|| {
|
||||
NanoError::Parse("@mcp tool_name requires a string literal".to_string())
|
||||
CompilerError::Parse("@mcp tool_name requires a string literal".to_string())
|
||||
})
|
||||
.map(|value| parse_string_lit(value.as_str()))??;
|
||||
if meta.tool_name.replace(value).is_some() {
|
||||
return Err(NanoError::Parse(
|
||||
return Err(CompilerError::Parse(
|
||||
"@mcp cannot include duplicate `tool_name` arguments".to_string(),
|
||||
));
|
||||
}
|
||||
}
|
||||
other => {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"unexpected @mcp argument rule: {:?}",
|
||||
other
|
||||
)));
|
||||
|
|
@ -261,14 +261,14 @@ fn parse_param(pair: pest::iterators::Pair<Rule>) -> Result<Param> {
|
|||
let mut inner = pair.into_inner();
|
||||
let mut next = inner
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("parameter is missing a variable".to_string()))?;
|
||||
.ok_or_else(|| CompilerError::Parse("parameter is missing a variable".to_string()))?;
|
||||
// Optional leading `@description("…")` documents the parameter.
|
||||
let mut description = None;
|
||||
if next.as_rule() == Rule::description_annotation {
|
||||
description = Some(annotation_string(next, "@description")?);
|
||||
next = inner
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("parameter is missing a variable".to_string()))?;
|
||||
.ok_or_else(|| CompilerError::Parse("parameter is missing a variable".to_string()))?;
|
||||
}
|
||||
let var = next.as_str();
|
||||
let name = var.strip_prefix('$').unwrap_or(var).to_string();
|
||||
|
|
@ -277,25 +277,25 @@ fn parse_param(pair: pest::iterators::Pair<Rule>) -> Result<Param> {
|
|||
let mut type_inner = type_ref.into_inner();
|
||||
let core = type_inner
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("parameter type is missing".to_string()))?;
|
||||
.ok_or_else(|| CompilerError::Parse("parameter type is missing".to_string()))?;
|
||||
let base = match core.as_rule() {
|
||||
Rule::base_type => core.as_str().to_string(),
|
||||
Rule::list_type => {
|
||||
let inner = core
|
||||
.into_inner()
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("list type missing item type".to_string()))?;
|
||||
.ok_or_else(|| CompilerError::Parse("list type missing item type".to_string()))?;
|
||||
format!("[{}]", inner.as_str().trim())
|
||||
}
|
||||
Rule::vector_type => {
|
||||
let vector = core
|
||||
.into_inner()
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("Vector type missing dimension".to_string()))?;
|
||||
.ok_or_else(|| CompilerError::Parse("Vector type missing dimension".to_string()))?;
|
||||
format!("Vector({})", vector.as_str().trim())
|
||||
}
|
||||
other => {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"unexpected param type rule: {:?}",
|
||||
other
|
||||
)));
|
||||
|
|
@ -326,7 +326,7 @@ fn parse_clause(pair: pest::iterators::Pair<Rule>) -> Result<Clause> {
|
|||
}
|
||||
Ok(Clause::Negation(clauses))
|
||||
}
|
||||
_ => Err(NanoError::Parse(format!(
|
||||
_ => Err(CompilerError::Parse(format!(
|
||||
"unexpected clause rule: {:?}",
|
||||
inner.as_rule()
|
||||
))),
|
||||
|
|
@ -337,13 +337,13 @@ fn parse_text_search_clause(pair: pest::iterators::Pair<Rule>) -> Result<Clause>
|
|||
let inner = pair
|
||||
.into_inner()
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("text search clause cannot be empty".to_string()))?;
|
||||
.ok_or_else(|| CompilerError::Parse("text search clause cannot be empty".to_string()))?;
|
||||
let expr = match inner.as_rule() {
|
||||
Rule::search_call => parse_search_call(inner)?,
|
||||
Rule::fuzzy_call => parse_fuzzy_call(inner)?,
|
||||
Rule::match_text_call => parse_match_text_call(inner)?,
|
||||
other => {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"unexpected text search clause rule: {:?}",
|
||||
other
|
||||
)));
|
||||
|
|
@ -395,7 +395,7 @@ fn parse_mutation_stmt(pair: pest::iterators::Pair<Rule>) -> Result<Mutation> {
|
|||
Rule::insert_stmt => parse_insert_mutation(pair).map(Mutation::Insert),
|
||||
Rule::update_stmt => parse_update_mutation(pair).map(Mutation::Update),
|
||||
Rule::delete_stmt => parse_delete_mutation(pair).map(Mutation::Delete),
|
||||
other => Err(NanoError::Parse(format!(
|
||||
other => Err(CompilerError::Parse(format!(
|
||||
"unexpected mutation statement rule: {:?}",
|
||||
other
|
||||
))),
|
||||
|
|
@ -433,7 +433,7 @@ fn parse_update_mutation(pair: pest::iterators::Pair<Rule>) -> Result<UpdateMuta
|
|||
}
|
||||
|
||||
let predicate = predicate.ok_or_else(|| {
|
||||
NanoError::Parse("update mutation requires a where predicate".to_string())
|
||||
CompilerError::Parse("update mutation requires a where predicate".to_string())
|
||||
})?;
|
||||
|
||||
Ok(UpdateMutation {
|
||||
|
|
@ -448,7 +448,7 @@ fn parse_delete_mutation(pair: pest::iterators::Pair<Rule>) -> Result<DeleteMuta
|
|||
let type_name = inner.next().unwrap().as_str().to_string();
|
||||
let predicate = inner
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("delete mutation requires a where predicate".to_string()))
|
||||
.ok_or_else(|| CompilerError::Parse("delete mutation requires a where predicate".to_string()))
|
||||
.and_then(parse_mutation_predicate)?;
|
||||
Ok(DeleteMutation {
|
||||
type_name,
|
||||
|
|
@ -486,7 +486,7 @@ fn parse_match_value(pair: pest::iterators::Pair<Rule>) -> Result<MatchValue> {
|
|||
}
|
||||
Rule::now_call => Ok(MatchValue::Now),
|
||||
Rule::literal => Ok(MatchValue::Literal(parse_literal(value_inner)?)),
|
||||
_ => Err(NanoError::Parse(format!(
|
||||
_ => Err(CompilerError::Parse(format!(
|
||||
"unexpected match value: {:?}",
|
||||
value_inner.as_rule()
|
||||
))),
|
||||
|
|
@ -508,7 +508,7 @@ fn parse_traversal(pair: pest::iterators::Pair<Rule>) -> Result<Traversal> {
|
|||
max_hops = max;
|
||||
inner
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("traversal missing destination variable".to_string()))?
|
||||
.ok_or_else(|| CompilerError::Parse("traversal missing destination variable".to_string()))?
|
||||
} else {
|
||||
next
|
||||
};
|
||||
|
|
@ -529,16 +529,16 @@ fn parse_traversal_bounds(pair: pest::iterators::Pair<Rule>) -> Result<(u32, Opt
|
|||
let mut inner = pair.into_inner();
|
||||
let min = inner
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("traversal bound missing min hop".to_string()))?
|
||||
.ok_or_else(|| CompilerError::Parse("traversal bound missing min hop".to_string()))?
|
||||
.as_str()
|
||||
.parse::<u32>()
|
||||
.map_err(|e| NanoError::Parse(format!("invalid traversal min bound: {}", e)))?;
|
||||
.map_err(|e| CompilerError::Parse(format!("invalid traversal min bound: {}", e)))?;
|
||||
let max = inner
|
||||
.next()
|
||||
.map(|p| {
|
||||
p.as_str()
|
||||
.parse::<u32>()
|
||||
.map_err(|e| NanoError::Parse(format!("invalid traversal max bound: {}", e)))
|
||||
.map_err(|e| CompilerError::Parse(format!("invalid traversal max bound: {}", e)))
|
||||
})
|
||||
.transpose()?;
|
||||
Ok((min, max))
|
||||
|
|
@ -577,7 +577,7 @@ fn parse_expr(pair: pest::iterators::Pair<Rule>) -> Result<Expr> {
|
|||
"avg" => AggFunc::Avg,
|
||||
"min" => AggFunc::Min,
|
||||
"max" => AggFunc::Max,
|
||||
other => return Err(NanoError::Parse(format!("unknown aggregate: {}", other))),
|
||||
other => return Err(CompilerError::Parse(format!("unknown aggregate: {}", other))),
|
||||
};
|
||||
let arg = parse_expr(parts.next().unwrap())?;
|
||||
Ok(Expr::Aggregate {
|
||||
|
|
@ -592,7 +592,7 @@ fn parse_expr(pair: pest::iterators::Pair<Rule>) -> Result<Expr> {
|
|||
Rule::bm25_call => parse_bm25_call(inner),
|
||||
Rule::rrf_call => parse_rrf_call(inner),
|
||||
Rule::ident => Ok(Expr::AliasRef(inner.as_str().to_string())),
|
||||
_ => Err(NanoError::Parse(format!(
|
||||
_ => Err(CompilerError::Parse(format!(
|
||||
"unexpected expr rule: {:?}",
|
||||
inner.as_rule()
|
||||
))),
|
||||
|
|
@ -603,12 +603,12 @@ fn parse_search_call(pair: pest::iterators::Pair<Rule>) -> Result<Expr> {
|
|||
let mut args = pair.into_inner();
|
||||
let field = args
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("search() missing field argument".to_string()))?;
|
||||
.ok_or_else(|| CompilerError::Parse("search() missing field argument".to_string()))?;
|
||||
let query = args
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("search() missing query argument".to_string()))?;
|
||||
.ok_or_else(|| CompilerError::Parse("search() missing query argument".to_string()))?;
|
||||
if args.next().is_some() {
|
||||
return Err(NanoError::Parse(
|
||||
return Err(CompilerError::Parse(
|
||||
"search() accepts exactly 2 arguments".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -622,13 +622,13 @@ fn parse_fuzzy_call(pair: pest::iterators::Pair<Rule>) -> Result<Expr> {
|
|||
let mut args = pair.into_inner();
|
||||
let field = args
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("fuzzy() missing field argument".to_string()))?;
|
||||
.ok_or_else(|| CompilerError::Parse("fuzzy() missing field argument".to_string()))?;
|
||||
let query = args
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("fuzzy() missing query argument".to_string()))?;
|
||||
.ok_or_else(|| CompilerError::Parse("fuzzy() missing query argument".to_string()))?;
|
||||
let max_edits = args.next().map(parse_expr).transpose()?.map(Box::new);
|
||||
if args.next().is_some() {
|
||||
return Err(NanoError::Parse(
|
||||
return Err(CompilerError::Parse(
|
||||
"fuzzy() accepts at most 3 arguments".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -643,12 +643,12 @@ fn parse_match_text_call(pair: pest::iterators::Pair<Rule>) -> Result<Expr> {
|
|||
let mut args = pair.into_inner();
|
||||
let field = args
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("match_text() missing field argument".to_string()))?;
|
||||
.ok_or_else(|| CompilerError::Parse("match_text() missing field argument".to_string()))?;
|
||||
let query = args
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("match_text() missing query argument".to_string()))?;
|
||||
.ok_or_else(|| CompilerError::Parse("match_text() missing query argument".to_string()))?;
|
||||
if args.next().is_some() {
|
||||
return Err(NanoError::Parse(
|
||||
return Err(CompilerError::Parse(
|
||||
"match_text() accepts exactly 2 arguments".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -662,12 +662,12 @@ fn parse_bm25_call(pair: pest::iterators::Pair<Rule>) -> Result<Expr> {
|
|||
let mut args = pair.into_inner();
|
||||
let field = args
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("bm25() missing field argument".to_string()))?;
|
||||
.ok_or_else(|| CompilerError::Parse("bm25() missing field argument".to_string()))?;
|
||||
let query = args
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("bm25() missing query argument".to_string()))?;
|
||||
.ok_or_else(|| CompilerError::Parse("bm25() missing query argument".to_string()))?;
|
||||
if args.next().is_some() {
|
||||
return Err(NanoError::Parse(
|
||||
return Err(CompilerError::Parse(
|
||||
"bm25() accepts exactly 2 arguments".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -681,14 +681,14 @@ fn parse_rank_expr(pair: pest::iterators::Pair<Rule>) -> Result<Expr> {
|
|||
let inner = if pair.as_rule() == Rule::rank_expr {
|
||||
pair.into_inner()
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("rank expression cannot be empty".to_string()))?
|
||||
.ok_or_else(|| CompilerError::Parse("rank expression cannot be empty".to_string()))?
|
||||
} else {
|
||||
pair
|
||||
};
|
||||
match inner.as_rule() {
|
||||
Rule::nearest_ordering => parse_nearest_ordering(inner),
|
||||
Rule::bm25_call => parse_bm25_call(inner),
|
||||
other => Err(NanoError::Parse(format!(
|
||||
other => Err(CompilerError::Parse(format!(
|
||||
"rrf() rank expression must be nearest(...) or bm25(...), got {:?}",
|
||||
other
|
||||
))),
|
||||
|
|
@ -699,13 +699,13 @@ fn parse_rrf_call(pair: pest::iterators::Pair<Rule>) -> Result<Expr> {
|
|||
let mut args = pair.into_inner();
|
||||
let primary = args
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("rrf() missing primary rank expression".to_string()))?;
|
||||
.ok_or_else(|| CompilerError::Parse("rrf() missing primary rank expression".to_string()))?;
|
||||
let secondary = args
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("rrf() missing secondary rank expression".to_string()))?;
|
||||
.ok_or_else(|| CompilerError::Parse("rrf() missing secondary rank expression".to_string()))?;
|
||||
let k = args.next().map(parse_expr).transpose()?.map(Box::new);
|
||||
if args.next().is_some() {
|
||||
return Err(NanoError::Parse(
|
||||
return Err(CompilerError::Parse(
|
||||
"rrf() accepts at most 3 arguments".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -724,7 +724,7 @@ fn parse_comp_op(pair: pest::iterators::Pair<Rule>) -> Result<CompOp> {
|
|||
"<" => Ok(CompOp::Lt),
|
||||
">=" => Ok(CompOp::Ge),
|
||||
"<=" => Ok(CompOp::Le),
|
||||
other => Err(NanoError::Parse(format!("unknown operator: {}", other))),
|
||||
other => Err(CompilerError::Parse(format!("unknown operator: {}", other))),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -743,14 +743,14 @@ fn parse_literal(pair: pest::iterators::Pair<Rule>) -> Result<Literal> {
|
|||
let n: i64 = inner
|
||||
.as_str()
|
||||
.parse()
|
||||
.map_err(|e| NanoError::Parse(format!("invalid integer: {}", e)))?;
|
||||
.map_err(|e| CompilerError::Parse(format!("invalid integer: {}", e)))?;
|
||||
Ok(Literal::Integer(n))
|
||||
}
|
||||
Rule::float_lit => {
|
||||
let f: f64 = inner
|
||||
.as_str()
|
||||
.parse()
|
||||
.map_err(|e| NanoError::Parse(format!("invalid float: {}", e)))?;
|
||||
.map_err(|e| CompilerError::Parse(format!("invalid float: {}", e)))?;
|
||||
Ok(Literal::Float(f))
|
||||
}
|
||||
Rule::bool_lit => {
|
||||
|
|
@ -758,7 +758,7 @@ fn parse_literal(pair: pest::iterators::Pair<Rule>) -> Result<Literal> {
|
|||
"true" => true,
|
||||
"false" => false,
|
||||
other => {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"invalid boolean literal: {}",
|
||||
other
|
||||
)));
|
||||
|
|
@ -771,7 +771,7 @@ fn parse_literal(pair: pest::iterators::Pair<Rule>) -> Result<Literal> {
|
|||
.into_inner()
|
||||
.next()
|
||||
.map(|s| parse_string_lit(s.as_str()))
|
||||
.ok_or_else(|| NanoError::Parse("date literal requires a string".to_string()))?;
|
||||
.ok_or_else(|| CompilerError::Parse("date literal requires a string".to_string()))?;
|
||||
Ok(Literal::Date(date_str?))
|
||||
}
|
||||
Rule::datetime_lit => {
|
||||
|
|
@ -780,7 +780,7 @@ fn parse_literal(pair: pest::iterators::Pair<Rule>) -> Result<Literal> {
|
|||
.next()
|
||||
.map(|s| parse_string_lit(s.as_str()))
|
||||
.ok_or_else(|| {
|
||||
NanoError::Parse("datetime literal requires a string".to_string())
|
||||
CompilerError::Parse("datetime literal requires a string".to_string())
|
||||
})?;
|
||||
Ok(Literal::DateTime(dt_str?))
|
||||
}
|
||||
|
|
@ -793,7 +793,7 @@ fn parse_literal(pair: pest::iterators::Pair<Rule>) -> Result<Literal> {
|
|||
}
|
||||
Ok(Literal::List(items))
|
||||
}
|
||||
_ => Err(NanoError::Parse(format!(
|
||||
_ => Err(CompilerError::Parse(format!(
|
||||
"unexpected literal: {:?}",
|
||||
inner.as_rule()
|
||||
))),
|
||||
|
|
@ -816,14 +816,14 @@ fn parse_ordering(pair: pest::iterators::Pair<Rule>) -> Result<Ordering> {
|
|||
let mut inner = pair.into_inner();
|
||||
let first = inner
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("ordering cannot be empty".to_string()))?;
|
||||
.ok_or_else(|| CompilerError::Parse("ordering cannot be empty".to_string()))?;
|
||||
let (expr, descending) = match first.as_rule() {
|
||||
Rule::nearest_ordering => (parse_nearest_ordering(first)?, false),
|
||||
Rule::expr => {
|
||||
let expr = parse_expr(first)?;
|
||||
let direction = inner.next().map(|p| p.as_str().to_string());
|
||||
if matches!(expr, Expr::Nearest { .. }) && direction.is_some() {
|
||||
return Err(NanoError::Parse(
|
||||
return Err(CompilerError::Parse(
|
||||
"nearest() ordering does not accept asc/desc modifiers".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -831,7 +831,7 @@ fn parse_ordering(pair: pest::iterators::Pair<Rule>) -> Result<Ordering> {
|
|||
(expr, descending)
|
||||
}
|
||||
other => {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"unexpected ordering rule: {:?}",
|
||||
other
|
||||
)));
|
||||
|
|
@ -845,22 +845,22 @@ fn parse_nearest_ordering(pair: pest::iterators::Pair<Rule>) -> Result<Expr> {
|
|||
let mut inner = pair.into_inner();
|
||||
let prop = inner
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("nearest() missing property".to_string()))?;
|
||||
.ok_or_else(|| CompilerError::Parse("nearest() missing property".to_string()))?;
|
||||
let mut prop_parts = prop.into_inner();
|
||||
let var = prop_parts
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("nearest() missing variable".to_string()))?
|
||||
.ok_or_else(|| CompilerError::Parse("nearest() missing variable".to_string()))?
|
||||
.as_str();
|
||||
let variable = var.strip_prefix('$').unwrap_or(var).to_string();
|
||||
let property = prop_parts
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("nearest() missing property name".to_string()))?
|
||||
.ok_or_else(|| CompilerError::Parse("nearest() missing property name".to_string()))?
|
||||
.as_str()
|
||||
.to_string();
|
||||
|
||||
let query = inner
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("nearest() missing query expression".to_string()))?;
|
||||
.ok_or_else(|| CompilerError::Parse("nearest() missing query expression".to_string()))?;
|
||||
Ok(Expr::Nearest {
|
||||
variable,
|
||||
property,
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ use std::sync::Arc;
|
|||
use arrow_schema::{DataType, Field, Schema, SchemaRef};
|
||||
|
||||
use crate::catalog::Catalog;
|
||||
use crate::error::{NanoError, Result};
|
||||
use crate::error::{CompilerError, Result};
|
||||
use crate::types::{Direction, PropType, ScalarType};
|
||||
|
||||
use super::ast::*;
|
||||
|
|
@ -82,7 +82,7 @@ pub fn typecheck_query_decl(catalog: &Catalog, query: &QueryDecl) -> Result<Chec
|
|||
|
||||
pub fn typecheck_query(catalog: &Catalog, query: &QueryDecl) -> Result<TypeContext> {
|
||||
if !query.mutations.is_empty() {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"mutation query cannot be typechecked with read-query API".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -115,14 +115,14 @@ fn parse_declared_param_types(params: &[Param]) -> Result<HashMap<String, PropTy
|
|||
let mut out = HashMap::with_capacity(params.len());
|
||||
for p in params {
|
||||
if p.name == NOW_PARAM_NAME {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"parameter name `${}` is reserved for runtime timestamp injection",
|
||||
NOW_PARAM_NAME
|
||||
)));
|
||||
}
|
||||
let prop_type =
|
||||
PropType::from_param_type_name(&p.type_name, p.nullable).ok_or_else(|| {
|
||||
NanoError::Type(format!(
|
||||
CompilerError::Type(format!(
|
||||
"unknown parameter type `{}` for `${}`",
|
||||
p.type_name, p.name
|
||||
))
|
||||
|
|
@ -168,12 +168,12 @@ fn typecheck_read_query(catalog: &Catalog, query: &QueryDecl) -> Result<TypeCont
|
|||
.iter()
|
||||
.any(|ord| expr_contains_rrf_with_aliases(&ord.expr, &alias_exprs));
|
||||
if has_rrf && query.limit.is_none() {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T21: rrf ordering requires a limit clause".to_string(),
|
||||
));
|
||||
}
|
||||
if has_standalone_nearest && query.limit.is_none() {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T17: nearest ordering requires a limit clause".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -183,7 +183,7 @@ fn typecheck_read_query(catalog: &Catalog, query: &QueryDecl) -> Result<TypeCont
|
|||
.iter()
|
||||
.any(|ord| matches!(ord.expr, Expr::AliasRef(_)))
|
||||
{
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T18: alias-based ordering is not supported together with nearest in phase 1"
|
||||
.to_string(),
|
||||
));
|
||||
|
|
@ -201,7 +201,7 @@ fn typecheck_read_query(catalog: &Catalog, query: &QueryDecl) -> Result<TypeCont
|
|||
match &proj.expr {
|
||||
Expr::PropAccess { .. } | Expr::Variable(_) => {}
|
||||
_ => {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T9: non-aggregate expressions in an aggregate query must be \
|
||||
property accesses or variables"
|
||||
.to_string(),
|
||||
|
|
@ -221,7 +221,7 @@ fn typecheck_mutation(catalog: &Catalog, mutation: &Mutation, params: &[Param])
|
|||
match mutation {
|
||||
Mutation::Insert(insert) => {
|
||||
if insert.assignments.is_empty() {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T10: insert mutation requires at least one assignment".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -235,7 +235,7 @@ fn typecheck_mutation(catalog: &Catalog, mutation: &Mutation, params: &[Param])
|
|||
.properties
|
||||
.get(&assignment.property)
|
||||
.ok_or_else(|| {
|
||||
NanoError::Type(format!(
|
||||
CompilerError::Type(format!(
|
||||
"T11: type `{}` has no property `{}`",
|
||||
insert.type_name, assignment.property
|
||||
))
|
||||
|
|
@ -265,13 +265,13 @@ fn typecheck_mutation(catalog: &Catalog, mutation: &Mutation, params: &[Param])
|
|||
if assigned_props.contains(embed.source.as_str()) {
|
||||
continue;
|
||||
}
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T12: insert for `{}` must provide non-nullable property `{}` or @embed source `{}`",
|
||||
insert.type_name, prop_name, embed.source
|
||||
)));
|
||||
}
|
||||
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T12: insert for `{}` must provide non-nullable property `{}`",
|
||||
insert.type_name, prop_name
|
||||
)));
|
||||
|
|
@ -308,7 +308,7 @@ fn typecheck_mutation(catalog: &Catalog, mutation: &Mutation, params: &[Param])
|
|||
.properties
|
||||
.get(&assignment.property)
|
||||
.ok_or_else(|| {
|
||||
NanoError::Type(format!(
|
||||
CompilerError::Type(format!(
|
||||
"T11: type `{}` has no property `{}`",
|
||||
insert.type_name, assignment.property
|
||||
))
|
||||
|
|
@ -324,13 +324,13 @@ fn typecheck_mutation(catalog: &Catalog, mutation: &Mutation, params: &[Param])
|
|||
}
|
||||
|
||||
if !has_from {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T12: insert for `{}` must provide required endpoint `from`",
|
||||
insert.type_name
|
||||
)));
|
||||
}
|
||||
if !has_to {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T12: insert for `{}` must provide required endpoint `to`",
|
||||
insert.type_name
|
||||
)));
|
||||
|
|
@ -341,7 +341,7 @@ fn typecheck_mutation(catalog: &Catalog, mutation: &Mutation, params: &[Param])
|
|||
continue;
|
||||
}
|
||||
if !insert.assignments.iter().any(|a| &a.property == prop_name) {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T12: insert for `{}` must provide non-nullable property `{}`",
|
||||
insert.type_name, prop_name
|
||||
)));
|
||||
|
|
@ -350,7 +350,7 @@ fn typecheck_mutation(catalog: &Catalog, mutation: &Mutation, params: &[Param])
|
|||
return Ok(insert.type_name.clone());
|
||||
}
|
||||
|
||||
Err(NanoError::Type(format!(
|
||||
Err(CompilerError::Type(format!(
|
||||
"T10: unknown node/edge type `{}`",
|
||||
insert.type_name
|
||||
)))
|
||||
|
|
@ -359,19 +359,19 @@ fn typecheck_mutation(catalog: &Catalog, mutation: &Mutation, params: &[Param])
|
|||
let node_type = if let Some(node_type) = catalog.node_types.get(&update.type_name) {
|
||||
node_type
|
||||
} else if catalog.edge_types.contains_key(&update.type_name) {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T16: update mutation for edge type `{}` is not supported",
|
||||
update.type_name
|
||||
)));
|
||||
} else {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T10: unknown node/edge type `{}`",
|
||||
update.type_name
|
||||
)));
|
||||
};
|
||||
|
||||
if update.assignments.is_empty() {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T10: update mutation requires at least one assignment".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -383,7 +383,7 @@ fn typecheck_mutation(catalog: &Catalog, mutation: &Mutation, params: &[Param])
|
|||
.properties
|
||||
.get(&assignment.property)
|
||||
.ok_or_else(|| {
|
||||
NanoError::Type(format!(
|
||||
CompilerError::Type(format!(
|
||||
"T11: type `{}` has no property `{}`",
|
||||
update.type_name, assignment.property
|
||||
))
|
||||
|
|
@ -422,7 +422,7 @@ fn typecheck_mutation(catalog: &Catalog, mutation: &Mutation, params: &[Param])
|
|||
)?;
|
||||
Ok(delete.type_name.clone())
|
||||
} else {
|
||||
Err(NanoError::Type(format!(
|
||||
Err(CompilerError::Type(format!(
|
||||
"T10: unknown node/edge type `{}`",
|
||||
delete.type_name
|
||||
)))
|
||||
|
|
@ -435,7 +435,7 @@ fn ensure_no_duplicate_assignment_names(assignments: &[MutationAssignment]) -> R
|
|||
let mut seen = std::collections::HashSet::new();
|
||||
for assignment in assignments {
|
||||
if !seen.insert(&assignment.property) {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T13: duplicate assignment for property `{}`",
|
||||
assignment.property
|
||||
)));
|
||||
|
|
@ -454,13 +454,13 @@ fn typecheck_mutation_predicate(
|
|||
.properties
|
||||
.get(&predicate.property)
|
||||
.ok_or_else(|| {
|
||||
NanoError::Type(format!(
|
||||
CompilerError::Type(format!(
|
||||
"T11: type `{}` has no property `{}`",
|
||||
type_name, predicate.property
|
||||
))
|
||||
})?;
|
||||
if matches!(prop_type.scalar, ScalarType::Blob) {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T11: blob property `{}` cannot be used in WHERE predicates",
|
||||
predicate.property
|
||||
)));
|
||||
|
|
@ -493,7 +493,7 @@ fn typecheck_edge_mutation_predicate(
|
|||
.properties
|
||||
.get(&predicate.property)
|
||||
.ok_or_else(|| {
|
||||
NanoError::Type(format!(
|
||||
CompilerError::Type(format!(
|
||||
"T11: type `{}` has no property `{}`",
|
||||
type_name, predicate.property
|
||||
))
|
||||
|
|
@ -517,7 +517,7 @@ fn check_match_value_type(
|
|||
MatchValue::Literal(lit) => check_literal_type(lit, expected, property),
|
||||
MatchValue::Variable(v) => {
|
||||
let Some(actual) = params.get(v) else {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T14: mutation variable `${}` must be a declared query parameter",
|
||||
v
|
||||
)));
|
||||
|
|
@ -528,7 +528,7 @@ fn check_match_value_type(
|
|||
&& matches!(actual.scalar, ScalarType::String)
|
||||
&& !actual.list);
|
||||
if !compatible {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T7: cannot assign/compare {} with {} for property `{}`",
|
||||
actual.display_name(),
|
||||
expected.display_name(),
|
||||
|
|
@ -543,7 +543,7 @@ fn check_match_value_type(
|
|||
|
||||
fn check_now_match_value_type(expected: &PropType, property: &str) -> Result<()> {
|
||||
if expected.list || expected.scalar != ScalarType::DateTime {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T7: cannot assign/compare DateTime with {} for property `{}`",
|
||||
expected.display_name(),
|
||||
property
|
||||
|
|
@ -597,7 +597,7 @@ fn typecheck_clauses(
|
|||
}
|
||||
}
|
||||
if !has_outer {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T9: negation block must reference at least one outer-bound variable"
|
||||
.to_string(),
|
||||
));
|
||||
|
|
@ -616,7 +616,7 @@ fn typecheck_binding(
|
|||
) -> Result<()> {
|
||||
// T1: binding type must exist in catalog
|
||||
if !catalog.node_types.contains_key(&binding.type_name) {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T1: unknown node type `{}`",
|
||||
binding.type_name
|
||||
)));
|
||||
|
|
@ -627,14 +627,14 @@ fn typecheck_binding(
|
|||
// T2 + T3: property match fields must exist and have correct types
|
||||
for pm in &binding.prop_matches {
|
||||
let prop = node_type.properties.get(&pm.prop_name).ok_or_else(|| {
|
||||
NanoError::Type(format!(
|
||||
CompilerError::Type(format!(
|
||||
"T2: type `{}` has no property `{}`",
|
||||
binding.type_name, pm.prop_name
|
||||
))
|
||||
})?;
|
||||
|
||||
if matches!(prop.scalar, ScalarType::Blob) {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T3: blob property `{}.{}` cannot be used in match patterns",
|
||||
binding.type_name, pm.prop_name
|
||||
)));
|
||||
|
|
@ -658,7 +658,7 @@ fn typecheck_binding(
|
|||
if let Some(existing) = ctx.bindings.get(&binding.variable)
|
||||
&& existing.type_name != binding.type_name
|
||||
{
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"variable `${}` already bound to type `{}`, cannot rebind to `{}`",
|
||||
binding.variable, existing.type_name, binding.type_name
|
||||
)));
|
||||
|
|
@ -680,7 +680,7 @@ fn check_binding_literal_type(lit: &Literal, expected: &PropType, property: &str
|
|||
if expected.list {
|
||||
let lit_type = literal_type(lit)?;
|
||||
if lit_type.list {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T3: list equality is not supported for property `{}`; use a scalar value to match list membership",
|
||||
property
|
||||
)));
|
||||
|
|
@ -688,7 +688,7 @@ fn check_binding_literal_type(lit: &Literal, expected: &PropType, property: &str
|
|||
|
||||
let expected_member = PropType::scalar(expected.scalar, expected.nullable);
|
||||
if !types_compatible(&lit_type, &expected_member) {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T3: property `{}` has type {} but membership match got {}",
|
||||
property,
|
||||
expected.display_name(),
|
||||
|
|
@ -708,7 +708,7 @@ fn check_binding_variable_type(
|
|||
) -> Result<()> {
|
||||
if expected.list {
|
||||
if actual.list {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T7: list equality is not supported for property `{}`; use a scalar parameter for membership matching",
|
||||
property
|
||||
)));
|
||||
|
|
@ -716,7 +716,7 @@ fn check_binding_variable_type(
|
|||
|
||||
let expected_member = PropType::scalar(expected.scalar, expected.nullable);
|
||||
if !types_compatible(actual, &expected_member) {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T7: cannot compare {} membership against {} for property `{}`",
|
||||
actual.display_name(),
|
||||
expected.display_name(),
|
||||
|
|
@ -727,7 +727,7 @@ fn check_binding_variable_type(
|
|||
}
|
||||
|
||||
if !types_compatible(actual, expected) {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T7: cannot assign/compare {} with {} for property `{}`",
|
||||
actual.display_name(),
|
||||
expected.display_name(),
|
||||
|
|
@ -746,23 +746,23 @@ fn typecheck_traversal(
|
|||
let edge = catalog
|
||||
.lookup_edge_by_name(&traversal.edge_name)
|
||||
.ok_or_else(|| {
|
||||
NanoError::Type(format!("T4: unknown edge type `{}`", traversal.edge_name))
|
||||
CompilerError::Type(format!("T4: unknown edge type `{}`", traversal.edge_name))
|
||||
})?;
|
||||
|
||||
if traversal.min_hops == 0 {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T15: traversal min hop bound must be >= 1".to_string(),
|
||||
));
|
||||
}
|
||||
if let Some(max_hops) = traversal.max_hops {
|
||||
if max_hops < traversal.min_hops {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T15: invalid traversal bounds {{{},{}}}; max must be >= min",
|
||||
traversal.min_hops, max_hops
|
||||
)));
|
||||
}
|
||||
} else {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T15: unbounded traversal is disabled; use bounded traversal {min,max}".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -784,7 +784,7 @@ fn typecheck_traversal(
|
|||
// dst should be edge.from_type
|
||||
bind_traversal_endpoint(ctx, &traversal.dst, &edge.from_type, edge)?;
|
||||
} else {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T5: variable `${}` has type `{}`, which is not an endpoint of edge `{}: {} -> {}`",
|
||||
traversal.src, src_bv.type_name, edge.name, edge.from_type, edge.to_type
|
||||
)));
|
||||
|
|
@ -798,7 +798,7 @@ fn typecheck_traversal(
|
|||
direction = Direction::In;
|
||||
bind_traversal_endpoint(ctx, &traversal.src, &edge.to_type, edge)?;
|
||||
} else {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T5: variable `${}` has type `{}`, which is not an endpoint of edge `{}: {} -> {}`",
|
||||
traversal.dst, dst_bv.type_name, edge.name, edge.from_type, edge.to_type
|
||||
)));
|
||||
|
|
@ -833,7 +833,7 @@ fn bind_traversal_endpoint(
|
|||
}
|
||||
if let Some(existing) = ctx.bindings.get(var) {
|
||||
if existing.type_name != expected_type {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T5: variable `${}` has type `{}` but edge `{}` expects `{}`",
|
||||
var, existing.type_name, edge.name, expected_type
|
||||
)));
|
||||
|
|
@ -863,27 +863,27 @@ fn typecheck_filter(
|
|||
if let (ResolvedType::Scalar(l), ResolvedType::Scalar(r)) = (&left_type, &right_type) {
|
||||
if filter.op == CompOp::Contains {
|
||||
if !l.list {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T7: contains requires a list property on the left, got {}",
|
||||
l.display_name()
|
||||
)));
|
||||
}
|
||||
if r.list {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T7: contains requires a scalar right operand".to_string(),
|
||||
));
|
||||
}
|
||||
if matches!(l.scalar, ScalarType::Vector(_))
|
||||
|| matches!(r.scalar, ScalarType::Vector(_))
|
||||
{
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T7: vector membership filters are not supported".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let expected_member = PropType::scalar(l.scalar, l.nullable);
|
||||
if !types_compatible(&expected_member, r) {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T7: cannot test membership of {} in {}",
|
||||
r.display_name(),
|
||||
l.display_name()
|
||||
|
|
@ -894,29 +894,29 @@ fn typecheck_filter(
|
|||
|
||||
// T7: check type compatibility
|
||||
if l.list || r.list {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T7: list comparisons in filters are not supported; use `contains` for list membership".to_string(),
|
||||
));
|
||||
}
|
||||
if matches!(l.scalar, ScalarType::Vector(_)) || matches!(r.scalar, ScalarType::Vector(_)) {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T7: vector comparisons in filters are not supported".to_string(),
|
||||
));
|
||||
}
|
||||
if matches!(l.scalar, ScalarType::Blob) || matches!(r.scalar, ScalarType::Blob) {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T7: blob comparisons in filters are not supported".to_string(),
|
||||
));
|
||||
}
|
||||
if !types_compatible(l, r) {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T7: cannot compare {} with {}",
|
||||
l.display_name(),
|
||||
r.display_name()
|
||||
)));
|
||||
}
|
||||
} else {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T7: filter comparisons require scalar operands, got {} and {}",
|
||||
left_type.display_name(),
|
||||
right_type.display_name()
|
||||
|
|
@ -940,15 +940,15 @@ fn resolve_expr_type(
|
|||
Expr::PropAccess { variable, property } => {
|
||||
// T6: variable must be bound and property must exist
|
||||
let bv = ctx.bindings.get(variable).ok_or_else(|| {
|
||||
NanoError::Type(format!("T6: variable `${}` is not bound", variable))
|
||||
CompilerError::Type(format!("T6: variable `${}` is not bound", variable))
|
||||
})?;
|
||||
|
||||
let node_type = catalog.node_types.get(&bv.type_name).ok_or_else(|| {
|
||||
NanoError::Type(format!("T6: type `{}` not found in catalog", bv.type_name))
|
||||
CompilerError::Type(format!("T6: type `{}` not found in catalog", bv.type_name))
|
||||
})?;
|
||||
|
||||
let prop = node_type.properties.get(property).ok_or_else(|| {
|
||||
NanoError::Type(format!(
|
||||
CompilerError::Type(format!(
|
||||
"T6: type `{}` has no property `{}`",
|
||||
bv.type_name, property
|
||||
))
|
||||
|
|
@ -962,19 +962,19 @@ fn resolve_expr_type(
|
|||
query,
|
||||
} => {
|
||||
let node_binding = ctx.bindings.get(variable).ok_or_else(|| {
|
||||
NanoError::Type(format!("T15: variable `${}` is not bound", variable))
|
||||
CompilerError::Type(format!("T15: variable `${}` is not bound", variable))
|
||||
})?;
|
||||
let node_type = catalog
|
||||
.node_types
|
||||
.get(&node_binding.type_name)
|
||||
.ok_or_else(|| {
|
||||
NanoError::Type(format!(
|
||||
CompilerError::Type(format!(
|
||||
"T15: type `{}` not found in catalog",
|
||||
node_binding.type_name
|
||||
))
|
||||
})?;
|
||||
let prop_type = node_type.properties.get(property).ok_or_else(|| {
|
||||
NanoError::Type(format!(
|
||||
CompilerError::Type(format!(
|
||||
"T15: type `{}` has no property `{}`",
|
||||
node_binding.type_name, property
|
||||
))
|
||||
|
|
@ -982,7 +982,7 @@ fn resolve_expr_type(
|
|||
let vector_dim = match prop_type.scalar {
|
||||
ScalarType::Vector(dim) => dim,
|
||||
_ => {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T15: nearest requires a Vector property, got {}.{}: {}",
|
||||
node_binding.type_name,
|
||||
property,
|
||||
|
|
@ -991,7 +991,7 @@ fn resolve_expr_type(
|
|||
}
|
||||
};
|
||||
if prop_type.list {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T15: nearest does not support list-wrapped vectors".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -1000,7 +1000,7 @@ fn resolve_expr_type(
|
|||
&& let Some(dim) = numeric_vector_literal_dim(lit)
|
||||
{
|
||||
if dim != vector_dim {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T15: nearest vector dimension mismatch: property is Vector({}), query literal has {} elements",
|
||||
vector_dim, dim
|
||||
)));
|
||||
|
|
@ -1019,7 +1019,7 @@ fn resolve_expr_type(
|
|||
_ => unreachable!(),
|
||||
};
|
||||
if qdim != vector_dim {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T15: nearest vector dimension mismatch: property is Vector({}), query is Vector({})",
|
||||
vector_dim, qdim
|
||||
)));
|
||||
|
|
@ -1029,14 +1029,14 @@ fn resolve_expr_type(
|
|||
// query-time string embedding is supported by the runtime executor
|
||||
}
|
||||
ResolvedType::Scalar(s) => {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T15: nearest query must be Vector({}) or String, got {}",
|
||||
vector_dim,
|
||||
s.display_name()
|
||||
)));
|
||||
}
|
||||
_ => {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T15: nearest query must be a scalar expression".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -1052,13 +1052,13 @@ fn resolve_expr_type(
|
|||
match field_type {
|
||||
ResolvedType::Scalar(s) if s.scalar == ScalarType::String && !s.list => {}
|
||||
ResolvedType::Scalar(s) => {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T19: search field must be String, got {}",
|
||||
s.display_name()
|
||||
)));
|
||||
}
|
||||
_ => {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T19: search field must be a scalar String expression".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -1068,13 +1068,13 @@ fn resolve_expr_type(
|
|||
match query_type {
|
||||
ResolvedType::Scalar(s) if s.scalar == ScalarType::String && !s.list => {}
|
||||
ResolvedType::Scalar(s) => {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T19: search query must be String, got {}",
|
||||
s.display_name()
|
||||
)));
|
||||
}
|
||||
_ => {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T19: search query must be a scalar String expression".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -1094,13 +1094,13 @@ fn resolve_expr_type(
|
|||
match field_type {
|
||||
ResolvedType::Scalar(s) if s.scalar == ScalarType::String && !s.list => {}
|
||||
ResolvedType::Scalar(s) => {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T19: fuzzy field must be String, got {}",
|
||||
s.display_name()
|
||||
)));
|
||||
}
|
||||
_ => {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T19: fuzzy field must be a scalar String expression".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -1110,13 +1110,13 @@ fn resolve_expr_type(
|
|||
match query_type {
|
||||
ResolvedType::Scalar(s) if s.scalar == ScalarType::String && !s.list => {}
|
||||
ResolvedType::Scalar(s) => {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T19: fuzzy query must be String, got {}",
|
||||
s.display_name()
|
||||
)));
|
||||
}
|
||||
_ => {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T19: fuzzy query must be a scalar String expression".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -1135,13 +1135,13 @@ fn resolve_expr_type(
|
|||
| ScalarType::U64
|
||||
) => {}
|
||||
ResolvedType::Scalar(s) => {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T19: fuzzy max_edits must be an integer scalar, got {}",
|
||||
s.display_name()
|
||||
)));
|
||||
}
|
||||
_ => {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T19: fuzzy max_edits must be an integer scalar expression".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -1158,13 +1158,13 @@ fn resolve_expr_type(
|
|||
match field_type {
|
||||
ResolvedType::Scalar(s) if s.scalar == ScalarType::String && !s.list => {}
|
||||
ResolvedType::Scalar(s) => {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T20: match_text field must be String, got {}",
|
||||
s.display_name()
|
||||
)));
|
||||
}
|
||||
_ => {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T20: match_text field must be a scalar String expression".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -1174,13 +1174,13 @@ fn resolve_expr_type(
|
|||
match query_type {
|
||||
ResolvedType::Scalar(s) if s.scalar == ScalarType::String && !s.list => {}
|
||||
ResolvedType::Scalar(s) => {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T20: match_text query must be String, got {}",
|
||||
s.display_name()
|
||||
)));
|
||||
}
|
||||
_ => {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T20: match_text query must be a scalar String expression".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -1196,13 +1196,13 @@ fn resolve_expr_type(
|
|||
match field_type {
|
||||
ResolvedType::Scalar(s) if s.scalar == ScalarType::String && !s.list => {}
|
||||
ResolvedType::Scalar(s) => {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T20: bm25 field must be String, got {}",
|
||||
s.display_name()
|
||||
)));
|
||||
}
|
||||
_ => {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T20: bm25 field must be a scalar String expression".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -1212,13 +1212,13 @@ fn resolve_expr_type(
|
|||
match query_type {
|
||||
ResolvedType::Scalar(s) if s.scalar == ScalarType::String && !s.list => {}
|
||||
ResolvedType::Scalar(s) => {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T20: bm25 query must be String, got {}",
|
||||
s.display_name()
|
||||
)));
|
||||
}
|
||||
_ => {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T20: bm25 query must be a scalar String expression".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -1235,12 +1235,12 @@ fn resolve_expr_type(
|
|||
k,
|
||||
} => {
|
||||
if !matches!(primary.as_ref(), Expr::Nearest { .. } | Expr::Bm25 { .. }) {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T21: rrf primary expression must be nearest(...) or bm25(...)".to_string(),
|
||||
));
|
||||
}
|
||||
if !matches!(secondary.as_ref(), Expr::Nearest { .. } | Expr::Bm25 { .. }) {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T21: rrf secondary expression must be nearest(...) or bm25(...)".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -1252,13 +1252,13 @@ fn resolve_expr_type(
|
|||
match ty {
|
||||
ResolvedType::Scalar(s) if s.scalar == ScalarType::F64 && !s.list => {}
|
||||
ResolvedType::Scalar(s) => {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T21: rrf rank expressions must evaluate to F64, got {}",
|
||||
s.display_name()
|
||||
)));
|
||||
}
|
||||
_ => {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T21: rrf rank expressions must be scalar numeric expressions"
|
||||
.to_string(),
|
||||
));
|
||||
|
|
@ -1279,13 +1279,13 @@ fn resolve_expr_type(
|
|||
| ScalarType::U64
|
||||
) => {}
|
||||
ResolvedType::Scalar(s) => {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T21: rrf k must be an integer scalar, got {}",
|
||||
s.display_name()
|
||||
)));
|
||||
}
|
||||
_ => {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T21: rrf k must be an integer scalar expression".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -1293,7 +1293,7 @@ fn resolve_expr_type(
|
|||
if let Expr::Literal(Literal::Integer(v)) = k_expr.as_ref()
|
||||
&& *v <= 0
|
||||
{
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"T21: rrf k must be greater than 0".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -1311,7 +1311,7 @@ fn resolve_expr_type(
|
|||
} else if let Some(bv) = ctx.bindings.get(name) {
|
||||
Ok(ResolvedType::Node(bv.type_name.clone()))
|
||||
} else {
|
||||
Err(NanoError::Type(format!(
|
||||
Err(CompilerError::Type(format!(
|
||||
"variable `${}` is not bound",
|
||||
name
|
||||
)))
|
||||
|
|
@ -1327,7 +1327,7 @@ fn resolve_expr_type(
|
|||
if let ResolvedType::Scalar(s) = &arg_type
|
||||
&& (s.list || !s.scalar.is_numeric())
|
||||
{
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T8: {} requires numeric type, got {}",
|
||||
func,
|
||||
s.display_name()
|
||||
|
|
@ -1338,7 +1338,7 @@ fn resolve_expr_type(
|
|||
if let ResolvedType::Scalar(s) = &arg_type
|
||||
&& (s.list || (!s.scalar.is_numeric() && s.scalar != ScalarType::String))
|
||||
{
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T8: {} requires numeric or string type, got {}",
|
||||
func,
|
||||
s.display_name()
|
||||
|
|
@ -1420,7 +1420,7 @@ fn resolved_type_to_field_shape(
|
|||
ResolvedType::Scalar(prop_type) => Ok((prop_type.to_arrow(), prop_type.nullable)),
|
||||
ResolvedType::Node(type_name) => {
|
||||
let node_type = catalog.node_types.get(type_name).ok_or_else(|| {
|
||||
NanoError::Type(format!("type `{}` not found in catalog", type_name))
|
||||
CompilerError::Type(format!("type `{}` not found in catalog", type_name))
|
||||
})?;
|
||||
let fields: Vec<Field> = node_type
|
||||
.arrow_schema
|
||||
|
|
@ -1450,14 +1450,14 @@ fn literal_type(lit: &Literal) -> Result<PropType> {
|
|||
}
|
||||
let first = literal_type(&items[0])?;
|
||||
if first.list {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"nested list literals are not supported".to_string(),
|
||||
));
|
||||
}
|
||||
for item in items.iter().skip(1) {
|
||||
let item_type = literal_type(item)?;
|
||||
if item_type.list || !types_compatible(&first, &item_type) {
|
||||
return Err(NanoError::Type(
|
||||
return Err(CompilerError::Type(
|
||||
"list literal elements must share a compatible scalar type".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -1473,7 +1473,7 @@ fn check_literal_type(lit: &Literal, expected: &PropType, prop_name: &str) -> Re
|
|||
return if expected.nullable {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(NanoError::Type(format!(
|
||||
Err(CompilerError::Type(format!(
|
||||
"T3: property `{}` is non-nullable but got null",
|
||||
prop_name
|
||||
)))
|
||||
|
|
@ -1487,7 +1487,7 @@ fn check_literal_type(lit: &Literal, expected: &PropType, prop_name: &str) -> Re
|
|||
if actual_dim == expected_dim {
|
||||
return Ok(());
|
||||
}
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T3: property `{}` has type Vector({}) but got vector literal with {} elements",
|
||||
prop_name, expected_dim, actual_dim
|
||||
)));
|
||||
|
|
@ -1495,7 +1495,7 @@ fn check_literal_type(lit: &Literal, expected: &PropType, prop_name: &str) -> Re
|
|||
|
||||
let lit_type = literal_type(lit)?;
|
||||
if !types_compatible(&lit_type, expected) {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T3: property `{}` has type {} but got {}",
|
||||
prop_name,
|
||||
expected.display_name(),
|
||||
|
|
@ -1507,7 +1507,7 @@ fn check_literal_type(lit: &Literal, expected: &PropType, prop_name: &str) -> Re
|
|||
match lit {
|
||||
Literal::String(v) => {
|
||||
if !allowed.contains(v) {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T3: property `{}` expects one of [{}], got '{}'",
|
||||
prop_name,
|
||||
allowed.join(", "),
|
||||
|
|
@ -1520,7 +1520,7 @@ fn check_literal_type(lit: &Literal, expected: &PropType, prop_name: &str) -> Re
|
|||
match item {
|
||||
Literal::String(v) if allowed.contains(v) => {}
|
||||
Literal::String(v) => {
|
||||
return Err(NanoError::Type(format!(
|
||||
return Err(CompilerError::Type(format!(
|
||||
"T3: property `{}` expects one of [{}], got '{}'",
|
||||
prop_name,
|
||||
allowed.join(", "),
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ use std::fmt;
|
|||
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::error::NanoError;
|
||||
use crate::error::CompilerError;
|
||||
use crate::ir::ParamMap;
|
||||
use crate::json_output::{JS_MAX_SAFE_INTEGER_U64, is_js_safe_integer_i64};
|
||||
use crate::query::ast::{Literal, Param, QueryDecl};
|
||||
|
|
@ -17,7 +17,7 @@ pub enum JsonParamMode {
|
|||
|
||||
#[derive(Debug)]
|
||||
pub enum RunInputError {
|
||||
Core(NanoError),
|
||||
Core(CompilerError),
|
||||
Message(String),
|
||||
}
|
||||
|
||||
|
|
@ -45,8 +45,8 @@ impl Error for RunInputError {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<NanoError> for RunInputError {
|
||||
fn from(value: NanoError) -> Self {
|
||||
impl From<CompilerError> for RunInputError {
|
||||
fn from(value: CompilerError) -> Self {
|
||||
Self::Core(value)
|
||||
}
|
||||
}
|
||||
|
|
@ -120,7 +120,7 @@ impl ToParam for i64 {
|
|||
impl ToParam for isize {
|
||||
fn to_param(self) -> crate::error::Result<Literal> {
|
||||
let value = i64::try_from(self).map_err(|_| {
|
||||
NanoError::Execution(format!(
|
||||
CompilerError::Execution(format!(
|
||||
"param value {} exceeds current engine range for numeric literals (max {})",
|
||||
self,
|
||||
i64::MAX
|
||||
|
|
@ -151,7 +151,7 @@ impl ToParam for u32 {
|
|||
impl ToParam for u64 {
|
||||
fn to_param(self) -> crate::error::Result<Literal> {
|
||||
let value = i64::try_from(self).map_err(|_| {
|
||||
NanoError::Execution(format!(
|
||||
CompilerError::Execution(format!(
|
||||
"param value {} exceeds current engine range for numeric literals (max {})",
|
||||
self,
|
||||
i64::MAX
|
||||
|
|
@ -164,7 +164,7 @@ impl ToParam for u64 {
|
|||
impl ToParam for usize {
|
||||
fn to_param(self) -> crate::error::Result<Literal> {
|
||||
let value = i64::try_from(self).map_err(|_| {
|
||||
NanoError::Execution(format!(
|
||||
CompilerError::Execution(format!(
|
||||
"param value {} exceeds current engine range for numeric literals (max {})",
|
||||
self,
|
||||
i64::MAX
|
||||
|
|
@ -177,7 +177,7 @@ impl ToParam for usize {
|
|||
impl ToParam for f32 {
|
||||
fn to_param(self) -> crate::error::Result<Literal> {
|
||||
if !self.is_finite() {
|
||||
return Err(NanoError::Execution(format!(
|
||||
return Err(CompilerError::Execution(format!(
|
||||
"invalid float parameter {}",
|
||||
self
|
||||
)));
|
||||
|
|
@ -189,7 +189,7 @@ impl ToParam for f32 {
|
|||
impl ToParam for f64 {
|
||||
fn to_param(self) -> crate::error::Result<Literal> {
|
||||
if !self.is_finite() {
|
||||
return Err(NanoError::Execution(format!(
|
||||
return Err(CompilerError::Execution(format!(
|
||||
"invalid float parameter {}",
|
||||
self
|
||||
)));
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ use arrow_ipc::writer::StreamWriter;
|
|||
use arrow_schema::{DataType, Field, Schema, SchemaRef};
|
||||
use serde::de::DeserializeOwned;
|
||||
|
||||
use crate::error::{NanoError, Result};
|
||||
use crate::error::{CompilerError, Result};
|
||||
use crate::json_output::{record_batches_to_json_rows, record_batches_to_rust_json_rows};
|
||||
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
|
|
@ -47,7 +47,7 @@ impl QueryResult {
|
|||
}
|
||||
|
||||
arrow_select::concat::concat_batches(&self.schema, &self.batches)
|
||||
.map_err(|err| NanoError::Execution(err.to_string()))
|
||||
.map_err(|err| CompilerError::Execution(err.to_string()))
|
||||
}
|
||||
|
||||
pub fn to_sdk_json(&self) -> serde_json::Value {
|
||||
|
|
@ -60,7 +60,7 @@ impl QueryResult {
|
|||
|
||||
pub fn deserialize<T: DeserializeOwned>(&self) -> Result<T> {
|
||||
serde_json::from_value(self.to_rust_json()).map_err(|err| {
|
||||
NanoError::Execution(format!("failed to deserialize query result: {}", err))
|
||||
CompilerError::Execution(format!("failed to deserialize query result: {}", err))
|
||||
})
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ use pest::error::InputLocation;
|
|||
use pest_derive::Parser;
|
||||
|
||||
use crate::error::{
|
||||
NanoError, ParseDiagnostic, Result, SourceSpan, decode_string_literal, render_span,
|
||||
CompilerError, ParseDiagnostic, Result, SourceSpan, decode_string_literal, render_span,
|
||||
};
|
||||
use crate::types::{PropType, ScalarType};
|
||||
|
||||
|
|
@ -16,7 +16,7 @@ use super::ast::*;
|
|||
struct SchemaParser;
|
||||
|
||||
pub fn parse_schema(input: &str) -> Result<SchemaFile> {
|
||||
parse_schema_diagnostic(input).map_err(|e| NanoError::Parse(e.to_string()))
|
||||
parse_schema_diagnostic(input).map_err(|e| CompilerError::Parse(e.to_string()))
|
||||
}
|
||||
|
||||
pub fn parse_schema_diagnostic(input: &str) -> std::result::Result<SchemaFile, ParseDiagnostic> {
|
||||
|
|
@ -27,7 +27,8 @@ pub fn parse_schema_diagnostic(input: &str) -> std::result::Result<SchemaFile, P
|
|||
if pair.as_rule() == Rule::schema_file {
|
||||
for inner in pair.into_inner() {
|
||||
if let Rule::schema_decl = inner.as_rule() {
|
||||
declarations.push(parse_schema_decl(inner).map_err(nano_error_to_diagnostic)?);
|
||||
declarations
|
||||
.push(parse_schema_decl(inner).map_err(compiler_error_to_diagnostic)?);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -46,13 +47,13 @@ pub fn parse_schema_diagnostic(input: &str) -> std::result::Result<SchemaFile, P
|
|||
let iface_refs: Vec<&InterfaceDecl> = interfaces.iter().collect();
|
||||
for decl in &mut declarations {
|
||||
if let SchemaDecl::Node(node) = decl {
|
||||
resolve_interfaces(node, &iface_refs).map_err(nano_error_to_diagnostic)?;
|
||||
resolve_interfaces(node, &iface_refs).map_err(compiler_error_to_diagnostic)?;
|
||||
}
|
||||
}
|
||||
|
||||
let schema = SchemaFile { declarations };
|
||||
validate_schema_annotations(&schema).map_err(nano_error_to_diagnostic)?;
|
||||
validate_constraints(&schema).map_err(nano_error_to_diagnostic)?;
|
||||
validate_schema_annotations(&schema).map_err(compiler_error_to_diagnostic)?;
|
||||
validate_constraints(&schema).map_err(compiler_error_to_diagnostic)?;
|
||||
Ok(schema)
|
||||
}
|
||||
|
||||
|
|
@ -64,7 +65,7 @@ fn pest_error_to_diagnostic(err: pest::error::Error<Rule>) -> ParseDiagnostic {
|
|||
ParseDiagnostic::new(err.to_string(), span)
|
||||
}
|
||||
|
||||
fn nano_error_to_diagnostic(err: NanoError) -> ParseDiagnostic {
|
||||
fn compiler_error_to_diagnostic(err: CompilerError) -> ParseDiagnostic {
|
||||
ParseDiagnostic::new(err.to_string(), None)
|
||||
}
|
||||
|
||||
|
|
@ -74,7 +75,7 @@ fn parse_schema_decl(pair: pest::iterators::Pair<Rule>) -> Result<SchemaDecl> {
|
|||
Rule::interface_decl => Ok(SchemaDecl::Interface(parse_interface_decl(inner)?)),
|
||||
Rule::node_decl => Ok(SchemaDecl::Node(parse_node_decl(inner)?)),
|
||||
Rule::edge_decl => Ok(SchemaDecl::Edge(parse_edge_decl(inner)?)),
|
||||
_ => Err(NanoError::Parse(format!(
|
||||
_ => Err(CompilerError::Parse(format!(
|
||||
"unexpected rule: {:?}",
|
||||
inner.as_rule()
|
||||
))),
|
||||
|
|
@ -180,21 +181,20 @@ fn parse_cardinality(pair: pest::iterators::Pair<Rule>) -> Result<Cardinality> {
|
|||
let min_str = inner.next().unwrap().as_str();
|
||||
let min = min_str
|
||||
.parse::<u32>()
|
||||
.map_err(|_| NanoError::Parse(format!("invalid cardinality min: {}", min_str)))?;
|
||||
let max = if let Some(max_pair) = inner.next() {
|
||||
let max_str = max_pair.as_str();
|
||||
Some(
|
||||
max_str
|
||||
.parse::<u32>()
|
||||
.map_err(|_| NanoError::Parse(format!("invalid cardinality max: {}", max_str)))?,
|
||||
)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
.map_err(|_| CompilerError::Parse(format!("invalid cardinality min: {}", min_str)))?;
|
||||
let max =
|
||||
if let Some(max_pair) = inner.next() {
|
||||
let max_str = max_pair.as_str();
|
||||
Some(max_str.parse::<u32>().map_err(|_| {
|
||||
CompilerError::Parse(format!("invalid cardinality max: {}", max_str))
|
||||
})?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
if let Some(max_val) = max {
|
||||
if min > max_val {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"cardinality min ({}) exceeds max ({})",
|
||||
min, max_val
|
||||
)));
|
||||
|
|
@ -219,7 +219,7 @@ fn parse_body_constraint(pair: pest::iterators::Pair<Rule>) -> Result<Constraint
|
|||
.map(|a| extract_ident_from_constraint_arg(a))
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
if names.is_empty() {
|
||||
return Err(NanoError::Parse(
|
||||
return Err(CompilerError::Parse(
|
||||
"@key constraint requires at least one property name".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -228,7 +228,7 @@ fn parse_body_constraint(pair: pest::iterators::Pair<Rule>) -> Result<Constraint
|
|||
"unique" => {
|
||||
let names = extract_ident_list_from_args(args)?;
|
||||
if names.is_empty() {
|
||||
return Err(NanoError::Parse(
|
||||
return Err(CompilerError::Parse(
|
||||
"@unique constraint requires at least one property name".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -237,7 +237,7 @@ fn parse_body_constraint(pair: pest::iterators::Pair<Rule>) -> Result<Constraint
|
|||
"index" => {
|
||||
let names = extract_ident_list_from_args(args)?;
|
||||
if names.is_empty() {
|
||||
return Err(NanoError::Parse(
|
||||
return Err(CompilerError::Parse(
|
||||
"@index constraint requires at least one property name".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -246,7 +246,7 @@ fn parse_body_constraint(pair: pest::iterators::Pair<Rule>) -> Result<Constraint
|
|||
"range" => {
|
||||
// @range(prop, min..max)
|
||||
if args.len() < 2 {
|
||||
return Err(NanoError::Parse(
|
||||
return Err(CompilerError::Parse(
|
||||
"@range requires property name and bounds: @range(prop, min..max)".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -258,7 +258,7 @@ fn parse_body_constraint(pair: pest::iterators::Pair<Rule>) -> Result<Constraint
|
|||
"check" => {
|
||||
// @check(prop, "regex")
|
||||
if args.len() < 2 {
|
||||
return Err(NanoError::Parse(
|
||||
return Err(CompilerError::Parse(
|
||||
"@check requires property name and pattern: @check(prop, \"regex\")"
|
||||
.to_string(),
|
||||
));
|
||||
|
|
@ -267,7 +267,10 @@ fn parse_body_constraint(pair: pest::iterators::Pair<Rule>) -> Result<Constraint
|
|||
let pattern = extract_string_from_constraint_arg(&args[1])?;
|
||||
Ok(Constraint::Check { property, pattern })
|
||||
}
|
||||
other => Err(NanoError::Parse(format!("unknown constraint: @{}", other))),
|
||||
other => Err(CompilerError::Parse(format!(
|
||||
"unknown constraint: @{}",
|
||||
other
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -281,7 +284,7 @@ fn extract_ident_from_constraint_arg(pair: pest::iterators::Pair<Rule>) -> Resul
|
|||
return Ok(inner.as_str().to_string());
|
||||
}
|
||||
}
|
||||
Err(NanoError::Parse(
|
||||
Err(CompilerError::Parse(
|
||||
"expected property name in constraint".to_string(),
|
||||
))
|
||||
}
|
||||
|
|
@ -309,7 +312,7 @@ fn extract_string_from_constraint_arg(pair: &pest::iterators::Pair<Rule>) -> Res
|
|||
}
|
||||
|
||||
find_string(pair)?
|
||||
.ok_or_else(|| NanoError::Parse("expected string argument in constraint".to_string()))
|
||||
.ok_or_else(|| CompilerError::Parse("expected string argument in constraint".to_string()))
|
||||
}
|
||||
|
||||
fn extract_range_bounds(
|
||||
|
|
@ -327,7 +330,9 @@ fn extract_range_bounds(
|
|||
}
|
||||
}
|
||||
found.ok_or_else(|| {
|
||||
NanoError::Parse("expected range bounds (min..max) in @range constraint".to_string())
|
||||
CompilerError::Parse(
|
||||
"expected range bounds (min..max) in @range constraint".to_string(),
|
||||
)
|
||||
})?
|
||||
};
|
||||
|
||||
|
|
@ -378,7 +383,7 @@ fn parse_constraint_bound(pair: &pest::iterators::Pair<Rule>) -> Result<Constrai
|
|||
}
|
||||
}
|
||||
|
||||
Err(NanoError::Parse(format!(
|
||||
Err(CompilerError::Parse(format!(
|
||||
"invalid constraint bound: {}",
|
||||
text
|
||||
)))
|
||||
|
|
@ -411,7 +416,7 @@ fn resolve_interfaces(node: &mut NodeDecl, interfaces: &[&InterfaceDecl]) -> Res
|
|||
|
||||
for iface_name in &node.implements {
|
||||
let iface = interface_map.get(iface_name.as_str()).ok_or_else(|| {
|
||||
NanoError::Parse(format!(
|
||||
CompilerError::Parse(format!(
|
||||
"node {} implements unknown interface '{}'",
|
||||
node.name, iface_name
|
||||
))
|
||||
|
|
@ -421,7 +426,7 @@ fn resolve_interfaces(node: &mut NodeDecl, interfaces: &[&InterfaceDecl]) -> Res
|
|||
if let Some(existing) = node.properties.iter().find(|p| p.name == iface_prop.name) {
|
||||
// Property exists — verify type compatibility
|
||||
if existing.prop_type != iface_prop.prop_type {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"node {} property '{}' has type {} but interface {} declares it as {}",
|
||||
node.name,
|
||||
iface_prop.name,
|
||||
|
|
@ -472,36 +477,35 @@ fn parse_type_ref(pair: pest::iterators::Pair<Rule>) -> Result<PropType> {
|
|||
let mut inner = pair
|
||||
.into_inner()
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("type reference is missing core type".to_string()))?;
|
||||
.ok_or_else(|| CompilerError::Parse("type reference is missing core type".to_string()))?;
|
||||
if inner.as_rule() == Rule::core_type {
|
||||
inner = inner
|
||||
.into_inner()
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("type reference is missing core type".to_string()))?;
|
||||
inner = inner.into_inner().next().ok_or_else(|| {
|
||||
CompilerError::Parse("type reference is missing core type".to_string())
|
||||
})?;
|
||||
}
|
||||
|
||||
match inner.as_rule() {
|
||||
Rule::base_type => {
|
||||
let scalar = ScalarType::from_str_name(inner.as_str())
|
||||
.ok_or_else(|| NanoError::Parse(format!("unknown type: {}", inner.as_str())))?;
|
||||
.ok_or_else(|| CompilerError::Parse(format!("unknown type: {}", inner.as_str())))?;
|
||||
Ok(PropType::scalar(scalar, nullable))
|
||||
}
|
||||
Rule::vector_type => {
|
||||
let dim_text = inner
|
||||
.into_inner()
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("Vector type missing dimension".to_string()))?
|
||||
.ok_or_else(|| CompilerError::Parse("Vector type missing dimension".to_string()))?
|
||||
.as_str();
|
||||
let dim = dim_text
|
||||
.parse::<u32>()
|
||||
.map_err(|e| NanoError::Parse(format!("invalid Vector dimension: {}", e)))?;
|
||||
.map_err(|e| CompilerError::Parse(format!("invalid Vector dimension: {}", e)))?;
|
||||
if dim == 0 {
|
||||
return Err(NanoError::Parse(
|
||||
return Err(CompilerError::Parse(
|
||||
"Vector dimension must be greater than zero".to_string(),
|
||||
));
|
||||
}
|
||||
if dim > i32::MAX as u32 {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"Vector dimension {} exceeds maximum supported {}",
|
||||
dim,
|
||||
i32::MAX
|
||||
|
|
@ -510,15 +514,14 @@ fn parse_type_ref(pair: pest::iterators::Pair<Rule>) -> Result<PropType> {
|
|||
Ok(PropType::scalar(ScalarType::Vector(dim), nullable))
|
||||
}
|
||||
Rule::list_type => {
|
||||
let element = inner
|
||||
.into_inner()
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("list type missing element type".to_string()))?;
|
||||
let element = inner.into_inner().next().ok_or_else(|| {
|
||||
CompilerError::Parse("list type missing element type".to_string())
|
||||
})?;
|
||||
let scalar = ScalarType::from_str_name(element.as_str()).ok_or_else(|| {
|
||||
NanoError::Parse(format!("unknown list element type: {}", element.as_str()))
|
||||
CompilerError::Parse(format!("unknown list element type: {}", element.as_str()))
|
||||
})?;
|
||||
if matches!(scalar, ScalarType::Blob) {
|
||||
return Err(NanoError::Parse(
|
||||
return Err(CompilerError::Parse(
|
||||
"list of Blob is not supported".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -532,7 +535,7 @@ fn parse_type_ref(pair: pest::iterators::Pair<Rule>) -> Result<PropType> {
|
|||
}
|
||||
}
|
||||
if values.is_empty() {
|
||||
return Err(NanoError::Parse(
|
||||
return Err(CompilerError::Parse(
|
||||
"enum type must include at least one value".to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -540,13 +543,13 @@ fn parse_type_ref(pair: pest::iterators::Pair<Rule>) -> Result<PropType> {
|
|||
dedup.sort();
|
||||
dedup.dedup();
|
||||
if dedup.len() != values.len() {
|
||||
return Err(NanoError::Parse(
|
||||
return Err(CompilerError::Parse(
|
||||
"enum type cannot include duplicate values".to_string(),
|
||||
));
|
||||
}
|
||||
Ok(PropType::enum_type(values, nullable))
|
||||
}
|
||||
other => Err(NanoError::Parse(format!(
|
||||
other => Err(CompilerError::Parse(format!(
|
||||
"unexpected type rule: {:?}",
|
||||
other
|
||||
))),
|
||||
|
|
@ -595,19 +598,19 @@ fn validate_string_annotation(
|
|||
continue;
|
||||
}
|
||||
if seen {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"{} declares @{} multiple times",
|
||||
target, annotation
|
||||
)));
|
||||
}
|
||||
let value = ann.value.as_deref().ok_or_else(|| {
|
||||
NanoError::Parse(format!(
|
||||
CompilerError::Parse(format!(
|
||||
"@{} on {} requires a non-empty value",
|
||||
annotation, target
|
||||
))
|
||||
})?;
|
||||
if value.trim().is_empty() {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"@{} on {} requires a non-empty value",
|
||||
annotation, target
|
||||
)));
|
||||
|
|
@ -631,7 +634,7 @@ fn validate_schema_annotations(schema: &SchemaFile) -> Result<()> {
|
|||
|| ann.name == "index"
|
||||
|| ann.name == "embed"
|
||||
{
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"@{} is only supported on node properties or as body constraint (node {})",
|
||||
ann.name, node.name
|
||||
)));
|
||||
|
|
@ -660,7 +663,7 @@ fn validate_schema_annotations(schema: &SchemaFile) -> Result<()> {
|
|||
|| ann.name == "index"
|
||||
|| ann.name == "embed"
|
||||
{
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"@{} is not supported on edges (edge {})",
|
||||
ann.name, edge.name
|
||||
)));
|
||||
|
|
@ -714,13 +717,13 @@ fn validate_property_annotations(
|
|||
|| ann.name == "index"
|
||||
|| ann.name == "embed")
|
||||
{
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"@{} is not supported on list property {}.{}",
|
||||
ann.name, type_name, prop.name
|
||||
)));
|
||||
}
|
||||
if is_vector && (ann.name == "key" || ann.name == "unique") {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"@{} is not supported on vector property {}.{}",
|
||||
ann.name, type_name, prop.name
|
||||
)));
|
||||
|
|
@ -731,13 +734,13 @@ fn validate_property_annotations(
|
|||
|| ann.name == "index"
|
||||
|| ann.name == "embed")
|
||||
{
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"@{} is not supported on blob property {}.{}",
|
||||
ann.name, type_name, prop.name
|
||||
)));
|
||||
}
|
||||
if ann.name == "instruction" {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"@instruction is only supported on node and edge types (property {}.{})",
|
||||
type_name, prop.name
|
||||
)));
|
||||
|
|
@ -745,7 +748,7 @@ fn validate_property_annotations(
|
|||
|
||||
// Edge-specific restrictions
|
||||
if is_edge && (ann.name == "key" || ann.name == "embed") {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"@{} is not supported on edge properties (edge {}.{})",
|
||||
ann.name, type_name, prop.name
|
||||
)));
|
||||
|
|
@ -755,13 +758,13 @@ fn validate_property_annotations(
|
|||
match ann.name.as_str() {
|
||||
"key" => {
|
||||
if ann.value.is_some() {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"@key on {}.{} does not accept a value",
|
||||
type_name, prop.name
|
||||
)));
|
||||
}
|
||||
if key_seen {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"property {}.{} declares @key multiple times",
|
||||
type_name, prop.name
|
||||
)));
|
||||
|
|
@ -770,13 +773,13 @@ fn validate_property_annotations(
|
|||
}
|
||||
"unique" => {
|
||||
if ann.value.is_some() {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"@unique on {}.{} does not accept a value",
|
||||
type_name, prop.name
|
||||
)));
|
||||
}
|
||||
if unique_seen {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"property {}.{} declares @unique multiple times",
|
||||
type_name, prop.name
|
||||
)));
|
||||
|
|
@ -785,13 +788,13 @@ fn validate_property_annotations(
|
|||
}
|
||||
"index" => {
|
||||
if ann.value.is_some() {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"@index on {}.{} does not accept a value",
|
||||
type_name, prop.name
|
||||
)));
|
||||
}
|
||||
if index_seen {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"property {}.{} declares @index multiple times",
|
||||
type_name, prop.name
|
||||
)));
|
||||
|
|
@ -800,7 +803,7 @@ fn validate_property_annotations(
|
|||
}
|
||||
"embed" => {
|
||||
if embed_seen {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"property {}.{} declares @embed multiple times",
|
||||
type_name, prop.name
|
||||
)));
|
||||
|
|
@ -808,20 +811,20 @@ fn validate_property_annotations(
|
|||
embed_seen = true;
|
||||
|
||||
if !is_vector {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"@embed is only supported on vector properties ({}.{})",
|
||||
type_name, prop.name
|
||||
)));
|
||||
}
|
||||
|
||||
let source_prop = ann.value.as_deref().ok_or_else(|| {
|
||||
NanoError::Parse(format!(
|
||||
CompilerError::Parse(format!(
|
||||
"@embed on {}.{} requires a source property name",
|
||||
type_name, prop.name
|
||||
))
|
||||
})?;
|
||||
if source_prop.trim().is_empty() {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"@embed on {}.{} requires a non-empty source property name",
|
||||
type_name, prop.name
|
||||
)));
|
||||
|
|
@ -831,14 +834,14 @@ fn validate_property_annotations(
|
|||
.iter()
|
||||
.find(|p| p.name == source_prop)
|
||||
.ok_or_else(|| {
|
||||
NanoError::Parse(format!(
|
||||
CompilerError::Parse(format!(
|
||||
"@embed on {}.{} references unknown source property {}",
|
||||
type_name, prop.name, source_prop
|
||||
))
|
||||
})?;
|
||||
if source_decl.prop_type.list || source_decl.prop_type.scalar != ScalarType::String
|
||||
{
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"@embed source property {}.{} must be String",
|
||||
type_name, source_prop
|
||||
)));
|
||||
|
|
@ -848,7 +851,7 @@ fn validate_property_annotations(
|
|||
// a typo can't be silently ignored (it would never validate).
|
||||
for key in ann.kwargs.keys() {
|
||||
if key != "model" {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"@embed on {}.{} has unknown argument '{}=' (only 'model' is supported)",
|
||||
type_name, prop.name, key
|
||||
)));
|
||||
|
|
@ -893,45 +896,45 @@ fn validate_type_constraints(
|
|||
match constraint {
|
||||
Constraint::Key(cols) => {
|
||||
if is_edge {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"@key constraint is not supported on edges (edge {})",
|
||||
type_name
|
||||
)));
|
||||
}
|
||||
key_count += 1;
|
||||
if key_count > 1 {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"node type {} has multiple @key constraints; only one is supported",
|
||||
type_name
|
||||
)));
|
||||
}
|
||||
for col in cols {
|
||||
let prop = prop_names.get(col.as_str()).ok_or_else(|| {
|
||||
NanoError::Parse(format!(
|
||||
CompilerError::Parse(format!(
|
||||
"@key on {} references unknown property '{}'",
|
||||
type_name, col
|
||||
))
|
||||
})?;
|
||||
if prop.prop_type.nullable {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"@key property {}.{} cannot be nullable",
|
||||
type_name, col
|
||||
)));
|
||||
}
|
||||
if prop.prop_type.list {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"@key is not supported on list property {}.{}",
|
||||
type_name, col
|
||||
)));
|
||||
}
|
||||
if matches!(prop.prop_type.scalar, ScalarType::Vector(_)) {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"@key is not supported on vector property {}.{}",
|
||||
type_name, col
|
||||
)));
|
||||
}
|
||||
if matches!(prop.prop_type.scalar, ScalarType::Blob) {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"@key is not supported on blob property {}.{}",
|
||||
type_name, col
|
||||
)));
|
||||
|
|
@ -945,7 +948,7 @@ fn validate_type_constraints(
|
|||
continue;
|
||||
}
|
||||
if !prop_names.contains_key(col.as_str()) {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"@unique on {} references unknown property '{}'",
|
||||
type_name, col
|
||||
)));
|
||||
|
|
@ -958,13 +961,13 @@ fn validate_type_constraints(
|
|||
continue;
|
||||
}
|
||||
let prop = prop_names.get(col.as_str()).ok_or_else(|| {
|
||||
NanoError::Parse(format!(
|
||||
CompilerError::Parse(format!(
|
||||
"@index on {} references unknown property '{}'",
|
||||
type_name, col
|
||||
))
|
||||
})?;
|
||||
if matches!(prop.prop_type.scalar, ScalarType::Blob) {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"@index is not supported on blob property {}.{}",
|
||||
type_name, col
|
||||
)));
|
||||
|
|
@ -973,19 +976,19 @@ fn validate_type_constraints(
|
|||
}
|
||||
Constraint::Range { property, .. } => {
|
||||
if is_edge {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"@range constraint is not supported on edges (edge {})",
|
||||
type_name
|
||||
)));
|
||||
}
|
||||
let prop = prop_names.get(property.as_str()).ok_or_else(|| {
|
||||
NanoError::Parse(format!(
|
||||
CompilerError::Parse(format!(
|
||||
"@range on {} references unknown property '{}'",
|
||||
type_name, property
|
||||
))
|
||||
})?;
|
||||
if !prop.prop_type.scalar.is_numeric() {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"@range on {}.{} requires a numeric type, got {}",
|
||||
type_name,
|
||||
property,
|
||||
|
|
@ -995,19 +998,19 @@ fn validate_type_constraints(
|
|||
}
|
||||
Constraint::Check { property, .. } => {
|
||||
if is_edge {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"@check constraint is not supported on edges (edge {})",
|
||||
type_name
|
||||
)));
|
||||
}
|
||||
let prop = prop_names.get(property.as_str()).ok_or_else(|| {
|
||||
NanoError::Parse(format!(
|
||||
CompilerError::Parse(format!(
|
||||
"@check on {} references unknown property '{}'",
|
||||
type_name, property
|
||||
))
|
||||
})?;
|
||||
if prop.prop_type.scalar != ScalarType::String {
|
||||
return Err(NanoError::Parse(format!(
|
||||
return Err(CompilerError::Parse(format!(
|
||||
"@check on {}.{} requires String type, got {}",
|
||||
type_name,
|
||||
property,
|
||||
|
|
|
|||
|
|
@ -2,9 +2,9 @@ pub mod api;
|
|||
mod handlers;
|
||||
mod mcp;
|
||||
mod settings;
|
||||
pub use settings::{load_server_settings, classify_server_runtime_state, ServerRuntimeState};
|
||||
use settings::*;
|
||||
use handlers::*;
|
||||
use settings::*;
|
||||
pub use settings::{ServerRuntimeState, classify_server_runtime_state, load_server_settings};
|
||||
pub mod auth;
|
||||
pub mod graph_id;
|
||||
pub mod identity;
|
||||
|
|
@ -30,10 +30,10 @@ use api::{
|
|||
BranchCreateOutput, BranchCreateRequest, BranchDeleteOutput, BranchListOutput,
|
||||
BranchMergeOutput, BranchMergeRequest, ChangeOutput, ChangeRequest, CommitListOutput,
|
||||
CommitListQuery, ErrorCode, ErrorOutput, ExportRequest, GraphInfo, GraphListResponse,
|
||||
HealthOutput, IngestOutput, IngestRequest, InvokeStoredQueryRequest,
|
||||
InvokeStoredQueryResponse, QueriesCatalogOutput, QueryRequest, ReadOutput, ReadRequest,
|
||||
SchemaApplyOutput, SchemaApplyRequest, SchemaOutput, SnapshotQuery, ingest_output,
|
||||
schema_apply_output, snapshot_payload,
|
||||
HealthOutput, IngestOutput, IngestRequest, InvokeStoredQueryRequest, InvokeStoredQueryResponse,
|
||||
QueriesCatalogOutput, QueryRequest, ReadOutput, ReadRequest, SchemaApplyOutput,
|
||||
SchemaApplyRequest, SchemaOutput, SnapshotQuery, ingest_output, schema_apply_output,
|
||||
snapshot_payload,
|
||||
};
|
||||
pub use auth::{AWS_SECRET_ENV, EnvOrFileTokenSource, TokenSource, resolve_token_source};
|
||||
use axum::body::{Body, Bytes};
|
||||
|
|
@ -167,6 +167,10 @@ pub struct ServerConfig {
|
|||
/// who set up auth and forgot the policy file would otherwise ship
|
||||
/// the illusion of protection.
|
||||
pub allow_unauthenticated: bool,
|
||||
/// Operator opt-in for fail-fast cluster boot. By default, graph-local
|
||||
/// startup failures quarantine that graph and healthy graphs still serve.
|
||||
/// When true, any quarantined or failed graph aborts startup.
|
||||
pub require_all_graphs: bool,
|
||||
}
|
||||
|
||||
/// What `load_server_settings` produces. RFC-011 cluster-only: the
|
||||
|
|
@ -316,7 +320,14 @@ impl AppState {
|
|||
) -> Self {
|
||||
let bearer_tokens = hash_bearer_tokens(bearer_tokens);
|
||||
let per_graph_policy = policy_engine.map(Arc::new);
|
||||
Self::build_single_mode(uri, db, bearer_tokens, per_graph_policy, Arc::new(workload), None)
|
||||
Self::build_single_mode(
|
||||
uri,
|
||||
db,
|
||||
bearer_tokens,
|
||||
per_graph_policy,
|
||||
Arc::new(workload),
|
||||
None,
|
||||
)
|
||||
}
|
||||
|
||||
/// Like `new_single`, but attaches a pre-validated stored-query
|
||||
|
|
@ -433,13 +444,8 @@ impl AppState {
|
|||
bearer_tokens: Vec<(String, String)>,
|
||||
policy_file: Option<&PathBuf>,
|
||||
) -> Result<Self> {
|
||||
Self::open_single_with_queries(
|
||||
uri,
|
||||
bearer_tokens,
|
||||
policy_file,
|
||||
QueryRegistry::default(),
|
||||
)
|
||||
.await
|
||||
Self::open_single_with_queries(uri, bearer_tokens, policy_file, QueryRegistry::default())
|
||||
.await
|
||||
}
|
||||
|
||||
/// Single-mode boot with a stored-query registry: open the engine,
|
||||
|
|
@ -522,8 +528,7 @@ impl AppState {
|
|||
// reserved id `default` — both the registry key and the URL
|
||||
// segment (`/graphs/default/...`).
|
||||
let uri = normalize_root_uri(&uri).unwrap_or(uri);
|
||||
let graph_id =
|
||||
GraphId::try_from("default").expect("'default' is a valid GraphId");
|
||||
let graph_id = GraphId::try_from("default").expect("'default' is a valid GraphId");
|
||||
let key = GraphKey::cluster(graph_id);
|
||||
let handle = Arc::new(GraphHandle {
|
||||
key,
|
||||
|
|
@ -950,15 +955,21 @@ pub fn build_app(state: AppState) -> Router {
|
|||
// flagged and their responses include RFC 9745 Deprecation +
|
||||
// RFC 8288 Link headers. Suppress the call-site warning for the
|
||||
// route registration itself.
|
||||
.route("/read", post({
|
||||
#[allow(deprecated)]
|
||||
server_read
|
||||
}))
|
||||
.route(
|
||||
"/read",
|
||||
post({
|
||||
#[allow(deprecated)]
|
||||
server_read
|
||||
}),
|
||||
)
|
||||
.route("/query", post(server_query))
|
||||
.route("/change", post({
|
||||
#[allow(deprecated)]
|
||||
server_change
|
||||
}))
|
||||
.route(
|
||||
"/change",
|
||||
post({
|
||||
#[allow(deprecated)]
|
||||
server_change
|
||||
}),
|
||||
)
|
||||
.route("/mutate", post(server_mutate))
|
||||
.route("/queries", get(server_list_queries))
|
||||
.route("/queries/{name}", post(server_invoke_query))
|
||||
|
|
@ -1080,7 +1091,14 @@ pub async fn serve(config: ServerConfig) -> Result<()> {
|
|||
config = %config_path.display(),
|
||||
"serving omnigraph"
|
||||
);
|
||||
open_multi_graph_state(graphs, tokens, server_policy.as_ref(), config_path).await?
|
||||
open_multi_graph_state(
|
||||
graphs,
|
||||
tokens,
|
||||
server_policy.as_ref(),
|
||||
config_path,
|
||||
config.require_all_graphs,
|
||||
)
|
||||
.await?
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -1109,9 +1127,9 @@ fn load_graph_policy(source: &PolicySource, graph_id: &str) -> Result<PolicyEngi
|
|||
}
|
||||
|
||||
/// Parallel open of every graph in the startup config, with bounded
|
||||
/// concurrency (`buffer_unordered(4)`). Fail-fast — the first open error
|
||||
/// aborts startup; other in-flight opens are dropped (their `Omnigraph`
|
||||
/// instances close cleanly via Arc drop).
|
||||
/// concurrency (`buffer_unordered(4)`). Graph-specific open failures
|
||||
/// quarantine that graph; startup succeeds as long as at least one graph
|
||||
/// opens.
|
||||
///
|
||||
/// The bound 4 is a rule-of-thumb for I/O-bound work. At N ≤ 10 this
|
||||
/// trades startup latency for a small amount of concurrent S3 / Lance
|
||||
|
|
@ -1121,8 +1139,9 @@ pub async fn open_multi_graph_state(
|
|||
tokens: Vec<(String, String)>,
|
||||
server_policy_source: Option<&PolicySource>,
|
||||
config_path: PathBuf,
|
||||
require_all_graphs: bool,
|
||||
) -> Result<AppState> {
|
||||
use futures::{StreamExt, TryStreamExt};
|
||||
use futures::StreamExt;
|
||||
|
||||
if graphs.is_empty() {
|
||||
bail!("multi-graph mode requires at least one graph in the `graphs:` map");
|
||||
|
|
@ -1134,21 +1153,48 @@ pub async fn open_multi_graph_state(
|
|||
// `Omnigraph::Server::"root"` entity at evaluation time.
|
||||
let server_policy = match server_policy_source {
|
||||
Some(PolicySource::File(path)) => Some(PolicyEngine::load_server(path)?),
|
||||
Some(PolicySource::Inline(source)) => {
|
||||
Some(PolicyEngine::load_server_from_source(source)?)
|
||||
}
|
||||
Some(PolicySource::Inline(source)) => Some(PolicyEngine::load_server_from_source(source)?),
|
||||
None => None,
|
||||
};
|
||||
|
||||
// `try_collect` propagates the first error eagerly, dropping every
|
||||
// in-flight open. `buffer_unordered + collect::<Vec<_>>` would drain
|
||||
// the stream before checking errors — incorrect for the docstring's
|
||||
// "fail-fast" claim and wasteful on S3-backed graphs.
|
||||
let handles: Vec<Arc<GraphHandle>> = futures::stream::iter(graphs.into_iter())
|
||||
.map(|cfg| async move { open_single_graph(cfg).await })
|
||||
let configured_graphs = graphs.len();
|
||||
let results = futures::stream::iter(graphs.into_iter())
|
||||
.map(|cfg| async move {
|
||||
let graph_id = cfg.graph_id.clone();
|
||||
open_single_graph(cfg).await.map_err(|err| (graph_id, err))
|
||||
})
|
||||
.buffer_unordered(4)
|
||||
.try_collect()
|
||||
.await?;
|
||||
.collect::<Vec<_>>()
|
||||
.await;
|
||||
let mut handles = Vec::new();
|
||||
let mut failed = 0usize;
|
||||
for result in results {
|
||||
match result {
|
||||
Ok(handle) => handles.push(handle),
|
||||
Err((graph_id, err)) => {
|
||||
failed += 1;
|
||||
warn!(
|
||||
graph_id = %graph_id,
|
||||
error = %err,
|
||||
"graph quarantined during startup"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
if require_all_graphs && failed > 0 {
|
||||
bail!(
|
||||
"strict multi-graph startup requires every graph to open ({} configured, {} failed)",
|
||||
configured_graphs,
|
||||
failed
|
||||
);
|
||||
}
|
||||
if handles.is_empty() {
|
||||
bail!(
|
||||
"no healthy graphs opened from multi-graph startup config ({} configured, {} failed)",
|
||||
configured_graphs,
|
||||
failed
|
||||
);
|
||||
}
|
||||
|
||||
let workload = workload::WorkloadController::from_env();
|
||||
let state = AppState::new_multi(handles, tokens, server_policy, workload, Some(config_path))
|
||||
|
|
|
|||
|
|
@ -22,6 +22,11 @@ struct Cli {
|
|||
/// Equivalent to setting `OMNIGRAPH_UNAUTHENTICATED=1`.
|
||||
#[arg(long)]
|
||||
unauthenticated: bool,
|
||||
/// Fail startup if any applied graph is quarantined or fails to open.
|
||||
/// By default, graph-local failures are logged and healthy graphs still
|
||||
/// serve. Equivalent to setting `OMNIGRAPH_REQUIRE_ALL_GRAPHS=1`.
|
||||
#[arg(long)]
|
||||
require_all_graphs: bool,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
|
|
@ -30,7 +35,12 @@ async fn main() -> Result<()> {
|
|||
init_tracing();
|
||||
|
||||
let cli = Cli::parse();
|
||||
let settings: ServerConfig =
|
||||
load_server_settings(cli.cluster.as_ref(), cli.bind, cli.unauthenticated).await?;
|
||||
let settings: ServerConfig = load_server_settings(
|
||||
cli.cluster.as_ref(),
|
||||
cli.bind,
|
||||
cli.unauthenticated,
|
||||
cli.require_all_graphs,
|
||||
)
|
||||
.await?;
|
||||
serve(settings).await
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ pub(crate) async fn load_cluster_settings(
|
|||
cluster_dir: &PathBuf,
|
||||
cli_bind: Option<String>,
|
||||
cli_allow_unauthenticated: bool,
|
||||
cli_require_all_graphs: bool,
|
||||
) -> Result<ServerConfig> {
|
||||
// `--cluster` accepts either a config directory (the ledger location is
|
||||
// resolved through cluster.yaml's `storage:` key) or a storage-root URI
|
||||
|
|
@ -28,11 +29,45 @@ pub(crate) async fn load_cluster_settings(
|
|||
.map_err(|diagnostics| {
|
||||
let details = diagnostics
|
||||
.iter()
|
||||
.map(|diagnostic| format!("[{}] {}: {}", diagnostic.code, diagnostic.path, diagnostic.message))
|
||||
.map(|diagnostic| {
|
||||
format!(
|
||||
"[{}] {}: {}",
|
||||
diagnostic.code, diagnostic.path, diagnostic.message
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n ");
|
||||
eyre!("the cluster at '{}' is not ready to serve:\n {details}", cluster_dir.display())
|
||||
eyre!(
|
||||
"the cluster at '{}' is not ready to serve:\n {details}",
|
||||
cluster_dir.display()
|
||||
)
|
||||
})?;
|
||||
for diagnostic in &snapshot.diagnostics {
|
||||
warn!(
|
||||
code = %diagnostic.code,
|
||||
path = %diagnostic.path,
|
||||
message = %diagnostic.message,
|
||||
"cluster startup diagnostic"
|
||||
);
|
||||
}
|
||||
let env_require_all_graphs = env_flag("OMNIGRAPH_REQUIRE_ALL_GRAPHS");
|
||||
let require_all_graphs = cli_require_all_graphs || env_require_all_graphs;
|
||||
if require_all_graphs && !snapshot.diagnostics.is_empty() {
|
||||
let details = snapshot
|
||||
.diagnostics
|
||||
.iter()
|
||||
.map(|diagnostic| {
|
||||
format!(
|
||||
"[{}] {}: {}",
|
||||
diagnostic.code, diagnostic.path, diagnostic.message
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n ");
|
||||
bail!(
|
||||
"strict cluster boot requires every applied graph to be ready; startup diagnostics:\n {details}"
|
||||
);
|
||||
}
|
||||
|
||||
// Bindings -> Cedar slots. The serving pipeline loads one bundle per
|
||||
// graph plus one server-level bundle; stacked bundles per scope are a
|
||||
|
|
@ -69,6 +104,7 @@ pub(crate) async fn load_cluster_settings(
|
|||
}
|
||||
|
||||
let mut graphs = Vec::new();
|
||||
let mut skipped_graphs = Vec::new();
|
||||
for graph in &snapshot.graphs {
|
||||
let specs: Vec<queries::RegistrySpec> = snapshot
|
||||
.queries
|
||||
|
|
@ -82,40 +118,75 @@ pub(crate) async fn load_cluster_settings(
|
|||
// spec carries only identity + source.
|
||||
})
|
||||
.collect();
|
||||
let registry = QueryRegistry::from_specs(specs).map_err(|errors| {
|
||||
let details = errors
|
||||
.iter()
|
||||
.map(|error| error.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n ");
|
||||
eyre!(
|
||||
"stored queries in the applied revision failed to parse:\n {details}\nrun `cluster refresh` then `cluster apply`, and restart"
|
||||
)
|
||||
})?;
|
||||
let registry = match QueryRegistry::from_specs(specs) {
|
||||
Ok(registry) => registry,
|
||||
Err(errors) => {
|
||||
let details = errors
|
||||
.iter()
|
||||
.map(|error| error.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n ");
|
||||
warn!(
|
||||
graph_id = %graph.graph_id,
|
||||
errors = %details,
|
||||
"graph quarantined because stored queries failed to parse"
|
||||
);
|
||||
skipped_graphs.push(format!(
|
||||
"{}: stored queries failed to parse: {details}",
|
||||
graph.graph_id
|
||||
));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let embedding = match graph
|
||||
.embedding
|
||||
.as_ref()
|
||||
.map(|profile| {
|
||||
profile.resolve().map_err(|err| {
|
||||
eyre!("embedding provider for graph '{}': {err}", graph.graph_id)
|
||||
})
|
||||
})
|
||||
.transpose()
|
||||
{
|
||||
Ok(embedding) => embedding,
|
||||
Err(err) => {
|
||||
warn!(
|
||||
graph_id = %graph.graph_id,
|
||||
error = %err,
|
||||
"graph quarantined because embedding provider configuration failed"
|
||||
);
|
||||
skipped_graphs.push(format!("{}: {err}", graph.graph_id));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
graphs.push(GraphStartupConfig {
|
||||
graph_id: graph.graph_id.clone(),
|
||||
uri: graph.root.to_string_lossy().to_string(),
|
||||
policy: graph_policies.get(&graph.graph_id).cloned(),
|
||||
embedding: graph
|
||||
.embedding
|
||||
.as_ref()
|
||||
.map(|profile| {
|
||||
profile.resolve().map_err(|err| {
|
||||
eyre!("embedding provider for graph '{}': {err}", graph.graph_id)
|
||||
})
|
||||
})
|
||||
.transpose()?,
|
||||
embedding,
|
||||
queries: registry,
|
||||
});
|
||||
}
|
||||
if graphs.is_empty() {
|
||||
let skipped = skipped_graphs.join(", ");
|
||||
bail!(
|
||||
"the cluster at '{}' has no healthy graphs to serve{}",
|
||||
cluster_dir.display(),
|
||||
if skipped.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
format!(" (quarantined: {skipped})")
|
||||
}
|
||||
);
|
||||
}
|
||||
if require_all_graphs && !skipped_graphs.is_empty() {
|
||||
bail!(
|
||||
"strict cluster boot requires every graph to build startup settings (quarantined: {})",
|
||||
skipped_graphs.join(", ")
|
||||
);
|
||||
}
|
||||
|
||||
let env_unauth = std::env::var("OMNIGRAPH_UNAUTHENTICATED")
|
||||
.ok()
|
||||
.map(|v| {
|
||||
let trimmed = v.trim();
|
||||
!trimmed.is_empty() && trimmed != "0" && !trimmed.eq_ignore_ascii_case("false")
|
||||
})
|
||||
.unwrap_or(false);
|
||||
let env_unauth = env_flag("OMNIGRAPH_UNAUTHENTICATED");
|
||||
|
||||
Ok(ServerConfig {
|
||||
mode: ServerConfigMode::Multi {
|
||||
|
|
@ -125,6 +196,7 @@ pub(crate) async fn load_cluster_settings(
|
|||
},
|
||||
bind: cli_bind.unwrap_or_else(|| "127.0.0.1:8080".to_string()),
|
||||
allow_unauthenticated: cli_allow_unauthenticated || env_unauth,
|
||||
require_all_graphs,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -136,6 +208,7 @@ pub async fn load_server_settings(
|
|||
cli_cluster: Option<&PathBuf>,
|
||||
cli_bind: Option<String>,
|
||||
cli_allow_unauthenticated: bool,
|
||||
cli_require_all_graphs: bool,
|
||||
) -> Result<ServerConfig> {
|
||||
let Some(cluster_dir) = cli_cluster else {
|
||||
bail!(
|
||||
|
|
@ -145,7 +218,23 @@ pub async fn load_server_settings(
|
|||
was removed in RFC-011."
|
||||
);
|
||||
};
|
||||
load_cluster_settings(cluster_dir, cli_bind, cli_allow_unauthenticated).await
|
||||
load_cluster_settings(
|
||||
cluster_dir,
|
||||
cli_bind,
|
||||
cli_allow_unauthenticated,
|
||||
cli_require_all_graphs,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
fn env_flag(name: &str) -> bool {
|
||||
std::env::var(name)
|
||||
.ok()
|
||||
.map(|v| {
|
||||
let trimmed = v.trim();
|
||||
!trimmed.is_empty() && trimmed != "0" && !trimmed.eq_ignore_ascii_case("false")
|
||||
})
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
/// MR-723 server runtime state, classified from the three-state matrix
|
||||
|
|
@ -238,7 +327,9 @@ pub(crate) fn read_bearer_tokens_file(path: &str) -> Result<Vec<(String, String)
|
|||
.wrap_err_with(|| format!("failed to parse bearer tokens file at {path}"))
|
||||
}
|
||||
|
||||
pub(crate) fn validate_bearer_tokens(entries: Vec<(String, String)>) -> Result<Vec<(String, String)>> {
|
||||
pub(crate) fn validate_bearer_tokens(
|
||||
entries: Vec<(String, String)>,
|
||||
) -> Result<Vec<(String, String)>> {
|
||||
let mut seen_actors = HashSet::new();
|
||||
let mut seen_tokens = HashSet::new();
|
||||
let mut normalized = Vec::with_capacity(entries.len());
|
||||
|
|
@ -299,11 +390,18 @@ mod tests {
|
|||
/// as 404 without also masking a 401/500. Pins each outcome.
|
||||
#[test]
|
||||
fn authorize_splits_decision_from_operational_error() {
|
||||
use super::{Authz, PolicyAction, PolicyCompiler, PolicyConfig, PolicyRequest, ResolvedActor, authorize};
|
||||
use super::{
|
||||
Authz, PolicyAction, PolicyCompiler, PolicyConfig, PolicyRequest, ResolvedActor,
|
||||
authorize,
|
||||
};
|
||||
use std::sync::Arc;
|
||||
|
||||
fn req(action: PolicyAction) -> PolicyRequest {
|
||||
PolicyRequest { action, branch: None, target_branch: None }
|
||||
PolicyRequest {
|
||||
action,
|
||||
branch: None,
|
||||
target_branch: None,
|
||||
}
|
||||
}
|
||||
let actor = ResolvedActor::cluster_static(Arc::from("act-alice"));
|
||||
|
||||
|
|
@ -343,7 +441,11 @@ mod tests {
|
|||
authorize(
|
||||
Some(&actor),
|
||||
Some(&engine),
|
||||
PolicyRequest { action: PolicyAction::Read, branch: Some("main".to_string()), target_branch: None },
|
||||
PolicyRequest {
|
||||
action: PolicyAction::Read,
|
||||
branch: Some("main".to_string()),
|
||||
target_branch: None
|
||||
},
|
||||
)
|
||||
.unwrap(),
|
||||
Authz::Allowed
|
||||
|
|
@ -352,11 +454,17 @@ mod tests {
|
|||
match authorize(
|
||||
Some(&actor),
|
||||
Some(&engine),
|
||||
PolicyRequest { action: PolicyAction::Change, branch: Some("main".to_string()), target_branch: None },
|
||||
PolicyRequest {
|
||||
action: PolicyAction::Change,
|
||||
branch: Some("main".to_string()),
|
||||
target_branch: None,
|
||||
},
|
||||
)
|
||||
.unwrap()
|
||||
{
|
||||
Authz::Denied(message) => assert!(!message.is_empty(), "a deny carries its decision message"),
|
||||
Authz::Denied(message) => {
|
||||
assert!(!message.is_empty(), "a deny carries its decision message")
|
||||
}
|
||||
Authz::Allowed => panic!("change must be denied: only read is allowed"),
|
||||
}
|
||||
// Policy installed but no actor → operational failure (`Err`), NOT a
|
||||
|
|
@ -393,8 +501,7 @@ mod tests {
|
|||
};
|
||||
|
||||
// Empty registry → nothing attached, no error.
|
||||
let empty =
|
||||
super::validate_and_attach(QueryRegistry::default(), &catalog, "g").unwrap();
|
||||
let empty = super::validate_and_attach(QueryRegistry::default(), &catalog, "g").unwrap();
|
||||
assert!(empty.is_none());
|
||||
|
||||
// A query that type-checks → attached.
|
||||
|
|
@ -403,7 +510,11 @@ mod tests {
|
|||
"query find_user() { match { $u: User } return { $u.name } }",
|
||||
)])
|
||||
.unwrap();
|
||||
assert!(super::validate_and_attach(ok, &catalog, "g").unwrap().is_some());
|
||||
assert!(
|
||||
super::validate_and_attach(ok, &catalog, "g")
|
||||
.unwrap()
|
||||
.is_some()
|
||||
);
|
||||
|
||||
// A query referencing a type the schema lacks → boot refusal that
|
||||
// names both the graph label and the offending query.
|
||||
|
|
@ -416,7 +527,10 @@ mod tests {
|
|||
let msg = err.to_string();
|
||||
assert!(msg.contains("graph-x"), "labels the graph: {msg}");
|
||||
assert!(msg.contains("ghost"), "names the query: {msg}");
|
||||
assert!(msg.contains("schema check"), "mentions the schema check: {msg}");
|
||||
assert!(
|
||||
msg.contains("schema check"),
|
||||
"mentions the schema check: {msg}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -447,7 +561,7 @@ mod tests {
|
|||
async fn server_settings_require_cluster_boot_source() {
|
||||
// RFC-011 cluster-only: with no --cluster the server refuses to
|
||||
// start and names the cluster-required remedy.
|
||||
let error = super::load_server_settings(None, None, false)
|
||||
let error = super::load_server_settings(None, None, false, false)
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert!(
|
||||
|
|
@ -530,6 +644,7 @@ mod tests {
|
|||
},
|
||||
bind: "127.0.0.1:0".to_string(),
|
||||
allow_unauthenticated: false,
|
||||
require_all_graphs: false,
|
||||
};
|
||||
let result = serve(config).await;
|
||||
let err = result
|
||||
|
|
@ -582,6 +697,7 @@ mod tests {
|
|||
},
|
||||
bind: "127.0.0.1:0".to_string(),
|
||||
allow_unauthenticated: false,
|
||||
require_all_graphs: false,
|
||||
};
|
||||
let result = serve(config).await;
|
||||
let err =
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@ use serde_json::Value;
|
|||
use serial_test::serial;
|
||||
use tower::ServiceExt;
|
||||
|
||||
|
||||
mod support;
|
||||
use support::*;
|
||||
|
||||
|
|
@ -414,7 +413,7 @@ async fn cluster_boot_serves_applied_state() {
|
|||
assert!(server_policy.is_none());
|
||||
|
||||
let state =
|
||||
omnigraph_server::open_multi_graph_state(graphs, Vec::new(), None, config_path)
|
||||
omnigraph_server::open_multi_graph_state(graphs, Vec::new(), None, config_path, false)
|
||||
.await
|
||||
.unwrap();
|
||||
let app = build_app(state);
|
||||
|
|
@ -424,7 +423,10 @@ async fn cluster_boot_serves_applied_state() {
|
|||
// GET /graphs refuses even in cluster mode.
|
||||
let (status, body) = json_response(
|
||||
&app,
|
||||
Request::builder().uri("/graphs").body(Body::empty()).unwrap(),
|
||||
Request::builder()
|
||||
.uri("/graphs")
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(status, StatusCode::FORBIDDEN, "{body}");
|
||||
|
|
@ -460,6 +462,115 @@ async fn cluster_boot_serves_applied_state() {
|
|||
assert_eq!(status, StatusCode::OK, "{body}");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn cluster_boot_quarantines_graph_open_failures() {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
let schema = "\nnode Person {\n name: String @key\n}\n";
|
||||
let good_uri = temp.path().join("good.omni");
|
||||
Omnigraph::init(good_uri.to_string_lossy().as_ref(), schema)
|
||||
.await
|
||||
.unwrap();
|
||||
let bad_uri = temp.path().join("missing.omni");
|
||||
let server_policy = omnigraph_server::PolicySource::Inline(
|
||||
r#"
|
||||
version: 1
|
||||
kind: server
|
||||
groups:
|
||||
admins: [act-admin]
|
||||
rules:
|
||||
- id: admins-list-graphs
|
||||
allow:
|
||||
actors: { group: admins }
|
||||
actions: [graph_list]
|
||||
"#
|
||||
.to_string(),
|
||||
);
|
||||
let graphs = vec![
|
||||
omnigraph_server::GraphStartupConfig {
|
||||
graph_id: "broken".to_string(),
|
||||
uri: bad_uri.to_string_lossy().to_string(),
|
||||
policy: None,
|
||||
embedding: None,
|
||||
queries: stored_query_registry(&[]),
|
||||
},
|
||||
omnigraph_server::GraphStartupConfig {
|
||||
graph_id: "good".to_string(),
|
||||
uri: good_uri.to_string_lossy().to_string(),
|
||||
policy: None,
|
||||
embedding: None,
|
||||
queries: stored_query_registry(&[]),
|
||||
},
|
||||
];
|
||||
let strict_err = match omnigraph_server::open_multi_graph_state(
|
||||
graphs.clone(),
|
||||
vec![("act-admin".to_string(), "admin-token".to_string())],
|
||||
Some(&server_policy),
|
||||
temp.path().join("cluster.yaml"),
|
||||
true,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(_) => panic!("strict startup should reject a failed graph open"),
|
||||
Err(err) => err,
|
||||
};
|
||||
assert!(
|
||||
strict_err
|
||||
.to_string()
|
||||
.contains("strict multi-graph startup requires every graph to open"),
|
||||
"{strict_err}"
|
||||
);
|
||||
let state = omnigraph_server::open_multi_graph_state(
|
||||
graphs,
|
||||
vec![("act-admin".to_string(), "admin-token".to_string())],
|
||||
Some(&server_policy),
|
||||
temp.path().join("cluster.yaml"),
|
||||
false,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let mut ready: Vec<_> = state
|
||||
.routing()
|
||||
.registry
|
||||
.list()
|
||||
.iter()
|
||||
.map(|handle| handle.key.graph_id.as_str().to_string())
|
||||
.collect();
|
||||
ready.sort();
|
||||
assert_eq!(ready, vec!["good"]);
|
||||
let app = build_app(state);
|
||||
|
||||
let (status, body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/graphs")
|
||||
.header("authorization", "Bearer admin-token")
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(status, StatusCode::OK, "{body}");
|
||||
assert_eq!(
|
||||
body["graphs"]
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.map(|graph| graph["graph_id"].as_str().unwrap())
|
||||
.collect::<Vec<_>>(),
|
||||
vec!["good"]
|
||||
);
|
||||
|
||||
let (status, body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/graphs/broken/queries")
|
||||
.header("authorization", "Bearer admin-token")
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(status, StatusCode::NOT_FOUND, "{body}");
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
#[serial]
|
||||
async fn cluster_boot_injects_embedding_provider_config() {
|
||||
|
|
@ -555,6 +666,7 @@ graphs:
|
|||
Vec::new(),
|
||||
server_policy.as_ref(),
|
||||
config_path,
|
||||
false,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
|
@ -665,7 +777,10 @@ async fn cluster_boot_wires_policy_bindings_into_cedar_slots() {
|
|||
.unwrap();
|
||||
fs::write(
|
||||
temp.path().join("cluster.policy.yaml"),
|
||||
permit_all_policy_yaml(&["default"]).replace("protected_branches: [main]\n", "protected_branches: [main]\nkind: server\n"),
|
||||
permit_all_policy_yaml(&["default"]).replace(
|
||||
"protected_branches: [main]\n",
|
||||
"protected_branches: [main]\nkind: server\n",
|
||||
),
|
||||
)
|
||||
.unwrap();
|
||||
fs::write(
|
||||
|
|
@ -719,7 +834,7 @@ graphs:
|
|||
async fn cluster_boot_refusals() {
|
||||
// RFC-011 cluster-only: with no --cluster, boot refuses with the
|
||||
// cluster-required remedy.
|
||||
let err = omnigraph_server::load_server_settings(None, None, true)
|
||||
let err = omnigraph_server::load_server_settings(None, None, true, false)
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert!(err.to_string().contains("boots from a cluster"), "{err}");
|
||||
|
|
@ -729,7 +844,12 @@ async fn cluster_boot_refusals() {
|
|||
|
||||
// Tampered catalog blob refuses boot with the remedy.
|
||||
let blob_dir = dir.join("__cluster/resources/query/knowledge/find_person");
|
||||
let blob = fs::read_dir(&blob_dir).unwrap().next().unwrap().unwrap().path();
|
||||
let blob = fs::read_dir(&blob_dir)
|
||||
.unwrap()
|
||||
.next()
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.path();
|
||||
fs::write(&blob, "tampered").unwrap();
|
||||
let err = cluster_settings(&dir).await.unwrap_err();
|
||||
assert!(
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@ use omnigraph_server::api::ReadRequest;
|
|||
use omnigraph_server::{AppState, build_app};
|
||||
use serde_json::json;
|
||||
|
||||
|
||||
mod support;
|
||||
use support::*;
|
||||
|
||||
|
|
@ -137,6 +136,7 @@ async fn server_boots_cluster_from_bare_storage_uri_and_serves_query() {
|
|||
Some(&std::path::PathBuf::from(&root)),
|
||||
None,
|
||||
true,
|
||||
false,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
|
@ -153,6 +153,7 @@ async fn server_boots_cluster_from_bare_storage_uri_and_serves_query() {
|
|||
Vec::new(),
|
||||
server_policy.as_ref(),
|
||||
config_path,
|
||||
false,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
|
@ -170,7 +171,9 @@ async fn server_boots_cluster_from_bare_storage_uri_and_serves_query() {
|
|||
.await
|
||||
.unwrap();
|
||||
assert_eq!(response.status(), StatusCode::OK);
|
||||
let bytes = axum::body::to_bytes(response.into_body(), usize::MAX).await.unwrap();
|
||||
let bytes = axum::body::to_bytes(response.into_body(), usize::MAX)
|
||||
.await
|
||||
.unwrap();
|
||||
let value: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
|
||||
assert_eq!(value["rows"][0]["p.name"], "Ada", "{value}");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -370,6 +370,47 @@ async fn list_queries_requires_invoke_query() {
|
|||
assert!(names.contains(&"find_person"), "invoker sees the exposed query: {names:?}");
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn list_queries_surfaces_query_description_and_instruction() {
|
||||
// E2e for the query-level `.gq` surface: `@description`/`@instruction` on
|
||||
// a stored query declaration are carried through to clients via the typed
|
||||
// `QueryCatalogEntry` fields over `GET /queries`. A query without them
|
||||
// omits both fields (serde `skip_serializing_if = "Option::is_none"`).
|
||||
let described = "query described($name: String) \
|
||||
@description(\"Find a person by exact name.\") \
|
||||
@instruction(\"Use for exact lookups; prefer search for fuzzy matches.\") \
|
||||
{ match { $p: Person { name: $name } } return { $p.age } }";
|
||||
let (_temp, app) = app_with_stored_queries(
|
||||
&[
|
||||
("described", described, true),
|
||||
("bare", "query bare() { match { $p: Person } return { $p.name } }", true),
|
||||
],
|
||||
&[("act-invoke", "t-invoke")],
|
||||
INVOKE_POLICY_YAML,
|
||||
)
|
||||
.await;
|
||||
let (status, body) = json_response(&app, get_request(&g("/queries"), "t-invoke")).await;
|
||||
assert_eq!(status, StatusCode::OK, "body: {body}");
|
||||
let entries = body["queries"].as_array().unwrap();
|
||||
|
||||
let described = entries.iter().find(|q| q["name"] == "described").unwrap();
|
||||
assert_eq!(
|
||||
described["description"], "Find a person by exact name.",
|
||||
"query @description surfaces over GET /queries: {described}"
|
||||
);
|
||||
assert_eq!(
|
||||
described["instruction"],
|
||||
"Use for exact lookups; prefer search for fuzzy matches.",
|
||||
"query @instruction surfaces over GET /queries: {described}"
|
||||
);
|
||||
|
||||
let bare = entries.iter().find(|q| q["name"] == "bare").unwrap();
|
||||
assert!(
|
||||
bare.get("description").is_none() && bare.get("instruction").is_none(),
|
||||
"a query without the annotations omits both fields: {bare}"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn list_queries_is_empty_when_no_registry() {
|
||||
let (_temp, app) = app_for_loaded_graph_with_auth("demo-token").await;
|
||||
|
|
|
|||
|
|
@ -15,15 +15,12 @@ use omnigraph::db::{Omnigraph, ReadTarget};
|
|||
use omnigraph::error::OmniError;
|
||||
use omnigraph::loader::{LoadMode, load_jsonl};
|
||||
use omnigraph_policy::{PolicyChecker, PolicyEngine};
|
||||
use omnigraph_server::api::{
|
||||
BranchCreateRequest, BranchMergeRequest, ChangeRequest, ReadRequest,
|
||||
};
|
||||
use omnigraph_server::api::{BranchCreateRequest, BranchMergeRequest, ChangeRequest, ReadRequest};
|
||||
use omnigraph_server::queries::{QueryRegistry, RegistrySpec};
|
||||
use omnigraph_server::{AppState, build_app};
|
||||
use serde_json::{Value, json};
|
||||
use tower::ServiceExt;
|
||||
|
||||
|
||||
pub const MUTATION_QUERIES: &str = r#"
|
||||
query insert_person($name: String, $age: I32) {
|
||||
insert Person { name: $name, age: $age }
|
||||
|
|
@ -1212,6 +1209,8 @@ graphs:
|
|||
temp
|
||||
}
|
||||
|
||||
pub async fn cluster_settings(dir: &Path) -> color_eyre::eyre::Result<omnigraph_server::ServerConfig> {
|
||||
omnigraph_server::load_server_settings(Some(&dir.to_path_buf()), None, true).await
|
||||
pub async fn cluster_settings(
|
||||
dir: &Path,
|
||||
) -> color_eyre::eyre::Result<omnigraph_server::ServerConfig> {
|
||||
omnigraph_server::load_server_settings(Some(&dir.to_path_buf()), None, true, false).await
|
||||
}
|
||||
|
|
|
|||
|
|
@ -55,5 +55,6 @@ arc-swap = { workspace = true }
|
|||
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.7.0" }
|
||||
tokio = { workspace = true }
|
||||
lance-namespace-impls = { workspace = true }
|
||||
lance-io = "7.0.0"
|
||||
serial_test = "3"
|
||||
proptest = "1"
|
||||
|
|
|
|||
|
|
@ -79,10 +79,14 @@ impl CommitGraph {
|
|||
|
||||
pub async fn open(root_uri: &str) -> Result<Self> {
|
||||
let root = root_uri.trim_end_matches('/');
|
||||
let dataset = Dataset::open(&graph_commits_uri(root))
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
let actor_dataset = Dataset::open(&graph_commit_actors_uri(root)).await.ok();
|
||||
let wrapper = crate::instrumentation::commit_graph_wrapper();
|
||||
let dataset =
|
||||
crate::instrumentation::open_dataset_tracked(&graph_commits_uri(root), wrapper.clone())
|
||||
.await?;
|
||||
let actor_dataset =
|
||||
crate::instrumentation::open_dataset_tracked(&graph_commit_actors_uri(root), wrapper)
|
||||
.await
|
||||
.ok();
|
||||
let actor_by_commit_id = match &actor_dataset {
|
||||
Some(dataset) => load_commit_actor_cache(dataset).await?,
|
||||
None => HashMap::new(),
|
||||
|
|
@ -101,14 +105,18 @@ impl CommitGraph {
|
|||
|
||||
pub async fn open_at_branch(root_uri: &str, branch: &str) -> Result<Self> {
|
||||
let root = root_uri.trim_end_matches('/');
|
||||
let dataset = Dataset::open(&graph_commits_uri(root))
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
let wrapper = crate::instrumentation::commit_graph_wrapper();
|
||||
let dataset =
|
||||
crate::instrumentation::open_dataset_tracked(&graph_commits_uri(root), wrapper.clone())
|
||||
.await?;
|
||||
let dataset = dataset
|
||||
.checkout_branch(branch)
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
let actor_dataset = Dataset::open(&graph_commit_actors_uri(root)).await.ok();
|
||||
let actor_dataset =
|
||||
crate::instrumentation::open_dataset_tracked(&graph_commit_actors_uri(root), wrapper)
|
||||
.await
|
||||
.ok();
|
||||
let actor_by_commit_id = match &actor_dataset {
|
||||
Some(dataset) => load_commit_actor_cache(dataset).await?,
|
||||
None => HashMap::new(),
|
||||
|
|
@ -127,9 +135,12 @@ impl CommitGraph {
|
|||
|
||||
pub async fn refresh(&mut self) -> Result<()> {
|
||||
let root = self.root_uri.clone();
|
||||
self.dataset = Dataset::open(&graph_commits_uri(&root))
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
let wrapper = crate::instrumentation::commit_graph_wrapper();
|
||||
self.dataset = crate::instrumentation::open_dataset_tracked(
|
||||
&graph_commits_uri(&root),
|
||||
wrapper.clone(),
|
||||
)
|
||||
.await?;
|
||||
if let Some(branch) = &self.active_branch {
|
||||
self.dataset = self
|
||||
.dataset
|
||||
|
|
@ -137,7 +148,10 @@ impl CommitGraph {
|
|||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
}
|
||||
self.actor_dataset = Dataset::open(&graph_commit_actors_uri(&root)).await.ok();
|
||||
self.actor_dataset =
|
||||
crate::instrumentation::open_dataset_tracked(&graph_commit_actors_uri(&root), wrapper)
|
||||
.await
|
||||
.ok();
|
||||
self.actor_by_commit_id = match &self.actor_dataset {
|
||||
Some(dataset) => load_commit_actor_cache(dataset).await?,
|
||||
None => HashMap::new(),
|
||||
|
|
|
|||
|
|
@ -10,7 +10,9 @@ use crate::storage::{StorageAdapter, join_uri, normalize_root_uri};
|
|||
|
||||
use super::commit_graph::{CommitGraph, GraphCommit};
|
||||
use super::is_internal_system_branch;
|
||||
use super::manifest::{ManifestChange, ManifestCoordinator, Snapshot, SubTableUpdate};
|
||||
use super::manifest::{
|
||||
ManifestChange, ManifestCoordinator, ManifestIncarnation, Snapshot, SubTableUpdate,
|
||||
};
|
||||
|
||||
const GRAPH_COMMITS_DIR: &str = "_graph_commits.lance";
|
||||
|
||||
|
|
@ -26,10 +28,11 @@ impl SnapshotId {
|
|||
&self.0
|
||||
}
|
||||
|
||||
pub(crate) fn synthetic(branch: Option<&str>, version: u64) -> Self {
|
||||
match branch {
|
||||
Some(branch) => Self(format!("manifest:{}:v{}", branch, version)),
|
||||
None => Self(format!("manifest:main:v{}", version)),
|
||||
pub(crate) fn synthetic(branch: Option<&str>, version: u64, e_tag: Option<&str>) -> Self {
|
||||
let branch = branch.unwrap_or("main");
|
||||
match e_tag {
|
||||
Some(e_tag) => Self(format!("manifest:{}:v{}:etag:{}", branch, version, e_tag)),
|
||||
None => Self(format!("manifest:{}:v{}", branch, version)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -166,6 +169,10 @@ impl GraphCoordinator {
|
|||
self.manifest.version()
|
||||
}
|
||||
|
||||
pub(crate) fn manifest_incarnation(&self) -> ManifestIncarnation {
|
||||
self.manifest.incarnation()
|
||||
}
|
||||
|
||||
pub fn snapshot(&self) -> Snapshot {
|
||||
self.manifest.snapshot()
|
||||
}
|
||||
|
|
@ -182,6 +189,19 @@ impl GraphCoordinator {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn probe_latest_incarnation(&self) -> Result<ManifestIncarnation> {
|
||||
crate::instrumentation::record_probe();
|
||||
self.manifest.probe_latest_incarnation().await
|
||||
}
|
||||
|
||||
/// Refresh only the manifest (not the commit graph). The read path uses this
|
||||
/// on a stale same-branch probe: a read pins its snapshot by manifest version
|
||||
/// and never needs the commit graph, so a full `refresh` (which also scans
|
||||
/// the commit graph) would be wasted IO.
|
||||
pub async fn refresh_manifest_only(&mut self) -> Result<()> {
|
||||
self.manifest.refresh().await
|
||||
}
|
||||
|
||||
pub async fn branch_list(&self) -> Result<Vec<String>> {
|
||||
self.manifest.list_branches().await.map(|branches| {
|
||||
branches
|
||||
|
|
@ -315,10 +335,13 @@ impl GraphCoordinator {
|
|||
None => GraphCoordinator::open(self.root_uri(), Arc::clone(&self.storage)).await?,
|
||||
};
|
||||
|
||||
Ok(other
|
||||
.head_commit_id()
|
||||
.await?
|
||||
.unwrap_or_else(|| SnapshotId::synthetic(other.current_branch(), other.version())))
|
||||
Ok(other.head_commit_id().await?.unwrap_or_else(|| {
|
||||
SnapshotId::synthetic(
|
||||
other.current_branch(),
|
||||
other.version(),
|
||||
other.manifest_incarnation().e_tag.as_deref(),
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
pub async fn resolve_target(&self, target: &ReadTarget) -> Result<ResolvedTarget> {
|
||||
|
|
@ -339,7 +362,11 @@ impl GraphCoordinator {
|
|||
}
|
||||
};
|
||||
let snapshot_id = other.head_commit_id().await?.unwrap_or_else(|| {
|
||||
SnapshotId::synthetic(other.current_branch(), other.version())
|
||||
SnapshotId::synthetic(
|
||||
other.current_branch(),
|
||||
other.version(),
|
||||
other.manifest_incarnation().e_tag.as_deref(),
|
||||
)
|
||||
});
|
||||
Ok(ResolvedTarget {
|
||||
requested: target.clone(),
|
||||
|
|
@ -509,9 +536,23 @@ impl GraphCoordinator {
|
|||
return Ok(SnapshotId::synthetic(
|
||||
current_branch.as_deref(),
|
||||
manifest_version,
|
||||
self.manifest_incarnation().e_tag.as_deref(),
|
||||
));
|
||||
};
|
||||
failpoints::maybe_fail("graph_publish.before_commit_append")?;
|
||||
// Refresh the commit-graph head from storage before selecting the
|
||||
// parent. `append_commit` parents the new commit on the IN-MEMORY head
|
||||
// (`head_commit_id`, zero storage read), but the manifest was just
|
||||
// committed against a freshly rebased pin (`commit_all` opens a fresh
|
||||
// coordinator) while THIS coordinator's cached head may be stale because
|
||||
// an external writer advanced the branch. Without this refresh a
|
||||
// same-branch write after an external commit appends off the stale head
|
||||
// and FORKS the commit DAG (the new commit and the external commit
|
||||
// sharing a parent). Refreshing makes the parent the true current head;
|
||||
// the just-committed manifest version has no commit-graph row yet, so the
|
||||
// fresh head is exactly the prior commit. (record_merge_commit is
|
||||
// unaffected — it passes explicit parents, never the cached head.)
|
||||
commit_graph.refresh().await?;
|
||||
let graph_commit_id = commit_graph
|
||||
.append_commit(current_branch.as_deref(), manifest_version, actor_id)
|
||||
.await?;
|
||||
|
|
|
|||
|
|
@ -24,20 +24,19 @@ mod recovery;
|
|||
mod state;
|
||||
|
||||
use graph::{init_manifest_graph, open_manifest_graph, snapshot_state_at};
|
||||
use layout::{manifest_uri, open_manifest_dataset, type_name_hash};
|
||||
use layout::{manifest_uri, open_manifest_dataset, table_uri_for_path, type_name_hash};
|
||||
pub(crate) use metadata::TableVersionMetadata;
|
||||
#[cfg(test)]
|
||||
use metadata::{OMNIGRAPH_ROW_COUNT_KEY, table_version_metadata_for_state};
|
||||
use namespace::open_table_at_version_from_manifest;
|
||||
pub(crate) use namespace::open_table_head_for_write;
|
||||
#[cfg(test)]
|
||||
use namespace::{branch_manifest_namespace, staged_table_namespace};
|
||||
use publisher::{GraphNamespacePublisher, ManifestBatchPublisher};
|
||||
pub(crate) use recovery::{
|
||||
RecoveryMode, RecoverySidecarHandle, SidecarKind, SidecarTablePin, SidecarTableRegistration,
|
||||
SidecarTombstone, delete_sidecar, has_schema_apply_sidecar, heal_pending_sidecars_roll_forward,
|
||||
list_sidecars, new_sidecar, recover_manifest_drift, schema_apply_serial_queue_key,
|
||||
write_sidecar,
|
||||
RecoveryMode, RecoverySidecar, RecoverySidecarHandle, SidecarKind, SidecarTablePin,
|
||||
SidecarTableRegistration, SidecarTombstone, confirm_sidecar_phase_b, delete_sidecar,
|
||||
has_schema_apply_sidecar, heal_pending_sidecars_roll_forward, list_sidecars, new_sidecar,
|
||||
recover_manifest_drift, schema_apply_serial_queue_key, write_sidecar,
|
||||
};
|
||||
pub use state::SubTableEntry;
|
||||
#[cfg(test)]
|
||||
|
|
@ -74,16 +73,51 @@ pub struct Snapshot {
|
|||
root_uri: String,
|
||||
version: u64,
|
||||
entries: HashMap<String, SubTableEntry>,
|
||||
/// Per-graph read caches (shared `Session` + held-handle cache), injected by
|
||||
/// `Omnigraph::resolved_target` for live Branch reads so table opens reuse
|
||||
/// handles (0 IO on a warm repeat) and one `Session`. `None` for write-prelude
|
||||
/// snapshots, time-travel / Snapshot-id reads, and directly-built test
|
||||
/// snapshots, which fall back to a plain open.
|
||||
read_caches: Option<Arc<crate::runtime_cache::ReadCaches>>,
|
||||
}
|
||||
|
||||
impl Snapshot {
|
||||
/// Open a sub-table dataset at its pinned version.
|
||||
/// Open a sub-table dataset at its pinned version. With read caches present
|
||||
/// (live Branch reads), reuse a held handle through the cache (0 open IO on a
|
||||
/// warm repeat) and the shared `Session`; otherwise plain-open (Fix 2).
|
||||
pub async fn open(&self, table_key: &str) -> Result<Dataset> {
|
||||
let entry = self
|
||||
.entries
|
||||
.get(table_key)
|
||||
.ok_or_else(|| OmniError::manifest(format!("no manifest entry for {}", table_key)))?;
|
||||
entry.open(&self.root_uri).await
|
||||
match &self.read_caches {
|
||||
Some(caches) => {
|
||||
let location = table_uri_for_path(
|
||||
&self.root_uri,
|
||||
&entry.table_path,
|
||||
entry.table_branch.as_deref(),
|
||||
);
|
||||
caches
|
||||
.handles
|
||||
.get_or_open(
|
||||
&entry.table_path,
|
||||
entry.table_branch.as_deref(),
|
||||
entry.table_version,
|
||||
entry.version_metadata.e_tag(),
|
||||
&location,
|
||||
Some(&caches.session),
|
||||
)
|
||||
.await
|
||||
}
|
||||
None => entry.open(&self.root_uri).await,
|
||||
}
|
||||
}
|
||||
|
||||
/// Attach per-graph read caches (shared `Session` + handle cache) so this
|
||||
/// snapshot's table opens reuse handles and the session. Set by
|
||||
/// `Omnigraph::resolved_target` for live Branch reads only.
|
||||
pub(crate) fn set_read_caches(&mut self, caches: Arc<crate::runtime_cache::ReadCaches>) {
|
||||
self.read_caches = Some(caches);
|
||||
}
|
||||
|
||||
/// Manifest version this snapshot was taken from.
|
||||
|
|
@ -101,6 +135,31 @@ impl Snapshot {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub(crate) struct ManifestIncarnation {
|
||||
pub(crate) version: u64,
|
||||
pub(crate) e_tag: Option<String>,
|
||||
timestamp_nanos: Option<u128>,
|
||||
}
|
||||
|
||||
impl ManifestIncarnation {
|
||||
pub(crate) fn matches(&self, held: &Self) -> bool {
|
||||
if self.version != held.version {
|
||||
return false;
|
||||
}
|
||||
match (&self.e_tag, &held.e_tag) {
|
||||
(Some(latest), Some(current)) => latest == current,
|
||||
_ => match (self.timestamp_nanos, held.timestamp_nanos) {
|
||||
(Some(latest), Some(current)) => latest == current,
|
||||
// Some object stores can omit both e_tag and manifest timestamp
|
||||
// from the reachable API. In that narrow case the version-number
|
||||
// probe is the strongest available identity.
|
||||
_ => true,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SubTableUpdate {
|
||||
pub(crate) fn to_create_table_version_request(&self) -> CreateTableVersionRequest {
|
||||
self.version_metadata.to_create_table_version_request(
|
||||
|
|
@ -132,14 +191,28 @@ pub(crate) enum ManifestChange {
|
|||
}
|
||||
|
||||
impl SubTableEntry {
|
||||
/// Open this sub-table at its pinned version directly by location (Fix 2),
|
||||
/// without the Lance namespace — which would full-scan `__manifest` twice per
|
||||
/// open (`describe_table` + `describe_table_version`). The resolved Snapshot
|
||||
/// already holds the path, version, and branch. Branches are Lance native
|
||||
/// branches, so `with_branch` resolves `{base}/tree/{branch}` from the base
|
||||
/// URI; main uses `with_version`.
|
||||
pub(crate) async fn open(&self, root_uri: &str) -> Result<Dataset> {
|
||||
open_table_at_version_from_manifest(
|
||||
root_uri,
|
||||
&self.table_key,
|
||||
self.table_branch.as_deref(),
|
||||
self.table_version,
|
||||
)
|
||||
.await
|
||||
// The branch-qualified location is the dataset that physically holds this
|
||||
// version: main at `{table_path}`, a branch at
|
||||
// `{table_path}/tree/{branch}` (Lance native-branch storage). `with_version`
|
||||
// then resolves the version within THAT dataset's `_versions` — a branch
|
||||
// version lives under `tree/{branch}/_versions`, not the base. This
|
||||
// matches the physical layout the namespace path resolved, without the
|
||||
// per-open `__manifest` scan.
|
||||
let location = table_uri_for_path(root_uri, &self.table_path, self.table_branch.as_deref());
|
||||
// Route through the instrumented data-table opener (Fix 3). With no
|
||||
// session this is exactly the Fix-2 `from_uri(location).with_version`.
|
||||
// This is the uncached fallback (a snapshot with no read caches); the
|
||||
// cached path (`Snapshot::open` → handle cache) calls the same opener on
|
||||
// a miss with the shared session, so both paths count on the per-query
|
||||
// `table_wrapper`.
|
||||
crate::instrumentation::open_table_dataset(&location, self.table_version, None).await
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -223,6 +296,7 @@ impl ManifestCoordinator {
|
|||
.into_iter()
|
||||
.map(|entry| (entry.table_key.clone(), entry))
|
||||
.collect(),
|
||||
read_caches: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -359,6 +433,48 @@ impl ManifestCoordinator {
|
|||
self.dataset.version().version
|
||||
}
|
||||
|
||||
/// Latest committed manifest version on disk (one object-store op, no row
|
||||
/// scan). The freshness probe for warm reuse: compare against `version()`
|
||||
/// (the held handle's pinned version) to decide whether to refresh.
|
||||
pub async fn probe_latest_version(&self) -> Result<u64> {
|
||||
self.dataset
|
||||
.latest_version_id()
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))
|
||||
}
|
||||
|
||||
pub(crate) fn incarnation(&self) -> ManifestIncarnation {
|
||||
ManifestIncarnation {
|
||||
version: self.version(),
|
||||
e_tag: self.dataset.manifest_location().e_tag.clone(),
|
||||
timestamp_nanos: Some(self.dataset.manifest().timestamp_nanos),
|
||||
}
|
||||
}
|
||||
|
||||
/// Latest committed manifest identity. Main cannot be deleted/recreated, so
|
||||
/// the cheap version-number probe is sufficient there. Non-main Lance
|
||||
/// branches can be deleted and recreated with the same version number, so
|
||||
/// load the latest manifest location and compare its e_tag / timestamp too.
|
||||
pub(crate) async fn probe_latest_incarnation(&self) -> Result<ManifestIncarnation> {
|
||||
if self.active_branch.is_none() {
|
||||
return Ok(ManifestIncarnation {
|
||||
version: self.probe_latest_version().await?,
|
||||
e_tag: self.dataset.manifest_location().e_tag.clone(),
|
||||
timestamp_nanos: Some(self.dataset.manifest().timestamp_nanos),
|
||||
});
|
||||
}
|
||||
let (manifest, location) = self
|
||||
.dataset
|
||||
.latest_manifest()
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
Ok(ManifestIncarnation {
|
||||
version: manifest.version,
|
||||
e_tag: location.e_tag,
|
||||
timestamp_nanos: Some(manifest.timestamp_nanos),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn active_branch(&self) -> Option<&str> {
|
||||
self.active_branch.as_deref()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,9 +20,12 @@ pub(super) fn manifest_uri(root: &str) -> String {
|
|||
}
|
||||
|
||||
pub(super) async fn open_manifest_dataset(root_uri: &str, branch: Option<&str>) -> Result<Dataset> {
|
||||
let dataset = Dataset::open(&manifest_uri(root_uri.trim_end_matches('/')))
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
let uri = manifest_uri(root_uri.trim_end_matches('/'));
|
||||
let dataset = crate::instrumentation::open_dataset_tracked(
|
||||
&uri,
|
||||
crate::instrumentation::manifest_wrapper(),
|
||||
)
|
||||
.await?;
|
||||
match branch {
|
||||
Some(branch) if branch != "main" => dataset
|
||||
.checkout_branch(branch)
|
||||
|
|
|
|||
|
|
@ -111,7 +111,6 @@ impl TableVersionMetadata {
|
|||
self.manifest_size
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn e_tag(&self) -> Option<&str> {
|
||||
self.e_tag.as_deref()
|
||||
}
|
||||
|
|
@ -138,6 +137,7 @@ impl TableVersionMetadata {
|
|||
request
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(super) fn to_namespace_version(&self, version: u64) -> TableVersion {
|
||||
self.to_namespace_version_with_details(version, None, None)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,21 +16,30 @@ use object_store::{
|
|||
|
||||
use crate::error::{OmniError, Result};
|
||||
|
||||
use super::layout::{
|
||||
namespace_internal_error, open_manifest_dataset, table_id_to_key, table_uri_for_path,
|
||||
};
|
||||
use super::metadata::{
|
||||
TableVersionMetadata, namespace_version_metadata, parse_namespace_version_request,
|
||||
};
|
||||
use super::layout::{namespace_internal_error, table_uri_for_path};
|
||||
#[cfg(test)]
|
||||
use super::layout::{open_manifest_dataset, table_id_to_key};
|
||||
use super::metadata::TableVersionMetadata;
|
||||
#[cfg(test)]
|
||||
use super::metadata::{namespace_version_metadata, parse_namespace_version_request};
|
||||
#[cfg(test)]
|
||||
use super::publisher::GraphNamespacePublisher;
|
||||
// The read namespace (BranchManifestNamespace) is test-only since Fix 2: reads
|
||||
// open sub-tables directly by location+version (SubTableEntry::open), so nothing
|
||||
// in production routes a read through the Lance namespace. The writes path uses
|
||||
// StagedTableNamespace. These items are retained to validate the namespace
|
||||
// contract in unit tests.
|
||||
#[cfg(test)]
|
||||
use super::state::{ManifestState, SubTableEntry, read_manifest_entries, read_manifest_state};
|
||||
|
||||
#[cfg(test)]
|
||||
#[derive(Debug, Clone)]
|
||||
struct BranchManifestNamespace {
|
||||
root_uri: String,
|
||||
branch: Option<String>,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl BranchManifestNamespace {
|
||||
fn new(root_uri: &str, branch: Option<&str>) -> Self {
|
||||
Self {
|
||||
|
|
@ -137,6 +146,7 @@ impl StagedTableNamespace {
|
|||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn branch_manifest_namespace(
|
||||
root_uri: &str,
|
||||
branch: Option<&str>,
|
||||
|
|
@ -175,21 +185,7 @@ async fn load_table_from_namespace(
|
|||
.map_err(|e| OmniError::Lance(e.to_string()))
|
||||
}
|
||||
|
||||
pub(crate) async fn open_table_at_version_from_manifest(
|
||||
root_uri: &str,
|
||||
table_key: &str,
|
||||
branch: Option<&str>,
|
||||
version: u64,
|
||||
) -> Result<Dataset> {
|
||||
load_table_from_namespace(
|
||||
branch_manifest_namespace(root_uri, branch),
|
||||
table_key,
|
||||
branch,
|
||||
Some(version),
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[async_trait]
|
||||
impl LanceNamespace for BranchManifestNamespace {
|
||||
fn namespace_id(&self) -> String {
|
||||
|
|
|
|||
|
|
@ -24,10 +24,13 @@ use lance::Dataset;
|
|||
use lance::Error as LanceError;
|
||||
use lance::dataset::{MergeInsertBuilder, WhenMatched, WhenNotMatched};
|
||||
use lance_namespace::NamespaceError;
|
||||
#[cfg(test)]
|
||||
use lance_namespace::models::CreateTableVersionRequest;
|
||||
|
||||
use crate::error::{OmniError, Result};
|
||||
|
||||
#[cfg(test)]
|
||||
use super::SubTableUpdate;
|
||||
use super::layout::{open_manifest_dataset, tombstone_object_id, version_object_id};
|
||||
use super::metadata::parse_namespace_version_request;
|
||||
use super::migrations::migrate_internal_schema;
|
||||
|
|
@ -37,7 +40,7 @@ use super::state::{
|
|||
};
|
||||
use super::{
|
||||
ManifestChange, OBJECT_TYPE_TABLE, OBJECT_TYPE_TABLE_TOMBSTONE, OBJECT_TYPE_TABLE_VERSION,
|
||||
SubTableEntry, SubTableUpdate, TableRegistration, TableTombstone,
|
||||
SubTableEntry, TableRegistration, TableTombstone,
|
||||
};
|
||||
|
||||
/// Bound on the publisher-level retry loop that wraps Lance's row-level CAS
|
||||
|
|
@ -396,6 +399,7 @@ impl GraphNamespacePublisher {
|
|||
Ok(Arc::try_unwrap(new_dataset).unwrap_or_else(|arc| (*arc).clone()))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(super) async fn publish_requests(
|
||||
&self,
|
||||
requests: &[CreateTableVersionRequest],
|
||||
|
|
|
|||
|
|
@ -62,10 +62,26 @@ pub(crate) const RECOVERY_ACTOR: &str = "omnigraph:recovery";
|
|||
/// Subdirectory under the graph root holding sidecar files.
|
||||
pub(crate) const RECOVERY_DIR_NAME: &str = "__recovery";
|
||||
|
||||
/// Current sidecar JSON shape version. Bumping this is a breaking change:
|
||||
/// older binaries will refuse to interpret newer sidecars (intentional —
|
||||
/// see [`SidecarSchemaError`]).
|
||||
pub(crate) const SIDECAR_SCHEMA_VERSION: u32 = 1;
|
||||
/// Max sidecar JSON shape/semantics version this binary writes and understands.
|
||||
/// The reader accepts every version `<= ` this and refuses only versions ABOVE
|
||||
/// it (an older binary cannot guess semantics a newer writer baked in — see
|
||||
/// [`SidecarSchemaError`] and [`parse_sidecar`]). Bump this whenever a change
|
||||
/// alters how an existing field is *interpreted* (not merely adds an optional
|
||||
/// one), and add a fixed `*_SCHEMA_VERSION` floor like the one below so older
|
||||
/// generations keep their original semantics.
|
||||
///
|
||||
/// v1 → v2: Phase-B confirmation. A `BranchMerge` sidecar at v2 carries
|
||||
/// `confirmed_version` and is classified strictly (unconfirmed ⇒ partial ⇒ roll
|
||||
/// back); at v1 it predates confirmation and keeps the loose roll-forward. The
|
||||
/// reader must distinguish the two, so this is a real version bump, not an
|
||||
/// additive field.
|
||||
pub(crate) const SIDECAR_SCHEMA_VERSION: u32 = 2;
|
||||
|
||||
/// The version at which Phase-B confirmation shipped. A `BranchMerge` sidecar is
|
||||
/// confirmation-aware (strict classification) iff `schema_version >=` this.
|
||||
/// FIXED at 2 — NOT derived from [`SIDECAR_SCHEMA_VERSION`] — so a future bump to
|
||||
/// v3+ still treats v2 sidecars as confirmation-aware.
|
||||
pub(crate) const CONFIRMATION_SCHEMA_VERSION: u32 = 2;
|
||||
|
||||
/// Selects which recovery actions are allowed in a sweep.
|
||||
///
|
||||
|
|
@ -115,6 +131,54 @@ pub(crate) enum SidecarKind {
|
|||
Optimize,
|
||||
}
|
||||
|
||||
/// Which recovery-classification semantics a sidecar's tables use. Resolved once
|
||||
/// from `(writer_kind, schema_version)` — see [`SidecarKind::classification_mode`]
|
||||
/// — so [`classify_table`] dispatches on the mode instead of re-deriving it from
|
||||
/// a kind×version match. Adding a writer kind or a version floor is then one arm
|
||||
/// in the resolver, not a guard threaded through `classify_table`.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub(crate) enum ClassificationMode {
|
||||
/// Exactly one `commit_staged` per table (`Mutation`, `Load`): require
|
||||
/// `lance_head == manifest_pinned + 1` and the pin to match.
|
||||
Strict,
|
||||
/// N ≥ 1 commits per table whose drift is content-preserving / derived
|
||||
/// state (`SchemaApply`, `EnsureIndices`, `Optimize`, and pre-confirmation
|
||||
/// `BranchMerge`): any `lance_head > manifest_pinned` rolls forward.
|
||||
Loose,
|
||||
/// Multi-commit publish of *distinct logical rows* with a recorded
|
||||
/// `confirmed_version` (`BranchMerge` at `schema_version >=
|
||||
/// CONFIRMATION_SCHEMA_VERSION`): roll forward ONLY to the confirmed
|
||||
/// version; an unconfirmed moved HEAD is a partial publish and rolls back.
|
||||
Confirmed,
|
||||
}
|
||||
|
||||
impl SidecarKind {
|
||||
/// Resolve the classification mode for this writer at a given sidecar
|
||||
/// `schema_version`. Exhaustive over `SidecarKind`, so adding a variant is a
|
||||
/// compile error here until its recovery semantics are declared.
|
||||
pub(crate) fn classification_mode(self, schema_version: u32) -> ClassificationMode {
|
||||
match self {
|
||||
SidecarKind::Mutation | SidecarKind::Load => ClassificationMode::Strict,
|
||||
// BranchMerge gained two-phase confirmation at
|
||||
// `CONFIRMATION_SCHEMA_VERSION`. A sidecar written before that
|
||||
// carries no `confirmed_version` and must keep the prior loose
|
||||
// roll-forward — classifying it strictly would misread a *completed*
|
||||
// pre-upgrade merge as a partial and roll it back. (The read gate
|
||||
// already refused any version newer than this binary.)
|
||||
SidecarKind::BranchMerge => {
|
||||
if schema_version >= CONFIRMATION_SCHEMA_VERSION {
|
||||
ClassificationMode::Confirmed
|
||||
} else {
|
||||
ClassificationMode::Loose
|
||||
}
|
||||
}
|
||||
SidecarKind::SchemaApply | SidecarKind::EnsureIndices | SidecarKind::Optimize => {
|
||||
ClassificationMode::Loose
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// One table's contribution to a sidecar's intended commit. The classifier
|
||||
/// uses these to decide per-table state at recovery time.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
|
|
@ -126,8 +190,22 @@ pub(crate) struct SidecarTablePin {
|
|||
/// Manifest-pinned version at writer start (CAS expectation).
|
||||
pub expected_version: u64,
|
||||
/// Lance HEAD that the writer's `commit_staged` would produce
|
||||
/// (typically `expected_version + 1`).
|
||||
/// (typically `expected_version + 1`). For multi-commit writers this is
|
||||
/// only a *lower bound* — see `confirmed_version`.
|
||||
pub post_commit_pin: u64,
|
||||
/// Phase-B confirmation: the exact Lance HEAD this table reached once the
|
||||
/// writer's *entire* multi-commit publish for it finished, recorded by a
|
||||
/// second sidecar write immediately before the manifest publish (Phase C).
|
||||
/// `None` means Phase B did not complete (the writer crashed mid-publish),
|
||||
/// so the on-disk drift is a *partial* commit and recovery must roll the
|
||||
/// whole operation BACK rather than publish an incomplete state. Only the
|
||||
/// `BranchMerge` writer records this today (its per-table publish is
|
||||
/// append → upsert → delete, several HEAD advances that the manifest
|
||||
/// publish makes atomic); other writers leave it `None` and keep their
|
||||
/// existing loose roll-forward. Backward-compatible: absent on older
|
||||
/// sidecars.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub confirmed_version: Option<u64>,
|
||||
/// Lance branch ref this table lives on (mirrors
|
||||
/// `SubTableEntry::table_branch`). Required for the recovery sweep
|
||||
/// to open the dataset at the correct ref — `Dataset::open(path)`
|
||||
|
|
@ -218,25 +296,27 @@ pub(crate) struct RecoverySidecarHandle {
|
|||
pub(crate) sidecar_uri: String,
|
||||
}
|
||||
|
||||
/// Error returned when the sidecar's `schema_version` is unknown to this
|
||||
/// binary. We refuse-and-error rather than read-and-warn: an old binary
|
||||
/// cannot guess what semantics a newer writer baked into a future shape.
|
||||
/// Operator action is required (typically: upgrade the binary).
|
||||
/// Error returned when the sidecar's `schema_version` is NEWER than this binary
|
||||
/// understands. We refuse-and-error rather than read-and-warn: an old binary
|
||||
/// cannot guess what semantics a newer writer baked into a future shape. (Older
|
||||
/// versions are accepted and interpreted with their original semantics — see
|
||||
/// [`parse_sidecar`].) Operator action is required (typically: upgrade the
|
||||
/// binary).
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct SidecarSchemaError {
|
||||
pub sidecar_uri: String,
|
||||
pub found_version: u32,
|
||||
pub supported_version: u32,
|
||||
pub max_supported_version: u32,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for SidecarSchemaError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"recovery sidecar at '{}' declares schema_version={}, but this \
|
||||
binary supports only schema_version={}; refusing to interpret \
|
||||
"recovery sidecar at '{}' declares schema_version={}, newer than the \
|
||||
maximum this binary supports (schema_version={}); refusing to interpret \
|
||||
— upgrade omnigraph or remove the sidecar with operator review",
|
||||
self.sidecar_uri, self.found_version, self.supported_version,
|
||||
self.sidecar_uri, self.found_version, self.max_supported_version,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
@ -271,6 +351,14 @@ pub(crate) enum TableClassification {
|
|||
/// previous restore attempt or an external mutation. Roll back to
|
||||
/// the manifest pin.
|
||||
UnexpectedMultistep,
|
||||
/// A confirmation-using writer (`BranchMerge`) advanced this table's HEAD
|
||||
/// (`lance_head > manifest_pinned`) but the sidecar carries no
|
||||
/// `confirmed_version` — its multi-commit publish crashed mid-flight, so
|
||||
/// the drift is a *partial* commit (e.g. an append without its sibling
|
||||
/// upsert/delete). Roll back to the manifest pin; the whole operation is
|
||||
/// re-run from scratch. Distinct from `UnexpectedMultistep` so the audit
|
||||
/// records a partial Phase B, not a foreign mutation.
|
||||
IncompletePhaseB,
|
||||
/// `lance_head < manifest_pinned`. Should be impossible: the manifest
|
||||
/// pin can only advance after a successful Lance commit. Surface
|
||||
/// loudly and abort recovery.
|
||||
|
|
@ -341,6 +429,58 @@ pub(crate) async fn write_sidecar(
|
|||
})
|
||||
}
|
||||
|
||||
/// Phase-B confirmation: stamp each pin with the exact Lance HEAD its publish
|
||||
/// reached, then re-write the sidecar in place (same object). Called once, after
|
||||
/// the writer's whole multi-commit publish completed and before the manifest
|
||||
/// publish (Phase C). Recovery then rolls forward ONLY to these confirmed
|
||||
/// versions; a sidecar still missing them is a partial Phase B that rolls back.
|
||||
///
|
||||
/// Overwriting the same object is atomic (same contract as [`write_sidecar`]):
|
||||
/// a torn rewrite is never observed, so recovery reads either the pre-confirm
|
||||
/// sidecar (→ roll back, safe) or the confirmed one (→ roll forward). A failure
|
||||
/// here leaves the pre-confirm sidecar, so the operation rolls back — correct.
|
||||
///
|
||||
/// SURVIVES the fragment-adopt work (unlike the row-level merge it currently
|
||||
/// serves — see `AdoptDelta` in `exec/merge.rs`). The recovery sidecar is the
|
||||
/// cross-table write-ahead log that makes a fast-forward-main commit
|
||||
/// all-or-nothing across N tables, which a fragment graft still needs. What
|
||||
/// narrows is the *within-table* reason for confirmation: once each table's
|
||||
/// merge is a single graft commit, the multi-step partial window shrinks to one
|
||||
/// commit, so the `BranchMerge` arm of `classify_table` could fold back into the
|
||||
/// strict single-commit path and `IncompletePhaseB` retire. Do NOT delete this
|
||||
/// with the row path — keep the sidecar; only simplify the classifier.
|
||||
pub(crate) async fn confirm_sidecar_phase_b(
|
||||
root_uri: &str,
|
||||
storage: &dyn StorageAdapter,
|
||||
sidecar: &mut RecoverySidecar,
|
||||
confirmed_versions: &HashMap<String, u64>,
|
||||
) -> Result<()> {
|
||||
// Failpoint: models a storage failure on the confirmation write — the
|
||||
// pre-confirm sidecar stays on disk, so recovery rolls the operation back.
|
||||
crate::failpoints::maybe_fail("recovery.sidecar_confirm")?;
|
||||
for pin in &mut sidecar.tables {
|
||||
// Every pinned table MUST have an achieved version. A miss means the
|
||||
// pin set and the publish `updates` diverged — fail loudly at the
|
||||
// producer rather than leave the pin unconfirmed, which recovery would
|
||||
// read as a partial Phase B and silently roll the whole (complete) merge
|
||||
// back. Today the two are kept in lockstep by construction; this guards
|
||||
// the invariant against a future edit to either filter.
|
||||
let version = confirmed_versions.get(&pin.table_key).ok_or_else(|| {
|
||||
OmniError::manifest_internal(format!(
|
||||
"confirm_sidecar_phase_b: no achieved version for pinned table '{}' \
|
||||
(pins and publish updates diverged)",
|
||||
pin.table_key
|
||||
))
|
||||
})?;
|
||||
pin.confirmed_version = Some(*version);
|
||||
}
|
||||
let uri = sidecar_uri(root_uri, &sidecar.operation_id);
|
||||
let json = serde_json::to_string_pretty(sidecar).map_err(|err| {
|
||||
OmniError::manifest_internal(format!("failed to serialize recovery sidecar: {}", err))
|
||||
})?;
|
||||
storage.write_text(&uri, &json).await
|
||||
}
|
||||
|
||||
/// Delete a sidecar after Phase C succeeded. Idempotent (safe to retry).
|
||||
pub(crate) async fn delete_sidecar(
|
||||
handle: &RecoverySidecarHandle,
|
||||
|
|
@ -408,11 +548,15 @@ pub(crate) fn parse_sidecar(sidecar_uri: &str, body: &str) -> Result<RecoverySid
|
|||
sidecar_uri, err
|
||||
))
|
||||
})?;
|
||||
if peek.schema_version != SIDECAR_SCHEMA_VERSION {
|
||||
// Accept every version we were built to understand (`<= max`); refuse only
|
||||
// versions NEWER than us. Interpreting older generations with their original
|
||||
// semantics (rather than refusing them) is what avoids billing operators to
|
||||
// drain pre-upgrade sidecars; classification then dispatches by version.
|
||||
if peek.schema_version > SIDECAR_SCHEMA_VERSION {
|
||||
return Err(SidecarSchemaError {
|
||||
sidecar_uri: sidecar_uri.to_string(),
|
||||
found_version: peek.schema_version,
|
||||
supported_version: SIDECAR_SCHEMA_VERSION,
|
||||
max_supported_version: SIDECAR_SCHEMA_VERSION,
|
||||
}
|
||||
.into());
|
||||
}
|
||||
|
|
@ -427,26 +571,38 @@ pub(crate) fn parse_sidecar(sidecar_uri: &str, body: &str) -> Result<RecoverySid
|
|||
/// Classify one table's observed state vs. the sidecar's intent.
|
||||
///
|
||||
/// `kind` adjusts the precision of the `RolledPastExpected` predicate:
|
||||
/// - **Confirmation** (`BranchMerge`): the writer's per-table publish is
|
||||
/// several HEAD advances (append → upsert → delete), so a bare
|
||||
/// `lance_head > manifest_pinned` is ambiguous — it may be a *complete*
|
||||
/// publish or a *partial* one crashed mid-sequence. The writer resolves
|
||||
/// the ambiguity by recording the exact achieved version
|
||||
/// (`confirmed_version`) only after the whole publish finished. So roll
|
||||
/// forward ONLY to that confirmed version; a missing confirmation is a
|
||||
/// partial commit (`IncompletePhaseB`) and rolls back. This is the safe
|
||||
/// form of the loose match for writers where a partial would publish an
|
||||
/// incomplete delta.
|
||||
/// - **Strict** (`Mutation`, `Load`): exactly one `commit_staged` per
|
||||
/// table, so `lance_head == manifest_pinned + 1` AND
|
||||
/// `post_commit_pin == lance_head` is required.
|
||||
/// - **Loose** (`SchemaApply`, `EnsureIndices`, `BranchMerge`,
|
||||
/// `Optimize`): the writer advances the Lance HEAD by N ≥ 1 commits
|
||||
/// per table (one per index built + one for the overwrite, etc.;
|
||||
/// merge tables run merge_insert + delete_where + index rebuilds;
|
||||
/// `Optimize` runs `compact_files`, which commits reserve-fragments +
|
||||
/// rewrite) and the exact N is hard to compute at sidecar-write time.
|
||||
/// The loose match accepts
|
||||
/// - **Loose** (`SchemaApply`, `EnsureIndices`, `Optimize`): the writer
|
||||
/// advances the Lance HEAD by N ≥ 1 commits per table (one per index
|
||||
/// built + one for the overwrite, etc.; `Optimize` runs `compact_files`,
|
||||
/// which commits reserve-fragments + rewrite) and the exact N is hard to
|
||||
/// compute at sidecar-write time. The loose match accepts
|
||||
/// any `lance_head > manifest_pinned` as `RolledPastExpected` when
|
||||
/// `pin.expected_version == manifest_pinned` (the writer's CAS
|
||||
/// target matches what the manifest currently shows). The risk this
|
||||
/// admits — an external agent advancing HEAD between sidecar write
|
||||
/// and recovery — is out of scope for the single-coordinator model.
|
||||
/// target matches what the manifest currently shows). This is safe for
|
||||
/// these writers because their drift is derived state (index coverage,
|
||||
/// compaction) the reconciler reproduces — a partial roll-forward loses
|
||||
/// no logical rows. The risk it admits — an external agent advancing HEAD
|
||||
/// between sidecar write and recovery — is out of scope for the
|
||||
/// single-coordinator model.
|
||||
pub(crate) fn classify_table(
|
||||
pin: &SidecarTablePin,
|
||||
lance_head: u64,
|
||||
manifest_pinned: u64,
|
||||
kind: SidecarKind,
|
||||
schema_version: u32,
|
||||
) -> TableClassification {
|
||||
use TableClassification::*;
|
||||
if lance_head < manifest_pinned {
|
||||
|
|
@ -457,27 +613,49 @@ pub(crate) fn classify_table(
|
|||
if lance_head == manifest_pinned {
|
||||
return NoMovement;
|
||||
}
|
||||
// lance_head > manifest_pinned
|
||||
let strict = matches!(kind, SidecarKind::Mutation | SidecarKind::Load);
|
||||
if strict {
|
||||
if lance_head == manifest_pinned + 1 {
|
||||
if pin.expected_version == manifest_pinned && pin.post_commit_pin == lance_head {
|
||||
RolledPastExpected
|
||||
} else {
|
||||
UnexpectedAtP1
|
||||
// lance_head > manifest_pinned. The "which semantics" decision is resolved
|
||||
// once from (kind, schema_version); dispatch on it.
|
||||
match kind.classification_mode(schema_version) {
|
||||
ClassificationMode::Confirmed => {
|
||||
// Two-phase confirmation: roll forward only to the exact version the
|
||||
// writer recorded after its whole multi-commit publish completed. No
|
||||
// confirmation ⇒ the publish crashed mid-sequence ⇒ partial ⇒ roll
|
||||
// back. A confirmation that doesn't match the observed HEAD means a
|
||||
// foreign writer advanced the table — don't roll a surprise forward.
|
||||
match pin.confirmed_version {
|
||||
Some(confirmed)
|
||||
if lance_head == confirmed && pin.expected_version == manifest_pinned =>
|
||||
{
|
||||
RolledPastExpected
|
||||
}
|
||||
Some(_) => UnexpectedMultistep,
|
||||
None => IncompletePhaseB,
|
||||
}
|
||||
} else {
|
||||
// lance_head > manifest_pinned + 1
|
||||
UnexpectedMultistep
|
||||
}
|
||||
} else {
|
||||
// Loose match for multi-commit writers (SchemaApply, EnsureIndices).
|
||||
if pin.expected_version == manifest_pinned {
|
||||
RolledPastExpected
|
||||
} else if lance_head == manifest_pinned + 1 {
|
||||
UnexpectedAtP1
|
||||
} else {
|
||||
UnexpectedMultistep
|
||||
ClassificationMode::Strict => {
|
||||
if lance_head == manifest_pinned + 1 {
|
||||
if pin.expected_version == manifest_pinned && pin.post_commit_pin == lance_head {
|
||||
RolledPastExpected
|
||||
} else {
|
||||
UnexpectedAtP1
|
||||
}
|
||||
} else {
|
||||
// lance_head > manifest_pinned + 1
|
||||
UnexpectedMultistep
|
||||
}
|
||||
}
|
||||
ClassificationMode::Loose => {
|
||||
// Multi-commit writers whose drift is content-preserving / derived
|
||||
// state (and pre-confirmation BranchMerge sidecars): any
|
||||
// `lance_head > manifest_pinned` rolls forward when the CAS target
|
||||
// matches what the manifest currently shows.
|
||||
if pin.expected_version == manifest_pinned {
|
||||
RolledPastExpected
|
||||
} else if lance_head == manifest_pinned + 1 {
|
||||
UnexpectedAtP1
|
||||
} else {
|
||||
UnexpectedMultistep
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -496,7 +674,7 @@ pub(crate) fn decide(classifications: &[TableClassification]) -> SidecarDecision
|
|||
}
|
||||
if classifications
|
||||
.iter()
|
||||
.any(|c| matches!(c, NoMovement | UnexpectedAtP1 | UnexpectedMultistep))
|
||||
.any(|c| matches!(c, NoMovement | UnexpectedAtP1 | UnexpectedMultistep | IncompletePhaseB))
|
||||
{
|
||||
return RollBack;
|
||||
}
|
||||
|
|
@ -830,7 +1008,12 @@ pub(crate) async fn recover_manifest_drift(
|
|||
// write-entry heal: a deferred sidecar whose branch was
|
||||
// deleted would otherwise fail every ReadWrite open.
|
||||
coordinator.refresh().await?;
|
||||
if !coordinator.all_branches().await?.iter().any(|name| name == b) {
|
||||
if !coordinator
|
||||
.all_branches()
|
||||
.await?
|
||||
.iter()
|
||||
.any(|name| name == b)
|
||||
{
|
||||
discard_orphaned_branch_sidecar(
|
||||
root_uri,
|
||||
storage.as_ref(),
|
||||
|
|
@ -886,7 +1069,13 @@ async fn process_sidecar(
|
|||
.map(|e| e.table_version)
|
||||
.unwrap_or(0);
|
||||
states.push(ClassifiedTable {
|
||||
classification: classify_table(pin, lance_head, manifest_pinned, sidecar.writer_kind),
|
||||
classification: classify_table(
|
||||
pin,
|
||||
lance_head,
|
||||
manifest_pinned,
|
||||
sidecar.writer_kind,
|
||||
sidecar.schema_version,
|
||||
),
|
||||
manifest_pinned,
|
||||
lance_head,
|
||||
});
|
||||
|
|
@ -1023,7 +1212,7 @@ async fn process_sidecar(
|
|||
Phase C did not land)"
|
||||
);
|
||||
let (new_manifest_version, published_versions) =
|
||||
roll_forward_all(root_uri, sidecar, snapshot).await?;
|
||||
roll_forward_all(root_uri, sidecar, &states, snapshot).await?;
|
||||
// `to_version` records the ACTUAL Lance HEAD published for
|
||||
// each table (not pin.post_commit_pin, which is a lower bound
|
||||
// for loose-match writers like SchemaApply / EnsureIndices /
|
||||
|
|
@ -1107,6 +1296,7 @@ async fn roll_back_sidecar(
|
|||
TableClassification::RolledPastExpected
|
||||
| TableClassification::UnexpectedAtP1
|
||||
| TableClassification::UnexpectedMultistep
|
||||
| TableClassification::IncompletePhaseB
|
||||
) {
|
||||
restore_table_to_version(
|
||||
&pin.table_path,
|
||||
|
|
@ -1114,14 +1304,17 @@ async fn roll_back_sidecar(
|
|||
state.manifest_pinned,
|
||||
)
|
||||
.await?;
|
||||
// Publish the post-restore HEAD, CAS against the current (unmoved)
|
||||
// manifest pin — the same helper roll-forward uses.
|
||||
push_table_update_at_head(
|
||||
// Publish the post-restore HEAD (the restore commit we just made),
|
||||
// CAS against the current (unmoved) manifest pin — the same helper
|
||||
// roll-forward uses. `None` target: there is no prior observation to
|
||||
// pin to; the version to publish is the HEAD the restore produced.
|
||||
push_table_update(
|
||||
root_uri,
|
||||
&pin.table_key,
|
||||
&pin.table_path,
|
||||
pin.table_branch.as_deref(),
|
||||
state.manifest_pinned,
|
||||
None,
|
||||
&mut updates,
|
||||
&mut expected,
|
||||
)
|
||||
|
|
@ -1222,6 +1415,7 @@ async fn record_audit_recovery_rollforward(
|
|||
async fn roll_forward_all(
|
||||
root_uri: &str,
|
||||
sidecar: &RecoverySidecar,
|
||||
states: &[ClassifiedTable],
|
||||
snapshot: &Snapshot,
|
||||
) -> Result<(u64, HashMap<String, u64>)> {
|
||||
let total_changes =
|
||||
|
|
@ -1231,22 +1425,25 @@ async fn roll_forward_all(
|
|||
let mut published_versions: HashMap<String, u64> =
|
||||
HashMap::with_capacity(sidecar.tables.len() + sidecar.additional_registrations.len());
|
||||
|
||||
for pin in &sidecar.tables {
|
||||
// Publish to the table's CURRENT Lance HEAD on the pin's branch (not the
|
||||
// sidecar's `post_commit_pin`, a lower bound for loose-match writers that
|
||||
// run multiple commit_staged calls per table). CAS against the pin's
|
||||
// pre-write `expected_version`.
|
||||
let head_version = push_table_update_at_head(
|
||||
for (pin, state) in sidecar.tables.iter().zip(states.iter()) {
|
||||
// Publish the version classification OBSERVED (`state.lance_head`), not a
|
||||
// fresh HEAD re-read. For a `Confirmed` pin classify already validated
|
||||
// `lance_head == confirmed_version`, so this publishes the recorded WAL
|
||||
// intent by construction; for loose/strict pins it's the multi-commit
|
||||
// HEAD classify saw. Single observation, no classify→publish TOCTOU. CAS
|
||||
// against the pin's pre-write `expected_version`.
|
||||
let published = push_table_update(
|
||||
root_uri,
|
||||
&pin.table_key,
|
||||
&pin.table_path,
|
||||
pin.table_branch.as_deref(),
|
||||
pin.expected_version,
|
||||
Some(state.lance_head),
|
||||
&mut updates,
|
||||
&mut expected,
|
||||
)
|
||||
.await?;
|
||||
published_versions.insert(pin.table_key.clone(), head_version);
|
||||
published_versions.insert(pin.table_key.clone(), published);
|
||||
}
|
||||
|
||||
// SchemaApply-only: register added tables (and renamed targets) and
|
||||
|
|
@ -1346,45 +1543,61 @@ async fn roll_forward_all(
|
|||
/// version the table was just restored to). The HEAD is read AFTER any restore
|
||||
/// in the same single-threaded sweep, so no concurrent writer can have advanced
|
||||
/// it.
|
||||
async fn push_table_update_at_head(
|
||||
/// Stage a manifest `Update` for one table.
|
||||
///
|
||||
/// `target_version` selects WHICH Lance version's state to publish:
|
||||
/// - `Some(v)` — pin the dataset at version `v` and publish it. Roll-FORWARD
|
||||
/// passes the version classification observed (and, for a `Confirmed` pin,
|
||||
/// validated equals `confirmed_version`), so recovery publishes the version it
|
||||
/// *decided* on rather than re-reading a HEAD a concurrent writer may have
|
||||
/// advanced since classification — one observation, used for both the decision
|
||||
/// and the publish (invariant 15).
|
||||
/// - `None` — publish the dataset's current HEAD. Roll-BACK uses this: it just
|
||||
/// created the restore commit, so HEAD *is* the version to publish.
|
||||
async fn push_table_update(
|
||||
root_uri: &str,
|
||||
table_key: &str,
|
||||
table_path: &str,
|
||||
branch: Option<&str>,
|
||||
expected_version: u64,
|
||||
target_version: Option<u64>,
|
||||
updates: &mut Vec<ManifestChange>,
|
||||
expected: &mut HashMap<String, u64>,
|
||||
) -> Result<u64> {
|
||||
let head_ds = Dataset::open(table_path)
|
||||
let ds = Dataset::open(table_path)
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
let head_ds = match branch {
|
||||
Some(b) if b != "main" => head_ds
|
||||
let ds = match branch {
|
||||
Some(b) if b != "main" => ds
|
||||
.checkout_branch(b)
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?,
|
||||
_ => head_ds,
|
||||
_ => ds,
|
||||
};
|
||||
let head_version = head_ds.version().version;
|
||||
let row_count = head_ds
|
||||
let ds = match target_version {
|
||||
Some(v) => ds
|
||||
.checkout_version(v)
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?,
|
||||
None => ds,
|
||||
};
|
||||
let published_version = ds.version().version;
|
||||
let row_count = ds
|
||||
.count_rows(None)
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))? as u64;
|
||||
let table_relative_path = super::table_path_for_table_key(table_key)?;
|
||||
let version_metadata = super::metadata::TableVersionMetadata::from_dataset(
|
||||
root_uri,
|
||||
&table_relative_path,
|
||||
&head_ds,
|
||||
)?;
|
||||
let version_metadata =
|
||||
super::metadata::TableVersionMetadata::from_dataset(root_uri, &table_relative_path, &ds)?;
|
||||
updates.push(ManifestChange::Update(SubTableUpdate {
|
||||
table_key: table_key.to_string(),
|
||||
table_version: head_version,
|
||||
table_version: published_version,
|
||||
table_branch: branch.map(str::to_string),
|
||||
row_count,
|
||||
version_metadata,
|
||||
}));
|
||||
expected.insert(table_key.to_string(), expected_version);
|
||||
Ok(head_version)
|
||||
Ok(published_version)
|
||||
}
|
||||
|
||||
/// Append the audit row describing this recovery action.
|
||||
|
|
@ -1568,6 +1781,7 @@ mod tests {
|
|||
table_path: table_path.to_string(),
|
||||
expected_version: expected,
|
||||
post_commit_pin: post,
|
||||
confirmed_version: None,
|
||||
table_branch: None,
|
||||
}
|
||||
}
|
||||
|
|
@ -1592,30 +1806,39 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn parse_sidecar_refuses_unknown_schema_version() {
|
||||
let body = r#"{
|
||||
"schema_version": 99,
|
||||
"operation_id": "01H000000000000000000000XX",
|
||||
"started_at": "0",
|
||||
"branch": null,
|
||||
"actor_id": null,
|
||||
"writer_kind": "Mutation",
|
||||
"tables": []
|
||||
}"#;
|
||||
let err = parse_sidecar("file:///tmp/__recovery/x.json", body).unwrap_err();
|
||||
fn parse_sidecar_refuses_future_but_accepts_older_schema_version() {
|
||||
let body = |version: u32| {
|
||||
format!(
|
||||
r#"{{
|
||||
"schema_version": {version},
|
||||
"operation_id": "01H000000000000000000000XX",
|
||||
"started_at": "0",
|
||||
"branch": null,
|
||||
"actor_id": null,
|
||||
"writer_kind": "BranchMerge",
|
||||
"tables": []
|
||||
}}"#
|
||||
)
|
||||
};
|
||||
// A version NEWER than this binary's max → refuse (can't guess the future).
|
||||
let err = parse_sidecar("file:///tmp/__recovery/x.json", &body(99)).unwrap_err();
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
msg.contains("schema_version=99") && msg.contains("supports only schema_version=1"),
|
||||
"expected SidecarSchemaError mentioning the version mismatch, got: {}",
|
||||
msg,
|
||||
msg.contains("schema_version=99") && msg.contains("newer than the maximum"),
|
||||
"expected a future-version refusal, got: {msg}",
|
||||
);
|
||||
// An OLDER version (pre-confirmation v1) → accept and interpret with its
|
||||
// original semantics; never refuse a version we were built to understand.
|
||||
let parsed = parse_sidecar("file:///tmp/__recovery/x.json", &body(1))
|
||||
.expect("a v1 (older) sidecar must parse, not be refused");
|
||||
assert_eq!(parsed.schema_version, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_no_movement_when_head_equals_pinned() {
|
||||
let pin = make_pin("node:Person", "irrelevant", 5, 6);
|
||||
assert_eq!(
|
||||
classify_table(&pin, 5, 5, SidecarKind::Mutation),
|
||||
classify_table(&pin, 5, 5, SidecarKind::Mutation, SIDECAR_SCHEMA_VERSION),
|
||||
TableClassification::NoMovement,
|
||||
);
|
||||
}
|
||||
|
|
@ -1624,7 +1847,7 @@ mod tests {
|
|||
fn classify_rolled_past_expected_when_sidecar_matches_strict() {
|
||||
let pin = make_pin("node:Person", "irrelevant", 5, 6);
|
||||
assert_eq!(
|
||||
classify_table(&pin, 6, 5, SidecarKind::Mutation),
|
||||
classify_table(&pin, 6, 5, SidecarKind::Mutation, SIDECAR_SCHEMA_VERSION),
|
||||
TableClassification::RolledPastExpected,
|
||||
);
|
||||
}
|
||||
|
|
@ -1634,7 +1857,7 @@ mod tests {
|
|||
// Same +1 drift but post_commit_pin says it should be 7, not 6.
|
||||
let pin = make_pin("node:Person", "irrelevant", 5, 7);
|
||||
assert_eq!(
|
||||
classify_table(&pin, 6, 5, SidecarKind::Mutation),
|
||||
classify_table(&pin, 6, 5, SidecarKind::Mutation, SIDECAR_SCHEMA_VERSION),
|
||||
TableClassification::UnexpectedAtP1,
|
||||
);
|
||||
}
|
||||
|
|
@ -1643,7 +1866,7 @@ mod tests {
|
|||
fn classify_unexpected_multistep_when_head_jumped_more_than_one_strict() {
|
||||
let pin = make_pin("node:Person", "irrelevant", 5, 6);
|
||||
assert_eq!(
|
||||
classify_table(&pin, 8, 5, SidecarKind::Mutation),
|
||||
classify_table(&pin, 8, 5, SidecarKind::Mutation, SIDECAR_SCHEMA_VERSION),
|
||||
TableClassification::UnexpectedMultistep,
|
||||
);
|
||||
}
|
||||
|
|
@ -1652,7 +1875,7 @@ mod tests {
|
|||
fn classify_invariant_violation_when_head_below_pinned() {
|
||||
let pin = make_pin("node:Person", "irrelevant", 5, 6);
|
||||
assert_eq!(
|
||||
classify_table(&pin, 3, 5, SidecarKind::Mutation),
|
||||
classify_table(&pin, 3, 5, SidecarKind::Mutation, SIDECAR_SCHEMA_VERSION),
|
||||
TableClassification::InvariantViolation { observed: 3 },
|
||||
);
|
||||
}
|
||||
|
|
@ -1668,7 +1891,7 @@ mod tests {
|
|||
// built two indices). Strict would say UnexpectedMultistep; loose
|
||||
// accepts it as RolledPastExpected.
|
||||
assert_eq!(
|
||||
classify_table(&pin, 8, 5, SidecarKind::SchemaApply),
|
||||
classify_table(&pin, 8, 5, SidecarKind::SchemaApply, SIDECAR_SCHEMA_VERSION),
|
||||
TableClassification::RolledPastExpected,
|
||||
);
|
||||
}
|
||||
|
|
@ -1677,7 +1900,7 @@ mod tests {
|
|||
fn classify_loose_match_accepts_multi_commit_drift_for_ensure_indices() {
|
||||
let pin = make_pin("node:Person", "irrelevant", 5, 6);
|
||||
assert_eq!(
|
||||
classify_table(&pin, 9, 5, SidecarKind::EnsureIndices),
|
||||
classify_table(&pin, 9, 5, SidecarKind::EnsureIndices, SIDECAR_SCHEMA_VERSION),
|
||||
TableClassification::RolledPastExpected,
|
||||
);
|
||||
}
|
||||
|
|
@ -1686,7 +1909,7 @@ mod tests {
|
|||
fn classify_loose_match_no_movement_unchanged() {
|
||||
let pin = make_pin("node:Person", "irrelevant", 5, 6);
|
||||
assert_eq!(
|
||||
classify_table(&pin, 5, 5, SidecarKind::SchemaApply),
|
||||
classify_table(&pin, 5, 5, SidecarKind::SchemaApply, SIDECAR_SCHEMA_VERSION),
|
||||
TableClassification::NoMovement,
|
||||
);
|
||||
}
|
||||
|
|
@ -1695,31 +1918,65 @@ mod tests {
|
|||
fn classify_loose_match_invariant_violation_unchanged() {
|
||||
let pin = make_pin("node:Person", "irrelevant", 5, 6);
|
||||
assert_eq!(
|
||||
classify_table(&pin, 3, 5, SidecarKind::SchemaApply),
|
||||
classify_table(&pin, 3, 5, SidecarKind::SchemaApply, SIDECAR_SCHEMA_VERSION),
|
||||
TableClassification::InvariantViolation { observed: 3 },
|
||||
);
|
||||
}
|
||||
|
||||
/// BranchMerge must be loose-matched, not strict: while the strict
|
||||
/// classifier expects exactly one `commit_staged` per table,
|
||||
/// `publish_rewritten_merge_table` runs multiple per table
|
||||
/// (merge_insert + delete_where + index rebuilds — the comment in
|
||||
/// `merge.rs` explicitly says so). Strict classification would roll
|
||||
/// back valid completed Phase B work as `UnexpectedMultistep`.
|
||||
/// BranchMerge advances each table by several commits per table
|
||||
/// (adopt: append + upsert + delete; three-way: merge_insert + delete +
|
||||
/// index), so a bare "HEAD moved" is ambiguous between a complete and a
|
||||
/// partial publish. At a confirmation-aware version the two-phase
|
||||
/// confirmation resolves it: roll forward ONLY to the recorded
|
||||
/// `confirmed_version`; an unconfirmed moved HEAD is a partial publish
|
||||
/// (`IncompletePhaseB` ⇒ roll back), and a confirmed version that doesn't
|
||||
/// match the observed HEAD is a foreign advance (`UnexpectedMultistep` ⇒
|
||||
/// roll back). A *pre-confirmation* (v1) sidecar carries no confirmation and
|
||||
/// must keep the original loose roll-forward — reading it as strict would
|
||||
/// roll a completed pre-upgrade merge back (silent discard).
|
||||
#[test]
|
||||
fn classify_loose_match_accepts_multi_commit_drift_for_branch_merge() {
|
||||
let pin = make_pin("node:Person", "irrelevant", 5, 6);
|
||||
fn classify_branch_merge_requires_phase_b_confirmation() {
|
||||
// Unconfirmed multi-commit drift at a confirmation-aware version →
|
||||
// partial Phase B → roll back.
|
||||
let unconfirmed = make_pin("node:Person", "irrelevant", 5, 6);
|
||||
assert_eq!(
|
||||
classify_table(&pin, 8, 5, SidecarKind::BranchMerge),
|
||||
classify_table(&unconfirmed, 8, 5, SidecarKind::BranchMerge, SIDECAR_SCHEMA_VERSION),
|
||||
TableClassification::IncompletePhaseB,
|
||||
);
|
||||
// Backward-compat: the SAME unconfirmed pin in a PRE-confirmation (v1)
|
||||
// sidecar → loose roll-forward (the regression fix — a completed
|
||||
// pre-upgrade merge must not be discarded).
|
||||
assert_eq!(
|
||||
classify_table(
|
||||
&unconfirmed,
|
||||
8,
|
||||
5,
|
||||
SidecarKind::BranchMerge,
|
||||
CONFIRMATION_SCHEMA_VERSION - 1,
|
||||
),
|
||||
TableClassification::RolledPastExpected,
|
||||
);
|
||||
// Confirmed to the observed HEAD → complete Phase B → roll forward.
|
||||
let confirmed = SidecarTablePin {
|
||||
confirmed_version: Some(8),
|
||||
..make_pin("node:Person", "irrelevant", 5, 6)
|
||||
};
|
||||
assert_eq!(
|
||||
classify_table(&confirmed, 8, 5, SidecarKind::BranchMerge, SIDECAR_SCHEMA_VERSION),
|
||||
TableClassification::RolledPastExpected,
|
||||
);
|
||||
// Confirmed, but HEAD drifted past it (foreign writer) → roll back.
|
||||
assert_eq!(
|
||||
classify_table(&confirmed, 9, 5, SidecarKind::BranchMerge, SIDECAR_SCHEMA_VERSION),
|
||||
TableClassification::UnexpectedMultistep,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_loose_match_branch_merge_no_movement_unchanged() {
|
||||
let pin = make_pin("node:Person", "irrelevant", 5, 6);
|
||||
assert_eq!(
|
||||
classify_table(&pin, 5, 5, SidecarKind::BranchMerge),
|
||||
classify_table(&pin, 5, 5, SidecarKind::BranchMerge, SIDECAR_SCHEMA_VERSION),
|
||||
TableClassification::NoMovement,
|
||||
);
|
||||
}
|
||||
|
|
@ -1728,7 +1985,7 @@ mod tests {
|
|||
fn classify_loose_match_branch_merge_invariant_violation_unchanged() {
|
||||
let pin = make_pin("node:Person", "irrelevant", 5, 6);
|
||||
assert_eq!(
|
||||
classify_table(&pin, 3, 5, SidecarKind::BranchMerge),
|
||||
classify_table(&pin, 3, 5, SidecarKind::BranchMerge, SIDECAR_SCHEMA_VERSION),
|
||||
TableClassification::InvariantViolation { observed: 3 },
|
||||
);
|
||||
}
|
||||
|
|
@ -1883,6 +2140,37 @@ mod tests {
|
|||
assert!(after.is_empty());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn confirm_sidecar_phase_b_errors_when_pin_missing_from_updates() {
|
||||
// A pinned table with no achieved version in the publish `updates` must
|
||||
// be a loud producer error, NOT a silent skip that leaves the pin
|
||||
// unconfirmed (which recovery would read as a partial Phase B and roll
|
||||
// the whole complete merge back). Guards the implicit `pins ⊆ updates`
|
||||
// invariant against a future divergence between the two filters.
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let storage = ObjectStorageAdapter::local();
|
||||
let mut sidecar = new_sidecar(
|
||||
SidecarKind::BranchMerge,
|
||||
Some("main".to_string()),
|
||||
None,
|
||||
vec![make_pin("node:Person", "file:///tmp/x.lance", 5, 6)],
|
||||
);
|
||||
// The confirmed-versions map does NOT cover the pinned table.
|
||||
let confirmed: HashMap<String, u64> = HashMap::new();
|
||||
let err = confirm_sidecar_phase_b(
|
||||
dir.path().to_str().unwrap(),
|
||||
&storage,
|
||||
&mut sidecar,
|
||||
&confirmed,
|
||||
)
|
||||
.await
|
||||
.expect_err("a pinned table with no achieved version must be a loud error");
|
||||
assert!(
|
||||
err.to_string().contains("pins and publish updates diverged"),
|
||||
"expected a pin/updates divergence error, got: {err}",
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn list_sidecars_skips_non_json_files() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
|
|
|
|||
|
|
@ -1531,7 +1531,11 @@ async fn test_v2_to_v3_sweeps_legacy_run_branches_on_write_open() {
|
|||
.await
|
||||
.unwrap();
|
||||
let post = open_manifest_dataset(uri, None).await.unwrap();
|
||||
assert_eq!(super::migrations::read_stamp(&post), 2, "stamp rewound to v2");
|
||||
assert_eq!(
|
||||
super::migrations::read_stamp(&post),
|
||||
2,
|
||||
"stamp rewound to v2"
|
||||
);
|
||||
}
|
||||
|
||||
// A no-op publish forces the open-for-write path, which runs the migration.
|
||||
|
|
@ -1556,7 +1560,10 @@ async fn test_v2_to_v3_sweeps_legacy_run_branches_on_write_open() {
|
|||
!after.iter().any(|b| b.starts_with("__run__")),
|
||||
"legacy run branch must be swept; got {after:?}",
|
||||
);
|
||||
assert!(after.iter().any(|b| b == "feature"), "user branch must survive");
|
||||
assert!(
|
||||
after.iter().any(|b| b == "feature"),
|
||||
"user branch must survive"
|
||||
);
|
||||
assert!(after.iter().any(|b| b == "main"), "main must survive");
|
||||
|
||||
// Idempotent: a second write-open finds the stamp at current and does not
|
||||
|
|
|
|||
|
|
@ -106,6 +106,12 @@ pub struct Omnigraph {
|
|||
coordinator: Arc<tokio::sync::RwLock<GraphCoordinator>>,
|
||||
table_store: TableStore,
|
||||
runtime_cache: RuntimeCache,
|
||||
/// Per-graph read caches: one shared Lance `Session` plus the held-`Dataset`
|
||||
/// handle cache, handed to live-Branch-read snapshots (via
|
||||
/// `resolved_target`) so table opens reuse handles (0 IO on a warm repeat)
|
||||
/// and one session. Invalidated alongside `runtime_cache` on branch switch /
|
||||
/// refresh — hygiene only; version-in-key carries correctness.
|
||||
read_caches: Arc<crate::runtime_cache::ReadCaches>,
|
||||
/// Read-heavy on every query, written only by `apply_schema`. ArcSwap
|
||||
/// gives atomic pointer swap with zero-cost reads (`load()` returns a
|
||||
/// `Guard<Arc<Catalog>>`), so concurrent queries on different actors
|
||||
|
|
@ -327,6 +333,14 @@ impl Omnigraph {
|
|||
coordinator: Arc::new(tokio::sync::RwLock::new(coordinator)),
|
||||
table_store: TableStore::new(&root),
|
||||
runtime_cache: RuntimeCache::default(),
|
||||
// One shared Session per graph (LanceDB's one-session-per-connection
|
||||
// model) plus the held-handle cache, created once and reused across
|
||||
// reads. Session::default() caps are lazy (6 GiB index / 1 GiB
|
||||
// metadata); multi-graph cap/sharing is a deferred follow-up.
|
||||
read_caches: Arc::new(crate::runtime_cache::ReadCaches {
|
||||
session: Arc::new(lance::session::Session::default()),
|
||||
handles: Arc::new(crate::runtime_cache::TableHandleCache::default()),
|
||||
}),
|
||||
catalog: Arc::new(ArcSwap::from_pointee(catalog)),
|
||||
schema_source: Arc::new(ArcSwap::from_pointee(schema_source.to_string())),
|
||||
write_queue: Arc::new(crate::db::write_queue::WriteQueueManager::new()),
|
||||
|
|
@ -351,12 +365,10 @@ impl Omnigraph {
|
|||
Self::open_with_storage_and_mode(uri, storage_for_uri(uri)?, OpenMode::ReadOnly).await
|
||||
}
|
||||
|
||||
/// `open_with_storage` retained for existing callers (init/test paths).
|
||||
/// Defaults to `OpenMode::ReadWrite`.
|
||||
pub(crate) async fn open_with_storage(
|
||||
uri: &str,
|
||||
storage: Arc<dyn StorageAdapter>,
|
||||
) -> Result<Self> {
|
||||
/// Open with a caller-supplied [`StorageAdapter`]. Used by init/test paths
|
||||
/// and by embedding/test consumers that wrap storage (e.g. a counting
|
||||
/// decorator for IO-budget tests). Defaults to `OpenMode::ReadWrite`.
|
||||
pub async fn open_with_storage(uri: &str, storage: Arc<dyn StorageAdapter>) -> Result<Self> {
|
||||
Self::open_with_storage_and_mode(uri, storage, OpenMode::ReadWrite).await
|
||||
}
|
||||
|
||||
|
|
@ -428,6 +440,14 @@ impl Omnigraph {
|
|||
coordinator: Arc::new(tokio::sync::RwLock::new(coordinator)),
|
||||
table_store: TableStore::new(&root),
|
||||
runtime_cache: RuntimeCache::default(),
|
||||
// One shared Session per graph (LanceDB's one-session-per-connection
|
||||
// model) plus the held-handle cache, created once and reused across
|
||||
// reads. Session::default() caps are lazy (6 GiB index / 1 GiB
|
||||
// metadata); multi-graph cap/sharing is a deferred follow-up.
|
||||
read_caches: Arc::new(crate::runtime_cache::ReadCaches {
|
||||
session: Arc::new(lance::session::Session::default()),
|
||||
handles: Arc::new(crate::runtime_cache::TableHandleCache::default()),
|
||||
}),
|
||||
catalog: Arc::new(ArcSwap::from_pointee(catalog)),
|
||||
schema_source: Arc::new(ArcSwap::from_pointee(schema_source)),
|
||||
write_queue: Arc::new(crate::db::write_queue::WriteQueueManager::new()),
|
||||
|
|
@ -539,6 +559,12 @@ impl Omnigraph {
|
|||
}
|
||||
|
||||
pub(crate) async fn ensure_schema_state_valid(&self) -> Result<()> {
|
||||
// Full per-call validation is intentional: a long-lived handle must
|
||||
// detect external drift of the schema source, IR, OR state on its next
|
||||
// operation (see lifecycle::long_lived_handle_rejects_schema_* tests). A
|
||||
// source-only fast path would miss IR/state drift when _schema.pg is
|
||||
// unchanged, so the only safe latency win is not calling this twice per
|
||||
// query (finding A removes the redundant caller in exec/query.rs).
|
||||
validate_schema_contract(self.uri(), Arc::clone(&self.storage)).await
|
||||
}
|
||||
|
||||
|
|
@ -719,10 +745,13 @@ impl Omnigraph {
|
|||
let normalized = normalize_branch_name(branch.unwrap_or("main"))?;
|
||||
let coord = self.coordinator.read().await;
|
||||
if normalized.as_deref() == coord.current_branch() {
|
||||
let snapshot_id = coord
|
||||
.head_commit_id()
|
||||
.await?
|
||||
.unwrap_or_else(|| SnapshotId::synthetic(coord.current_branch(), coord.version()));
|
||||
let snapshot_id = coord.head_commit_id().await?.unwrap_or_else(|| {
|
||||
SnapshotId::synthetic(
|
||||
coord.current_branch(),
|
||||
coord.version(),
|
||||
coord.manifest_incarnation().e_tag.as_deref(),
|
||||
)
|
||||
});
|
||||
return Ok(ResolvedTarget {
|
||||
requested,
|
||||
branch: coord.current_branch().map(str::to_string),
|
||||
|
|
@ -785,10 +814,15 @@ impl Omnigraph {
|
|||
let branch = normalize_branch_name(branch)?;
|
||||
let next = self.open_coordinator_for_branch(branch.as_deref()).await?;
|
||||
*self.coordinator.write().await = next;
|
||||
self.runtime_cache.invalidate_all().await;
|
||||
self.invalidate_read_caches().await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn invalidate_read_caches(&self) {
|
||||
self.runtime_cache.invalidate_all().await;
|
||||
self.read_caches.handles.invalidate_all().await;
|
||||
}
|
||||
|
||||
/// Re-read the handle-local coordinator state from storage AND run
|
||||
/// in-process recovery. Closes the Phase B → Phase C residual (e.g.
|
||||
/// `MutationStaging::finalize` crash mid-publish in a long-running
|
||||
|
|
@ -888,7 +922,7 @@ impl Omnigraph {
|
|||
)
|
||||
.await?;
|
||||
self.reload_schema_if_source_changed().await?;
|
||||
self.runtime_cache.invalidate_all().await;
|
||||
self.invalidate_read_caches().await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
@ -920,7 +954,7 @@ impl Omnigraph {
|
|||
// write that triggered the heal validates against the stale
|
||||
// schema. Same post-heal step as `refresh`.
|
||||
self.reload_schema_if_source_changed().await?;
|
||||
self.runtime_cache.invalidate_all().await;
|
||||
self.invalidate_read_caches().await;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -956,7 +990,7 @@ impl Omnigraph {
|
|||
/// own publish path.
|
||||
pub(crate) async fn refresh_coordinator_only(&self) -> Result<()> {
|
||||
self.coordinator.write().await.refresh().await?;
|
||||
self.runtime_cache.invalidate_all().await;
|
||||
self.invalidate_read_caches().await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
@ -974,11 +1008,66 @@ impl Omnigraph {
|
|||
target: impl Into<ReadTarget>,
|
||||
) -> Result<ResolvedTarget> {
|
||||
self.ensure_schema_state_valid().await?;
|
||||
self.coordinator
|
||||
.read()
|
||||
.await
|
||||
.resolve_target(&target.into())
|
||||
.await
|
||||
let target = target.into();
|
||||
let mut resolved = self.resolve_target_inner(&target).await?;
|
||||
// Attach the read caches (shared Session + held-handle cache) for live
|
||||
// Branch reads so table opens reuse handles (0 IO on a warm repeat).
|
||||
// Snapshot-id reads are deliberately NOT cached: they pin a historical
|
||||
// version `cleanup` may GC, so bypassing the cache sidesteps the
|
||||
// cleanup-vs-cached-handle edge. Writes never reach here (they use
|
||||
// `resolved_branch_target`), so they never receive a pinned handle.
|
||||
if matches!(target, ReadTarget::Branch(_)) {
|
||||
resolved
|
||||
.snapshot
|
||||
.set_read_caches(Arc::clone(&self.read_caches));
|
||||
}
|
||||
Ok(resolved)
|
||||
}
|
||||
|
||||
/// Resolve a read target to its snapshot, without attaching read caches.
|
||||
/// Same-branch reads reuse the warm coordinator, gated by a cheap version
|
||||
/// probe (invariant 6: strong consistency, never a blind warm read). Reads do
|
||||
/// not need the commit graph (the manifest version is the visibility
|
||||
/// authority, invariant 2), so the id is synthetic and no commit-graph scan
|
||||
/// happens on this path.
|
||||
async fn resolve_target_inner(&self, target: &ReadTarget) -> Result<ResolvedTarget> {
|
||||
if let ReadTarget::Branch(branch) = target {
|
||||
let normalized = normalize_branch_name(branch)?;
|
||||
{
|
||||
let coord = self.coordinator.read().await;
|
||||
if normalized.as_deref() != coord.current_branch() {
|
||||
// Different branch: cold resolve (opens that branch).
|
||||
return coord.resolve_target(target).await;
|
||||
}
|
||||
let held = coord.manifest_incarnation();
|
||||
if coord.probe_latest_incarnation().await?.matches(&held) {
|
||||
return Ok(warm_resolved_target(&coord, target));
|
||||
}
|
||||
// Stale: refresh under the write lock below.
|
||||
}
|
||||
let mut coord = self.coordinator.write().await;
|
||||
if normalized.as_deref() == coord.current_branch() {
|
||||
// Re-check after taking the write lock; another writer may have
|
||||
// refreshed (tokio RwLock has no read->write upgrade).
|
||||
let held = coord.manifest_incarnation();
|
||||
let mut refreshed = false;
|
||||
if !coord.probe_latest_incarnation().await?.matches(&held) {
|
||||
coord.refresh_manifest_only().await?;
|
||||
refreshed = true;
|
||||
}
|
||||
let resolved = warm_resolved_target(&coord, target);
|
||||
drop(coord);
|
||||
if refreshed {
|
||||
self.invalidate_read_caches().await;
|
||||
}
|
||||
return Ok(resolved);
|
||||
}
|
||||
// Branch changed while waiting for the write lock: cold resolve.
|
||||
return coord.resolve_target(target).await;
|
||||
}
|
||||
|
||||
// Snapshot target: resolve through the commit graph as before.
|
||||
self.coordinator.read().await.resolve_target(target).await
|
||||
}
|
||||
|
||||
// ─── Change detection ────────────────────────────────────────────────
|
||||
|
|
@ -1673,6 +1762,24 @@ pub(crate) fn normalize_branch_name(branch: &str) -> Result<Option<String>> {
|
|||
Ok(Some(branch.to_string()))
|
||||
}
|
||||
|
||||
/// Build a `ResolvedTarget` from the warm coordinator without opening the commit
|
||||
/// graph. The live branch snapshot is pinned by the manifest incarnation, so the
|
||||
/// id is synthetic `(branch, version, e_tag when available)`; nothing on the read
|
||||
/// path needs a real commit ULID (only `RuntimeCache` keys on the id, where
|
||||
/// synthetic is consistent).
|
||||
fn warm_resolved_target(coord: &GraphCoordinator, requested: &ReadTarget) -> ResolvedTarget {
|
||||
ResolvedTarget {
|
||||
requested: requested.clone(),
|
||||
branch: coord.current_branch().map(str::to_string),
|
||||
snapshot_id: SnapshotId::synthetic(
|
||||
coord.current_branch(),
|
||||
coord.version(),
|
||||
coord.manifest_incarnation().e_tag.as_deref(),
|
||||
),
|
||||
snapshot: coord.snapshot(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn ensure_public_branch_ref(branch: &str, operation: &str) -> Result<()> {
|
||||
if is_internal_system_branch(branch) {
|
||||
return Err(OmniError::manifest(format!(
|
||||
|
|
@ -2523,6 +2630,7 @@ edge WorksAt: Person -> Company
|
|||
db.branch_create("__run__legacy").await.unwrap();
|
||||
drop(db);
|
||||
{
|
||||
// forbidden-api-allow: test synthesizes a legacy graph by editing __manifest directly.
|
||||
let mut ds = lance::Dataset::open(&format!("{}/__manifest", uri))
|
||||
.await
|
||||
.unwrap();
|
||||
|
|
|
|||
|
|
@ -420,6 +420,9 @@ async fn optimize_one_table(
|
|||
// Lower bound — compaction commits N≥1 versions (reserve + rewrite);
|
||||
// the classifier loose-matches SidecarKind::Optimize.
|
||||
post_commit_pin: expected_version + 1,
|
||||
// Optimize uses the loose match (drift is derived state), not
|
||||
// BranchMerge's Phase-B confirmation — left None.
|
||||
confirmed_version: None,
|
||||
table_branch: None,
|
||||
}],
|
||||
);
|
||||
|
|
@ -937,6 +940,7 @@ mod tests {
|
|||
|
||||
for type_name in ["Person", "Company"] {
|
||||
let table_uri = node_table_uri(uri, type_name);
|
||||
// forbidden-api-allow: test synthesizes a branch ref directly on the Lance dataset.
|
||||
let mut ds = lance::Dataset::open(&table_uri).await.unwrap();
|
||||
let base = ds.version().version;
|
||||
ds.create_branch("feature", base, None).await.unwrap();
|
||||
|
|
|
|||
|
|
@ -362,6 +362,9 @@ where
|
|||
table_path: db.storage().dataset_uri(&entry.table_path),
|
||||
expected_version: entry.table_version,
|
||||
post_commit_pin: entry.table_version + 1,
|
||||
// SchemaApply uses the loose match, not BranchMerge's Phase-B
|
||||
// confirmation — left None.
|
||||
confirmed_version: None,
|
||||
table_branch: entry.table_branch.clone(),
|
||||
})
|
||||
})
|
||||
|
|
@ -447,8 +450,7 @@ where
|
|||
&& sidecar_registrations.is_empty()
|
||||
&& sidecar_tombstones.is_empty());
|
||||
if writes_sidecar {
|
||||
schema_apply_queue_keys
|
||||
.push(crate::db::manifest::schema_apply_serial_queue_key());
|
||||
schema_apply_queue_keys.push(crate::db::manifest::schema_apply_serial_queue_key());
|
||||
}
|
||||
let _schema_apply_queue_guards = db
|
||||
.write_queue()
|
||||
|
|
@ -530,8 +532,7 @@ where
|
|||
.await?;
|
||||
let table_path = table_path_for_table_key(target_table_key)?;
|
||||
let dataset_uri = db.storage().dataset_uri(&table_path);
|
||||
let target_ds =
|
||||
SnapshotHandle::new(TableStore::write_dataset(&dataset_uri, batch).await?);
|
||||
let target_ds = SnapshotHandle::new(TableStore::write_dataset(&dataset_uri, batch).await?);
|
||||
// Indexes on the renamed table are reconciled later (iss-848).
|
||||
let state = db.storage().table_state(&dataset_uri, &target_ds).await?;
|
||||
table_registrations.insert(target_table_key.clone(), table_path);
|
||||
|
|
@ -750,6 +751,7 @@ where
|
|||
async fn cleanup_dataset_old_versions(db: &Omnigraph, full_uri: &str) -> Result<()> {
|
||||
use chrono::Utc;
|
||||
use lance::dataset::cleanup::CleanupPolicy;
|
||||
// forbidden-api-allow: maintenance (Hard-drop version GC) opens the dataset to run cleanup_old_versions.
|
||||
let ds = lance::Dataset::open(full_uri)
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
|
|
|
|||
|
|
@ -125,6 +125,9 @@ pub(super) async fn ensure_indices_for_branch(
|
|||
table_path: full_path,
|
||||
expected_version: entry.table_version,
|
||||
post_commit_pin: entry.table_version + 1,
|
||||
// EnsureIndices uses the loose match (index coverage is derived
|
||||
// state), not BranchMerge's Phase-B confirmation — left None.
|
||||
confirmed_version: None,
|
||||
// Use active_branch (where commits actually land), NOT
|
||||
// entry.table_branch (where the table currently lives).
|
||||
// open_owned_dataset_for_branch_write forks a feature
|
||||
|
|
@ -150,6 +153,9 @@ pub(super) async fn ensure_indices_for_branch(
|
|||
table_path: full_path,
|
||||
expected_version: entry.table_version,
|
||||
post_commit_pin: entry.table_version + 1,
|
||||
// EnsureIndices uses the loose match (index coverage is derived
|
||||
// state), not BranchMerge's Phase-B confirmation — left None.
|
||||
confirmed_version: None,
|
||||
// Use active_branch (where commits actually land), NOT
|
||||
// entry.table_branch (where the table currently lives).
|
||||
// open_owned_dataset_for_branch_write forks a feature
|
||||
|
|
@ -1097,7 +1103,8 @@ async fn prepare_updates_for_commit(
|
|||
// have null embeddings) is deferred and logged inside
|
||||
// build_indices; a later ensure_indices/optimize materializes it.
|
||||
// The load/mutate/merge commit must not fail on it.
|
||||
let _pending = build_indices_on_dataset(db, &prepared_update.table_key, &mut ds).await?;
|
||||
let _pending =
|
||||
build_indices_on_dataset(db, &prepared_update.table_key, &mut ds).await?;
|
||||
let state = db.storage().table_state(&full_path, &ds).await?;
|
||||
prepared_update.table_version = state.version;
|
||||
prepared_update.row_count = state.row_count;
|
||||
|
|
@ -1350,6 +1357,7 @@ mod classify_fork_ref_tests {
|
|||
// the manifest's `feature` snapshot still places on main.
|
||||
let person = node_path(&db, "feature", "node:Person").await;
|
||||
{
|
||||
// forbidden-api-allow: test synthesizes a branch ref directly on the Lance dataset.
|
||||
let mut ds = lance::Dataset::open(&person).await.unwrap();
|
||||
let v = ds.version().version;
|
||||
ds.create_branch("feature", v, None).await.unwrap();
|
||||
|
|
@ -1362,6 +1370,7 @@ mod classify_fork_ref_tests {
|
|||
|
||||
// Orphan (ghost): a ref for a branch the manifest does not have at all.
|
||||
{
|
||||
// forbidden-api-allow: test synthesizes a branch ref directly on the Lance dataset.
|
||||
let mut ds = lance::Dataset::open(&person).await.unwrap();
|
||||
let v = ds.version().version;
|
||||
ds.create_branch("ghost", v, None).await.unwrap();
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ pub enum MergeConflictKind {
|
|||
#[derive(Debug, Error)]
|
||||
pub enum OmniError {
|
||||
#[error("{0}")]
|
||||
Compiler(#[from] omnigraph_compiler::error::NanoError),
|
||||
Compiler(#[from] omnigraph_compiler::error::CompilerError),
|
||||
#[error("storage: {0}")]
|
||||
Lance(String),
|
||||
#[error("query: {0}")]
|
||||
|
|
|
|||
|
|
@ -5,7 +5,14 @@ const MERGE_STAGE_DIR_ENV: &str = "OMNIGRAPH_MERGE_STAGING_DIR";
|
|||
|
||||
#[derive(Debug)]
|
||||
enum CandidateTableState {
|
||||
/// Adopt the source's table state via a pointer switch or a branch fork —
|
||||
/// no data HEAD advance, so nothing to pin for recovery.
|
||||
AdoptSourceState,
|
||||
/// Adopt the source's state by applying a non-empty delta onto the target's
|
||||
/// lineage (append new + upsert changed + delete removed). The delta is
|
||||
/// pre-computed at classification so this candidate can be recovery-pinned:
|
||||
/// its publish advances Lance HEAD before the manifest commit.
|
||||
AdoptWithDelta(AdoptDelta),
|
||||
RewriteMerged(StagedMergeResult),
|
||||
}
|
||||
|
||||
|
|
@ -22,6 +29,38 @@ struct StagedMergeResult {
|
|||
deleted_ids: Vec<String>,
|
||||
}
|
||||
|
||||
/// Delta for an adopted-source merge (the fast-forward / target-owns path):
|
||||
/// the new + changed rows to apply onto the target's base lineage, plus the ids
|
||||
/// removed on source. Distinct from [`StagedMergeResult`] (the three-way path),
|
||||
/// which also carries a `full_staged` table for validation — the adopt path
|
||||
/// validates against the source snapshot directly (`candidate_dataset`), so it
|
||||
/// needs no `full_staged` and never builds it.
|
||||
///
|
||||
/// TRANSITIONAL — fragment-adopt excision point. This whole row-level adopt
|
||||
/// (`AdoptDelta`, [`compute_adopt_delta`], [`publish_adopted_delta`], and the
|
||||
/// streaming append it drives) re-derives the source branch row-by-row because
|
||||
/// today's Lance offers no fragment-level branch merge. When Lance ships
|
||||
/// branch-merge/rebase ([#7263]) + UUID branch paths ([#7185]), a fast-forward
|
||||
/// merge becomes a *fragment graft* — adopt the source table version's
|
||||
/// fragments (and their already-built indexes) by reference, no rows scanned,
|
||||
/// re-appended, upserted, or deleted. At that point this struct and its two
|
||||
/// functions are removed wholesale; the merge collapses to ~one ref/metadata
|
||||
/// op per table. Keep them self-contained so that excision stays a clean delete.
|
||||
///
|
||||
/// [#7263]: https://github.com/lance-format/lance/issues/7263
|
||||
/// [#7185]: https://github.com/lance-format/lance/issues/7185
|
||||
#[derive(Debug)]
|
||||
struct AdoptDelta {
|
||||
/// New-on-source rows → `stage_append` (a streaming `Operation::Append`, no
|
||||
/// hash join). The connector's dominant case and the OOM fix: appending new
|
||||
/// rows never buffers the whole delta in a full-outer hash join.
|
||||
appends: Option<StagedTable>,
|
||||
/// Changed-on-source rows → `stage_merge_insert` (a hash join bounded to the
|
||||
/// genuinely-changed set, not the whole delta).
|
||||
upserts: Option<StagedTable>,
|
||||
deleted_ids: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct CursorRow {
|
||||
id: String,
|
||||
|
|
@ -31,24 +70,48 @@ struct CursorRow {
|
|||
row_index: usize,
|
||||
}
|
||||
|
||||
impl CursorRow {
|
||||
/// Compute this row's signature on demand. Used by the lazy adopt cursor,
|
||||
/// where `signature` is left empty; the value is identical to the eager
|
||||
/// `signature` field the three-way cursor populates.
|
||||
fn compute_signature(&self) -> Result<String> {
|
||||
row_signature(&self.batch, self.row_index)
|
||||
}
|
||||
}
|
||||
|
||||
struct OrderedTableCursor {
|
||||
stream: Option<std::pin::Pin<Box<DatasetRecordBatchStream>>>,
|
||||
dataset: Option<Dataset>,
|
||||
current_batch: Option<RecordBatch>,
|
||||
current_row: usize,
|
||||
peeked: Option<CursorRow>,
|
||||
/// When false, `next_row` leaves `CursorRow::signature` empty and callers
|
||||
/// compute it on demand via `CursorRow::compute_signature`. The adopt path
|
||||
/// uses this: new/deleted rows never need a signature comparison and would
|
||||
/// otherwise eagerly stringify their embedding for nothing.
|
||||
eager_signatures: bool,
|
||||
}
|
||||
|
||||
impl OrderedTableCursor {
|
||||
async fn from_snapshot(snapshot: &Snapshot, table_key: &str) -> Result<Self> {
|
||||
Self::open(snapshot, table_key, true).await
|
||||
}
|
||||
|
||||
/// Like `from_snapshot` but leaves row signatures uncomputed (callers use
|
||||
/// `CursorRow::compute_signature` on demand). See `eager_signatures`.
|
||||
async fn from_snapshot_lazy(snapshot: &Snapshot, table_key: &str) -> Result<Self> {
|
||||
Self::open(snapshot, table_key, false).await
|
||||
}
|
||||
|
||||
async fn open(snapshot: &Snapshot, table_key: &str, eager_signatures: bool) -> Result<Self> {
|
||||
let dataset = match snapshot.entry(table_key) {
|
||||
Some(_) => Some(snapshot.open(table_key).await?),
|
||||
None => None,
|
||||
};
|
||||
Self::from_dataset(dataset).await
|
||||
Self::from_dataset(dataset, eager_signatures).await
|
||||
}
|
||||
|
||||
async fn from_dataset(dataset: Option<Dataset>) -> Result<Self> {
|
||||
async fn from_dataset(dataset: Option<Dataset>, eager_signatures: bool) -> Result<Self> {
|
||||
let stream = if let Some(ds) = &dataset {
|
||||
Some(Box::pin(
|
||||
crate::table_store::TableStore::scan_stream_with(
|
||||
|
|
@ -71,6 +134,7 @@ impl OrderedTableCursor {
|
|||
current_batch: None,
|
||||
current_row: 0,
|
||||
peeked: None,
|
||||
eager_signatures,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -97,9 +161,14 @@ impl OrderedTableCursor {
|
|||
let dataset = self.dataset.clone().ok_or_else(|| {
|
||||
OmniError::manifest("cursor row missing source dataset".to_string())
|
||||
})?;
|
||||
let signature = if self.eager_signatures {
|
||||
row_signature(batch, row_index)?
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
return Ok(Some(CursorRow {
|
||||
id: row_id_at(batch, row_index)?,
|
||||
signature: row_signature(batch, row_index)?,
|
||||
signature,
|
||||
dataset,
|
||||
batch: batch.clone(),
|
||||
row_index,
|
||||
|
|
@ -258,20 +327,30 @@ fn sanitize_table_key(table_key: &str) -> String {
|
|||
}
|
||||
|
||||
/// Computes the delta between base and source for an adopted-source merge.
|
||||
/// Returns the changed/new rows (for merge_insert) and deleted IDs (for delete).
|
||||
async fn compute_source_delta(
|
||||
/// Returns the new + changed rows and the ids deleted on source.
|
||||
///
|
||||
/// Unchanged rows are dropped: the adopt path validates against the source
|
||||
/// snapshot directly (`candidate_dataset`), so no `full_staged` table is built
|
||||
/// — saving the O(rows) temp write that `compute_source_delta` used to produce
|
||||
/// and then discard.
|
||||
///
|
||||
/// TRANSITIONAL — removed by the fragment-adopt work (see [`AdoptDelta`]): a
|
||||
/// fragment graft adopts the source's fragments by reference, so there is no
|
||||
/// row-level delta to compute.
|
||||
async fn compute_adopt_delta(
|
||||
table_key: &str,
|
||||
catalog: &Catalog,
|
||||
base_snapshot: &Snapshot,
|
||||
source_snapshot: &Snapshot,
|
||||
) -> Result<Option<StagedMergeResult>> {
|
||||
) -> Result<Option<AdoptDelta>> {
|
||||
let schema = schema_for_table_key(catalog, table_key)?;
|
||||
let mut full_writer =
|
||||
StagedTableWriter::new(&format!("{}_adopt_full", table_key), schema.clone())?;
|
||||
let mut delta_writer = StagedTableWriter::new(&format!("{}_adopt_delta", table_key), schema)?;
|
||||
let mut append_writer =
|
||||
StagedTableWriter::new(&format!("{}_adopt_append", table_key), schema.clone())?;
|
||||
let mut upsert_writer =
|
||||
StagedTableWriter::new(&format!("{}_adopt_upsert", table_key), schema)?;
|
||||
let mut deleted_ids: Vec<String> = Vec::new();
|
||||
let mut base = OrderedTableCursor::from_snapshot(base_snapshot, table_key).await?;
|
||||
let mut source = OrderedTableCursor::from_snapshot(source_snapshot, table_key).await?;
|
||||
let mut base = OrderedTableCursor::from_snapshot_lazy(base_snapshot, table_key).await?;
|
||||
let mut source = OrderedTableCursor::from_snapshot_lazy(source_snapshot, table_key).await?;
|
||||
|
||||
let mut needs_update = false;
|
||||
|
||||
|
|
@ -297,9 +376,6 @@ async fn compute_source_delta(
|
|||
None
|
||||
};
|
||||
|
||||
let base_sig = base_row.as_ref().map(|r| r.signature.as_str());
|
||||
let source_sig = source_row.as_ref().map(|r| r.signature.as_str());
|
||||
|
||||
match (&base_row, &source_row) {
|
||||
(Some(_), None) => {
|
||||
// Deleted on source
|
||||
|
|
@ -307,20 +383,21 @@ async fn compute_source_delta(
|
|||
needs_update = true;
|
||||
}
|
||||
(None, Some(src)) => {
|
||||
// New on source
|
||||
full_writer.push_row(src).await?;
|
||||
delta_writer.push_row(src).await?;
|
||||
// New on source → append (streaming, no hash join). No signature
|
||||
// needed — a new id is absent from base by construction.
|
||||
append_writer.push_row(src).await?;
|
||||
needs_update = true;
|
||||
}
|
||||
(Some(_), Some(src)) if source_sig != base_sig => {
|
||||
// Changed on source
|
||||
full_writer.push_row(src).await?;
|
||||
delta_writer.push_row(src).await?;
|
||||
needs_update = true;
|
||||
}
|
||||
(Some(base), Some(_)) => {
|
||||
// Unchanged — write to full (for validation), skip delta
|
||||
full_writer.push_row(base).await?;
|
||||
(Some(base), Some(src)) => {
|
||||
// Present on both — compute signatures lazily (the only case
|
||||
// that needs them) to tell a changed row from an unchanged one.
|
||||
// New/deleted rows above skip the embedding stringify entirely.
|
||||
if src.compute_signature()? != base.compute_signature()? {
|
||||
// Changed on source → upsert.
|
||||
upsert_writer.push_row(src).await?;
|
||||
needs_update = true;
|
||||
}
|
||||
// else unchanged — already on the target's base lineage; drop.
|
||||
}
|
||||
(None, None) => unreachable!(),
|
||||
}
|
||||
|
|
@ -330,15 +407,20 @@ async fn compute_source_delta(
|
|||
return Ok(None);
|
||||
}
|
||||
|
||||
let delta_staged = if delta_writer.row_count > 0 {
|
||||
Some(delta_writer.finish().await?)
|
||||
let appends = if append_writer.row_count > 0 {
|
||||
Some(append_writer.finish().await?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let upserts = if upsert_writer.row_count > 0 {
|
||||
Some(upsert_writer.finish().await?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(Some(StagedMergeResult {
|
||||
full_staged: full_writer.finish().await?,
|
||||
delta_staged,
|
||||
Ok(Some(AdoptDelta {
|
||||
appends,
|
||||
upserts,
|
||||
deleted_ids,
|
||||
}))
|
||||
}
|
||||
|
|
@ -651,10 +733,12 @@ async fn candidate_dataset(
|
|||
) -> Result<Option<Dataset>> {
|
||||
if let Some(candidate) = candidates.get(table_key) {
|
||||
return match candidate {
|
||||
CandidateTableState::AdoptSourceState => match source_snapshot.entry(table_key) {
|
||||
Some(_) => Ok(Some(source_snapshot.open(table_key).await?)),
|
||||
None => Ok(None),
|
||||
},
|
||||
CandidateTableState::AdoptSourceState | CandidateTableState::AdoptWithDelta(_) => {
|
||||
match source_snapshot.entry(table_key) {
|
||||
Some(_) => Ok(Some(source_snapshot.open(table_key).await?)),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
CandidateTableState::RewriteMerged(staged) => {
|
||||
Ok(Some(staged.full_staged.dataset.clone()))
|
||||
}
|
||||
|
|
@ -840,13 +924,62 @@ fn row_id_at(batch: &RecordBatch, row: usize) -> Result<String> {
|
|||
Ok(ids.value(row).to_string())
|
||||
}
|
||||
|
||||
async fn publish_adopted_source_state(
|
||||
/// Classify a table whose target state equals base (the adopt / fast-forward
|
||||
/// case). Returns [`CandidateTableState::AdoptWithDelta`] — with the delta
|
||||
/// pre-computed so it can be recovery-pinned — when the adopt applies a
|
||||
/// non-empty delta onto the target's lineage (a HEAD-advancing publish via
|
||||
/// [`publish_adopted_delta`]); otherwise [`CandidateTableState::AdoptSourceState`]
|
||||
/// (a pointer switch or fork, which does not advance the data HEAD).
|
||||
///
|
||||
/// The HEAD-advancing subcases mirror [`publish_adopted_source_state`]: source
|
||||
/// on a branch with the target either on main or owning the table. Computing the
|
||||
/// delta here (rather than inside the publish) is what closes the recovery gap —
|
||||
/// the classifier knows whether the publish will move Lance HEAD.
|
||||
async fn classify_adopt(
|
||||
target_db: &Omnigraph,
|
||||
catalog: &Catalog,
|
||||
base_snapshot: &Snapshot,
|
||||
source_snapshot: &Snapshot,
|
||||
target_snapshot: &Snapshot,
|
||||
table_key: &str,
|
||||
) -> Result<CandidateTableState> {
|
||||
let Some(source_entry) = source_snapshot.entry(table_key) else {
|
||||
return Ok(CandidateTableState::AdoptSourceState);
|
||||
};
|
||||
let target_entry = target_snapshot.entry(table_key);
|
||||
let target_active = target_db.active_branch().await;
|
||||
let advances_head = match (
|
||||
target_active.as_deref(),
|
||||
source_entry.table_branch.as_deref(),
|
||||
) {
|
||||
// Source on a branch, target on main — delta applied onto main's lineage.
|
||||
(None, Some(_)) => true,
|
||||
// Both on branches, target owns this table — delta applied onto it.
|
||||
(Some(target_branch), Some(_)) => {
|
||||
target_entry.and_then(|e| e.table_branch.as_deref()) == Some(target_branch)
|
||||
}
|
||||
// Source on main (pointer switch) or target doesn't own (fork): no advance.
|
||||
_ => false,
|
||||
};
|
||||
if !advances_head {
|
||||
return Ok(CandidateTableState::AdoptSourceState);
|
||||
}
|
||||
match compute_adopt_delta(table_key, catalog, base_snapshot, source_snapshot).await? {
|
||||
Some(delta) => Ok(CandidateTableState::AdoptWithDelta(delta)),
|
||||
None => Ok(CandidateTableState::AdoptSourceState),
|
||||
}
|
||||
}
|
||||
|
||||
/// Adopt the source's table state without applying a row delta: a pointer
|
||||
/// switch (source/target share lineage) or a branch fork. The HEAD-advancing
|
||||
/// delta case is classified [`CandidateTableState::AdoptWithDelta`] and
|
||||
/// published by [`publish_adopted_delta`], so reaching the branch-bearing arms
|
||||
/// here means the delta was empty.
|
||||
async fn publish_adopted_source_state(
|
||||
target_db: &Omnigraph,
|
||||
source_snapshot: &Snapshot,
|
||||
target_snapshot: &Snapshot,
|
||||
table_key: &str,
|
||||
) -> Result<crate::db::SubTableUpdate> {
|
||||
let source_entry = source_snapshot
|
||||
.entry(table_key)
|
||||
|
|
@ -875,44 +1008,31 @@ async fn publish_adopted_source_state(
|
|||
row_count: source_entry.row_count,
|
||||
version_metadata: source_entry.version_metadata.clone(),
|
||||
}),
|
||||
// Source on branch, target on main — apply delta to preserve version metadata
|
||||
(None, Some(_source_branch)) => {
|
||||
let delta =
|
||||
compute_source_delta(table_key, catalog, base_snapshot, source_snapshot).await?;
|
||||
match delta {
|
||||
Some(staged) => publish_rewritten_merge_table(target_db, table_key, &staged).await,
|
||||
None => Ok(crate::db::SubTableUpdate {
|
||||
table_key: table_key.to_string(),
|
||||
table_version: target_entry
|
||||
.map(|e| e.table_version)
|
||||
.unwrap_or(source_entry.table_version),
|
||||
table_branch: None,
|
||||
row_count: source_entry.row_count,
|
||||
version_metadata: target_entry
|
||||
.map(|entry| entry.version_metadata.clone())
|
||||
.unwrap_or_else(|| source_entry.version_metadata.clone()),
|
||||
}),
|
||||
}
|
||||
}
|
||||
// Source on branch, target on main, empty delta — adopt source's
|
||||
// version by a pointer switch (the non-empty case is `AdoptWithDelta`).
|
||||
(None, Some(_source_branch)) => Ok(crate::db::SubTableUpdate {
|
||||
table_key: table_key.to_string(),
|
||||
table_version: target_entry
|
||||
.map(|e| e.table_version)
|
||||
.unwrap_or(source_entry.table_version),
|
||||
table_branch: None,
|
||||
row_count: source_entry.row_count,
|
||||
version_metadata: target_entry
|
||||
.map(|entry| entry.version_metadata.clone())
|
||||
.unwrap_or_else(|| source_entry.version_metadata.clone()),
|
||||
}),
|
||||
// Both on branches
|
||||
(Some(target_branch), Some(source_branch)) => {
|
||||
if target_entry.and_then(|entry| entry.table_branch.as_deref()) == Some(target_branch) {
|
||||
// Target already owns this table — apply delta onto its lineage
|
||||
let delta =
|
||||
compute_source_delta(table_key, catalog, base_snapshot, source_snapshot)
|
||||
.await?;
|
||||
match delta {
|
||||
Some(staged) => {
|
||||
publish_rewritten_merge_table(target_db, table_key, &staged).await
|
||||
}
|
||||
None => Ok(crate::db::SubTableUpdate {
|
||||
table_key: table_key.to_string(),
|
||||
table_version: target_entry.unwrap().table_version,
|
||||
table_branch: Some(target_branch.to_string()),
|
||||
row_count: source_entry.row_count,
|
||||
version_metadata: target_entry.unwrap().version_metadata.clone(),
|
||||
}),
|
||||
}
|
||||
// Target already owns this table, empty delta — pointer switch
|
||||
// onto its own lineage (the non-empty case is `AdoptWithDelta`).
|
||||
Ok(crate::db::SubTableUpdate {
|
||||
table_key: table_key.to_string(),
|
||||
table_version: target_entry.unwrap().table_version,
|
||||
table_branch: Some(target_branch.to_string()),
|
||||
row_count: source_entry.row_count,
|
||||
version_metadata: target_entry.unwrap().version_metadata.clone(),
|
||||
})
|
||||
} else {
|
||||
// Target doesn't own this table yet — fork from source state.
|
||||
// This creates the target branch on the sub-table dataset.
|
||||
|
|
@ -1000,6 +1120,13 @@ async fn publish_rewritten_merge_table(
|
|||
}
|
||||
}
|
||||
|
||||
// Failpoint: crash after the Phase 1 merge_insert commit, before the delete.
|
||||
// Models a partial Phase B on the three-way path — the merged constructive
|
||||
// rows are on Lance HEAD but the delete has not committed and the
|
||||
// achieved-version intent has not been recorded, so recovery must roll BACK.
|
||||
// See tests/failpoints.rs::branch_merge_rewrite_partial_after_merge_rolls_back.
|
||||
crate::failpoints::maybe_fail("branch_merge.rewrite_after_merge_pre_delete")?;
|
||||
|
||||
// Phase 2: delete removed rows via deletion vectors.
|
||||
//
|
||||
// INLINE-COMMIT RESIDUAL: lance-6.0.1 does not expose a public
|
||||
|
|
@ -1023,6 +1150,14 @@ async fn publish_rewritten_merge_table(
|
|||
current_ds = new_ds;
|
||||
}
|
||||
|
||||
// Failpoint: crash after the Phase 2 delete commit, before the index build.
|
||||
// Models a partial Phase B on the three-way path — constructive rows +
|
||||
// deletes are on Lance HEAD but the achieved-version intent has not been
|
||||
// recorded, so recovery must roll BACK (the index is reconciler-owned derived
|
||||
// state, but the merge itself never reached its commit boundary). See
|
||||
// tests/failpoints.rs::branch_merge_rewrite_partial_after_delete_rolls_back.
|
||||
crate::failpoints::maybe_fail("branch_merge.rewrite_after_delete_pre_index")?;
|
||||
|
||||
// Phase 3: rebuild indices.
|
||||
//
|
||||
// `build_indices_on_dataset` uses `stage_create_btree_index` /
|
||||
|
|
@ -1054,6 +1189,160 @@ async fn publish_rewritten_merge_table(
|
|||
})
|
||||
}
|
||||
|
||||
/// Scan a staged temp table and concat its non-empty batches into the single
|
||||
/// batch that `stage_append` / `stage_merge_insert` consume. Returns `None` when
|
||||
/// the table has no rows (both staged primitives reject an empty batch).
|
||||
async fn scan_staged_combined(
|
||||
target_db: &Omnigraph,
|
||||
table: &StagedTable,
|
||||
) -> Result<Option<RecordBatch>> {
|
||||
crate::instrumentation::record_scan_staged_combined();
|
||||
let snapshot = SnapshotHandle::new(table.dataset.clone());
|
||||
let batches: Vec<RecordBatch> = target_db
|
||||
.storage()
|
||||
.scan_batches_for_rewrite(&snapshot)
|
||||
.await?
|
||||
.into_iter()
|
||||
.filter(|batch| batch.num_rows() > 0)
|
||||
.collect();
|
||||
if batches.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
let combined = if batches.len() == 1 {
|
||||
batches.into_iter().next().unwrap()
|
||||
} else {
|
||||
let schema = batches[0].schema();
|
||||
arrow_select::concat::concat_batches(&schema, &batches)
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?
|
||||
};
|
||||
Ok(Some(combined))
|
||||
}
|
||||
|
||||
/// Apply an [`AdoptDelta`] onto the target's base lineage (the fast-forward /
|
||||
/// target-owns path). Kept separate from `publish_rewritten_merge_table` (the
|
||||
/// three-way path) because the two paths diverge: commit 3 splits this Phase 1
|
||||
/// into append (new) + merge_insert (changed), and commit 6 makes its index
|
||||
/// coverage incremental — neither of which the three-way path takes.
|
||||
///
|
||||
/// `open_for_mutation(Merge)` opens the target's own table lineage (active
|
||||
/// branch is the merge target after the caller's swap), so every write lands on
|
||||
/// the target and survives source-branch deletion — GC-safe.
|
||||
///
|
||||
/// TRANSITIONAL — removed by the fragment-adopt work (see [`AdoptDelta`]): the
|
||||
/// multi-commit append → upsert → delete publish here (the source of the
|
||||
/// partial-Phase-B recovery window the sidecar confirmation guards) collapses to
|
||||
/// a single fragment-graft commit per table, so this whole function goes away.
|
||||
async fn publish_adopted_delta(
|
||||
target_db: &Omnigraph,
|
||||
table_key: &str,
|
||||
delta: &AdoptDelta,
|
||||
) -> Result<crate::db::SubTableUpdate> {
|
||||
let (ds, full_path, table_branch) = target_db
|
||||
.open_for_mutation(table_key, crate::db::MutationOpKind::Merge)
|
||||
.await?;
|
||||
let mut current_ds = ds;
|
||||
|
||||
// Phase 1a: append the NEW rows. `stage_append_stream` is a streaming
|
||||
// `Operation::Append` — no hash join — so it never buffers the delta and
|
||||
// cannot exhaust the DataFusion memory pool (the OOM fix). It streams the
|
||||
// staged rows straight into the target (Lance rolls fragments at
|
||||
// `max_rows_per_file`), so memory is bounded regardless of how many rows the
|
||||
// connector appended — never the whole set in one batch. New ids are absent
|
||||
// from base by construction (the ordered walk only classifies a row
|
||||
// `(None, Some)` when base lacks it), so they never collide on `id`. Routed
|
||||
// through the staged primitive so a failure between writing fragments and
|
||||
// committing leaves no Lance-HEAD drift. `appends` is `Some` only when the
|
||||
// staged table is non-empty (`compute_adopt_delta`).
|
||||
if let Some(append_table) = &delta.appends {
|
||||
let source = SnapshotHandle::new(append_table.dataset.clone());
|
||||
let staged = target_db
|
||||
.storage()
|
||||
.stage_append_stream(¤t_ds, &source, &[])
|
||||
.await?;
|
||||
current_ds = target_db
|
||||
.storage()
|
||||
.commit_staged(current_ds, staged)
|
||||
.await?;
|
||||
}
|
||||
|
||||
// Failpoint: crash after the Phase 1a append commit, before the upsert.
|
||||
// Models a partial Phase B — appends are on Lance HEAD but the upserts/deletes
|
||||
// have not committed and the achieved-version intent has not been recorded, so
|
||||
// recovery must roll BACK (not publish the appends-only state). See
|
||||
// tests/failpoints.rs::branch_merge_adopt_partial_after_append_rolls_back.
|
||||
crate::failpoints::maybe_fail("branch_merge.adopt_after_append_pre_upsert")?;
|
||||
|
||||
// Phase 1b: upsert the CHANGED rows. The merge_insert hash join is now
|
||||
// bounded to the genuinely-changed set, not the whole delta. It runs against
|
||||
// the committed view that already includes the appends; the changed ids are
|
||||
// disjoint from the appended ids (each id is classified into exactly one of
|
||||
// new / changed / deleted / unchanged in the single ordered walk), so the
|
||||
// join never collides with an appended row.
|
||||
if let Some(upsert_table) = &delta.upserts {
|
||||
if let Some(combined) = scan_staged_combined(target_db, upsert_table).await? {
|
||||
let staged_merge = target_db
|
||||
.storage()
|
||||
.stage_merge_insert(
|
||||
current_ds.clone(),
|
||||
combined,
|
||||
vec!["id".to_string()],
|
||||
lance::dataset::WhenMatched::UpdateAll,
|
||||
lance::dataset::WhenNotMatched::InsertAll,
|
||||
)
|
||||
.await?;
|
||||
current_ds = target_db
|
||||
.storage()
|
||||
.commit_staged(current_ds, staged_merge)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
|
||||
// Failpoint: crash after the Phase 1b upsert commit, before the delete.
|
||||
// Models a partial Phase B — appends + upserts on Lance HEAD but the delete
|
||||
// has not committed and the achieved-version intent has not been recorded, so
|
||||
// recovery must roll BACK. See
|
||||
// tests/failpoints.rs::branch_merge_adopt_partial_after_upsert_rolls_back.
|
||||
crate::failpoints::maybe_fail("branch_merge.adopt_after_upsert_pre_delete")?;
|
||||
|
||||
// Phase 2: delete removed rows via deletion vectors (inline-commit residual,
|
||||
// same as the three-way path until Lance ships a public two-phase delete).
|
||||
if !delta.deleted_ids.is_empty() {
|
||||
let escaped: Vec<String> = delta
|
||||
.deleted_ids
|
||||
.iter()
|
||||
.map(|id| format!("'{}'", id.replace('\'', "''")))
|
||||
.collect();
|
||||
let filter = format!("id IN ({})", escaped.join(", "));
|
||||
let (new_ds, _) = target_db
|
||||
.storage_inline_residual()
|
||||
.delete_where(&full_path, current_ds, &filter)
|
||||
.await?;
|
||||
current_ds = new_ds;
|
||||
}
|
||||
|
||||
// Phase 4: index coverage is reconciler-owned on the adopt path. Unlike the
|
||||
// three-way `RewriteMerged` path, this does NOT build indices inline: the
|
||||
// appended/upserted rows are left uncovered (reads stay correct via
|
||||
// brute-force — indexes are derived state, invariant 7) and
|
||||
// `optimize` / `ensure_indices` folds them in. This keeps even the first
|
||||
// merge into a freshly schema-applied (unindexed) table fast — no inline IVF
|
||||
// retrain on the publish path — and is the row-level approximation of Layer
|
||||
// 2's fragment-adopt, where the source branch's already-built indices carry
|
||||
// over by reference. See docs/user/branching/merge.md.
|
||||
let final_state = target_db
|
||||
.storage()
|
||||
.table_state(&full_path, ¤t_ds)
|
||||
.await?;
|
||||
|
||||
Ok(crate::db::SubTableUpdate {
|
||||
table_key: table_key.to_string(),
|
||||
table_version: final_state.version,
|
||||
table_branch,
|
||||
row_count: final_state.row_count,
|
||||
version_metadata: final_state.version_metadata,
|
||||
})
|
||||
}
|
||||
|
||||
impl Omnigraph {
|
||||
pub async fn branch_merge(&self, source: &str, target: &str) -> Result<MergeOutcome> {
|
||||
self.branch_merge_as(source, target, None).await
|
||||
|
|
@ -1262,7 +1551,16 @@ impl Omnigraph {
|
|||
continue;
|
||||
}
|
||||
if same_manifest_state(base_entry, target_entry) {
|
||||
candidates.insert(table_key.clone(), CandidateTableState::AdoptSourceState);
|
||||
let candidate = classify_adopt(
|
||||
self,
|
||||
&self.catalog(),
|
||||
base_snapshot,
|
||||
source_snapshot,
|
||||
&target_snapshot,
|
||||
table_key,
|
||||
)
|
||||
.await?;
|
||||
candidates.insert(table_key.clone(), candidate);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -1290,31 +1588,24 @@ impl Omnigraph {
|
|||
validate_merge_candidates(self, source_snapshot, &target_snapshot, &candidates).await?;
|
||||
|
||||
// Recovery sidecar: protect the per-table commit_staged loop.
|
||||
// Pin only `RewriteMerged` candidates because they always
|
||||
// advance Lance HEAD through `publish_rewritten_merge_table`
|
||||
// (which runs stage_merge_insert + delete_where + index
|
||||
// rebuilds — multiple commit_staged calls per table; loose
|
||||
// classification handles the multi-step drift).
|
||||
// Pin `RewriteMerged` and `AdoptWithDelta` candidates — both advance
|
||||
// Lance HEAD before the manifest publish (RewriteMerged via
|
||||
// publish_rewritten_merge_table; AdoptWithDelta via publish_adopted_delta:
|
||||
// stage_append + stage_merge_insert + delete_where + index — multiple
|
||||
// commit_staged calls per table, which the loose classification handles
|
||||
// as multi-step drift).
|
||||
//
|
||||
// `AdoptSourceState` candidates are NOT pinned: their publish
|
||||
// path is `publish_adopted_source_state`, whose subcases mostly
|
||||
// don't advance Lance HEAD (pure manifest pointer switch, or
|
||||
// fork via `fork_dataset_from_entry_state` which only adds a
|
||||
// Lance branch ref). If those subcases were pinned, recovery
|
||||
// would classify them as NoMovement and the all-or-nothing
|
||||
// decision would force a rollback that destroys legitimately-
|
||||
// committed work on sibling RewriteMerged tables.
|
||||
// (`publish_adopted_source_state`) is a pure pointer switch or a fork
|
||||
// (`fork_dataset_from_entry_state` only adds a Lance branch ref), neither
|
||||
// of which advances the data HEAD. Pinning them would classify as
|
||||
// NoMovement and force an all-or-nothing rollback that destroys sibling
|
||||
// tables' committed work.
|
||||
//
|
||||
// Residual: two `AdoptSourceState` subcases (when source has a
|
||||
// table_branch AND the source delta is non-empty) internally
|
||||
// call `publish_rewritten_merge_table` and DO advance HEAD.
|
||||
// Those are not covered by this sidecar — if they fail mid-
|
||||
// commit, the residual persists until the next ReadWrite open
|
||||
// detects it via a subsequent ExpectedVersionMismatch from a
|
||||
// later writer that touches the same table. Closing this gap
|
||||
// requires pre-computing source deltas during candidate
|
||||
// classification (a structural change to `CandidateTableState`)
|
||||
// and is left as follow-up work.
|
||||
// The former gap — adopt subcases that applied a non-empty delta advanced
|
||||
// HEAD unpinned — is closed: `classify_adopt` pre-computes the delta, so a
|
||||
// HEAD-advancing adopt is `AdoptWithDelta` (pinned here) and an empty-delta
|
||||
// adopt stays `AdoptSourceState`.
|
||||
// Acquire per-(table_key, target_branch) queues for every table
|
||||
// touched by the merge plan. Sorted-order acquisition prevents
|
||||
// lock-order inversion against concurrent multi-table writers.
|
||||
|
|
@ -1334,6 +1625,7 @@ impl Omnigraph {
|
|||
candidates.get(*table_key),
|
||||
Some(CandidateTableState::RewriteMerged(_))
|
||||
| Some(CandidateTableState::AdoptSourceState)
|
||||
| Some(CandidateTableState::AdoptWithDelta(_))
|
||||
)
|
||||
})
|
||||
.map(|table_key| (table_key.clone(), active_branch_for_keys.clone()))
|
||||
|
|
@ -1347,7 +1639,9 @@ impl Omnigraph {
|
|||
};
|
||||
if !matches!(
|
||||
candidate,
|
||||
CandidateTableState::RewriteMerged(_) | CandidateTableState::AdoptSourceState
|
||||
CandidateTableState::RewriteMerged(_)
|
||||
| CandidateTableState::AdoptSourceState
|
||||
| CandidateTableState::AdoptWithDelta(_)
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -1368,7 +1662,10 @@ impl Omnigraph {
|
|||
.iter()
|
||||
.filter_map(|table_key| {
|
||||
let candidate = candidates.get(table_key)?;
|
||||
if !matches!(candidate, CandidateTableState::RewriteMerged(_)) {
|
||||
if !matches!(
|
||||
candidate,
|
||||
CandidateTableState::RewriteMerged(_) | CandidateTableState::AdoptWithDelta(_)
|
||||
) {
|
||||
return None;
|
||||
}
|
||||
let entry = target_snapshot.entry(table_key)?;
|
||||
|
|
@ -1377,6 +1674,11 @@ impl Omnigraph {
|
|||
table_path: self.storage().dataset_uri(&entry.table_path),
|
||||
expected_version: entry.table_version,
|
||||
post_commit_pin: entry.table_version + 1,
|
||||
// Stamped after the whole per-table publish completes
|
||||
// (Phase-B confirmation, just before the manifest publish).
|
||||
// Until then `None` marks an unfinished publish that
|
||||
// recovery must roll back, not roll forward.
|
||||
confirmed_version: None,
|
||||
// Use the merge target branch (where commits actually
|
||||
// land), NOT entry.table_branch (where the table
|
||||
// currently lives). publish_rewritten_merge_table calls
|
||||
|
|
@ -1393,7 +1695,14 @@ impl Omnigraph {
|
|||
})
|
||||
})
|
||||
.collect();
|
||||
let recovery_handle = if recovery_pins.is_empty() {
|
||||
// Keep the sidecar alongside its handle: after the per-table publish
|
||||
// loop completes (Phase B), we re-write it with each table's confirmed
|
||||
// version before the manifest publish, so recovery can tell a finished
|
||||
// publish (roll forward) from a partial one (roll back).
|
||||
let mut recovery: Option<(
|
||||
crate::db::manifest::RecoverySidecar,
|
||||
crate::db::manifest::RecoverySidecarHandle,
|
||||
)> = if recovery_pins.is_empty() {
|
||||
None
|
||||
} else {
|
||||
// Use the merge target branch directly, NOT a heuristic
|
||||
|
|
@ -1418,14 +1727,13 @@ impl Omnigraph {
|
|||
// this, future merges between the same pair lose
|
||||
// already-up-to-date detection and merge-base correctness.
|
||||
sidecar.merge_source_commit_id = Some(source_head_commit_id.to_string());
|
||||
Some(
|
||||
crate::db::manifest::write_sidecar(
|
||||
self.root_uri(),
|
||||
self.storage_adapter(),
|
||||
&sidecar,
|
||||
)
|
||||
.await?,
|
||||
let handle = crate::db::manifest::write_sidecar(
|
||||
self.root_uri(),
|
||||
self.storage_adapter(),
|
||||
&sidecar,
|
||||
)
|
||||
.await?;
|
||||
Some((sidecar, handle))
|
||||
};
|
||||
|
||||
let mut updates = Vec::new();
|
||||
|
|
@ -1436,15 +1744,11 @@ impl Omnigraph {
|
|||
};
|
||||
let update = match candidate_state {
|
||||
CandidateTableState::AdoptSourceState => {
|
||||
publish_adopted_source_state(
|
||||
self,
|
||||
&self.catalog(),
|
||||
base_snapshot,
|
||||
source_snapshot,
|
||||
&target_snapshot,
|
||||
table_key,
|
||||
)
|
||||
.await?
|
||||
publish_adopted_source_state(self, source_snapshot, &target_snapshot, table_key)
|
||||
.await?
|
||||
}
|
||||
CandidateTableState::AdoptWithDelta(delta) => {
|
||||
publish_adopted_delta(self, table_key, delta).await?
|
||||
}
|
||||
CandidateTableState::RewriteMerged(staged) => {
|
||||
publish_rewritten_merge_table(self, table_key, staged).await?
|
||||
|
|
@ -1456,10 +1760,33 @@ impl Omnigraph {
|
|||
updates.push(update);
|
||||
}
|
||||
|
||||
// Phase-B confirmation: every table's publish finished, so stamp the
|
||||
// sidecar with each table's exact achieved version before the manifest
|
||||
// publish. This is the commit point of the recovery WAL: a crash from
|
||||
// here on rolls FORWARD to these versions, while a crash anywhere in the
|
||||
// publish loop above left the sidecar unconfirmed and rolls BACK. The
|
||||
// `updates` carry the real per-table final versions (multiple
|
||||
// commit_staged calls per table, so not derivable from `post_commit_pin`
|
||||
// alone). A failure here leaves the unconfirmed sidecar → roll back.
|
||||
if let Some((sidecar, _)) = recovery.as_mut() {
|
||||
let confirmed_versions: std::collections::HashMap<String, u64> = updates
|
||||
.iter()
|
||||
.map(|u| (u.table_key.clone(), u.table_version))
|
||||
.collect();
|
||||
crate::db::manifest::confirm_sidecar_phase_b(
|
||||
self.root_uri(),
|
||||
self.storage_adapter(),
|
||||
sidecar,
|
||||
&confirmed_versions,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
// Failpoint: pin the per-writer Phase B → Phase C residual for
|
||||
// branch_merge. Lance HEAD has advanced on every touched table
|
||||
// (publish_*) but the manifest publish below hasn't run. Used
|
||||
// by `tests/failpoints.rs::branch_merge_phase_b_failure_recovered_on_next_open`.
|
||||
// (publish_*) AND the sidecar is confirmed, but the manifest publish
|
||||
// below hasn't run — so recovery rolls FORWARD. Used by
|
||||
// `tests/failpoints.rs::branch_merge_phase_b_failure_recovered_on_next_open`.
|
||||
crate::failpoints::maybe_fail("branch_merge.post_phase_b_pre_manifest_commit")?;
|
||||
|
||||
let manifest_version = if updates.is_empty() {
|
||||
|
|
@ -1471,7 +1798,7 @@ impl Omnigraph {
|
|||
// Recovery sidecar lifecycle: delete after manifest publish.
|
||||
// Best-effort cleanup; the merge already landed durably so
|
||||
// failing the user here is undesirable.
|
||||
if let Some(handle) = recovery_handle {
|
||||
if let Some((_, handle)) = recovery {
|
||||
if let Err(err) =
|
||||
crate::db::manifest::delete_sidecar(&handle, self.storage_adapter()).await
|
||||
{
|
||||
|
|
|
|||
|
|
@ -477,6 +477,12 @@ fn predicate_to_sql(
|
|||
}
|
||||
};
|
||||
|
||||
// #283: emit the column UNQUOTED. Lance's `Scanner::filter(&str)` (the
|
||||
// committed-scan consumer) preserves an unquoted identifier's case but
|
||||
// treats a double-quoted `"col"` as a string literal, so quoting here
|
||||
// would silently match zero committed rows. The pending-batch MemTable
|
||||
// query is instead made case-preserving by disabling DataFusion identifier
|
||||
// normalization on its `SessionContext` (see `scan_pending_batches`).
|
||||
Ok(format!("{} {} {}", column, op, value_sql))
|
||||
}
|
||||
|
||||
|
|
@ -1477,3 +1483,29 @@ fn enrich_mutation_params(params: &ParamMap) -> Result<ParamMap> {
|
|||
}
|
||||
Ok(resolved)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod predicate_sql_tests {
|
||||
use super::*;
|
||||
|
||||
// #283: a camelCase column in a mutation predicate must be emitted
|
||||
// UNQUOTED and case-preserved. The committed-scan consumer, Lance's
|
||||
// `Scanner::filter(&str)`, preserves an unquoted identifier's case but
|
||||
// treats a double-quoted `"col"` as a string literal (which silently
|
||||
// matches zero rows), so the predicate string must not quote the column.
|
||||
// The pending MemTable path stays case-preserving by disabling DataFusion
|
||||
// identifier normalization on its context, not by quoting here.
|
||||
#[test]
|
||||
fn predicate_to_sql_preserves_camelcase_column_unquoted() {
|
||||
let predicate = IRMutationPredicate {
|
||||
property: "repoName".to_string(),
|
||||
op: CompOp::Eq,
|
||||
value: IRExpr::Literal(Literal::String("acme".into())),
|
||||
};
|
||||
let sql = predicate_to_sql(&predicate, &ParamMap::new(), false).unwrap();
|
||||
assert_eq!(
|
||||
sql, "repoName = 'acme'",
|
||||
"column must be unquoted and case-preserved, got {sql}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ impl Omnigraph {
|
|||
query_name: &str,
|
||||
params: &ParamMap,
|
||||
) -> Result<QueryResult> {
|
||||
self.ensure_schema_state_valid().await?;
|
||||
// resolved_target validates the schema contract; no redundant call here.
|
||||
let resolved = self.resolved_target(target).await?;
|
||||
let catalog = self.catalog();
|
||||
|
||||
|
|
@ -80,7 +80,7 @@ impl Omnigraph {
|
|||
query_name: &str,
|
||||
params: &ParamMap,
|
||||
) -> Result<QueryResult> {
|
||||
self.ensure_schema_state_valid().await?;
|
||||
// snapshot_at_version validates the schema contract; no redundant call here.
|
||||
let snapshot = self.snapshot_at_version(version).await?;
|
||||
let catalog = self.catalog();
|
||||
|
||||
|
|
@ -2149,9 +2149,13 @@ pub(super) fn ir_expr_to_expr(
|
|||
params: &ParamMap,
|
||||
target: Option<&arrow_schema::DataType>,
|
||||
) -> Option<datafusion::prelude::Expr> {
|
||||
use datafusion::prelude::col;
|
||||
use datafusion::prelude::ident;
|
||||
match expr {
|
||||
IRExpr::PropAccess { property, .. } => Some(col(property)),
|
||||
// #283: `ident()` preserves the identifier's case. `col()` would route
|
||||
// through SQL identifier normalization and lowercase an unquoted
|
||||
// camelCase column (`repoName` → `reponame`), which then fails to
|
||||
// resolve against the case-sensitive Lance/Arrow schema.
|
||||
IRExpr::PropAccess { property, .. } => Some(ident(property)),
|
||||
IRExpr::Literal(l) => literal_to_expr_coerced(l, target),
|
||||
IRExpr::Param(name) => params
|
||||
.get(name)
|
||||
|
|
@ -2656,4 +2660,61 @@ mod literal_lowering_tests {
|
|||
"reversed-operand literal must coerce to the Int32 column type, got {expr:?}"
|
||||
);
|
||||
}
|
||||
|
||||
// Name of the left operand's column in a binary comparison `col OP lit`.
|
||||
fn binary_left_column_name(e: &Expr) -> Option<String> {
|
||||
match e {
|
||||
Expr::BinaryExpr(b) => match b.left.as_ref() {
|
||||
Expr::Column(c) => Some(c.name.clone()),
|
||||
_ => None,
|
||||
},
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
// #283: a camelCase property must reach the scan as its exact column name,
|
||||
// not a SQL-normalized (lowercased) one. `col()` lowercases unquoted
|
||||
// identifiers; the pushed-down column ref must stay `repoName`.
|
||||
#[test]
|
||||
fn ir_filter_preserves_camelcase_column_name() {
|
||||
use arrow_schema::{DataType, Field};
|
||||
let schema = arrow_schema::Schema::new(vec![Field::new("repoName", DataType::Utf8, true)]);
|
||||
let filter = IRFilter {
|
||||
left: IRExpr::PropAccess {
|
||||
variable: "d".into(),
|
||||
property: "repoName".into(),
|
||||
},
|
||||
op: CompOp::Eq,
|
||||
right: IRExpr::Literal(Literal::String("acme".into())),
|
||||
};
|
||||
let expr = ir_filter_to_expr(&filter, &ParamMap::new(), Some(&schema)).unwrap();
|
||||
assert_eq!(
|
||||
binary_left_column_name(&expr).as_deref(),
|
||||
Some("repoName"),
|
||||
"camelCase column must be preserved (not lowercased to `reponame`), got {expr:?}"
|
||||
);
|
||||
}
|
||||
|
||||
// Index preservation: a camelCase numeric column still coerces its literal
|
||||
// (so the scalar BTREE stays eligible) — the col→ident fix must not disturb
|
||||
// the coercion path (which resolves the column type via field_with_name).
|
||||
#[test]
|
||||
fn ir_filter_coerces_literal_for_camelcase_int_column() {
|
||||
use arrow_schema::{DataType, Field};
|
||||
let schema =
|
||||
arrow_schema::Schema::new(vec![Field::new("itemCount", DataType::Int32, true)]);
|
||||
let filter = IRFilter {
|
||||
left: IRExpr::PropAccess {
|
||||
variable: "m".into(),
|
||||
property: "itemCount".into(),
|
||||
},
|
||||
op: CompOp::Eq,
|
||||
right: IRExpr::Literal(Literal::Integer(2)),
|
||||
};
|
||||
let expr = ir_filter_to_expr(&filter, &ParamMap::new(), Some(&schema)).unwrap();
|
||||
assert!(
|
||||
binary_has_int32_literal(&expr),
|
||||
"camelCase int column must keep its coerced Int32 literal (BTREE-eligible), got {expr:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -712,6 +712,9 @@ impl StagedMutation {
|
|||
table_path: entry.path.full_path.clone(),
|
||||
expected_version: entry.expected_version,
|
||||
post_commit_pin: entry.expected_version + 1,
|
||||
// Mutation/Load use strict single-commit classification, not
|
||||
// BranchMerge's Phase-B confirmation — left None.
|
||||
confirmed_version: None,
|
||||
table_branch: entry.path.table_branch.clone(),
|
||||
});
|
||||
}
|
||||
|
|
@ -738,6 +741,7 @@ impl StagedMutation {
|
|||
// can advance HEAD by more than one version (e.g.,
|
||||
// when Lance internally compacts deletion vectors).
|
||||
post_commit_pin: update.table_version,
|
||||
confirmed_version: None,
|
||||
table_branch: path.table_branch.clone(),
|
||||
});
|
||||
}
|
||||
|
|
|
|||
320
crates/omnigraph/src/instrumentation.rs
Normal file
320
crates/omnigraph/src/instrumentation.rs
Normal file
|
|
@ -0,0 +1,320 @@
|
|||
//! Read-path cost instrumentation (test seam).
|
||||
//!
|
||||
//! Two boundary instruments let cost-budget tests assert that a warm read does
|
||||
//! no redundant IO, the way LanceDB's IO-counted tests do (see
|
||||
//! `docs/dev/testing.md`, "Cost-budget tests"):
|
||||
//!
|
||||
//! - **Lance object store** — a per-query [`WrappingObjectStore`] attached to the
|
||||
//! datasets a query opens, so a test counts real `read_iops`. Delivered through
|
||||
//! a task-local ([`QueryIoProbes`]) set by the test; production leaves it unset,
|
||||
//! so the open helpers attach nothing (one unset-`Option` check per open).
|
||||
//! - **omnigraph `StorageAdapter`** — [`CountingStorageAdapter`], a decorator that
|
||||
//! counts per-method calls (the schema-contract reads on the query path).
|
||||
//!
|
||||
//! Nothing here changes runtime behavior: the wrappers only observe, and the
|
||||
//! decorator delegates every call. `IOTracker` (the concrete counter) lives in
|
||||
//! tests via the `lance-io` dev-dependency; this module stays generic over the
|
||||
//! `lance::io`-re-exported trait, so it adds no production dependency.
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use lance::Dataset;
|
||||
use lance::dataset::builder::DatasetBuilder;
|
||||
use lance::io::{ObjectStoreParams, WrappingObjectStore};
|
||||
|
||||
use crate::error::{OmniError, Result};
|
||||
use crate::storage::StorageAdapter;
|
||||
|
||||
/// Per-query IO probes, installed for a query's task via [`with_query_io_probes`].
|
||||
///
|
||||
/// Each wrapper is attached (when present) to the datasets that category opens,
|
||||
/// so a test reads `read_iops` off its own `IOTracker` handle. `probe_count`
|
||||
/// records calls to the version probe (which runs on the coordinator's already-open
|
||||
/// handle, so it is counted by invocation rather than by the per-query wrappers).
|
||||
#[derive(Clone, Default)]
|
||||
pub struct QueryIoProbes {
|
||||
pub manifest_wrapper: Option<Arc<dyn WrappingObjectStore>>,
|
||||
pub commit_graph_wrapper: Option<Arc<dyn WrappingObjectStore>>,
|
||||
/// Attached to the per-table data opens a query performs (the cache-miss
|
||||
/// path in `SubTableEntry::open`). Lets a cost test assert how many tables
|
||||
/// a query actually opened — N on a cold read, 0 on a warm repeat once the
|
||||
/// handle cache (Fix 3) serves them.
|
||||
pub table_wrapper: Option<Arc<dyn WrappingObjectStore>>,
|
||||
pub probe_count: Arc<AtomicU64>,
|
||||
}
|
||||
|
||||
tokio::task_local! {
|
||||
static QUERY_IO_PROBES: QueryIoProbes;
|
||||
}
|
||||
|
||||
/// Run `fut` with per-query IO probes installed. Test-only entry point; nothing
|
||||
/// in production sets the probes, so the accessors below return `None`/no-op.
|
||||
pub async fn with_query_io_probes<F>(probes: QueryIoProbes, fut: F) -> F::Output
|
||||
where
|
||||
F: std::future::Future,
|
||||
{
|
||||
QUERY_IO_PROBES.scope(probes, fut).await
|
||||
}
|
||||
|
||||
fn current<R>(f: impl FnOnce(&QueryIoProbes) -> R) -> Option<R> {
|
||||
QUERY_IO_PROBES.try_with(f).ok()
|
||||
}
|
||||
|
||||
pub(crate) fn manifest_wrapper() -> Option<Arc<dyn WrappingObjectStore>> {
|
||||
current(|p| p.manifest_wrapper.clone()).flatten()
|
||||
}
|
||||
|
||||
pub(crate) fn commit_graph_wrapper() -> Option<Arc<dyn WrappingObjectStore>> {
|
||||
current(|p| p.commit_graph_wrapper.clone()).flatten()
|
||||
}
|
||||
|
||||
pub(crate) fn table_wrapper() -> Option<Arc<dyn WrappingObjectStore>> {
|
||||
current(|p| p.table_wrapper.clone()).flatten()
|
||||
}
|
||||
|
||||
/// Record one version-probe invocation against the active per-query probes.
|
||||
/// No-op when no probes are installed (production).
|
||||
pub(crate) fn record_probe() {
|
||||
let _ = current(|p| p.probe_count.fetch_add(1, Ordering::Relaxed));
|
||||
}
|
||||
|
||||
/// Per-operation staged-write counts, installed for a task via
|
||||
/// [`with_merge_write_probes`]. Lets a cost-budget test assert WHICH staged-write
|
||||
/// primitive an operation invokes — e.g. that an append-only fast-forward merge
|
||||
/// routes new rows through `stage_append` and does **zero** `stage_merge_insert`
|
||||
/// (the full-outer hash join). Counts the publish-path primitives only;
|
||||
/// merge-staging temp tables use `append_or_create_batch`, not these.
|
||||
#[derive(Clone, Default)]
|
||||
pub struct MergeWriteProbes {
|
||||
pub stage_append_calls: Arc<AtomicU64>,
|
||||
pub stage_append_rows: Arc<AtomicU64>,
|
||||
pub stage_merge_insert_calls: Arc<AtomicU64>,
|
||||
pub stage_merge_insert_rows: Arc<AtomicU64>,
|
||||
/// Inline vector-index (IVF) builds. The fast-forward adopt path defers
|
||||
/// index coverage to the reconciler, so an adopt merge must do 0 of these.
|
||||
pub create_vector_index_calls: Arc<AtomicU64>,
|
||||
/// Times the merge materialized a staged delta into one in-memory batch
|
||||
/// (`scan_staged_combined`). The append path streams instead, so an
|
||||
/// append-only fast-forward merge must do 0 of these.
|
||||
pub scan_staged_combined_calls: Arc<AtomicU64>,
|
||||
}
|
||||
|
||||
impl MergeWriteProbes {
|
||||
pub fn stage_append_calls(&self) -> u64 {
|
||||
self.stage_append_calls.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn stage_append_rows(&self) -> u64 {
|
||||
self.stage_append_rows.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn stage_merge_insert_calls(&self) -> u64 {
|
||||
self.stage_merge_insert_calls.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn stage_merge_insert_rows(&self) -> u64 {
|
||||
self.stage_merge_insert_rows.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn create_vector_index_calls(&self) -> u64 {
|
||||
self.create_vector_index_calls.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn scan_staged_combined_calls(&self) -> u64 {
|
||||
self.scan_staged_combined_calls.load(Ordering::Relaxed)
|
||||
}
|
||||
}
|
||||
|
||||
tokio::task_local! {
|
||||
static MERGE_WRITE_PROBES: MergeWriteProbes;
|
||||
}
|
||||
|
||||
/// Run `fut` with staged-write probes installed. Test-only entry point; nothing
|
||||
/// in production sets the probes, so `record_stage_*` below are no-ops.
|
||||
pub async fn with_merge_write_probes<F>(probes: MergeWriteProbes, fut: F) -> F::Output
|
||||
where
|
||||
F: std::future::Future,
|
||||
{
|
||||
MERGE_WRITE_PROBES.scope(probes, fut).await
|
||||
}
|
||||
|
||||
/// Record one `stage_append` of `rows` rows against the active probes. No-op in
|
||||
/// production (no probes installed).
|
||||
pub(crate) fn record_stage_append(rows: u64) {
|
||||
let _ = MERGE_WRITE_PROBES.try_with(|p| {
|
||||
p.stage_append_calls.fetch_add(1, Ordering::Relaxed);
|
||||
p.stage_append_rows.fetch_add(rows, Ordering::Relaxed);
|
||||
});
|
||||
}
|
||||
|
||||
/// Record one `stage_merge_insert` of `rows` rows against the active probes.
|
||||
/// No-op in production (no probes installed).
|
||||
pub(crate) fn record_stage_merge_insert(rows: u64) {
|
||||
let _ = MERGE_WRITE_PROBES.try_with(|p| {
|
||||
p.stage_merge_insert_calls.fetch_add(1, Ordering::Relaxed);
|
||||
p.stage_merge_insert_rows.fetch_add(rows, Ordering::Relaxed);
|
||||
});
|
||||
}
|
||||
|
||||
/// Record one inline vector-index build against the active probes. No-op in
|
||||
/// production (no probes installed).
|
||||
pub(crate) fn record_create_vector_index() {
|
||||
let _ = MERGE_WRITE_PROBES.try_with(|p| {
|
||||
p.create_vector_index_calls.fetch_add(1, Ordering::Relaxed);
|
||||
});
|
||||
}
|
||||
|
||||
/// Record one `scan_staged_combined` materialization against the active probes.
|
||||
/// No-op in production (no probes installed).
|
||||
pub(crate) fn record_scan_staged_combined() {
|
||||
let _ = MERGE_WRITE_PROBES.try_with(|p| {
|
||||
p.scan_staged_combined_calls.fetch_add(1, Ordering::Relaxed);
|
||||
});
|
||||
}
|
||||
|
||||
/// Open a Lance dataset at `uri`, attaching `wrapper` (for IO counting) when
|
||||
/// present. With no wrapper this is exactly `Dataset::open(uri)`. The wrapper is
|
||||
/// set via `ObjectStoreParams` on the builder so the open itself is counted
|
||||
/// (`Dataset::with_object_store_wrappers` only wraps an already-open store).
|
||||
pub(crate) async fn open_dataset_tracked(
|
||||
uri: &str,
|
||||
wrapper: Option<Arc<dyn WrappingObjectStore>>,
|
||||
) -> Result<Dataset> {
|
||||
let result = match wrapper {
|
||||
None => Dataset::open(uri).await,
|
||||
Some(wrapper) => {
|
||||
DatasetBuilder::from_uri(uri)
|
||||
.with_store_params(ObjectStoreParams {
|
||||
object_store_wrapper: Some(wrapper),
|
||||
..Default::default()
|
||||
})
|
||||
.load()
|
||||
.await
|
||||
}
|
||||
};
|
||||
result.map_err(|e| OmniError::Lance(e.to_string()))
|
||||
}
|
||||
|
||||
/// Open a data-table dataset at `location` pinned to `version` — the cache-miss
|
||||
/// path of the data-read boundary (`SubTableEntry::open`). Attaches the shared
|
||||
/// per-graph `Session` (warms metadata/index caches across opens, LanceDB's
|
||||
/// one-session-per-connection pattern) and the per-query `table_wrapper` (for IO
|
||||
/// counting) when present. With neither, this is exactly the Fix-2
|
||||
/// `from_uri(location).with_version(version)` open.
|
||||
pub(crate) async fn open_table_dataset(
|
||||
location: &str,
|
||||
version: u64,
|
||||
session: Option<&Arc<lance::session::Session>>,
|
||||
) -> Result<Dataset> {
|
||||
let mut builder = DatasetBuilder::from_uri(location).with_version(version);
|
||||
if let Some(session) = session {
|
||||
builder = builder.with_session(session.clone());
|
||||
}
|
||||
if let Some(wrapper) = table_wrapper() {
|
||||
builder = builder.with_store_params(ObjectStoreParams {
|
||||
object_store_wrapper: Some(wrapper),
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
builder
|
||||
.load()
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))
|
||||
}
|
||||
|
||||
/// Per-method read counts for [`CountingStorageAdapter`].
|
||||
#[derive(Debug, Default)]
|
||||
pub struct StorageReadCounts {
|
||||
pub read_text: AtomicU64,
|
||||
pub exists: AtomicU64,
|
||||
pub read_text_versioned: AtomicU64,
|
||||
pub list_dir: AtomicU64,
|
||||
}
|
||||
|
||||
impl StorageReadCounts {
|
||||
pub fn read_text(&self) -> u64 {
|
||||
self.read_text.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn exists(&self) -> u64 {
|
||||
self.exists.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn read_text_versioned(&self) -> u64 {
|
||||
self.read_text_versioned.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn list_dir(&self) -> u64 {
|
||||
self.list_dir.load(Ordering::Relaxed)
|
||||
}
|
||||
}
|
||||
|
||||
/// Boundary decorator over a [`StorageAdapter`] that counts read-facing calls.
|
||||
/// Reads delegate after incrementing; writes delegate unchanged. Construct with
|
||||
/// [`CountingStorageAdapter::new`] and open an engine via
|
||||
/// `Omnigraph::open_with_storage` to count its non-Lance storage IO.
|
||||
#[derive(Debug)]
|
||||
pub struct CountingStorageAdapter {
|
||||
inner: Arc<dyn StorageAdapter>,
|
||||
counts: Arc<StorageReadCounts>,
|
||||
}
|
||||
|
||||
impl CountingStorageAdapter {
|
||||
/// Wrap `inner`, returning the adapter and a shared handle to its counts.
|
||||
pub fn new(inner: Arc<dyn StorageAdapter>) -> (Arc<dyn StorageAdapter>, Arc<StorageReadCounts>) {
|
||||
let counts = Arc::new(StorageReadCounts::default());
|
||||
let adapter: Arc<dyn StorageAdapter> = Arc::new(Self {
|
||||
inner,
|
||||
counts: Arc::clone(&counts),
|
||||
});
|
||||
(adapter, counts)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl StorageAdapter for CountingStorageAdapter {
|
||||
async fn read_text(&self, uri: &str) -> Result<String> {
|
||||
self.counts.read_text.fetch_add(1, Ordering::Relaxed);
|
||||
self.inner.read_text(uri).await
|
||||
}
|
||||
|
||||
async fn write_text(&self, uri: &str, contents: &str) -> Result<()> {
|
||||
self.inner.write_text(uri, contents).await
|
||||
}
|
||||
|
||||
async fn write_text_if_absent(&self, uri: &str, contents: &str) -> Result<bool> {
|
||||
self.inner.write_text_if_absent(uri, contents).await
|
||||
}
|
||||
|
||||
async fn exists(&self, uri: &str) -> Result<bool> {
|
||||
self.counts.exists.fetch_add(1, Ordering::Relaxed);
|
||||
self.inner.exists(uri).await
|
||||
}
|
||||
|
||||
async fn rename_text(&self, from_uri: &str, to_uri: &str) -> Result<()> {
|
||||
self.inner.rename_text(from_uri, to_uri).await
|
||||
}
|
||||
|
||||
async fn delete(&self, uri: &str) -> Result<()> {
|
||||
self.inner.delete(uri).await
|
||||
}
|
||||
|
||||
async fn list_dir(&self, dir_uri: &str) -> Result<Vec<String>> {
|
||||
self.counts.list_dir.fetch_add(1, Ordering::Relaxed);
|
||||
self.inner.list_dir(dir_uri).await
|
||||
}
|
||||
|
||||
async fn read_text_versioned(&self, uri: &str) -> Result<(String, String)> {
|
||||
self.counts.read_text_versioned.fetch_add(1, Ordering::Relaxed);
|
||||
self.inner.read_text_versioned(uri).await
|
||||
}
|
||||
|
||||
async fn write_text_if_match(
|
||||
&self,
|
||||
uri: &str,
|
||||
contents: &str,
|
||||
expected_version: &str,
|
||||
) -> Result<Option<String>> {
|
||||
self.inner
|
||||
.write_text_if_match(uri, contents, expected_version)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn delete_prefix(&self, prefix_uri: &str) -> Result<()> {
|
||||
self.inner.delete_prefix(prefix_uri).await
|
||||
}
|
||||
}
|
||||
|
|
@ -14,6 +14,7 @@ pub mod error;
|
|||
mod exec;
|
||||
pub mod failpoints;
|
||||
pub mod graph_index;
|
||||
pub mod instrumentation;
|
||||
pub mod loader;
|
||||
pub mod runtime_cache;
|
||||
pub mod storage;
|
||||
|
|
|
|||
|
|
@ -1,6 +1,9 @@
|
|||
use std::collections::{HashMap, VecDeque};
|
||||
use std::hash::Hash;
|
||||
use std::sync::Arc;
|
||||
|
||||
use lance::Dataset;
|
||||
use lance::session::Session;
|
||||
use omnigraph_compiler::catalog::Catalog;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
|
|
@ -26,17 +29,15 @@ pub struct RuntimeCache {
|
|||
graph_indices: Mutex<GraphIndexCache>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
#[derive(Debug)]
|
||||
struct GraphIndexCache {
|
||||
entries: HashMap<GraphIndexCacheKey, Arc<GraphIndex>>,
|
||||
lru: VecDeque<GraphIndexCacheKey>,
|
||||
entries: LruMap<GraphIndexCacheKey, Arc<GraphIndex>>,
|
||||
}
|
||||
|
||||
impl RuntimeCache {
|
||||
pub async fn invalidate_all(&self) {
|
||||
let mut cache = self.graph_indices.lock().await;
|
||||
cache.entries.clear();
|
||||
cache.lru.clear();
|
||||
cache.entries.invalidate_all();
|
||||
}
|
||||
|
||||
pub async fn graph_index(
|
||||
|
|
@ -48,7 +49,6 @@ impl RuntimeCache {
|
|||
{
|
||||
let mut cache = self.graph_indices.lock().await;
|
||||
if let Some(index) = cache.entries.get(&key).cloned() {
|
||||
cache.touch(key.clone());
|
||||
return Ok(index);
|
||||
}
|
||||
}
|
||||
|
|
@ -62,7 +62,6 @@ impl RuntimeCache {
|
|||
let index = Arc::new(GraphIndex::build(&resolved.snapshot, &edge_types).await?);
|
||||
let mut cache = self.graph_indices.lock().await;
|
||||
if let Some(existing) = cache.entries.get(&key).cloned() {
|
||||
cache.touch(key);
|
||||
return Ok(existing);
|
||||
}
|
||||
cache.insert(key, Arc::clone(&index));
|
||||
|
|
@ -72,24 +71,86 @@ impl RuntimeCache {
|
|||
|
||||
impl GraphIndexCache {
|
||||
fn insert(&mut self, key: GraphIndexCacheKey, value: Arc<GraphIndex>) {
|
||||
self.entries.insert(key.clone(), value);
|
||||
self.touch(key);
|
||||
while self.entries.len() > 8 {
|
||||
let Some(oldest) = self.lru.pop_front() else {
|
||||
break;
|
||||
};
|
||||
if self.entries.remove(&oldest).is_some() {
|
||||
break;
|
||||
}
|
||||
self.entries.insert(key, value);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn touch(&mut self, key: GraphIndexCacheKey) {
|
||||
self.entries.touch(key);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct LruMap<K, V>
|
||||
where
|
||||
K: Clone + Eq + Hash,
|
||||
{
|
||||
entries: HashMap<K, V>,
|
||||
lru: VecDeque<K>,
|
||||
cap: usize,
|
||||
}
|
||||
|
||||
impl<K, V> LruMap<K, V>
|
||||
where
|
||||
K: Clone + Eq + Hash,
|
||||
{
|
||||
fn new(cap: usize) -> Self {
|
||||
Self {
|
||||
entries: HashMap::new(),
|
||||
lru: VecDeque::new(),
|
||||
cap,
|
||||
}
|
||||
}
|
||||
|
||||
fn touch(&mut self, key: GraphIndexCacheKey) {
|
||||
fn get(&mut self, key: &K) -> Option<&V> {
|
||||
if self.entries.contains_key(key) {
|
||||
self.touch(key.clone());
|
||||
self.entries.get(key)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn insert(&mut self, key: K, value: V) {
|
||||
self.entries.insert(key.clone(), value);
|
||||
self.touch(key);
|
||||
while self.entries.len() > self.cap {
|
||||
let Some(oldest) = self.lru.pop_front() else {
|
||||
break;
|
||||
};
|
||||
self.entries.remove(&oldest);
|
||||
}
|
||||
}
|
||||
|
||||
fn invalidate_all(&mut self) {
|
||||
self.entries.clear();
|
||||
self.lru.clear();
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn contains_key(&self, key: &K) -> bool {
|
||||
self.entries.contains_key(key)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn len(&self) -> usize {
|
||||
self.entries.len()
|
||||
}
|
||||
|
||||
fn touch(&mut self, key: K) {
|
||||
self.lru.retain(|existing| existing != &key);
|
||||
self.lru.push_back(key);
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for GraphIndexCache {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
entries: LruMap::new(8),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn graph_index_cache_key(resolved: &ResolvedTarget, catalog: &Catalog) -> GraphIndexCacheKey {
|
||||
let mut edge_tables: Vec<GraphIndexTableState> = catalog
|
||||
.edge_types
|
||||
|
|
@ -114,6 +175,114 @@ fn graph_index_cache_key(resolved: &ResolvedTarget, catalog: &Catalog) -> GraphI
|
|||
}
|
||||
}
|
||||
|
||||
/// Max held `Dataset` handles. A handle holds only Arcs (object store + manifest),
|
||||
/// never table data, so this is cheap; it bounds how many `(table, branch,
|
||||
/// version, e_tag)` cells stay warm. One graph's live table set across a couple
|
||||
/// of branches at the current version fits comfortably, with headroom for the
|
||||
/// recently-superseded versions left by writes until they age out.
|
||||
const TABLE_HANDLE_CACHE_CAP: usize = 64;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
struct TableHandleKey {
|
||||
table_path: String,
|
||||
table_branch: Option<String>,
|
||||
version: u64,
|
||||
e_tag: Option<String>,
|
||||
}
|
||||
|
||||
/// Held open-`Dataset` handles keyed by `(table_path, branch, version, e_tag)` — the
|
||||
/// version-keyed analogue of LanceDB's `DatasetConsistencyWrapper`
|
||||
/// (`rust/lancedb/src/table/dataset.rs`). A warm read reuses a held handle with
|
||||
/// zero open IO (a cheap `Dataset` clone); a miss opens once at the location with
|
||||
/// the shared `Session`. Version plus e_tag are in the key, so a write (or a
|
||||
/// delete/recreate that reuses a version number on object stores with e_tags) is
|
||||
/// simply a new key. A same-branch manifest refresh clears this cache as the
|
||||
/// fallback for e_tag-less table locations. Only read-path Data opens use this —
|
||||
/// writes open HEAD directly and never receive a pinned handle.
|
||||
#[derive(Default)]
|
||||
pub struct TableHandleCache {
|
||||
inner: Mutex<TableHandleCacheInner>,
|
||||
}
|
||||
|
||||
struct TableHandleCacheInner {
|
||||
entries: LruMap<TableHandleKey, Dataset>,
|
||||
}
|
||||
|
||||
impl TableHandleCache {
|
||||
/// Drop all held handles. Correctness never requires this (version-in-key);
|
||||
/// it is memory hygiene, called from the same hooks that clear the graph
|
||||
/// index cache (branch switch / refresh).
|
||||
pub async fn invalidate_all(&self) {
|
||||
let mut inner = self.inner.lock().await;
|
||||
inner.entries.invalidate_all();
|
||||
}
|
||||
|
||||
/// Return the dataset for `(table_path, branch, version, e_tag)`, reusing a
|
||||
/// held handle (0 open IO) or opening it once at `location` with the shared
|
||||
/// `session` on a miss.
|
||||
pub async fn get_or_open(
|
||||
&self,
|
||||
table_path: &str,
|
||||
table_branch: Option<&str>,
|
||||
version: u64,
|
||||
e_tag: Option<&str>,
|
||||
location: &str,
|
||||
session: Option<&Arc<Session>>,
|
||||
) -> Result<Dataset> {
|
||||
let key = TableHandleKey {
|
||||
table_path: table_path.to_string(),
|
||||
table_branch: table_branch.map(str::to_string),
|
||||
version,
|
||||
e_tag: e_tag.map(str::to_string),
|
||||
};
|
||||
{
|
||||
let mut inner = self.inner.lock().await;
|
||||
if let Some(ds) = inner.entries.get(&key).cloned() {
|
||||
return Ok(ds);
|
||||
}
|
||||
}
|
||||
// Miss: open without holding the lock (the open is async IO). A concurrent
|
||||
// double-miss opens twice and one wins the insert — correct (the dataset
|
||||
// at a version is immutable) and rare.
|
||||
let ds = crate::instrumentation::open_table_dataset(location, version, session).await?;
|
||||
let mut inner = self.inner.lock().await;
|
||||
if let Some(existing) = inner.entries.get(&key).cloned() {
|
||||
return Ok(existing);
|
||||
}
|
||||
inner.insert(key, ds.clone());
|
||||
Ok(ds)
|
||||
}
|
||||
}
|
||||
|
||||
impl TableHandleCacheInner {
|
||||
fn insert(&mut self, key: TableHandleKey, value: Dataset) {
|
||||
self.entries.insert(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for TableHandleCacheInner {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
entries: LruMap::new(TABLE_HANDLE_CACHE_CAP),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Per-graph read caches handed to a resolved `Snapshot` so its table opens reuse
|
||||
/// one shared `Session` (LanceDB's one-session-per-connection pattern) and the
|
||||
/// held-handle cache. Manual `Debug` because `lance::session::Session` is not
|
||||
/// `Debug`; this lets `Snapshot` keep its `#[derive(Debug)]`.
|
||||
pub struct ReadCaches {
|
||||
pub session: Arc<Session>,
|
||||
pub handles: Arc<TableHandleCache>,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for ReadCaches {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("ReadCaches").finish_non_exhaustive()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
|
@ -156,4 +325,21 @@ mod tests {
|
|||
assert!(cache.entries.contains_key(&key(0)));
|
||||
assert!(!cache.entries.contains_key(&key(1)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lru_map_evicts_oldest_and_touch_refreshes_order() {
|
||||
let mut map = LruMap::new(2);
|
||||
map.insert("a", 1);
|
||||
map.insert("b", 2);
|
||||
|
||||
assert_eq!(map.get(&"a"), Some(&1));
|
||||
map.insert("c", 3);
|
||||
|
||||
assert!(map.contains_key(&"a"));
|
||||
assert!(!map.contains_key(&"b"));
|
||||
assert!(map.contains_key(&"c"));
|
||||
|
||||
map.invalidate_all();
|
||||
assert_eq!(map.len(), 0);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -353,6 +353,15 @@ pub trait TableStorage: sealed::Sealed + Send + Sync + Debug {
|
|||
prior_stages: &[StagedHandle],
|
||||
) -> Result<StagedHandle>;
|
||||
|
||||
/// Append `source`'s rows into `snapshot`'s table, streaming so the whole
|
||||
/// row set is never materialized in memory (see `TableStore::stage_append_stream`).
|
||||
async fn stage_append_stream(
|
||||
&self,
|
||||
snapshot: &SnapshotHandle,
|
||||
source: &SnapshotHandle,
|
||||
prior_stages: &[StagedHandle],
|
||||
) -> Result<StagedHandle>;
|
||||
|
||||
async fn stage_merge_insert(
|
||||
&self,
|
||||
snapshot: SnapshotHandle,
|
||||
|
|
@ -684,6 +693,18 @@ impl TableStorage for TableStore {
|
|||
.map(StagedHandle::new)
|
||||
}
|
||||
|
||||
async fn stage_append_stream(
|
||||
&self,
|
||||
snapshot: &SnapshotHandle,
|
||||
source: &SnapshotHandle,
|
||||
prior_stages: &[StagedHandle],
|
||||
) -> Result<StagedHandle> {
|
||||
let staged_writes = staged_handles_as_writes(prior_stages);
|
||||
TableStore::stage_append_stream(self, snapshot.dataset(), source.dataset(), &staged_writes)
|
||||
.await
|
||||
.map(StagedHandle::new)
|
||||
}
|
||||
|
||||
async fn stage_merge_insert(
|
||||
&self,
|
||||
snapshot: SnapshotHandle,
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ use arrow_array::{
|
|||
Array, ArrayRef, RecordBatch, StringArray, StructArray, UInt8Array, UInt32Array, UInt64Array,
|
||||
};
|
||||
use arrow_schema::SchemaRef;
|
||||
use datafusion::physical_plan::SendableRecordBatchStream;
|
||||
use futures::TryStreamExt;
|
||||
use lance::Dataset;
|
||||
use lance::blob::BlobArrayBuilder;
|
||||
|
|
@ -362,6 +363,29 @@ impl TableStore {
|
|||
Ok(materialized)
|
||||
}
|
||||
|
||||
/// Streaming, blob-aware sibling of [`Self::scan_batches_for_rewrite`].
|
||||
/// Yields the dataset's rows lazily as a `SendableRecordBatchStream` so a
|
||||
/// downstream writer (`stage_append_stream`) never materializes the whole
|
||||
/// table in memory. Blob columns still need per-row rebuild, so those tables
|
||||
/// fall back to the materialized path and are re-streamed from the `Vec`
|
||||
/// (rare — only tables with a `Blob` property; bounded-memory blob streaming
|
||||
/// is a follow-up). The non-blob path is a true lazy scan.
|
||||
pub async fn scan_stream_for_rewrite(&self, ds: &Dataset) -> Result<SendableRecordBatchStream> {
|
||||
let has_blob_columns = ds.schema().fields_pre_order().any(|field| field.is_blob());
|
||||
if has_blob_columns {
|
||||
let arrow_schema: SchemaRef = Arc::new(ds.schema().into());
|
||||
let batches = self.scan_batches_for_rewrite(ds).await?;
|
||||
let reader = arrow_array::RecordBatchIterator::new(
|
||||
batches.into_iter().map(Ok),
|
||||
arrow_schema,
|
||||
);
|
||||
return Ok(lance_datafusion::utils::reader_to_stream(Box::new(reader)));
|
||||
}
|
||||
// Non-blob: a true lazy scan. `DatasetRecordBatchStream` converts to the
|
||||
// `SendableRecordBatchStream` that `execute_uncommitted_stream` consumes.
|
||||
Ok(Self::scan_stream(ds, None, None, None, false).await?.into())
|
||||
}
|
||||
|
||||
pub(crate) async fn materialize_blob_batch(
|
||||
ds: &Dataset,
|
||||
batch: RecordBatch,
|
||||
|
|
@ -919,6 +943,7 @@ impl TableStore {
|
|||
"stage_append called with empty batch".to_string(),
|
||||
));
|
||||
}
|
||||
let appended_rows = batch.num_rows() as u64;
|
||||
let params = WriteParams {
|
||||
mode: WriteMode::Append,
|
||||
allow_external_blob_outside_bases: true,
|
||||
|
|
@ -931,6 +956,9 @@ impl TableStore {
|
|||
.execute_uncommitted(vec![batch])
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
// Record only after the staging write succeeds, so a failed write does
|
||||
// not inflate the probe (matches `stage_append_stream`'s ordering).
|
||||
crate::instrumentation::record_stage_append(appended_rows);
|
||||
let mut new_fragments = match &transaction.operation {
|
||||
Operation::Append { fragments } => fragments.clone(),
|
||||
Operation::Overwrite { fragments, .. } => fragments.clone(),
|
||||
|
|
@ -971,6 +999,71 @@ impl TableStore {
|
|||
})
|
||||
}
|
||||
|
||||
/// Streaming variant of [`Self::stage_append`]: appends the rows of `source`
|
||||
/// into `ds` without materializing them in memory. It scans `source` lazily
|
||||
/// (`scan_stream_for_rewrite`) and hands the stream to Lance's
|
||||
/// `execute_uncommitted_stream`, which rolls fragments at `max_rows_per_file`
|
||||
/// — bounded memory, one Append transaction. This is the substrate-blessed
|
||||
/// bulk-append path (the same one LanceDB's `Table::add` uses). Identical
|
||||
/// fragment-id / stable-row-id staging as `stage_append`.
|
||||
///
|
||||
/// TRANSITIONAL caller — its only caller is the row-level merge append
|
||||
/// (`publish_adopted_delta`, see `AdoptDelta`), which the fragment-adopt work
|
||||
/// (Lance #7263/#7185) removes: a fragment graft re-appends no rows. This
|
||||
/// primitive and `scan_stream_for_rewrite` are then dead unless re-adopted as
|
||||
/// a general bulk-append path (the `Table::add` shape makes that plausible).
|
||||
pub async fn stage_append_stream(
|
||||
&self,
|
||||
ds: &Dataset,
|
||||
source: &Dataset,
|
||||
prior_stages: &[StagedWrite],
|
||||
) -> Result<StagedWrite> {
|
||||
let stream = self.scan_stream_for_rewrite(source).await?;
|
||||
let params = WriteParams {
|
||||
mode: WriteMode::Append,
|
||||
allow_external_blob_outside_bases: true,
|
||||
auto_cleanup: None,
|
||||
skip_auto_cleanup: true,
|
||||
..Default::default()
|
||||
};
|
||||
let transaction = InsertBuilder::new(Arc::new(ds.clone()))
|
||||
.with_params(¶ms)
|
||||
.execute_uncommitted_stream(stream)
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
let mut new_fragments = match &transaction.operation {
|
||||
Operation::Append { fragments } => fragments.clone(),
|
||||
Operation::Overwrite { fragments, .. } => fragments.clone(),
|
||||
other => {
|
||||
return Err(OmniError::manifest_internal(format!(
|
||||
"stage_append_stream: unexpected Lance operation {:?}",
|
||||
std::mem::discriminant(other)
|
||||
)));
|
||||
}
|
||||
};
|
||||
let appended_rows: u64 = new_fragments
|
||||
.iter()
|
||||
.filter_map(|f| f.physical_rows)
|
||||
.map(|r| r as u64)
|
||||
.sum();
|
||||
crate::instrumentation::record_stage_append(appended_rows);
|
||||
// Same commit-time fragment-id / row-id renumbering as `stage_append`.
|
||||
let next_id_base = ds.manifest.max_fragment_id.unwrap_or(0) as u64
|
||||
+ 1
|
||||
+ prior_stages_fragment_count(prior_stages);
|
||||
assign_fragment_ids(&mut new_fragments, next_id_base);
|
||||
if ds.manifest.uses_stable_row_ids() {
|
||||
let prior_rows = prior_stages_row_count(prior_stages)?;
|
||||
let start_row_id = ds.manifest.next_row_id + prior_rows;
|
||||
assign_row_id_meta(&mut new_fragments, start_row_id)?;
|
||||
}
|
||||
Ok(StagedWrite {
|
||||
transaction,
|
||||
new_fragments,
|
||||
removed_fragment_ids: Vec::new(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Stage a merge_insert (upsert): write fragment files describing the
|
||||
/// merge result, return the uncommitted transaction plus the new
|
||||
/// fragments. The transaction's `Operation::Update` carries the
|
||||
|
|
@ -1012,6 +1105,7 @@ impl TableStore {
|
|||
"stage_merge_insert called with empty batch".to_string(),
|
||||
));
|
||||
}
|
||||
let merged_rows = batch.num_rows() as u64;
|
||||
|
||||
// Precondition for the FirstSeen workaround below: every call path that
|
||||
// reaches stage_merge_insert (load, MutationStaging::finalize,
|
||||
|
|
@ -1052,6 +1146,9 @@ impl TableStore {
|
|||
.execute_uncommitted(stream)
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
// Record only after the staging write succeeds, so a failed write does
|
||||
// not inflate the probe (matches `stage_append`/`stage_append_stream`).
|
||||
crate::instrumentation::record_stage_merge_insert(merged_rows);
|
||||
// Operation::Update { removed_fragment_ids, updated_fragments, new_fragments, .. } —
|
||||
// `new_fragments` are the freshly inserted rows; `updated_fragments`
|
||||
// are rewrites of existing fragments that include both retained and
|
||||
|
|
@ -1541,8 +1638,11 @@ impl TableStore {
|
|||
ds.create_index_builder(&[column], IndexType::Vector, ¶ms)
|
||||
.replace(true)
|
||||
.await
|
||||
.map(|_| ())
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
// Record only after the index build succeeds, so a failed build does not
|
||||
// inflate the probe (matches the `stage_*` probes).
|
||||
crate::instrumentation::record_create_vector_index();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn create_empty_dataset(dataset_uri: &str, schema: &SchemaRef) -> Result<Dataset> {
|
||||
|
|
@ -1783,7 +1883,15 @@ async fn scan_pending_batches(
|
|||
filter: Option<&str>,
|
||||
) -> Result<Vec<RecordBatch>> {
|
||||
let schema = pending_schema.unwrap_or_else(|| pending_batches[0].schema());
|
||||
let ctx = datafusion::execution::context::SessionContext::new();
|
||||
// #283: disable SQL identifier normalization so an unquoted camelCase
|
||||
// column in `filter` (e.g. `repoName = 'acme'`, emitted unquoted by
|
||||
// `predicate_to_sql` because the committed Lance scan needs it unquoted)
|
||||
// is matched case-preserving against the case-sensitive MemTable schema.
|
||||
// Without this, DataFusion lowercases `repoName` → `reponame` and fails to
|
||||
// resolve. Quoted identifiers (the projection list below) are unaffected.
|
||||
let mut config = datafusion::execution::context::SessionConfig::new();
|
||||
config.options_mut().sql_parser.enable_ident_normalization = false;
|
||||
let ctx = datafusion::execution::context::SessionContext::new_with_config(config);
|
||||
let mem = datafusion::datasource::MemTable::try_new(schema, vec![pending_batches.to_vec()])
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
ctx.register_table("pending", Arc::new(mem))
|
||||
|
|
|
|||
|
|
@ -548,6 +548,174 @@ async fn branch_merge_records_single_latest_commit_with_two_parents() {
|
|||
);
|
||||
}
|
||||
|
||||
// ── P1: commit-DAG coherence on same-branch writes after an external commit ──
|
||||
//
|
||||
// `append_commit` takes a new commit's parent from the coordinator's in-memory
|
||||
// head (commit_graph head_commit, zero storage read), but `commit_all` rebases
|
||||
// the MANIFEST from a fresh coordinator. So after an external writer advances
|
||||
// the branch, a same-branch write on a non-refreshed handle commits a fresh
|
||||
// manifest version yet appends off the stale head — forking the commit DAG (the
|
||||
// new commit and the external commit share a parent). Data is unaffected (the
|
||||
// manifest is the visibility authority); only commit history is malformed.
|
||||
// P1 refreshes the commit-graph head before the append, so the parent is the
|
||||
// true current head. These two tests are RED before that fix, GREEN after.
|
||||
|
||||
/// Non-strict insert: the fork is pre-existing (commit_all rebases the manifest
|
||||
/// regardless of the stale head), independent of Fix 1.
|
||||
#[tokio::test]
|
||||
async fn same_branch_insert_after_external_commit_is_linear() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
|
||||
// Handle A: a long-lived writer whose coordinator head stays pinned at the
|
||||
// load commit (C0) — it never refreshes before its own write below.
|
||||
let mut a = init_and_load(&dir).await;
|
||||
let c0 = CommitGraph::open(uri)
|
||||
.await
|
||||
.unwrap()
|
||||
.head_commit()
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
// External writer B advances main: commit C1, parent C0.
|
||||
let mut b = Omnigraph::open(uri).await.unwrap();
|
||||
mutate_main(
|
||||
&mut b,
|
||||
MUTATION_QUERIES,
|
||||
"insert_person",
|
||||
&mixed_params(&[("$name", "ext_b")], &[("$age", 30)]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let c1 = CommitGraph::open(uri)
|
||||
.await
|
||||
.unwrap()
|
||||
.head_commit()
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
c1.parent_commit_id.as_deref(),
|
||||
Some(c0.graph_commit_id.as_str()),
|
||||
"sanity: B's commit C1 should descend from C0"
|
||||
);
|
||||
|
||||
// A writes to main WITHOUT refreshing. A's coordinator still thinks the head
|
||||
// is C0, so a pre-fix append parents the new commit on C0 instead of C1.
|
||||
mutate_main(
|
||||
&mut a,
|
||||
MUTATION_QUERIES,
|
||||
"insert_person",
|
||||
&mixed_params(&[("$name", "local_a")], &[("$age", 40)]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let commits = CommitGraph::open(uri)
|
||||
.await
|
||||
.unwrap()
|
||||
.load_commits()
|
||||
.await
|
||||
.unwrap();
|
||||
let latest = commits.iter().max_by_key(|c| c.manifest_version).unwrap();
|
||||
assert_eq!(
|
||||
latest.parent_commit_id.as_deref(),
|
||||
Some(c1.graph_commit_id.as_str()),
|
||||
"A's same-branch write after an external commit must append off the true \
|
||||
head C1, not the stale head C0 (commit-DAG fork)"
|
||||
);
|
||||
let c0_children = commits
|
||||
.iter()
|
||||
.filter(|c| c.parent_commit_id.as_deref() == Some(c0.graph_commit_id.as_str()))
|
||||
.count();
|
||||
assert_eq!(c0_children, 1, "C0 must have exactly one child; two is the fork");
|
||||
}
|
||||
|
||||
/// Strict update after a read: Fix 1's `refresh_manifest_only` makes the read
|
||||
/// freshen the read-time pin, defeating the strict 409 that used to force a
|
||||
/// coherent refresh — so the same stale-head append forks strict ops too.
|
||||
#[tokio::test]
|
||||
async fn same_branch_update_after_external_commit_and_read_is_linear() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
|
||||
// A inserts the row it will later update; this is A's own commit (Ca), so
|
||||
// A's coordinator head is Ca.
|
||||
let mut a = init_and_load(&dir).await;
|
||||
mutate_main(
|
||||
&mut a,
|
||||
MUTATION_QUERIES,
|
||||
"insert_person",
|
||||
&mixed_params(&[("$name", "target")], &[("$age", 40)]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let ca = CommitGraph::open(uri)
|
||||
.await
|
||||
.unwrap()
|
||||
.head_commit()
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
// External writer B advances main: commit Cb, parent Ca.
|
||||
let mut b = Omnigraph::open(uri).await.unwrap();
|
||||
mutate_main(
|
||||
&mut b,
|
||||
MUTATION_QUERIES,
|
||||
"insert_person",
|
||||
&mixed_params(&[("$name", "ext_b")], &[("$age", 30)]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let cb = CommitGraph::open(uri)
|
||||
.await
|
||||
.unwrap()
|
||||
.head_commit()
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert_eq!(cb.parent_commit_id.as_deref(), Some(ca.graph_commit_id.as_str()));
|
||||
|
||||
// A reads main: the stale-probe path refreshes A's MANIFEST (via
|
||||
// refresh_manifest_only) but not its commit-graph head, freshening the
|
||||
// read-time pin so the strict update below skips its 409.
|
||||
query_main(&mut a, TEST_QUERIES, "total_people", ¶ms(&[]))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Strict update, no explicit refresh: pre-fix it appends off the stale head
|
||||
// Ca instead of Cb.
|
||||
mutate_main(
|
||||
&mut a,
|
||||
MUTATION_QUERIES,
|
||||
"set_age",
|
||||
&mixed_params(&[("$name", "target")], &[("$age", 99)]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let commits = CommitGraph::open(uri)
|
||||
.await
|
||||
.unwrap()
|
||||
.load_commits()
|
||||
.await
|
||||
.unwrap();
|
||||
let latest = commits.iter().max_by_key(|c| c.manifest_version).unwrap();
|
||||
assert_eq!(
|
||||
latest.parent_commit_id.as_deref(),
|
||||
Some(cb.graph_commit_id.as_str()),
|
||||
"a strict update after an external commit and a local read must append \
|
||||
off the true head Cb, not the stale head Ca"
|
||||
);
|
||||
let ca_children = commits
|
||||
.iter()
|
||||
.filter(|c| c.parent_commit_id.as_deref() == Some(ca.graph_commit_id.as_str()))
|
||||
.count();
|
||||
assert_eq!(ca_children, 1, "Ca must have exactly one child; two is the fork");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn branch_merge_records_actor_on_latest_commit() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
|
|
|
|||
|
|
@ -8,12 +8,15 @@ use omnigraph::db::Omnigraph;
|
|||
use omnigraph::error::{ManifestErrorKind, OmniError};
|
||||
use omnigraph::failpoints::ScopedFailPoint;
|
||||
use omnigraph::loader::LoadMode;
|
||||
use serial_test::serial;
|
||||
|
||||
use helpers::recovery::{
|
||||
FollowUpMutation, RecoveryExpectation, TableExpectation, assert_post_recovery_invariants,
|
||||
branch_head_commit_id, single_sidecar_operation_id,
|
||||
};
|
||||
use helpers::{MUTATION_QUERIES, mixed_params, mutate_main, version_main};
|
||||
use helpers::{
|
||||
MUTATION_QUERIES, collect_column_strings, mixed_params, mutate_main, read_table, version_main,
|
||||
};
|
||||
|
||||
const SCHEMA_V1: &str = "node Person { name: String @key }\n";
|
||||
const SCHEMA_V2_ADDED_TYPE: &str =
|
||||
|
|
@ -3176,6 +3179,7 @@ async fn optimize_phase_b_failure_recovered_on_next_open() {
|
|||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[serial(branch_merge_phase_b)]
|
||||
async fn branch_merge_phase_b_failure_recovered_on_next_open() {
|
||||
use omnigraph::loader::{LoadMode, load_jsonl};
|
||||
|
||||
|
|
@ -3337,6 +3341,352 @@ async fn branch_merge_phase_b_failure_recovered_on_next_open() {
|
|||
drop(db);
|
||||
}
|
||||
|
||||
/// AdoptWithDelta recovery (the gap closure): a fast-forward merge — main has
|
||||
/// NOT advanced since the branch forked, so the touched table is classified
|
||||
/// `AdoptWithDelta`, not `RewriteMerged` — that fails after Phase B must still
|
||||
/// recover on the next open. Before the recovery-pin closure this drifted
|
||||
/// silently: the adopt path advanced Lance HEAD but was unpinned, so the sweep
|
||||
/// found no sidecar and the merge was lost.
|
||||
#[tokio::test]
|
||||
#[serial(branch_merge_phase_b)]
|
||||
async fn branch_merge_adopt_with_delta_phase_b_failure_recovered_on_next_open() {
|
||||
use omnigraph::loader::{LoadMode, load_jsonl};
|
||||
|
||||
let _scenario = FailScenario::setup();
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap().to_string();
|
||||
|
||||
// Seed main, branch off, mutate ONLY the branch. main stays at base, so the
|
||||
// merge is a fast-forward and Person classifies `AdoptWithDelta` (forked
|
||||
// source, target == base, non-empty delta) — NOT `RewriteMerged`.
|
||||
{
|
||||
let mut db = Omnigraph::init(&uri, helpers::TEST_SCHEMA).await.unwrap();
|
||||
load_jsonl(
|
||||
&mut db,
|
||||
r#"{"type":"Person","data":{"name":"alice","age":30}}
|
||||
"#,
|
||||
LoadMode::Append,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
db.branch_create("feature").await.unwrap();
|
||||
db.mutate(
|
||||
"feature",
|
||||
MUTATION_QUERIES,
|
||||
"insert_person",
|
||||
&mixed_params(&[("$name", "Bob")], &[("$age", 40)]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
// main intentionally NOT mutated → fast-forward → AdoptWithDelta.
|
||||
}
|
||||
|
||||
let pre_failure_version = {
|
||||
let db = Omnigraph::open(&uri).await.unwrap();
|
||||
version_main(&db).await.unwrap()
|
||||
};
|
||||
|
||||
// Fail after the per-table publish loop, before commit_manifest_updates.
|
||||
{
|
||||
let db = Omnigraph::open(&uri).await.unwrap();
|
||||
let _failpoint =
|
||||
ScopedFailPoint::new("branch_merge.post_phase_b_pre_manifest_commit", "return");
|
||||
let err = db.branch_merge("feature", "main").await.unwrap_err();
|
||||
assert!(
|
||||
err.to_string().contains(
|
||||
"injected failpoint triggered: branch_merge.post_phase_b_pre_manifest_commit"
|
||||
),
|
||||
"unexpected error: {err}"
|
||||
);
|
||||
|
||||
// The gap closure: an AdoptWithDelta merge must persist a sidecar.
|
||||
let recovery_dir = dir.path().join("__recovery");
|
||||
let sidecars: Vec<_> = std::fs::read_dir(&recovery_dir)
|
||||
.unwrap()
|
||||
.filter_map(|e| e.ok())
|
||||
.collect();
|
||||
assert_eq!(
|
||||
sidecars.len(),
|
||||
1,
|
||||
"AdoptWithDelta merge must persist exactly one recovery sidecar (the closed gap)"
|
||||
);
|
||||
}
|
||||
|
||||
// Reopen → the recovery sweep rolls the AdoptWithDelta merge forward.
|
||||
let db = Omnigraph::open(&uri).await.unwrap();
|
||||
let recovery_dir = dir.path().join("__recovery");
|
||||
if recovery_dir.exists() {
|
||||
let remaining: Vec<_> = std::fs::read_dir(&recovery_dir)
|
||||
.unwrap()
|
||||
.filter_map(|e| e.ok())
|
||||
.collect();
|
||||
assert!(
|
||||
remaining.is_empty(),
|
||||
"sidecar must be deleted post-recovery; remaining: {remaining:?}"
|
||||
);
|
||||
}
|
||||
|
||||
let post_recovery_version = version_main(&db).await.unwrap();
|
||||
assert!(
|
||||
post_recovery_version > pre_failure_version,
|
||||
"manifest must advance post-recovery; pre={pre_failure_version} post={post_recovery_version}"
|
||||
);
|
||||
let names = collect_column_strings(&read_table(&db, "node:Person").await, "name");
|
||||
assert!(
|
||||
names.contains(&"Bob".to_string()),
|
||||
"recovered AdoptWithDelta merge must include Bob; have {names:?}"
|
||||
);
|
||||
drop(db);
|
||||
}
|
||||
|
||||
/// Which branch-merge publish path a partial-Phase-B test exercises.
|
||||
enum MergeScenario {
|
||||
/// main stays at base → the touched table is `AdoptWithDelta`
|
||||
/// (`publish_adopted_delta`: append → upsert → delete).
|
||||
Adopt,
|
||||
/// main advances past base → the touched table is `RewriteMerged`
|
||||
/// (`publish_rewritten_merge_table`: merge_insert → delete → index).
|
||||
Rewrite,
|
||||
}
|
||||
|
||||
async fn sorted_person_names(db: &Omnigraph) -> Vec<String> {
|
||||
let mut names = collect_column_strings(&read_table(db, "node:Person").await, "name");
|
||||
names.sort();
|
||||
names
|
||||
}
|
||||
|
||||
/// THE recovery-atomicity regression gate. A branch merge whose per-table publish
|
||||
/// is a multi-commit sequence (append → upsert → delete, or merge_insert → delete
|
||||
/// → index) advances Lance HEAD step by step before the manifest publish. If the
|
||||
/// process dies *mid*-sequence — after some commits but before the achieved-version
|
||||
/// intent is recorded — recovery must roll the whole merge **back**, not publish
|
||||
/// the partial and record the merge as complete.
|
||||
///
|
||||
/// The delta is deliberately MIXED — a fresh id (`bob`, append), a modified base id
|
||||
/// (`carol`, upsert) and a removed base id (`dave`, delete) — so every partial
|
||||
/// window leaves real work undone. Proof of rollback: after recovery the target is
|
||||
/// back at its base name-set, and a *re-run* of the merge re-applies the full delta
|
||||
/// (the partial was not silently recorded as "already merged").
|
||||
///
|
||||
/// RED before the fix: the loose `BranchMerge` classification rolls any
|
||||
/// `lance_head > manifest_pinned` forward, so the partial is published (e.g. `bob`
|
||||
/// present, `dave` kept) and the merge recorded — the first assert (back at base)
|
||||
/// fails. GREEN after: `achieved_version == None` → `IncompletePhaseB` → roll back.
|
||||
async fn assert_partial_merge_rolls_back(scenario: MergeScenario, failpoint: &str) {
|
||||
use omnigraph::loader::load_jsonl;
|
||||
|
||||
let _scenario = FailScenario::setup();
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap().to_string();
|
||||
|
||||
// Seed main {alice, carol, dave}; on `feature` add bob (append), bump carol
|
||||
// (upsert), remove dave (delete). For Rewrite, also move main past base so the
|
||||
// table classifies RewriteMerged instead of a fast-forward AdoptWithDelta.
|
||||
{
|
||||
let mut db = Omnigraph::init(&uri, helpers::TEST_SCHEMA).await.unwrap();
|
||||
load_jsonl(
|
||||
&mut db,
|
||||
"{\"type\":\"Person\",\"data\":{\"name\":\"alice\",\"age\":30}}\n\
|
||||
{\"type\":\"Person\",\"data\":{\"name\":\"carol\",\"age\":50}}\n\
|
||||
{\"type\":\"Person\",\"data\":{\"name\":\"dave\",\"age\":60}}\n",
|
||||
LoadMode::Append,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
db.branch_create("feature").await.unwrap();
|
||||
db.mutate(
|
||||
"feature",
|
||||
MUTATION_QUERIES,
|
||||
"insert_person",
|
||||
&mixed_params(&[("$name", "bob")], &[("$age", 40)]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
db.mutate(
|
||||
"feature",
|
||||
MUTATION_QUERIES,
|
||||
"set_age",
|
||||
&mixed_params(&[("$name", "carol")], &[("$age", 55)]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
db.mutate(
|
||||
"feature",
|
||||
MUTATION_QUERIES,
|
||||
"remove_person",
|
||||
&mixed_params(&[("$name", "dave")], &[]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
if matches!(scenario, MergeScenario::Rewrite) {
|
||||
db.mutate(
|
||||
"main",
|
||||
MUTATION_QUERIES,
|
||||
"set_age",
|
||||
&mixed_params(&[("$name", "alice")], &[("$age", 35)]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
// Crash mid-Phase-B at the injected window.
|
||||
{
|
||||
let db = Omnigraph::open(&uri).await.unwrap();
|
||||
let _fp = ScopedFailPoint::new(failpoint, "return");
|
||||
let err = db.branch_merge("feature", "main").await.unwrap_err();
|
||||
assert!(
|
||||
err.to_string().contains(failpoint),
|
||||
"expected the injected failpoint {failpoint}, got: {err}"
|
||||
);
|
||||
}
|
||||
|
||||
// Reopen → the open-time sweep must ROLL BACK to base (the merge never reached
|
||||
// its commit boundary), and a re-run must then apply the FULL delta.
|
||||
{
|
||||
let db = Omnigraph::open(&uri).await.unwrap();
|
||||
assert_eq!(
|
||||
sorted_person_names(&db).await,
|
||||
vec!["alice", "carol", "dave"],
|
||||
"partial Phase B at {failpoint} must roll back to base \
|
||||
(no bob, dave kept, carol's upsert reverted); the merge must NOT be recorded",
|
||||
);
|
||||
db.branch_merge("feature", "main").await.unwrap();
|
||||
assert_eq!(
|
||||
sorted_person_names(&db).await,
|
||||
vec!["alice", "bob", "carol"],
|
||||
"re-merge after rollback must re-apply the full delta \
|
||||
(bob added, dave removed) — proof the partial was not silently recorded",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[serial(branch_merge_phase_b)]
|
||||
async fn branch_merge_adopt_partial_after_append_rolls_back() {
|
||||
assert_partial_merge_rolls_back(
|
||||
MergeScenario::Adopt,
|
||||
"branch_merge.adopt_after_append_pre_upsert",
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[serial(branch_merge_phase_b)]
|
||||
async fn branch_merge_adopt_partial_after_upsert_rolls_back() {
|
||||
assert_partial_merge_rolls_back(
|
||||
MergeScenario::Adopt,
|
||||
"branch_merge.adopt_after_upsert_pre_delete",
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[serial(branch_merge_phase_b)]
|
||||
async fn branch_merge_rewrite_partial_after_merge_rolls_back() {
|
||||
assert_partial_merge_rolls_back(
|
||||
MergeScenario::Rewrite,
|
||||
"branch_merge.rewrite_after_merge_pre_delete",
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[serial(branch_merge_phase_b)]
|
||||
async fn branch_merge_rewrite_partial_after_delete_rolls_back() {
|
||||
assert_partial_merge_rolls_back(
|
||||
MergeScenario::Rewrite,
|
||||
"branch_merge.rewrite_after_delete_pre_index",
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
/// Backward-compat: a `BranchMerge` sidecar written by a *pre-confirmation*
|
||||
/// binary (schema_version 1, no `confirmed_version`) must NOT be misread as a
|
||||
/// partial Phase B and rolled back. A pre-upgrade crash in the Phase-B→C gap can
|
||||
/// leave such a sidecar over a *completed* merge; rolling it back would silently
|
||||
/// discard a finished merge with no operator signal — the regression greptile /
|
||||
/// Cursor flagged.
|
||||
///
|
||||
/// We synthesize the pre-upgrade sidecar realistically: crash after Phase B (a
|
||||
/// real sidecar + advanced Lance HEAD), then downgrade the on-disk JSON to the
|
||||
/// v1 shape (`schema_version` = 1, strip every pin's `confirmed_version`) before
|
||||
/// reopening — exactly what an old binary would have left.
|
||||
///
|
||||
/// RED before the versioning fix: a v1 sidecar with no `confirmed_version`
|
||||
/// classifies `IncompletePhaseB` → rolls back → `bob` is discarded. GREEN after:
|
||||
/// the version-aware classifier reads v1 as the old loose generation → rolls
|
||||
/// forward → `bob` preserved.
|
||||
#[tokio::test]
|
||||
#[serial(branch_merge_phase_b)]
|
||||
async fn pre_upgrade_v1_branch_merge_sidecar_rolls_forward_not_back() {
|
||||
use omnigraph::loader::{LoadMode, load_jsonl};
|
||||
|
||||
let _scenario = FailScenario::setup();
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap().to_string();
|
||||
|
||||
// main {alice}; feature adds bob → a fast-forward AdoptWithDelta merge, which
|
||||
// writes a recovery sidecar.
|
||||
{
|
||||
let mut db = Omnigraph::init(&uri, helpers::TEST_SCHEMA).await.unwrap();
|
||||
load_jsonl(
|
||||
&mut db,
|
||||
"{\"type\":\"Person\",\"data\":{\"name\":\"alice\",\"age\":30}}\n",
|
||||
LoadMode::Append,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
db.branch_create("feature").await.unwrap();
|
||||
db.mutate(
|
||||
"feature",
|
||||
MUTATION_QUERIES,
|
||||
"insert_person",
|
||||
&mixed_params(&[("$name", "bob")], &[("$age", 40)]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
// Crash after Phase B (Lance HEAD advanced, manifest not published) → a real
|
||||
// sidecar lands on disk.
|
||||
{
|
||||
let db = Omnigraph::open(&uri).await.unwrap();
|
||||
let _fp = ScopedFailPoint::new("branch_merge.post_phase_b_pre_manifest_commit", "return");
|
||||
db.branch_merge("feature", "main").await.unwrap_err();
|
||||
}
|
||||
|
||||
// Downgrade the sidecar to the pre-confirmation v1 shape an old binary writes.
|
||||
{
|
||||
let recovery_dir = std::path::Path::new(&uri).join("__recovery");
|
||||
let path = std::fs::read_dir(&recovery_dir)
|
||||
.unwrap()
|
||||
.filter_map(Result::ok)
|
||||
.map(|e| e.path())
|
||||
.find(|p| p.extension().is_some_and(|x| x == "json"))
|
||||
.expect("a recovery sidecar must exist after the post-Phase-B crash");
|
||||
let mut v: serde_json::Value =
|
||||
serde_json::from_str(&std::fs::read_to_string(&path).unwrap()).unwrap();
|
||||
v["schema_version"] = serde_json::json!(1);
|
||||
for table in v["tables"].as_array_mut().unwrap() {
|
||||
table.as_object_mut().unwrap().remove("confirmed_version");
|
||||
}
|
||||
std::fs::write(&path, serde_json::to_string_pretty(&v).unwrap()).unwrap();
|
||||
}
|
||||
|
||||
// Reopen → the pre-upgrade completed merge must roll FORWARD (bob kept), not
|
||||
// be silently discarded.
|
||||
{
|
||||
let db = Omnigraph::open(&uri).await.unwrap();
|
||||
assert_eq!(
|
||||
sorted_person_names(&db).await,
|
||||
vec!["alice", "bob"],
|
||||
"a pre-confirmation (v1) BranchMerge sidecar over a completed merge must roll \
|
||||
forward, not be misread as a partial and rolled back",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Branch-axis variant of the branch_merge recovery test: target is a
|
||||
/// non-main branch. Catches the branch-specific commit-graph head bug
|
||||
/// (D2) — without `CommitGraph::open_at_branch`, the recovery sweep
|
||||
|
|
@ -3344,6 +3694,7 @@ async fn branch_merge_phase_b_failure_recovered_on_next_open() {
|
|||
/// target, and future merges between the same pair would lose
|
||||
/// already-up-to-date detection.
|
||||
#[tokio::test]
|
||||
#[serial(branch_merge_phase_b)]
|
||||
async fn branch_merge_phase_b_failure_recovered_on_non_main_target() {
|
||||
use omnigraph::loader::{LoadMode, load_jsonl};
|
||||
|
||||
|
|
@ -3468,6 +3819,7 @@ async fn branch_merge_phase_b_failure_recovered_on_non_main_target() {
|
|||
/// keeps RewriteMerged tables on active_branch), the contract assertion
|
||||
/// catches a regression that reverts to `entry.table_branch.clone()`.
|
||||
#[tokio::test]
|
||||
#[serial(branch_merge_phase_b)]
|
||||
async fn branch_merge_sidecar_pins_table_branch_to_active_branch() {
|
||||
use omnigraph::loader::{LoadMode, load_jsonl};
|
||||
|
||||
|
|
|
|||
|
|
@ -71,6 +71,14 @@ const FORBIDDEN_PATTERNS: &[&str] = &[
|
|||
"Dataset::drop_columns",
|
||||
"Dataset::truncate_table",
|
||||
"Dataset::restore",
|
||||
// Raw dataset OPENS — all reads must route through `Snapshot::open` (the
|
||||
// held-handle cache + shared Session, Fix 3). Only the instrumented opener
|
||||
// (`instrumentation.rs`) and the storage/manifest layers (allow-listed below)
|
||||
// open datasets directly; forbidding these in the read/exec layer keeps a
|
||||
// future read from silently bypassing the cache.
|
||||
"Dataset::open",
|
||||
"DatasetBuilder::from_uri",
|
||||
"DatasetBuilder::from_namespace",
|
||||
// Lance-specific method names that don't clash with our `TableStore`
|
||||
// wrappers (we use `merge_insert_batch{,es}`, `add_columns_to_*`,
|
||||
// etc. — never the bare Lance names). Engine code that writes
|
||||
|
|
@ -106,6 +114,7 @@ const ALLOW_LIST_FILES: &[&str] = &[
|
|||
"commit_graph.rs", // Maintains `_graph_commits.lance` system table.
|
||||
"graph_coordinator.rs", // Drives the manifest publisher / branch coordinator.
|
||||
"recovery_audit.rs", // Maintains `_graph_commit_recoveries.lance` (recovery audit trail).
|
||||
"instrumentation.rs", // The instrumented dataset opener (open_dataset_tracked / open_table_dataset).
|
||||
];
|
||||
|
||||
/// Directories exempt from the guard. Files under these paths may use
|
||||
|
|
|
|||
|
|
@ -166,6 +166,21 @@ pub async fn mutate_branch(
|
|||
db.mutate(branch, query_source, query_name, params).await
|
||||
}
|
||||
|
||||
/// Advance the manifest version `n` times (one commit per insert), building
|
||||
/// deep commit history for cost-budget tests (history depth, not row count).
|
||||
pub async fn commit_many(db: &mut Omnigraph, n: usize) {
|
||||
for i in 0..n {
|
||||
mutate_main(
|
||||
db,
|
||||
MUTATION_QUERIES,
|
||||
"insert_person",
|
||||
&mixed_params(&[("$name", &format!("commit_many_{i}"))], &[("$age", 30)]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn snapshot_main(db: &Omnigraph) -> Result<Snapshot> {
|
||||
db.snapshot_of(ReadTarget::branch("main")).await
|
||||
}
|
||||
|
|
|
|||
|
|
@ -304,3 +304,108 @@ async fn init_with_force_recovers_from_orphan_schema_files() {
|
|||
"force-recovered graph must have full schema state written"
|
||||
);
|
||||
}
|
||||
|
||||
/// E2e for the schema-level `.pg` surface: `@description` (node / edge /
|
||||
/// property) and `@instruction` (node / edge only) parse, validate, and
|
||||
/// persist verbatim into the on-disk `_schema.ir.json` through `Omnigraph::init`
|
||||
/// — the contract that surfaces them in catalog metadata for tooling.
|
||||
#[tokio::test]
|
||||
async fn schema_annotations_persist_into_ir_json_on_init() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
|
||||
let schema = r#"
|
||||
node Task @description("Tracked work item") @instruction("Prefer querying by slug") {
|
||||
slug: String @key @description("Stable external identifier")
|
||||
}
|
||||
|
||||
edge DependsOn: Task -> Task @description("Hard dependency") @instruction("Use only for blockers")
|
||||
"#;
|
||||
|
||||
Omnigraph::init(uri, schema).await.unwrap();
|
||||
|
||||
let ir_json = fs::read_to_string(dir.path().join("_schema.ir.json")).unwrap();
|
||||
let ir: serde_json::Value = serde_json::from_str(&ir_json).unwrap();
|
||||
|
||||
// Helper: collect the {name -> value} map of annotations that carry a
|
||||
// string value. Value-less annotations (e.g. `@key`, which also desugars
|
||||
// to a constraint) are skipped — they aren't what this test asserts.
|
||||
let anns = |v: &serde_json::Value| -> std::collections::BTreeMap<String, String> {
|
||||
v["annotations"]
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.filter_map(|a| {
|
||||
Some((
|
||||
a["name"].as_str()?.to_string(),
|
||||
a["value"].as_str()?.to_string(),
|
||||
))
|
||||
})
|
||||
.collect()
|
||||
};
|
||||
|
||||
let node = ir["nodes"]
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.find(|n| n["name"] == "Task")
|
||||
.unwrap();
|
||||
let node_anns = anns(node);
|
||||
assert_eq!(node_anns.get("description").map(String::as_str), Some("Tracked work item"));
|
||||
assert_eq!(
|
||||
node_anns.get("instruction").map(String::as_str),
|
||||
Some("Prefer querying by slug"),
|
||||
"node @instruction persists into _schema.ir.json"
|
||||
);
|
||||
|
||||
let prop = node["properties"]
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.find(|p| p["name"] == "slug")
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
anns(prop).get("description").map(String::as_str),
|
||||
Some("Stable external identifier"),
|
||||
"property @description persists into _schema.ir.json"
|
||||
);
|
||||
|
||||
let edge = ir["edges"]
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.find(|e| e["name"] == "DependsOn")
|
||||
.unwrap();
|
||||
let edge_anns = anns(edge);
|
||||
assert_eq!(edge_anns.get("description").map(String::as_str), Some("Hard dependency"));
|
||||
assert_eq!(edge_anns.get("instruction").map(String::as_str), Some("Use only for blockers"));
|
||||
}
|
||||
|
||||
/// `@instruction` is rejected on a property at compile time, so init aborts
|
||||
/// before any graph state is written (mirrors the parser-level rejection from
|
||||
/// the full engine boundary).
|
||||
#[tokio::test]
|
||||
async fn init_rejects_instruction_on_property() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
|
||||
let schema = r#"
|
||||
node Task {
|
||||
slug: String @key @instruction("bad")
|
||||
}
|
||||
"#;
|
||||
|
||||
// `Omnigraph` is not `Debug`, so match rather than `unwrap_err`.
|
||||
let err = match Omnigraph::init(uri, schema).await {
|
||||
Ok(_) => panic!("property-level @instruction must abort init"),
|
||||
Err(err) => err,
|
||||
};
|
||||
assert!(
|
||||
err.to_string().contains("@instruction is only supported on node and edge types"),
|
||||
"property-level @instruction must abort init: {err}"
|
||||
);
|
||||
assert!(
|
||||
!dir.path().join("_schema.ir.json").exists(),
|
||||
"rejected init must not persist a schema IR"
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -145,3 +145,29 @@ query seen_eq() { match { $m: Metric { seen: datetime("2024-06-01T12:00:00Z") }
|
|||
assert_eq!(sorted_metric_names(&mut db, q, "born_eq").await, vec!["m1"]);
|
||||
assert_eq!(sorted_metric_names(&mut db, q, "seen_eq").await, vec!["m1"]);
|
||||
}
|
||||
|
||||
// #283: a property-match on a camelCase `@index` field must execute, not fail
|
||||
// with "No field named reponame" at the Lance scan. Exercises the pushdown arm
|
||||
// (inline binding `Doc { repoName: $r }`) end-to-end.
|
||||
const CC_SCHEMA: &str = r#"
|
||||
node Doc {
|
||||
slug: String @key
|
||||
repoName: String @index
|
||||
}
|
||||
"#;
|
||||
const CC_DATA: &str = r#"{"type":"Doc","data":{"slug":"d1","repoName":"acme"}}
|
||||
{"type":"Doc","data":{"slug":"d2","repoName":"globex"}}"#;
|
||||
|
||||
#[tokio::test]
|
||||
async fn camelcase_property_filter_executes() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let mut db = Omnigraph::init(uri, CC_SCHEMA).await.unwrap();
|
||||
load_jsonl(&mut db, CC_DATA, LoadMode::Overwrite).await.unwrap();
|
||||
|
||||
let q = r#"query by_repo($r: String) { match { $d: Doc { repoName: $r } } return { $d.slug } }"#;
|
||||
let r = query_main(&mut db, q, "by_repo", ¶ms(&[("$r", "acme")]))
|
||||
.await
|
||||
.expect("camelCase property filter must execute, not fail at the Lance scan");
|
||||
assert_eq!(r.num_rows(), 1, "expected exactly the d1 row for repoName=acme");
|
||||
}
|
||||
|
|
|
|||
213
crates/omnigraph/tests/merge_fast_forward.rs
Normal file
213
crates/omnigraph/tests/merge_fast_forward.rs
Normal file
|
|
@ -0,0 +1,213 @@
|
|||
//! Fast-forward branch-merge cost + correctness.
|
||||
//!
|
||||
//! The data-path fix routes *new* rows of an adopted-source merge through
|
||||
//! `stage_append` (a streaming `Operation::Append`) instead of lumping new +
|
||||
//! changed rows into one `stage_merge_insert` (a full-outer hash join that
|
||||
//! buffers the whole delta and exhausts the DataFusion memory pool on
|
||||
//! embedding-bearing tables).
|
||||
//!
|
||||
//! The regression gate here is *structural*, not a brittle size threshold: it
|
||||
//! asserts WHICH staged-write primitive the merge invokes, via the task-local
|
||||
//! write probes in `omnigraph::instrumentation`. That is deterministic and
|
||||
//! machine-independent — it cannot flake on a bigger memory pool.
|
||||
|
||||
// Wrapping `branch_merge` in `with_merge_write_probes` (a task-local scope)
|
||||
// nests the already-deep merge future one layer deeper, overflowing rustc's
|
||||
// default 128 layout-query depth. Bump it for this test crate.
|
||||
#![recursion_limit = "512"]
|
||||
|
||||
mod helpers;
|
||||
|
||||
use omnigraph::db::{MergeOutcome, Omnigraph};
|
||||
use omnigraph::instrumentation::{MergeWriteProbes, with_merge_write_probes};
|
||||
|
||||
use helpers::*;
|
||||
|
||||
/// Insert `n` brand-new persons (fresh ids) onto `branch`, forking the Person
|
||||
/// table onto it. All rows are "new on source" — none collide with base ids.
|
||||
async fn append_new_persons(db: &mut Omnigraph, branch: &str, n: usize) {
|
||||
for i in 0..n {
|
||||
mutate_branch(
|
||||
db,
|
||||
branch,
|
||||
MUTATION_QUERIES,
|
||||
"insert_person",
|
||||
&mixed_params(&[("$name", &format!("ff_new_{i}"))], &[("$age", 30)]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
/// THE cost-budget gate. An append-only fast-forward merge must append the new
|
||||
/// rows and run **zero** `stage_merge_insert` (the full-outer hash join that is
|
||||
/// the OOM). RED today (new + changed are lumped into one `stage_merge_insert`);
|
||||
/// GREEN once the adopt path splits new→`stage_append`, changed→`stage_merge_insert`.
|
||||
#[tokio::test]
|
||||
async fn append_only_fast_forward_merge_does_no_merge_insert() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let main = init_and_load(&dir).await;
|
||||
main.branch_create("feature").await.unwrap();
|
||||
|
||||
let mut feature = Omnigraph::open(uri).await.unwrap();
|
||||
append_new_persons(&mut feature, "feature", 5).await;
|
||||
|
||||
let probes = MergeWriteProbes::default();
|
||||
let outcome =
|
||||
with_merge_write_probes(probes.clone(), main.branch_merge("feature", "main"))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(outcome, MergeOutcome::FastForward);
|
||||
|
||||
assert_eq!(
|
||||
probes.stage_merge_insert_calls(),
|
||||
0,
|
||||
"append-only fast-forward merge must do 0 stage_merge_insert (the OOM hash join); did {}",
|
||||
probes.stage_merge_insert_calls(),
|
||||
);
|
||||
assert!(
|
||||
probes.stage_append_calls() >= 1,
|
||||
"append-only fast-forward merge must append the new rows via stage_append; did {}",
|
||||
probes.stage_append_calls(),
|
||||
);
|
||||
assert_eq!(
|
||||
probes.scan_staged_combined_calls(),
|
||||
0,
|
||||
"append-only merge must stream the append (stage_append_stream), not materialize the \
|
||||
whole delta into one batch via scan_staged_combined; did {}",
|
||||
probes.scan_staged_combined_calls(),
|
||||
);
|
||||
}
|
||||
|
||||
/// Functional correctness: a fast-forward merge of an append-only branch leaves
|
||||
/// main equal to the source branch. Independent of the cost-budget gate.
|
||||
#[tokio::test]
|
||||
async fn fast_forward_merge_yields_source_state() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let main = init_and_load(&dir).await;
|
||||
let base_count = count_rows(&main, "node:Person").await;
|
||||
|
||||
main.branch_create("feature").await.unwrap();
|
||||
let mut feature = Omnigraph::open(uri).await.unwrap();
|
||||
append_new_persons(&mut feature, "feature", 5).await;
|
||||
let source_count = count_rows_branch(&feature, "feature", "node:Person").await;
|
||||
assert_eq!(source_count, base_count + 5);
|
||||
|
||||
let outcome = main.branch_merge("feature", "main").await.unwrap();
|
||||
assert_eq!(outcome, MergeOutcome::FastForward);
|
||||
|
||||
// main now equals source: the 5 new persons are present, the base rows kept.
|
||||
assert_eq!(count_rows(&main, "node:Person").await, source_count);
|
||||
let names = collect_column_strings(&read_table(&main, "node:Person").await, "name");
|
||||
for i in 0..5 {
|
||||
assert!(
|
||||
names.contains(&format!("ff_new_{i}")),
|
||||
"merged main missing new person ff_new_{i}; have {names:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const VEC_SCHEMA: &str = "node Chunk {\n slug: String @key\n embedding: Vector(8) @index\n}\n";
|
||||
|
||||
/// Commit 6 behavior: the fast-forward adopt path does NOT build indices inline
|
||||
/// — index coverage is reconciler-owned (`optimize`/`ensure_indices`). A merge
|
||||
/// into a freshly-initialized (unindexed) vector table must perform **0** inline
|
||||
/// vector-index (IVF) builds; reads stay correct via brute-force until
|
||||
/// `optimize` covers the new rows. RED before the change (the publish path built
|
||||
/// the IVF inline); GREEN after.
|
||||
#[tokio::test]
|
||||
async fn fast_forward_merge_defers_vector_index_to_reconciler() {
|
||||
use omnigraph::loader::LoadMode;
|
||||
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
// Empty Chunk table → no vector index at init (KMeans can't train on 0 rows).
|
||||
let main = Omnigraph::init(uri, VEC_SCHEMA).await.unwrap();
|
||||
main.branch_create("feature").await.unwrap();
|
||||
|
||||
// Load embedding-bearing chunks onto the branch. The branch builds its own
|
||||
// index here (outside the probe scope) — irrelevant to the merge's cost.
|
||||
let mut rows = String::new();
|
||||
for i in 0..24 {
|
||||
let v: Vec<String> = (0..8).map(|j| format!("{}.0", (i + j) % 5)).collect();
|
||||
rows.push_str(&format!(
|
||||
"{{\"type\":\"Chunk\",\"data\":{{\"slug\":\"c{i}\",\"embedding\":[{}]}}}}\n",
|
||||
v.join(",")
|
||||
));
|
||||
}
|
||||
let feature = Omnigraph::open(uri).await.unwrap();
|
||||
feature.load("feature", &rows, LoadMode::Merge).await.unwrap();
|
||||
|
||||
// Merge, counting inline vector-index builds the publish path performs.
|
||||
let probes = MergeWriteProbes::default();
|
||||
let outcome = with_merge_write_probes(probes.clone(), main.branch_merge("feature", "main"))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(outcome, MergeOutcome::FastForward);
|
||||
|
||||
assert_eq!(
|
||||
probes.create_vector_index_calls(),
|
||||
0,
|
||||
"fast-forward adopt merge must defer vector-index coverage to the reconciler \
|
||||
(0 inline IVF builds); did {}",
|
||||
probes.create_vector_index_calls(),
|
||||
);
|
||||
// Correctness: the rows landed on main (reads brute-force until optimize).
|
||||
assert_eq!(count_rows(&main, "node:Chunk").await, 24);
|
||||
}
|
||||
|
||||
const BLOB_SCHEMA: &str = "node Document {\n title: String @key\n content: Blob?\n note: String?\n}\n";
|
||||
const BLOB_INSERT: &str = r#"
|
||||
query insert_doc($title: String, $content: Blob, $note: String) {
|
||||
insert Document { title: $title, content: $content, note: $note }
|
||||
}
|
||||
"#;
|
||||
|
||||
/// A fast-forward merge of a branch with a Blob column exercises the blob
|
||||
/// fallback in `scan_stream_for_rewrite` (materialize → re-stream) through the
|
||||
/// streaming append. main is NOT mutated, so Document is `AdoptWithDelta` (the
|
||||
/// adopt/append path), not `RewriteMerged`. The blob bytes must survive the
|
||||
/// materialize → stream → append round-trip.
|
||||
#[tokio::test]
|
||||
async fn fast_forward_merge_streams_blob_columns() {
|
||||
use omnigraph::loader::{LoadMode, load_jsonl};
|
||||
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let mut main = Omnigraph::init(uri, BLOB_SCHEMA).await.unwrap();
|
||||
load_jsonl(
|
||||
&mut main,
|
||||
"{\"type\":\"Document\",\"data\":{\"title\":\"seed\",\"content\":\"base64:U2VlZA==\",\"note\":\"base\"}}",
|
||||
LoadMode::Overwrite,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
main.branch_create("feature").await.unwrap();
|
||||
|
||||
// Only the branch is mutated → fast-forward → adopt/append path.
|
||||
let mut feature = Omnigraph::open(uri).await.unwrap();
|
||||
mutate_branch(
|
||||
&mut feature,
|
||||
"feature",
|
||||
BLOB_INSERT,
|
||||
"insert_doc",
|
||||
¶ms(&[
|
||||
("$title", "readme"),
|
||||
("$content", "base64:SGVsbG8="),
|
||||
("$note", "branch"),
|
||||
]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let outcome = main.branch_merge("feature", "main").await.unwrap();
|
||||
assert_eq!(outcome, MergeOutcome::FastForward);
|
||||
|
||||
// The appended blob row's bytes survive the streaming append; the base row stays intact.
|
||||
let readme = main.read_blob("Document", "readme", "content").await.unwrap();
|
||||
assert_eq!(&readme.read().await.unwrap()[..], b"Hello");
|
||||
let seed = main.read_blob("Document", "seed", "content").await.unwrap();
|
||||
assert_eq!(&seed.read().await.unwrap()[..], b"Seed");
|
||||
}
|
||||
|
|
@ -104,8 +104,10 @@ async fn recovery_refuses_unknown_schema_version_on_open() {
|
|||
let _db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
|
||||
drop(_db);
|
||||
|
||||
// A sidecar from a hypothetical future writer; the older binary must
|
||||
// refuse to interpret it (resolved-decisions §3 in the design doc).
|
||||
// A sidecar from a hypothetical future writer (version NEWER than this
|
||||
// binary's max); the reader must refuse to interpret it — it cannot guess
|
||||
// semantics a newer writer baked in. (Older versions are accepted and
|
||||
// interpreted with their original semantics; see `parse_sidecar`.)
|
||||
let sidecar_json = r#"{
|
||||
"schema_version": 99,
|
||||
"operation_id": "01H000000000000000000000ZZ",
|
||||
|
|
@ -120,11 +122,11 @@ async fn recovery_refuses_unknown_schema_version_on_open() {
|
|||
let err = Omnigraph::open(uri)
|
||||
.await
|
||||
.err()
|
||||
.expect("expected open to fail because of unknown sidecar schema_version");
|
||||
.expect("expected open to fail because of a future sidecar schema_version");
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
msg.contains("schema_version=99") && msg.contains("supports only schema_version=1"),
|
||||
"expected SidecarSchemaError mentioning the version mismatch, got: {}",
|
||||
msg.contains("schema_version=99") && msg.contains("newer than the maximum"),
|
||||
"expected a future-version refusal, got: {}",
|
||||
msg,
|
||||
);
|
||||
// Sidecar must still be on disk — we don't auto-delete unparseable files.
|
||||
|
|
|
|||
833
crates/omnigraph/tests/warm_read_cost.rs
Normal file
833
crates/omnigraph/tests/warm_read_cost.rs
Normal file
|
|
@ -0,0 +1,833 @@
|
|||
//! Cost-budget tests for the warm read path (Fix 1): a warm same-branch read
|
||||
//! must perform no manifest or commit-graph opens, measured with Lance's
|
||||
//! `IOTracker` at the object-store boundary (the LanceDB IO-counted-test
|
||||
//! pattern; see docs/dev/testing.md). Guards invariant 15 (read cost bounded by
|
||||
//! work, not history) for snapshot resolution, and invariant 6 (a warm reader
|
||||
//! still observes external commits).
|
||||
|
||||
mod helpers;
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
|
||||
use arrow_array::{Array, StringArray};
|
||||
use lance::io::WrappingObjectStore;
|
||||
use lance_io::utils::tracking_store::IOTracker;
|
||||
use omnigraph::db::{Omnigraph, ReadTarget};
|
||||
use omnigraph::instrumentation::{QueryIoProbes, with_query_io_probes};
|
||||
use omnigraph_compiler::result::QueryResult;
|
||||
|
||||
use helpers::{
|
||||
MUTATION_QUERIES, TEST_QUERIES, commit_many, count_rows, init_and_load, mixed_params,
|
||||
mutate_branch, mutate_main, params,
|
||||
};
|
||||
|
||||
/// IO probes plus the tracker handles to read `read_iops` after the query.
|
||||
/// Returns `(probes, manifest, commit_graph, table, probe_count)` — `table`
|
||||
/// counts per-table data opens (the cache-miss path), so a cost test can assert
|
||||
/// N opens on a cold read and 0 on a warm repeat (Fix 3).
|
||||
fn probes() -> (
|
||||
QueryIoProbes,
|
||||
IOTracker,
|
||||
IOTracker,
|
||||
IOTracker,
|
||||
Arc<AtomicU64>,
|
||||
) {
|
||||
let manifest = IOTracker::default();
|
||||
let commit_graph = IOTracker::default();
|
||||
let table = IOTracker::default();
|
||||
let probe_count = Arc::new(AtomicU64::new(0));
|
||||
let probes = QueryIoProbes {
|
||||
manifest_wrapper: Some(Arc::new(manifest.clone()) as Arc<dyn WrappingObjectStore>),
|
||||
commit_graph_wrapper: Some(Arc::new(commit_graph.clone()) as Arc<dyn WrappingObjectStore>),
|
||||
table_wrapper: Some(Arc::new(table.clone()) as Arc<dyn WrappingObjectStore>),
|
||||
probe_count: Arc::clone(&probe_count),
|
||||
};
|
||||
(probes, manifest, commit_graph, table, probe_count)
|
||||
}
|
||||
|
||||
fn first_column_strings(result: &QueryResult) -> Vec<String> {
|
||||
if result.num_rows() == 0 {
|
||||
return Vec::new();
|
||||
}
|
||||
let batch = result.concat_batches().unwrap();
|
||||
let values = batch
|
||||
.column(0)
|
||||
.as_any()
|
||||
.downcast_ref::<StringArray>()
|
||||
.unwrap();
|
||||
let mut out = (0..values.len())
|
||||
.filter(|&row| !values.is_null(row))
|
||||
.map(|row| values.value(row).to_string())
|
||||
.collect::<Vec<_>>();
|
||||
out.sort();
|
||||
out
|
||||
}
|
||||
|
||||
/// A warm same-branch read must not re-open or scan `__manifest`, and must not
|
||||
/// open the commit graph, even at commit-history depth. The only manifest IO is
|
||||
/// the version probe (counted by invocation). Fails before Fix 1, where the read
|
||||
/// path re-opens a fresh coordinator and scans both internal tables.
|
||||
#[tokio::test]
|
||||
async fn warm_same_branch_read_does_no_resolution_opens() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let mut db = init_and_load(&dir).await;
|
||||
// Deep history: warm-read resolution cost must be flat in commit count.
|
||||
commit_many(&mut db, 20).await;
|
||||
|
||||
let (probes_in, manifest, commit_graph, _table, probe_count) = probes();
|
||||
with_query_io_probes(
|
||||
probes_in,
|
||||
db.query(
|
||||
ReadTarget::branch("main"),
|
||||
TEST_QUERIES,
|
||||
"total_people",
|
||||
¶ms(&[]),
|
||||
),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// A warm same-branch read opens nothing from the internal tables, even at
|
||||
// commit-history depth. Fix 1 reuses the coordinator (no re-open: 0
|
||||
// commit-graph opens, exactly 1 cheap version probe). Fix 2 opens the touched
|
||||
// data table by location+version instead of via the namespace, so the
|
||||
// per-table __manifest scan is gone too. Pre-fix, each of these is a deep scan
|
||||
// of an internal table that grows with commit count.
|
||||
assert_eq!(
|
||||
manifest.stats().read_iops,
|
||||
0,
|
||||
"warm same-branch read must not scan __manifest (resolution or per-table)"
|
||||
);
|
||||
assert_eq!(
|
||||
commit_graph.stats().read_iops,
|
||||
0,
|
||||
"warm same-branch read must not open the commit graph (no coordinator re-open)"
|
||||
);
|
||||
assert_eq!(
|
||||
probe_count.load(Ordering::Relaxed),
|
||||
1,
|
||||
"warm same-branch read performs exactly one version probe"
|
||||
);
|
||||
}
|
||||
|
||||
/// A multi-table query (a traversal touching Person, WorksAt, and Company) scans
|
||||
/// `__manifest` zero times. Fix 2 opens every touched table by location+version,
|
||||
/// so manifest IO no longer scales with the number of tables — pre-Fix-2 each
|
||||
/// table cost two full `__manifest` scans (`describe_table` +
|
||||
/// `describe_table_version`), which is the "2 tables = 2×" multi-table tax.
|
||||
#[tokio::test]
|
||||
async fn multi_table_query_does_no_manifest_scans() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let db = init_and_load(&dir).await;
|
||||
|
||||
let (probes_in, manifest, _commit_graph, _table, _probe) = probes();
|
||||
with_query_io_probes(
|
||||
probes_in,
|
||||
db.query(
|
||||
ReadTarget::branch("main"),
|
||||
TEST_QUERIES,
|
||||
"age_stats",
|
||||
¶ms(&[]),
|
||||
),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
manifest.stats().read_iops,
|
||||
0,
|
||||
"a multi-table read must not scan __manifest once per touched table"
|
||||
);
|
||||
}
|
||||
|
||||
/// A warm reader must observe a commit made through another handle (invariant 6,
|
||||
/// strong consistency): the version probe detects the advance and refreshes.
|
||||
/// Passes before and after Fix 1 (today's cold re-read is always fresh); a
|
||||
/// regression guard so the warm-reuse fast path never serves a stale read.
|
||||
#[tokio::test]
|
||||
async fn external_commit_observed_by_warm_reader() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let mut writer = init_and_load(&dir).await;
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let reader = Omnigraph::open(uri).await.unwrap();
|
||||
|
||||
let before = count_rows(&reader, "node:Person").await;
|
||||
|
||||
// External commit through a separate handle.
|
||||
mutate_main(
|
||||
&mut writer,
|
||||
MUTATION_QUERIES,
|
||||
"insert_person",
|
||||
&mixed_params(&[("$name", "ext_new_person")], &[("$age", 41)]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let after = count_rows(&reader, "node:Person").await;
|
||||
assert_eq!(
|
||||
after,
|
||||
before + 1,
|
||||
"warm reader must observe an external commit"
|
||||
);
|
||||
}
|
||||
|
||||
// ── Finding A: drop the redundant per-query schema validation ─────────────────
|
||||
//
|
||||
// Every query runs `ensure_schema_state_valid`. It ran TWICE per query (once in
|
||||
// query()/run_query_at, once again in resolved_target/snapshot_at_version), each
|
||||
// reading 3 contract files + 2 existence probes (~10 storage ops). Finding A
|
||||
// removes the redundant caller, so validation runs once. (A cheaper source-only
|
||||
// probe was rejected: the codebase requires per-call detection of IR/state drift
|
||||
// on long-lived handles -- lifecycle::long_lived_handle_rejects_schema_ir_drift
|
||||
// -- which a source-only compare would miss.) Measured at the StorageAdapter
|
||||
// boundary with the counting decorator.
|
||||
|
||||
/// A warm query validates the schema contract exactly once (3 reads + 2 exists),
|
||||
/// not twice. Fails before finding A, where query() and resolved_target each
|
||||
/// validate (6 read_text + 4 exists).
|
||||
#[tokio::test]
|
||||
async fn warm_query_validates_schema_contract_once() {
|
||||
use omnigraph::instrumentation::CountingStorageAdapter;
|
||||
use omnigraph::storage::storage_for_uri;
|
||||
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
// Init through the standard path, then re-open behind a counting adapter to
|
||||
// measure the per-query schema-contract storage reads (delta around the
|
||||
// query excludes open-time reads).
|
||||
let _ = init_and_load(&dir).await;
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let (adapter, counts) = CountingStorageAdapter::new(storage_for_uri(uri).unwrap());
|
||||
let db = Omnigraph::open_with_storage(uri, adapter).await.unwrap();
|
||||
|
||||
let before_read_text = counts.read_text();
|
||||
let before_exists = counts.exists();
|
||||
db.query(
|
||||
ReadTarget::branch("main"),
|
||||
TEST_QUERIES,
|
||||
"total_people",
|
||||
¶ms(&[]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
counts.read_text() - before_read_text,
|
||||
3,
|
||||
"warm query should validate the schema contract once (3 reads), not twice"
|
||||
);
|
||||
assert_eq!(
|
||||
counts.exists() - before_exists,
|
||||
2,
|
||||
"warm query should probe contract-file existence once (2 probes), not twice"
|
||||
);
|
||||
}
|
||||
|
||||
/// The cheap source-compare must still detect that the on-disk schema source has
|
||||
/// drifted from the validated contract and fail the read, rather than serving the
|
||||
/// stale-but-cached schema. Passes before and after finding A (regression guard
|
||||
/// for the documented weaker per-query guard).
|
||||
#[tokio::test]
|
||||
async fn schema_source_drift_is_caught_on_read() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let _writer = init_and_load(&dir).await;
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let reader = Omnigraph::open(uri).await.unwrap();
|
||||
|
||||
// Drift the on-disk schema source behind the reader's back.
|
||||
std::fs::write(
|
||||
dir.path().join("_schema.pg"),
|
||||
"this is not a valid schema {{{",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let result = reader
|
||||
.query(
|
||||
ReadTarget::branch("main"),
|
||||
TEST_QUERIES,
|
||||
"total_people",
|
||||
¶ms(&[]),
|
||||
)
|
||||
.await;
|
||||
assert!(
|
||||
result.is_err(),
|
||||
"a query must fail when the on-disk schema source has drifted from the validated contract"
|
||||
);
|
||||
}
|
||||
|
||||
// ── Morphological-matrix coverage: branch-warm + stale-refresh cells ──────────
|
||||
|
||||
/// A WARM read on a non-main branch (handle synced to that branch) also scans
|
||||
/// `__manifest` zero times. Exercises Fix 2's branch-owned-table open
|
||||
/// (`{table_path}/tree/{branch}` + with_version) on Fix 1's warm path — the cell
|
||||
/// that regressed when the open used `with_branch` against the base.
|
||||
#[tokio::test]
|
||||
async fn warm_branch_read_does_no_manifest_scans() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let db = init_and_load(&dir).await;
|
||||
db.branch_create("feature").await.unwrap();
|
||||
// Write to the branch so its tables are branch-owned (under tree/feature).
|
||||
db.mutate(
|
||||
"feature",
|
||||
MUTATION_QUERIES,
|
||||
"insert_person",
|
||||
&mixed_params(&[("$name", "Eve")], &[("$age", 22)]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
// Bind the handle's coordinator to the branch so reads of it take the warm path.
|
||||
db.sync_branch("feature").await.unwrap();
|
||||
|
||||
let (probes_in, manifest, commit_graph, _table, probe_count) = probes();
|
||||
with_query_io_probes(
|
||||
probes_in,
|
||||
db.query(
|
||||
ReadTarget::branch("feature"),
|
||||
TEST_QUERIES,
|
||||
"total_people",
|
||||
¶ms(&[]),
|
||||
),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
manifest.stats().read_iops,
|
||||
0,
|
||||
"warm branch read must not scan __manifest (branch-owned table opened by location)"
|
||||
);
|
||||
assert_eq!(
|
||||
commit_graph.stats().read_iops,
|
||||
0,
|
||||
"warm branch read must not open the commit graph"
|
||||
);
|
||||
assert_eq!(
|
||||
probe_count.load(Ordering::Relaxed),
|
||||
1,
|
||||
"warm branch read performs exactly one version probe"
|
||||
);
|
||||
}
|
||||
|
||||
/// A non-main branch can be deleted and recreated at the same Lance version
|
||||
/// number. Warm branch freshness therefore needs the manifest incarnation, not
|
||||
/// just `version()`, or a reader pinned to the old incarnation can serve stale
|
||||
/// rows from the deleted branch. This is the correctness guard for Phase 6A.
|
||||
#[tokio::test]
|
||||
async fn warm_read_on_recreated_branch_observes_new_incarnation() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let mut writer = init_and_load(&dir).await;
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let reader = Omnigraph::open(uri).await.unwrap();
|
||||
|
||||
writer.branch_create("feature").await.unwrap();
|
||||
mutate_branch(
|
||||
&mut writer,
|
||||
"feature",
|
||||
MUTATION_QUERIES,
|
||||
"insert_person",
|
||||
&mixed_params(&[("$name", "Eve")], &[("$age", 22)]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
reader.sync_branch("feature").await.unwrap();
|
||||
let old_feature = reader
|
||||
.query(
|
||||
ReadTarget::branch("feature"),
|
||||
TEST_QUERIES,
|
||||
"get_person",
|
||||
¶ms(&[("$name", "Eve")]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
old_feature.num_rows(),
|
||||
1,
|
||||
"test setup: old feature branch must contain Eve"
|
||||
);
|
||||
let old_version = reader
|
||||
.version_of(ReadTarget::branch("feature"))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
writer.branch_delete("feature").await.unwrap();
|
||||
mutate_main(
|
||||
&mut writer,
|
||||
MUTATION_QUERIES,
|
||||
"insert_person",
|
||||
&mixed_params(&[("$name", "MainOnly")], &[("$age", 44)]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
writer.branch_create("feature").await.unwrap();
|
||||
let new_version = writer
|
||||
.version_of(ReadTarget::branch("feature"))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
new_version, old_version,
|
||||
"test setup must exercise branch incarnation reuse at one Lance version"
|
||||
);
|
||||
|
||||
let (probes_in, manifest, commit_graph, _table, probe_count) = probes();
|
||||
let new_feature = with_query_io_probes(
|
||||
probes_in,
|
||||
reader.query(
|
||||
ReadTarget::branch("feature"),
|
||||
TEST_QUERIES,
|
||||
"get_person",
|
||||
¶ms(&[("$name", "MainOnly")]),
|
||||
),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
new_feature.num_rows(),
|
||||
1,
|
||||
"warm reader must refresh to the recreated branch incarnation"
|
||||
);
|
||||
assert!(
|
||||
manifest.stats().read_iops > 0,
|
||||
"recreated branch must re-read the manifest after the incarnation probe"
|
||||
);
|
||||
assert_eq!(
|
||||
commit_graph.stats().read_iops,
|
||||
0,
|
||||
"same-branch incarnation refresh must be manifest-only"
|
||||
);
|
||||
assert_eq!(
|
||||
probe_count.load(Ordering::Relaxed),
|
||||
2,
|
||||
"stale same-branch read probes once under the read lock and once under the write lock"
|
||||
);
|
||||
}
|
||||
|
||||
/// Recreated non-main branches can reuse the same branch-owned table version.
|
||||
/// This forces the held table-handle cache to distinguish incarnations by the
|
||||
/// per-table Lance manifest e_tag, not just `(table_path, branch, version)`.
|
||||
#[tokio::test]
|
||||
async fn recreated_branch_owned_table_handle_uses_table_etag() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let mut writer = init_and_load(&dir).await;
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let reader = Omnigraph::open(uri).await.unwrap();
|
||||
|
||||
writer.branch_create("feature").await.unwrap();
|
||||
mutate_branch(
|
||||
&mut writer,
|
||||
"feature",
|
||||
MUTATION_QUERIES,
|
||||
"insert_person",
|
||||
&mixed_params(&[("$name", "OldOnly")], &[("$age", 31)]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
reader.sync_branch("feature").await.unwrap();
|
||||
let old_person = reader
|
||||
.query(
|
||||
ReadTarget::branch("feature"),
|
||||
TEST_QUERIES,
|
||||
"get_person",
|
||||
¶ms(&[("$name", "OldOnly")]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(old_person.num_rows(), 1);
|
||||
let old_entry = reader
|
||||
.snapshot_of(ReadTarget::branch("feature"))
|
||||
.await
|
||||
.unwrap()
|
||||
.entry("node:Person")
|
||||
.unwrap()
|
||||
.clone();
|
||||
assert_eq!(old_entry.table_branch.as_deref(), Some("feature"));
|
||||
|
||||
writer.branch_delete("feature").await.unwrap();
|
||||
writer.branch_create("feature").await.unwrap();
|
||||
mutate_branch(
|
||||
&mut writer,
|
||||
"feature",
|
||||
MUTATION_QUERIES,
|
||||
"insert_person",
|
||||
&mixed_params(&[("$name", "NewOnly")], &[("$age", 32)]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let new_entry = writer
|
||||
.snapshot_of(ReadTarget::branch("feature"))
|
||||
.await
|
||||
.unwrap()
|
||||
.entry("node:Person")
|
||||
.unwrap()
|
||||
.clone();
|
||||
assert_eq!(new_entry.table_path, old_entry.table_path);
|
||||
assert_eq!(new_entry.table_branch, old_entry.table_branch);
|
||||
assert_eq!(
|
||||
new_entry.table_version, old_entry.table_version,
|
||||
"test setup must force table handle identity to differ only by e_tag"
|
||||
);
|
||||
|
||||
let (probes_in, manifest, commit_graph, table, probe_count) = probes();
|
||||
let new_person = with_query_io_probes(
|
||||
probes_in,
|
||||
reader.query(
|
||||
ReadTarget::branch("feature"),
|
||||
TEST_QUERIES,
|
||||
"get_person",
|
||||
¶ms(&[("$name", "NewOnly")]),
|
||||
),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
new_person.num_rows(),
|
||||
1,
|
||||
"warm reader must open the recreated branch-owned table incarnation"
|
||||
);
|
||||
assert!(
|
||||
table.stats().read_iops > 0,
|
||||
"table e_tag must force a held-handle cache miss for the recreated table"
|
||||
);
|
||||
assert!(
|
||||
manifest.stats().read_iops > 0,
|
||||
"recreated branch must refresh the manifest"
|
||||
);
|
||||
assert_eq!(
|
||||
commit_graph.stats().read_iops,
|
||||
0,
|
||||
"same-branch table-incarnation refresh must be manifest-only"
|
||||
);
|
||||
assert_eq!(
|
||||
probe_count.load(Ordering::Relaxed),
|
||||
2,
|
||||
"stale same-branch read probes once under each lock"
|
||||
);
|
||||
|
||||
let stale_old_person = reader
|
||||
.query(
|
||||
ReadTarget::branch("feature"),
|
||||
TEST_QUERIES,
|
||||
"get_person",
|
||||
¶ms(&[("$name", "OldOnly")]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
stale_old_person.num_rows(),
|
||||
0,
|
||||
"old branch-owned table contents must not leak after branch recreation"
|
||||
);
|
||||
}
|
||||
|
||||
/// The graph-index cache is keyed by synthetic snapshot id plus edge-table
|
||||
/// state. A recreated branch can reuse the same edge table `(branch, version)`,
|
||||
/// so the synthetic snapshot id must carry the manifest incarnation or traversal
|
||||
/// can reuse stale topology.
|
||||
#[tokio::test]
|
||||
async fn recreated_branch_traversal_uses_graph_index_incarnation() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let mut writer = init_and_load(&dir).await;
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let reader = Omnigraph::open(uri).await.unwrap();
|
||||
|
||||
writer.branch_create("feature").await.unwrap();
|
||||
mutate_branch(
|
||||
&mut writer,
|
||||
"feature",
|
||||
MUTATION_QUERIES,
|
||||
"insert_person_and_friend",
|
||||
&mixed_params(
|
||||
&[("$name", "OldWalker"), ("$friend", "Alice")],
|
||||
&[("$age", 41)],
|
||||
),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
reader.sync_branch("feature").await.unwrap();
|
||||
let old_friends = reader
|
||||
.query(
|
||||
ReadTarget::branch("feature"),
|
||||
TEST_QUERIES,
|
||||
"friends_of",
|
||||
¶ms(&[("$name", "OldWalker")]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(first_column_strings(&old_friends), vec!["Alice"]);
|
||||
let old_edge_entry = reader
|
||||
.snapshot_of(ReadTarget::branch("feature"))
|
||||
.await
|
||||
.unwrap()
|
||||
.entry("edge:Knows")
|
||||
.unwrap()
|
||||
.clone();
|
||||
assert_eq!(old_edge_entry.table_branch.as_deref(), Some("feature"));
|
||||
|
||||
writer.branch_delete("feature").await.unwrap();
|
||||
writer.branch_create("feature").await.unwrap();
|
||||
mutate_branch(
|
||||
&mut writer,
|
||||
"feature",
|
||||
MUTATION_QUERIES,
|
||||
"insert_person_and_friend",
|
||||
&mixed_params(
|
||||
&[("$name", "NewWalker"), ("$friend", "Bob")],
|
||||
&[("$age", 42)],
|
||||
),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let new_edge_entry = writer
|
||||
.snapshot_of(ReadTarget::branch("feature"))
|
||||
.await
|
||||
.unwrap()
|
||||
.entry("edge:Knows")
|
||||
.unwrap()
|
||||
.clone();
|
||||
assert_eq!(new_edge_entry.table_path, old_edge_entry.table_path);
|
||||
assert_eq!(new_edge_entry.table_branch, old_edge_entry.table_branch);
|
||||
assert_eq!(
|
||||
new_edge_entry.table_version, old_edge_entry.table_version,
|
||||
"test setup must force graph-index identity to differ only by snapshot incarnation"
|
||||
);
|
||||
|
||||
let (probes_in, manifest, commit_graph, _table, probe_count) = probes();
|
||||
let new_friends = with_query_io_probes(
|
||||
probes_in,
|
||||
reader.query(
|
||||
ReadTarget::branch("feature"),
|
||||
TEST_QUERIES,
|
||||
"friends_of",
|
||||
¶ms(&[("$name", "NewWalker")]),
|
||||
),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
first_column_strings(&new_friends),
|
||||
vec!["Bob"],
|
||||
"traversal must use the recreated branch's topology, not stale cached graph index"
|
||||
);
|
||||
assert!(
|
||||
manifest.stats().read_iops > 0,
|
||||
"recreated branch traversal must refresh the manifest"
|
||||
);
|
||||
assert_eq!(
|
||||
commit_graph.stats().read_iops,
|
||||
0,
|
||||
"same-branch traversal incarnation refresh must be manifest-only"
|
||||
);
|
||||
assert_eq!(
|
||||
probe_count.load(Ordering::Relaxed),
|
||||
2,
|
||||
"stale same-branch read probes once under each lock"
|
||||
);
|
||||
|
||||
let stale_old_friends = reader
|
||||
.query(
|
||||
ReadTarget::branch("feature"),
|
||||
TEST_QUERIES,
|
||||
"friends_of",
|
||||
¶ms(&[("$name", "OldWalker")]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
first_column_strings(&stale_old_friends),
|
||||
Vec::<String>::new(),
|
||||
"old branch topology must not leak after branch recreation"
|
||||
);
|
||||
}
|
||||
|
||||
/// When an external writer advances the manifest, the reader's next query takes
|
||||
/// the STALE path: it re-reads the manifest (read_iops > 0) but never scans the
|
||||
/// commit graph (`refresh_manifest_only`), unlike a full coordinator refresh.
|
||||
/// Pins Fix 1's manifest-only refresh.
|
||||
#[tokio::test]
|
||||
async fn stale_read_refreshes_manifest_only() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let mut writer = init_and_load(&dir).await;
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let reader = Omnigraph::open(uri).await.unwrap();
|
||||
// Establish the reader's warm coordinator.
|
||||
reader
|
||||
.query(
|
||||
ReadTarget::branch("main"),
|
||||
TEST_QUERIES,
|
||||
"total_people",
|
||||
¶ms(&[]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// External commit advances the on-disk manifest behind the reader.
|
||||
mutate_main(
|
||||
&mut writer,
|
||||
MUTATION_QUERIES,
|
||||
"insert_person",
|
||||
&mixed_params(&[("$name", "Frank")], &[("$age", 33)]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let (probes_in, manifest, commit_graph, _table, probe_count) = probes();
|
||||
with_query_io_probes(
|
||||
probes_in,
|
||||
reader.query(
|
||||
ReadTarget::branch("main"),
|
||||
TEST_QUERIES,
|
||||
"total_people",
|
||||
¶ms(&[]),
|
||||
),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert!(
|
||||
manifest.stats().read_iops > 0,
|
||||
"stale read must re-read the manifest"
|
||||
);
|
||||
assert_eq!(
|
||||
commit_graph.stats().read_iops,
|
||||
0,
|
||||
"stale refresh must be manifest-only (no commit-graph scan)"
|
||||
);
|
||||
assert_eq!(
|
||||
probe_count.load(Ordering::Relaxed),
|
||||
2,
|
||||
"stale same-branch read probes once under the read lock and once under the write lock"
|
||||
);
|
||||
}
|
||||
|
||||
// ── Fix 3: held-handle cache — warm repeat reads stop re-opening tables ────────
|
||||
//
|
||||
// After Fix 1+2 a warm same-branch read still re-opened every touched table per
|
||||
// query (the "never warms up" residual). Fix 3 holds the open `Dataset` per
|
||||
// `(table, branch, version, e_tag)` (the version-keyed analogue of LanceDB's
|
||||
// `DatasetConsistencyWrapper`) and shares one `Session` per graph, so a second
|
||||
// identical warm read reuses the handle with zero table opens.
|
||||
|
||||
/// Headline: a second identical warm same-branch read does ZERO table opens
|
||||
/// (the cold first read opens; the warm repeat serves from the held-handle
|
||||
/// cache). Fails before Fix 3, where every read re-opens the table.
|
||||
#[tokio::test]
|
||||
async fn repeat_warm_read_reuses_table_handles() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let mut db = init_and_load(&dir).await;
|
||||
// Deep history: the win must hold regardless of commit count.
|
||||
commit_many(&mut db, 10).await;
|
||||
|
||||
// Cold first read: opens the touched table.
|
||||
let (p1, _m1, _c1, table1, _pr1) = probes();
|
||||
with_query_io_probes(
|
||||
p1,
|
||||
db.query(
|
||||
ReadTarget::branch("main"),
|
||||
TEST_QUERIES,
|
||||
"total_people",
|
||||
¶ms(&[]),
|
||||
),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(
|
||||
table1.stats().read_iops > 0,
|
||||
"the cold first read must open the table"
|
||||
);
|
||||
|
||||
// Warm repeat: the held handle is reused, so no open happens through this
|
||||
// query's table wrapper.
|
||||
let (p2, manifest2, commit_graph2, table2, probe2) = probes();
|
||||
with_query_io_probes(
|
||||
p2,
|
||||
db.query(
|
||||
ReadTarget::branch("main"),
|
||||
TEST_QUERIES,
|
||||
"total_people",
|
||||
¶ms(&[]),
|
||||
),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
table2.stats().read_iops,
|
||||
0,
|
||||
"a warm repeat read must reuse the held handle (0 table opens)"
|
||||
);
|
||||
assert_eq!(
|
||||
manifest2.stats().read_iops,
|
||||
0,
|
||||
"warm repeat read: 0 manifest opens"
|
||||
);
|
||||
assert_eq!(
|
||||
commit_graph2.stats().read_iops,
|
||||
0,
|
||||
"warm repeat read: 0 commit-graph opens"
|
||||
);
|
||||
assert_eq!(
|
||||
probe2.load(Ordering::Relaxed),
|
||||
1,
|
||||
"warm repeat read: exactly one version probe"
|
||||
);
|
||||
}
|
||||
|
||||
/// A write advances the table's version, so the next read misses the
|
||||
/// version-keyed cache and re-opens — never serving a stale handle (invariant 6
|
||||
/// for the cached path). Passes with or without the cache; a correctness guard
|
||||
/// that the cache cannot serve pre-write data.
|
||||
#[tokio::test]
|
||||
async fn write_invalidates_table_cache_for_changed_table() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let mut db = init_and_load(&dir).await;
|
||||
|
||||
let before = count_rows(&db, "node:Person").await;
|
||||
|
||||
// Warm the cache for Person.
|
||||
db.query(
|
||||
ReadTarget::branch("main"),
|
||||
TEST_QUERIES,
|
||||
"total_people",
|
||||
¶ms(&[]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Write Person: its version advances, so the cached (table, branch, version)
|
||||
// key is now superseded.
|
||||
mutate_main(
|
||||
&mut db,
|
||||
MUTATION_QUERIES,
|
||||
"insert_person",
|
||||
&mixed_params(&[("$name", "cache_miss_one")], &[("$age", 50)]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// The next read re-opens Person at the new version (cache miss).
|
||||
let (p, _m, _c, table, _pr) = probes();
|
||||
with_query_io_probes(
|
||||
p,
|
||||
db.query(
|
||||
ReadTarget::branch("main"),
|
||||
TEST_QUERIES,
|
||||
"total_people",
|
||||
¶ms(&[]),
|
||||
),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(
|
||||
table.stats().read_iops > 0,
|
||||
"a read after a write to the table must re-open it (version-keyed miss)"
|
||||
);
|
||||
|
||||
let after = count_rows(&db, "node:Person").await;
|
||||
assert_eq!(
|
||||
after,
|
||||
before + 1,
|
||||
"the post-write read observes the new row (no stale handle served)"
|
||||
);
|
||||
}
|
||||
|
|
@ -1646,3 +1646,70 @@ async fn branch_cascade_delete_forks_node_and_edges_under_held_queues() {
|
|||
"main must be untouched by the branch delete"
|
||||
);
|
||||
}
|
||||
|
||||
// #283: a mutation predicate (`where camelField = ...`) on a camelCase column
|
||||
// must execute, not fail at the Lance scan with "No field named ...". Covers
|
||||
// both `update` (committed scan via scan_with_pending) and `delete`
|
||||
// (delete_where), which share the same emitted SQL filter string.
|
||||
const CC_SCHEMA: &str = r#"
|
||||
node Doc {
|
||||
slug: String @key
|
||||
repoName: String @index
|
||||
status: String?
|
||||
}
|
||||
"#;
|
||||
const CC_DATA: &str = r#"{"type":"Doc","data":{"slug":"d1","repoName":"acme","status":"open"}}
|
||||
{"type":"Doc","data":{"slug":"d2","repoName":"globex","status":"open"}}"#;
|
||||
|
||||
#[tokio::test]
|
||||
async fn camelcase_mutation_predicate_updates_and_deletes() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let mut db = Omnigraph::init(uri, CC_SCHEMA).await.unwrap();
|
||||
load_jsonl(&mut db, CC_DATA, LoadMode::Overwrite).await.unwrap();
|
||||
|
||||
let m = r#"
|
||||
query set_status($repo: String, $st: String) { update Doc set { status: $st } where repoName = $repo }
|
||||
query del($repo: String) { delete Doc where repoName = $repo }
|
||||
"#;
|
||||
|
||||
let upd = db
|
||||
.mutate("main", m, "set_status", ¶ms(&[("$repo", "acme"), ("$st", "closed")]))
|
||||
.await
|
||||
.expect("update with a camelCase predicate must execute");
|
||||
assert_eq!(upd.affected_nodes, 1, "exactly the acme Doc should update");
|
||||
|
||||
let del = db
|
||||
.mutate("main", m, "del", ¶ms(&[("$repo", "globex")]))
|
||||
.await
|
||||
.expect("delete with a camelCase predicate must execute");
|
||||
assert_eq!(del.affected_nodes, 1, "exactly the globex Doc should delete");
|
||||
|
||||
assert_eq!(count_rows(&db, "node:Doc").await, 1, "one Doc (acme) should remain");
|
||||
}
|
||||
|
||||
// #283 (pending side): a chained mutation whose 2nd op filters a camelCase
|
||||
// column must read op-1's staged rows through the pending DataFusion `MemTable`
|
||||
// (`SELECT … WHERE {filter}` via ctx.sql), which lowercases unquoted idents.
|
||||
// This is the path the single update/delete above does NOT exercise.
|
||||
#[tokio::test]
|
||||
async fn camelcase_chained_mutation_reads_pending_by_camelcase() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let mut db = Omnigraph::init(uri, CC_SCHEMA).await.unwrap();
|
||||
load_jsonl(&mut db, CC_DATA, LoadMode::Overwrite).await.unwrap();
|
||||
|
||||
// op-1 stages a status change to the acme Doc; op-2 re-filters the same
|
||||
// camelCase column, so it must match op-1's pending row.
|
||||
let m = r#"
|
||||
query chain($repo: String) {
|
||||
update Doc set { status: "stage1" } where repoName = $repo
|
||||
update Doc set { status: "stage2" } where repoName = $repo
|
||||
}
|
||||
"#;
|
||||
let r = db
|
||||
.mutate("main", m, "chain", ¶ms(&[("$repo", "acme")]))
|
||||
.await
|
||||
.expect("chained camelCase mutation must read the pending row, not fail at the MemTable SELECT");
|
||||
assert_eq!(r.affected_nodes, 2, "both ops should touch the acme Doc (read-your-writes)");
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue