mirror of
https://github.com/ModernRelay/omnigraph.git
synced 2026-06-24 02:38:06 +02:00
Parallel per-type load writes + omnigraph optimize/cleanup CLI (#46)
* Parallel per-type load writes + omnigraph optimize/cleanup CLI
## MR-677.3 — parallel per-type load writes
The load path already groups records into one RecordBatch per type and
makes one Lance commit per table (loader::mod.rs:249-..), but those
commits ran sequentially. Wrap node and edge write loops in
`futures::stream::buffered(N)` against a new helper
`write_batches_concurrently`. Concurrency tunable via
`OMNIGRAPH_LOAD_CONCURRENCY` (default 8).
## MR-676 — `omnigraph optimize` and `omnigraph cleanup`
New CLI subcommands that walk every node + edge table in the repo:
- `omnigraph optimize <uri>` — runs Lance `compact_files` on each
table to merge small fragments into fewer larger ones.
- `omnigraph cleanup <uri> --keep N | --older-than 7d --confirm` —
runs Lance `cleanup_old_versions` to prune historical manifests +
unique fragments. Requires `--confirm` because it's destructive.
Supports both count-based and time-based retention (or both AND'd
together). Time uses chrono `DateTime<Utc>` (added as a workspace
dep, default-features off).
Both commands run their per-table loops in parallel (8-way bounded,
`OMNIGRAPH_MAINTENANCE_CONCURRENCY` env override). Smoke-tested
against the 114-table prod graph: optimize went 7m15s sequential
→ 1m28s parallel. cleanup --keep 1 removed 137 historical versions
across 114 tables in 1m57s without disrupting `/healthz` or query
responses.
Public API on `Omnigraph`:
pub async fn optimize(&mut self) -> Result<Vec<TableOptimizeStats>>
pub async fn cleanup(&mut self, opts: CleanupPolicyOptions)
-> Result<Vec<TableCleanupStats>>
All 10 existing loader tests still pass.
Closes MR-676.
Partially addresses MR-677 (the .3 — parallel by type — piece;
MR-677.1 is for the `omnigraph embed` path, not load, since load
doesn't call Gemini directly. .2 was already in place).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
* chore: regenerate openapi.json
---------
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
parent
628bc2e607
commit
74eb5a5380
12 changed files with 498 additions and 31 deletions
|
|
@ -214,6 +214,39 @@ enum Command {
|
|||
#[command(subcommand)]
|
||||
command: PolicyCommand,
|
||||
},
|
||||
/// Compact small Lance fragments in every table of the repo
|
||||
Optimize {
|
||||
/// Repo URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
#[arg(long)]
|
||||
config: Option<PathBuf>,
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
},
|
||||
/// Remove old Lance versions from every table of the repo (destructive)
|
||||
Cleanup {
|
||||
/// Repo URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
#[arg(long)]
|
||||
config: Option<PathBuf>,
|
||||
/// Number of recent versions to keep per table. Either `--keep` or
|
||||
/// `--older-than` (or both) must be set.
|
||||
#[arg(long)]
|
||||
keep: Option<u32>,
|
||||
/// Only remove versions older than this duration. Accepts Go-style
|
||||
/// durations: `7d`, `24h`, `90m`. At least one of --keep / --older-than.
|
||||
#[arg(long)]
|
||||
older_than: Option<String>,
|
||||
/// Required to actually run; without it, prints what would be removed
|
||||
#[arg(long)]
|
||||
confirm: bool,
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Subcommand)]
|
||||
|
|
@ -795,6 +828,31 @@ fn resolve_uri(
|
|||
config.resolve_target_uri(cli_uri, cli_target, config.cli_graph_name())
|
||||
}
|
||||
|
||||
/// Parse a Go-style compact duration: `7d`, `24h`, `30m`, `90s`, or a plain
|
||||
/// integer as seconds. Used by the `cleanup --older-than` flag.
|
||||
fn parse_duration_arg(s: &str) -> Result<std::time::Duration> {
|
||||
let s = s.trim();
|
||||
if s.is_empty() {
|
||||
bail!("duration is empty");
|
||||
}
|
||||
let (num_part, unit) = match s.char_indices().rev().find(|(_, c)| c.is_ascii_alphabetic()) {
|
||||
Some((i, _)) => (&s[..i + 1 - s[i..].chars().next().unwrap().len_utf8()], &s[i..]),
|
||||
None => (s, ""),
|
||||
};
|
||||
let n: u64 = num_part
|
||||
.parse()
|
||||
.map_err(|e| color_eyre::eyre::eyre!("invalid duration '{}': {}", s, e))?;
|
||||
let secs = match unit {
|
||||
"" | "s" => n,
|
||||
"m" => n * 60,
|
||||
"h" => n * 60 * 60,
|
||||
"d" => n * 60 * 60 * 24,
|
||||
"w" => n * 60 * 60 * 24 * 7,
|
||||
_ => bail!("unknown duration unit '{}'. Supported: s, m, h, d, w", unit),
|
||||
};
|
||||
Ok(std::time::Duration::from_secs(secs))
|
||||
}
|
||||
|
||||
fn resolve_local_uri(
|
||||
config: &OmnigraphConfig,
|
||||
cli_uri: Option<String>,
|
||||
|
|
@ -2465,6 +2523,111 @@ async fn main() -> Result<()> {
|
|||
print_policy_explain(&decision, &request);
|
||||
}
|
||||
},
|
||||
Command::Optimize {
|
||||
uri,
|
||||
target,
|
||||
config,
|
||||
json,
|
||||
} => {
|
||||
let config = load_cli_config(config.as_ref())?;
|
||||
let uri = resolve_uri(&config, uri, target.as_deref())?;
|
||||
let mut db = Omnigraph::open(&uri).await?;
|
||||
let stats = db.optimize().await?;
|
||||
if json {
|
||||
let value = serde_json::json!({
|
||||
"uri": uri,
|
||||
"tables": stats.iter().map(|s| serde_json::json!({
|
||||
"table_key": s.table_key,
|
||||
"fragments_removed": s.fragments_removed,
|
||||
"fragments_added": s.fragments_added,
|
||||
"committed": s.committed,
|
||||
})).collect::<Vec<_>>(),
|
||||
});
|
||||
print_json(&value)?;
|
||||
} else {
|
||||
println!("optimize {} — {} tables", uri, stats.len());
|
||||
for s in &stats {
|
||||
if s.committed {
|
||||
println!(
|
||||
" {:<40} frags {} → {} ✓",
|
||||
s.table_key,
|
||||
s.fragments_removed + s.fragments_added - s.fragments_added,
|
||||
s.fragments_added
|
||||
);
|
||||
} else {
|
||||
println!(" {:<40} no-op", s.table_key);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Command::Cleanup {
|
||||
uri,
|
||||
target,
|
||||
config,
|
||||
keep,
|
||||
older_than,
|
||||
confirm,
|
||||
json,
|
||||
} => {
|
||||
let config = load_cli_config(config.as_ref())?;
|
||||
let uri = resolve_uri(&config, uri, target.as_deref())?;
|
||||
|
||||
let older_than_dur = older_than
|
||||
.as_deref()
|
||||
.map(parse_duration_arg)
|
||||
.transpose()?;
|
||||
|
||||
if keep.is_none() && older_than_dur.is_none() {
|
||||
bail!("cleanup requires at least one of --keep or --older-than");
|
||||
}
|
||||
|
||||
let policy_desc = match (keep, older_than_dur) {
|
||||
(Some(k), Some(d)) => format!("keep {} versions, remove anything older than {:?}", k, d),
|
||||
(Some(k), None) => format!("keep {} versions", k),
|
||||
(None, Some(d)) => format!("remove anything older than {:?}", d),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
if !confirm {
|
||||
eprintln!(
|
||||
"cleanup is destructive — rerun with --confirm. Policy for {}: {}",
|
||||
uri, policy_desc
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let options = omnigraph::db::CleanupPolicyOptions {
|
||||
keep_versions: keep,
|
||||
older_than: older_than_dur,
|
||||
};
|
||||
|
||||
let mut db = Omnigraph::open(&uri).await?;
|
||||
let stats = db.cleanup(options).await?;
|
||||
if json {
|
||||
let value = serde_json::json!({
|
||||
"uri": uri,
|
||||
"keep_versions": keep,
|
||||
"older_than_secs": older_than_dur.map(|d| d.as_secs()),
|
||||
"tables": stats.iter().map(|s| serde_json::json!({
|
||||
"table_key": s.table_key,
|
||||
"bytes_removed": s.bytes_removed,
|
||||
"old_versions_removed": s.old_versions_removed,
|
||||
})).collect::<Vec<_>>(),
|
||||
});
|
||||
print_json(&value)?;
|
||||
} else {
|
||||
let total_bytes: u64 = stats.iter().map(|s| s.bytes_removed).sum();
|
||||
let total_versions: u64 = stats.iter().map(|s| s.old_versions_removed).sum();
|
||||
println!(
|
||||
"cleanup {} ({}) — removed {} versions ({} bytes) across {} tables",
|
||||
uri,
|
||||
policy_desc,
|
||||
total_versions,
|
||||
total_bytes,
|
||||
stats.len()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue