mirror of
https://github.com/ModernRelay/omnigraph.git
synced 2026-06-12 01:45:14 +02:00
* Parallel per-type load writes + omnigraph optimize/cleanup CLI
## MR-677.3 — parallel per-type load writes
The load path already groups records into one RecordBatch per type and
makes one Lance commit per table (loader::mod.rs:249-..), but those
commits ran sequentially. Wrap node and edge write loops in
`futures::stream::buffered(N)` against a new helper
`write_batches_concurrently`. Concurrency tunable via
`OMNIGRAPH_LOAD_CONCURRENCY` (default 8).
## MR-676 — `omnigraph optimize` and `omnigraph cleanup`
New CLI subcommands that walk every node + edge table in the repo:
- `omnigraph optimize <uri>` — runs Lance `compact_files` on each
table to merge small fragments into fewer larger ones.
- `omnigraph cleanup <uri> --keep N | --older-than 7d --confirm` —
runs Lance `cleanup_old_versions` to prune historical manifests +
unique fragments. Requires `--confirm` because it's destructive.
Supports both count-based and time-based retention (or both AND'd
together). Time uses chrono `DateTime<Utc>` (added as a workspace
dep, default-features off).
Both commands run their per-table loops in parallel (8-way bounded,
`OMNIGRAPH_MAINTENANCE_CONCURRENCY` env override). Smoke-tested
against the 114-table prod graph: optimize went 7m15s sequential
→ 1m28s parallel. cleanup --keep 1 removed 137 historical versions
across 114 tables in 1m57s without disrupting `/healthz` or query
responses.
Public API on `Omnigraph`:
pub async fn optimize(&mut self) -> Result<Vec<TableOptimizeStats>>
pub async fn cleanup(&mut self, opts: CleanupPolicyOptions)
-> Result<Vec<TableCleanupStats>>
All 10 existing loader tests still pass.
Closes MR-676.
Partially addresses MR-677 (the .3 — parallel by type — piece;
MR-677.1 is for the `omnigraph embed` path, not load, since load
doesn't call Gemini directly. .2 was already in place).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
* chore: regenerate openapi.json
---------
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
47 lines
1.6 KiB
TOML
47 lines
1.6 KiB
TOML
[package]
|
|
name = "omnigraph-server"
|
|
version = "0.3.1"
|
|
edition = "2024"
|
|
description = "HTTP server for the Omnigraph graph database."
|
|
license = "MIT"
|
|
repository = "https://github.com/ModernRelay/omnigraph"
|
|
homepage = "https://github.com/ModernRelay/omnigraph"
|
|
documentation = "https://docs.rs/omnigraph-server"
|
|
|
|
[[bin]]
|
|
name = "omnigraph-server"
|
|
path = "src/main.rs"
|
|
|
|
[features]
|
|
default = []
|
|
# Enables the AWS Secrets Manager bearer-token source. Off by default — on-prem
|
|
# and local-dev builds don't pay the AWS SDK compile cost.
|
|
aws = ["dep:aws-config", "dep:aws-sdk-secretsmanager"]
|
|
|
|
[dependencies]
|
|
omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.3.1" }
|
|
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.3.1" }
|
|
axum = { workspace = true }
|
|
clap = { workspace = true }
|
|
color-eyre = { workspace = true }
|
|
serde = { workspace = true }
|
|
serde_json = { workspace = true }
|
|
tokio = { workspace = true }
|
|
serde_yaml = { workspace = true }
|
|
tracing = { workspace = true }
|
|
tracing-subscriber = { workspace = true }
|
|
tower-http = { workspace = true }
|
|
utoipa = { workspace = true }
|
|
cedar-policy = { workspace = true }
|
|
futures = { workspace = true }
|
|
sha2 = { workspace = true }
|
|
subtle = { workspace = true }
|
|
async-trait = { workspace = true }
|
|
aws-config = { version = "1", optional = true, default-features = false, features = ["rustls", "rt-tokio", "credentials-process", "sso"] }
|
|
aws-sdk-secretsmanager = { version = "1", optional = true, default-features = false, features = ["rustls", "rt-tokio"] }
|
|
|
|
[dev-dependencies]
|
|
tempfile = { workspace = true }
|
|
tower = { workspace = true }
|
|
serial_test = "3"
|
|
lance-index = { workspace = true }
|