mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
198 lines
7.7 KiB
Rust
198 lines
7.7 KiB
Rust
//! Index-mode DB corruption recovery regression.
|
|
//!
|
|
//! Nyx's indexed scan path stores per-project state in a SQLite file. If
|
|
//! that file is truncated or filled with garbage (crashed scanner, disk
|
|
//! failure, user stomping on the state dir) the scanner must surface a
|
|
//! clear error instead of panicking, hanging, or producing nonsense
|
|
//! findings. These tests exercise both classes of corruption:
|
|
//!
|
|
//! 1. Truncation to zero bytes, SQLite treats a zero-length file as a
|
|
//! fresh empty DB. We expect the indexer to bootstrap the schema and
|
|
//! carry on.
|
|
//! 2. Arbitrary garbage in the header, SQLite rejects this with
|
|
//! `SQLITE_NOTADB` during pragma/schema execution. We expect the
|
|
//! indexer to return a structured error, not a panic.
|
|
//!
|
|
//! A later change may add an auto-rebuild path gated by `--rebuild-db`;
|
|
//! if so, the garbage-header test should flip to assert success with a
|
|
//! diagnostic note. For now we pin current behaviour.
|
|
|
|
use nyx_scanner::commands::index::build_index;
|
|
use nyx_scanner::commands::scan::{Diag, scan_with_index_parallel};
|
|
use nyx_scanner::database::index::Indexer;
|
|
use nyx_scanner::errors::NyxError;
|
|
use nyx_scanner::utils::config::{AnalysisMode, Config};
|
|
use std::io::Write;
|
|
use std::path::Path;
|
|
use std::sync::Arc;
|
|
|
|
fn test_cfg() -> Config {
|
|
let mut cfg = Config::default();
|
|
cfg.scanner.mode = AnalysisMode::Full;
|
|
cfg.scanner.read_vcsignore = false;
|
|
cfg.scanner.require_git_to_read_vcsignore = false;
|
|
cfg.performance.worker_threads = Some(1);
|
|
cfg.performance.batch_size = 8;
|
|
cfg.performance.channel_multiplier = 1;
|
|
cfg
|
|
}
|
|
|
|
fn seed_project(root: &Path) {
|
|
// Use the qualified `child_process.exec` form so the seed produces a
|
|
// taint finding under the post-fix label rules (bare `exec` as a flat
|
|
// sink was removed because it suffix-matched any `<recv>.exec`, e.g.
|
|
// Dockerode `container.exec`). The qualified form is the canonical
|
|
// Node.js stdlib path and stays a flat sink.
|
|
std::fs::write(
|
|
root.join("cmdi.js"),
|
|
b"const child_process = require('child_process');\n\
|
|
const express = require('express');\n\
|
|
const app = express();\n\
|
|
app.get('/x', (req, res) => { child_process.exec(req.query.cmd); res.send('ok'); });\n",
|
|
)
|
|
.unwrap();
|
|
}
|
|
|
|
/// Build a fresh index against a project tempdir and return `(project_name,
|
|
/// db_path, project_root_keep_alive, db_dir_keep_alive)`.
|
|
fn build_indexed_project() -> (
|
|
String,
|
|
std::path::PathBuf,
|
|
tempfile::TempDir,
|
|
tempfile::TempDir,
|
|
) {
|
|
let project = tempfile::tempdir().unwrap();
|
|
seed_project(project.path());
|
|
|
|
let db_dir = tempfile::tempdir().unwrap();
|
|
let db_path = db_dir.path().join("corrupt.sqlite");
|
|
build_index("corrupt", project.path(), &db_path, &test_cfg(), false)
|
|
.expect("initial build_index should succeed on clean tree");
|
|
|
|
// Sanity check: running an indexed scan produces diags.
|
|
let pool = Indexer::init(&db_path).expect("init pool against clean DB");
|
|
let diags: Vec<Diag> = scan_with_index_parallel(
|
|
"corrupt",
|
|
Arc::clone(&pool),
|
|
&test_cfg(),
|
|
false,
|
|
project.path(),
|
|
)
|
|
.expect("clean indexed scan should succeed");
|
|
assert!(
|
|
!diags.is_empty(),
|
|
"sanity: indexed scan on seeded project should produce findings",
|
|
);
|
|
|
|
// Drop the pool so we can overwrite the DB file on platforms where
|
|
// open handles block replacement (mainly Windows, but SQLite's WAL
|
|
// also wants us closed before we scribble on it).
|
|
drop(pool);
|
|
|
|
("corrupt".to_string(), db_path, project, db_dir)
|
|
}
|
|
|
|
/// Overwrite the first `n` bytes of `path` with `fill`, truncating any
|
|
/// additional content. Mimics a partial write / header smash.
|
|
fn clobber_header(path: &Path, fill: u8, n: usize) {
|
|
let bytes = vec![fill; n];
|
|
let mut f = std::fs::OpenOptions::new()
|
|
.write(true)
|
|
.truncate(true)
|
|
.open(path)
|
|
.expect("open db for clobber");
|
|
f.write_all(&bytes).expect("write clobber bytes");
|
|
}
|
|
|
|
/// Truncate the DB file to zero bytes. SQLite treats this as "new empty
|
|
/// database", so `Indexer::init` should successfully re-bootstrap.
|
|
#[test]
|
|
fn zero_truncated_db_rebuilds_on_init() {
|
|
let (project_name, db_path, project, _db_dir) = build_indexed_project();
|
|
|
|
// Truncate to zero bytes.
|
|
std::fs::OpenOptions::new()
|
|
.write(true)
|
|
.truncate(true)
|
|
.open(&db_path)
|
|
.expect("truncate db to zero");
|
|
assert_eq!(
|
|
std::fs::metadata(&db_path).unwrap().len(),
|
|
0,
|
|
"expected db to be zero-length after truncation",
|
|
);
|
|
|
|
// Re-init: SQLite treats the empty file as a fresh DB and `Indexer::init`
|
|
// runs the CREATE TABLE statements, so this should succeed.
|
|
let pool = Indexer::init(&db_path)
|
|
.expect("Indexer::init should bootstrap a schema into an empty file");
|
|
|
|
// After init, the DB is empty of prior state, an indexed scan should
|
|
// still run end-to-end but will effectively be acting like a cold
|
|
// rebuild. We don't re-call build_index here because the plan is to
|
|
// confirm the raw init path is resilient.
|
|
let diags = scan_with_index_parallel(
|
|
&project_name,
|
|
Arc::clone(&pool),
|
|
&test_cfg(),
|
|
false,
|
|
project.path(),
|
|
)
|
|
.expect("indexed scan after zero-truncation should succeed");
|
|
// Scan-side resilience: cached summaries are gone, but the filesystem
|
|
// pass runs on a clean SQLite and findings still emit.
|
|
assert!(
|
|
!diags.is_empty(),
|
|
"indexed scan after rebuild should still emit findings",
|
|
);
|
|
}
|
|
|
|
/// Clobber the SQLite magic header with garbage bytes. This is the
|
|
/// "actual corruption" case, SQLite rejects it with `SQLITE_NOTADB` the
|
|
/// first time pragma or SQL is executed, which surfaces as
|
|
/// `NyxError::Sql(_)` from `Indexer::init`.
|
|
#[test]
|
|
fn garbage_header_db_returns_structured_error() {
|
|
let (_project_name, db_path, _project, _db_dir) = build_indexed_project();
|
|
|
|
// Write 100 bytes of `0xFF`, guaranteed not to match SQLite's header
|
|
// magic "SQLite format 3\0".
|
|
clobber_header(&db_path, 0xFF, 100);
|
|
|
|
// `Indexer::init` should fail loudly. The exact path is SQLite
|
|
// surfacing SQLITE_NOTADB or a similar error; we assert only that we
|
|
// got *some* NyxError back (not a panic, not a successful init).
|
|
let result = Indexer::init(&db_path);
|
|
match result {
|
|
Err(NyxError::Sql(e)) => {
|
|
let msg = e.to_string();
|
|
assert!(
|
|
!msg.is_empty(),
|
|
"SQLite error should carry a diagnostic message",
|
|
);
|
|
}
|
|
Err(NyxError::Io(_)) => {
|
|
// Acceptable: some platforms classify the corrupt file as
|
|
// an IO error at open time.
|
|
}
|
|
Err(NyxError::Pool(_)) => {
|
|
// Acceptable: r2d2 may wrap the init failure in a pool error.
|
|
}
|
|
Err(other) => {
|
|
panic!("expected NyxError::Sql / Io / Pool on corrupt header, got {other:?}",);
|
|
}
|
|
Ok(_) => panic!(
|
|
"Indexer::init should not succeed against a garbage-header file at {}",
|
|
db_path.display(),
|
|
),
|
|
}
|
|
}
|
|
|
|
// NOTE: A mid-file corruption test (garbage at bytes 100..200, preserving
|
|
// SQLite magic) is still omitted. `Indexer::init` short-circuits on
|
|
// header-magic mismatch (see `preflight_header`), so the corrupt-on-arrival
|
|
// shapes users actually hit return in microseconds. Mid-page damage that
|
|
// preserves the magic header still falls into SQLite's slow corruption
|
|
// detection path (150-200s), which is too long for CI wall-clock budgets;
|
|
// detecting that shape would require running `PRAGMA quick_check` with an
|
|
// interrupt callback, which is out of scope here.
|