Feat/configurable sanitizers and js precision (#32)

* chore: Exclude CLAUDE.md from Cargo.toml

* feat: Add configurable analysis rules and CLI commands for custom sanitizers and terminators

* feat: Enhance resource management and analysis efficiency

- Implemented parallel summary merging in `scan_filesystem` using rayon for improved performance.
- Introduced `GlobalSummaries::merge()` for efficient merging of summaries.
- Optimized file reading and hashing to eliminate redundant I/O operations.
- Added `should_scan_with_hash()` and `upsert_file_with_hash()` methods to streamline file processing.
- Enhanced taint analysis with in-place mutations to reduce memory allocations.
- Updated resource acquisition patterns to exclude false positives for `freopen` and wrapper functions.

* feat: Implement severity downgrade for findings in non-production paths and add source kind inference

* feat: Update versioning information in SECURITY.md for new stable line

* feat: Update categories in Cargo.toml to include parser-implementations and text-processing

* feat: Update dependencies in Cargo.lock for improved compatibility and performance

* feat: Update dependencies in Cargo.lock and Cargo.toml for improved compatibility
This commit is contained in:
Eli Peter 2026-02-25 04:02:11 -05:00 committed by GitHub
parent f96a89e7c1
commit 19b578c5c4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
37 changed files with 3775 additions and 432 deletions

View file

@ -68,9 +68,13 @@ pub mod index {
impl Indexer {
pub fn init(database_path: &Path) -> NyxResult<Arc<Pool<SqliteConnectionManager>>> {
let _span = tracing::info_span!("db_init", path = %database_path.display()).entered();
// NO_MUTEX is safe because r2d2 ensures each pooled connection
// is only ever used by one thread at a time. Combined with WAL
// mode this allows concurrent readers + a single writer without
// the global serialization that FULL_MUTEX causes.
let flags = OpenFlags::SQLITE_OPEN_READ_WRITE
| OpenFlags::SQLITE_OPEN_CREATE
| OpenFlags::SQLITE_OPEN_FULL_MUTEX;
| OpenFlags::SQLITE_OPEN_NO_MUTEX;
let manager = SqliteConnectionManager::file(database_path).with_flags(flags);
let pool = Arc::new(Pool::new(manager)?);
@ -132,10 +136,13 @@ pub mod index {
}
/// Return true when the file *content* or *mtime* changed since the last scan.
///
/// Short-circuits on mtime: if the stored mtime matches the
/// filesystem mtime, the file is assumed unchanged (skip hash).
#[allow(dead_code)] // used in tests and by should_scan_with_hash callers may fall back
pub fn should_scan(&self, path: &Path) -> NyxResult<bool> {
let meta = fs::metadata(path)?;
let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64;
let digest = Self::digest_file(path)?;
let row: Option<(Vec<u8>, i64)> = self
.conn
@ -147,18 +154,56 @@ pub mod index {
.optional()?;
Ok(match row {
Some((stored_hash, stored_mtime)) => stored_hash != digest || stored_mtime != mtime,
Some((stored_hash, stored_mtime)) => {
if stored_mtime != mtime {
// mtime changed — must re-scan
true
} else {
// mtime matches — compare hash only if cheap
// (the caller already read the file and can use
// should_scan_with_hash instead for full accuracy)
let digest = Self::digest_file(path)?;
stored_hash != digest
}
}
None => true,
})
}
/// Like [`should_scan`] but accepts a pre-computed hash to avoid
/// redundant file reads.
pub fn should_scan_with_hash(&self, path: &Path, hash: &[u8]) -> NyxResult<bool> {
let row: Option<Vec<u8>> = self
.conn
.query_row(
"SELECT hash FROM files WHERE project = ?1 AND path = ?2",
params![self.project, path.to_string_lossy()],
|r| r.get(0),
)
.optional()?;
Ok(match row {
Some(stored_hash) => stored_hash != hash,
None => true,
})
}
/// Insert or update the `files` row and return its id.
pub fn upsert_file(&self, path: &Path) -> NyxResult<i64> {
let bytes = fs::read(path)?;
let hash = Self::digest_bytes(&bytes);
self.upsert_file_with_hash(path, &hash)
}
/// Insert or update the `files` row using a pre-computed hash.
/// Avoids redundant file reads when the caller already has the hash.
pub fn upsert_file_with_hash(&self, path: &Path, hash: &[u8]) -> NyxResult<i64> {
let meta = fs::metadata(path)?;
let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64;
let scanned_at = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64;
let digest = Self::digest_file(path)?;
let path_str = path.to_string_lossy();
// Use a single statement: upsert then query the id.
self.c().execute(
"INSERT INTO files (project, path, hash, mtime, scanned_at)
VALUES (?1, ?2, ?3, ?4, ?5)
@ -166,18 +211,12 @@ pub mod index {
SET hash = excluded.hash,
mtime = excluded.mtime,
scanned_at = excluded.scanned_at",
params![
self.project,
path.to_string_lossy(),
digest,
mtime,
scanned_at
],
params![self.project, path_str, hash, mtime, scanned_at],
)?;
let id: i64 = self.c().query_row(
"SELECT id FROM files WHERE project = ?1 AND path = ?2",
params![self.project, path.to_string_lossy()],
params![self.project, path_str],
|r| r.get(0),
)?;
Ok(id)
@ -287,24 +326,38 @@ pub mod index {
}
/// Load every function summary for this project.
///
/// Reads all JSON strings from SQLite in one pass, then
/// deserializes them in parallel with rayon for large result sets.
pub fn load_all_summaries(&self) -> NyxResult<Vec<crate::summary::FuncSummary>> {
let mut stmt = self
.c()
.prepare("SELECT summary FROM function_summaries WHERE project = ?1")?;
let iter = stmt.query_map([&self.project], |row| {
let json: String = row.get(0)?;
Ok(json)
})?;
let jsons: Vec<String> = stmt
.query_map([&self.project], |row| row.get::<_, String>(0))?
.filter_map(Result::ok)
.collect();
let mut out = Vec::new();
for row in iter {
let json = row?;
let s: crate::summary::FuncSummary = serde_json::from_str(&json)
.map_err(|e| rusqlite::Error::ToSqlConversionFailure(Box::new(e)))?;
out.push(s);
// Parallel JSON deserialization for large sets
if jsons.len() > 256 {
use rayon::prelude::*;
let results: Vec<_> = jsons
.par_iter()
.filter_map(|json| {
serde_json::from_str::<crate::summary::FuncSummary>(json).ok()
})
.collect();
Ok(results)
} else {
let mut out = Vec::with_capacity(jsons.len());
for json in &jsons {
if let Ok(s) = serde_json::from_str::<crate::summary::FuncSummary>(json) {
out.push(s);
}
}
Ok(out)
}
Ok(out)
}
/// gets files from the database
@ -351,12 +404,20 @@ pub mod index {
// -------------------------------------------------------------------------
// Helpers
// -------------------------------------------------------------------------
#[allow(dead_code)] // used by should_scan() and tests
fn digest_file(path: &Path) -> NyxResult<Vec<u8>> {
let mut hasher = blake3::Hasher::new();
let mut file = fs::File::open(path)?;
std::io::copy(&mut file, &mut hasher)?;
Ok(hasher.finalize().as_bytes().to_vec())
}
/// Hash already-read bytes without re-reading from disk.
pub fn digest_bytes(bytes: &[u8]) -> Vec<u8> {
let mut hasher = blake3::Hasher::new();
hasher.update(bytes);
hasher.finalize().as_bytes().to_vec()
}
}
}