Feat/full cfg (#30)

* feat: Enhance control flow analysis with function summaries and taint analysis

* feat: Update taint analysis to utilize function summaries for enhanced tracking

* Refactor `walk.rs` batch processing and override handling:

- Renamed `Batcher` to `BatchSender` for clarity.
- Added `BatchSender::new` constructor for cleaner initialization.
- Simplified batch size management in `BatchSender`.
- Extracted `build_overrides` function for reusable override construction.
- Improved error handling and validation in override building.
- Enhanced performance with directory and file type filtering in `walk`.

* Improve logging and streamline directory walk process:

- Added detailed `tracing` logs for debugging batch flushes, override construction, and walk initialization/completion.
- Optimized and simplified `filter_entry` logic for directory and file type filters.
- Improved metadata checks and max file size enforcement during the scan.

* Refactor and optimize taint tracking, label rules, and directory walk process:

- Replaced `DefaultHasher` with `blake3::Hasher` for improved taint hashing.
- Enhanced sorting and hashing logic in `taint.rs` for consistency and efficiency.
- Removed unused `set_hash` function and redundant imports across files.
- Improved batch sender logic in `walk.rs`, renaming key components for clarity.
- Unified `spawn_senders` and `spawn_file_walker` with thread handling and channel tuple return.
- Expanded label rules with additional matchers for sources, sanitizers, and sinks.
- Deprecated `dump_cfg` and specific logging utilities in `cfg.rs` for code cleanup.

* fix: fixed let chains error in walk.rs

* fix: updated dependencies

* fix: updated dependencies

* chore: Remove standard error in scan.rs

* feat: Introduce function summaries for enhanced taint and control flow analysis

* feat: Enhance taint analysis with interop support and function summaries

* feat: Add configuration analysis module and enhance matcher rules

* feat: Add arity column to function_summaries and handle schema migration

* fix: fixed clippy &PathBuf warnings

* chore: Update dependencies and versioning in Cargo files

* docs: Update README to enhance clarity and detail on features and analysis modes

* chore: Update CHANGELOG for version 0.2.0 with new features, changes, and fixes

* docs: Update SECURITY.md to clarify version support status

---------

Co-authored-by: elipeter <eli.peter@es.fcm.travel>
This commit is contained in:
Eli Peter 2026-02-24 23:44:07 -05:00 committed by GitHub
parent 8cbbec7d90
commit f96a89e7c1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
87 changed files with 11505 additions and 1099 deletions

View file

@ -1,6 +1,6 @@
pub mod index {
use crate::commands::scan::Diag;
use crate::errors::NyxResult;
use crate::errors::{NyxError, NyxResult};
use crate::patterns::Severity;
use r2d2::{Pool, PooledConnection};
use r2d2_sqlite::SqliteConnectionManager;
@ -34,12 +34,18 @@ pub mod index {
col INTEGER NOT NULL,
PRIMARY KEY (file_id, rule_id, line, col));
CREATE TABLE IF NOT EXISTS function_summaries (hash TEXT PRIMARY KEY,
CREATE TABLE IF NOT EXISTS function_summaries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
project TEXT NOT NULL,
file_path TEXT NOT NULL,
file_hash BLOB NOT NULL,
name TEXT NOT NULL,
arity INTEGER NOT NULL DEFAULT -1,
lang TEXT NOT NULL,
summary TEXT NOT NULL,
updated_at INTEGER NOT NULL);
updated_at INTEGER NOT NULL,
UNIQUE(project, file_path, name, arity)
);
"#;
// TODO: ADD CLEANS FOR EACH TABLE BASED ON PROJECT WHICH RUNS ON CLEAN
@ -61,6 +67,7 @@ pub mod index {
impl Indexer {
pub fn init(database_path: &Path) -> NyxResult<Arc<Pool<SqliteConnectionManager>>> {
let _span = tracing::info_span!("db_init", path = %database_path.display()).entered();
let flags = OpenFlags::SQLITE_OPEN_READ_WRITE
| OpenFlags::SQLITE_OPEN_CREATE
| OpenFlags::SQLITE_OPEN_FULL_MUTEX;
@ -70,7 +77,43 @@ pub mod index {
{
let conn = pool.get()?;
conn.pragma_update(None, "journal_mode", "WAL")?;
conn.pragma_update(None, "synchronous", "NORMAL")?;
conn.pragma_update(None, "cache_size", "-8000")?; // 8 MB
conn.pragma_update(None, "temp_store", "MEMORY")?;
conn.pragma_update(None, "mmap_size", "268435456")?; // 256 MB
conn.execute_batch(SCHEMA)?;
// Migrate: if the function_summaries table has the old schema
// (missing `arity` column), drop and recreate it.
let has_arity: bool = conn
.prepare("PRAGMA table_info(function_summaries)")
.and_then(|mut s| {
let cols: Vec<String> = s
.query_map([], |r| r.get::<_, String>(1))?
.filter_map(Result::ok)
.collect();
Ok(cols.iter().any(|c| c == "arity"))
})
.unwrap_or(true);
if !has_arity {
tracing::info!("migrating function_summaries: adding arity column");
conn.execute_batch("DROP TABLE IF EXISTS function_summaries;")?;
conn.execute_batch(
"CREATE TABLE IF NOT EXISTS function_summaries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
project TEXT NOT NULL,
file_path TEXT NOT NULL,
file_hash BLOB NOT NULL,
name TEXT NOT NULL,
arity INTEGER NOT NULL DEFAULT -1,
lang TEXT NOT NULL,
summary TEXT NOT NULL,
updated_at INTEGER NOT NULL,
UNIQUE(project, file_path, name, arity)
);",
)?;
}
}
Ok(pool)
}
@ -196,49 +239,73 @@ pub mod index {
Ok(issue_iter.filter_map(Result::ok).collect())
}
// pub fn upsert_summary(
// &mut self,
// project: &str,
// path: &Path,
// hash: &str,
// s: &crate::summary::FuncSummary,
// ) -> NyxResult<()> {
// let conn = self.c();
// let now = chrono::Utc::now().timestamp_millis(); // i64
//
// conn.execute(
// "INSERT INTO function_summaries (hash, project, name, lang, summary, updated_at)
// VALUES (?1, ?2, ?3, ?4, ?5, ?6)
// ON CONFLICT(hash) DO UPDATE SET summary = excluded.summary,
// updated_at = excluded.updated_at",
// (
// hash,
// project,
// &s.name,
// path.extension().and_then(|e| e.to_str()).unwrap_or_default(),
// serde_json::to_string(s).unwrap(), //TODO REPLACE UNWRAP
// now,
// ),
// )?;
// Ok(())
// }
//
// pub fn load_all_summaries(&self, project: &str) -> NyxResult<Vec<crate::summary::FuncSummary<'static>>> {
// let mut stmt = self
// .c()
// .prepare("SELECT summary FROM function_summaries WHERE project = ?1")?;
//
// let iter = stmt.query_map([project], |row| {
// let json: String = row.get(0)?;
// Ok(serde_json::from_str::<crate::summary::FuncSummary>(json.as_str()).unwrap()) // TODO: REPLACE UNWRAP
// })?;
//
// Ok(iter
// .collect::<Result<Vec<_>, _>>()?
// .into_iter()
// .map(|s| unsafe { std::mem::transmute::<_, crate::summary::FuncSummary<'static>>(s) })
// .collect())
// }
/// Atomically replace all function summaries for a single file.
///
/// Deletes every existing summary row for `(project, file_path)` then
/// inserts the new set. This keeps the table in sync when a file is
/// reparsed and its functions change.
pub fn replace_summaries_for_file(
&mut self,
file_path: &Path,
file_hash: &[u8],
summaries: &[crate::summary::FuncSummary],
) -> NyxResult<()> {
let tx = self.conn.transaction()?;
let path_str = file_path.to_string_lossy();
let now = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64;
tx.execute(
"DELETE FROM function_summaries WHERE project = ?1 AND file_path = ?2",
params![self.project, path_str],
)?;
{
let mut stmt = tx.prepare(
"INSERT OR REPLACE INTO function_summaries
(project, file_path, file_hash, name, arity, lang, summary, updated_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
)?;
for s in summaries {
let json = serde_json::to_string(s)
.map_err(|e| NyxError::Msg(format!("summary serialise: {e}")))?;
stmt.execute(params![
self.project,
path_str,
file_hash,
s.name,
s.param_count as i64,
s.lang,
json,
now
])?;
}
}
tx.commit()?;
Ok(())
}
/// Load every function summary for this project.
pub fn load_all_summaries(&self) -> NyxResult<Vec<crate::summary::FuncSummary>> {
let mut stmt = self
.c()
.prepare("SELECT summary FROM function_summaries WHERE project = ?1")?;
let iter = stmt.query_map([&self.project], |row| {
let json: String = row.get(0)?;
Ok(json)
})?;
let mut out = Vec::new();
for row in iter {
let json = row?;
let s: crate::summary::FuncSummary = serde_json::from_str(&json)
.map_err(|e| rusqlite::Error::ToSqlConversionFailure(Box::new(e)))?;
out.push(s);
}
Ok(out)
}
/// gets files from the database
pub fn get_files(&self, project: &str) -> NyxResult<Vec<PathBuf>> {