mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-27 20:29:39 +02:00
Feat/full cfg (#30)
* feat: Enhance control flow analysis with function summaries and taint analysis * feat: Update taint analysis to utilize function summaries for enhanced tracking * Refactor `walk.rs` batch processing and override handling: - Renamed `Batcher` to `BatchSender` for clarity. - Added `BatchSender::new` constructor for cleaner initialization. - Simplified batch size management in `BatchSender`. - Extracted `build_overrides` function for reusable override construction. - Improved error handling and validation in override building. - Enhanced performance with directory and file type filtering in `walk`. * Improve logging and streamline directory walk process: - Added detailed `tracing` logs for debugging batch flushes, override construction, and walk initialization/completion. - Optimized and simplified `filter_entry` logic for directory and file type filters. - Improved metadata checks and max file size enforcement during the scan. * Refactor and optimize taint tracking, label rules, and directory walk process: - Replaced `DefaultHasher` with `blake3::Hasher` for improved taint hashing. - Enhanced sorting and hashing logic in `taint.rs` for consistency and efficiency. - Removed unused `set_hash` function and redundant imports across files. - Improved batch sender logic in `walk.rs`, renaming key components for clarity. - Unified `spawn_senders` and `spawn_file_walker` with thread handling and channel tuple return. - Expanded label rules with additional matchers for sources, sanitizers, and sinks. - Deprecated `dump_cfg` and specific logging utilities in `cfg.rs` for code cleanup. * fix: fixed let chains error in walk.rs * fix: updated dependencies * fix: updated dependencies * chore: Remove standard error in scan.rs * feat: Introduce function summaries for enhanced taint and control flow analysis * feat: Enhance taint analysis with interop support and function summaries * feat: Add configuration analysis module and enhance matcher rules * feat: Add arity column to function_summaries and handle schema migration * fix: fixed clippy &PathBuf warnings * chore: Update dependencies and versioning in Cargo files * docs: Update README to enhance clarity and detail on features and analysis modes * chore: Update CHANGELOG for version 0.2.0 with new features, changes, and fixes * docs: Update SECURITY.md to clarify version support status --------- Co-authored-by: elipeter <eli.peter@es.fcm.travel>
This commit is contained in:
parent
8cbbec7d90
commit
f96a89e7c1
87 changed files with 11505 additions and 1099 deletions
159
src/database.rs
159
src/database.rs
|
|
@ -1,6 +1,6 @@
|
|||
pub mod index {
|
||||
use crate::commands::scan::Diag;
|
||||
use crate::errors::NyxResult;
|
||||
use crate::errors::{NyxError, NyxResult};
|
||||
use crate::patterns::Severity;
|
||||
use r2d2::{Pool, PooledConnection};
|
||||
use r2d2_sqlite::SqliteConnectionManager;
|
||||
|
|
@ -34,12 +34,18 @@ pub mod index {
|
|||
col INTEGER NOT NULL,
|
||||
PRIMARY KEY (file_id, rule_id, line, col));
|
||||
|
||||
CREATE TABLE IF NOT EXISTS function_summaries (hash TEXT PRIMARY KEY,
|
||||
CREATE TABLE IF NOT EXISTS function_summaries (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
project TEXT NOT NULL,
|
||||
file_path TEXT NOT NULL,
|
||||
file_hash BLOB NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
arity INTEGER NOT NULL DEFAULT -1,
|
||||
lang TEXT NOT NULL,
|
||||
summary TEXT NOT NULL,
|
||||
updated_at INTEGER NOT NULL);
|
||||
updated_at INTEGER NOT NULL,
|
||||
UNIQUE(project, file_path, name, arity)
|
||||
);
|
||||
"#;
|
||||
|
||||
// TODO: ADD CLEANS FOR EACH TABLE BASED ON PROJECT WHICH RUNS ON CLEAN
|
||||
|
|
@ -61,6 +67,7 @@ pub mod index {
|
|||
|
||||
impl Indexer {
|
||||
pub fn init(database_path: &Path) -> NyxResult<Arc<Pool<SqliteConnectionManager>>> {
|
||||
let _span = tracing::info_span!("db_init", path = %database_path.display()).entered();
|
||||
let flags = OpenFlags::SQLITE_OPEN_READ_WRITE
|
||||
| OpenFlags::SQLITE_OPEN_CREATE
|
||||
| OpenFlags::SQLITE_OPEN_FULL_MUTEX;
|
||||
|
|
@ -70,7 +77,43 @@ pub mod index {
|
|||
{
|
||||
let conn = pool.get()?;
|
||||
conn.pragma_update(None, "journal_mode", "WAL")?;
|
||||
conn.pragma_update(None, "synchronous", "NORMAL")?;
|
||||
conn.pragma_update(None, "cache_size", "-8000")?; // 8 MB
|
||||
conn.pragma_update(None, "temp_store", "MEMORY")?;
|
||||
conn.pragma_update(None, "mmap_size", "268435456")?; // 256 MB
|
||||
conn.execute_batch(SCHEMA)?;
|
||||
|
||||
// Migrate: if the function_summaries table has the old schema
|
||||
// (missing `arity` column), drop and recreate it.
|
||||
let has_arity: bool = conn
|
||||
.prepare("PRAGMA table_info(function_summaries)")
|
||||
.and_then(|mut s| {
|
||||
let cols: Vec<String> = s
|
||||
.query_map([], |r| r.get::<_, String>(1))?
|
||||
.filter_map(Result::ok)
|
||||
.collect();
|
||||
Ok(cols.iter().any(|c| c == "arity"))
|
||||
})
|
||||
.unwrap_or(true);
|
||||
|
||||
if !has_arity {
|
||||
tracing::info!("migrating function_summaries: adding arity column");
|
||||
conn.execute_batch("DROP TABLE IF EXISTS function_summaries;")?;
|
||||
conn.execute_batch(
|
||||
"CREATE TABLE IF NOT EXISTS function_summaries (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
project TEXT NOT NULL,
|
||||
file_path TEXT NOT NULL,
|
||||
file_hash BLOB NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
arity INTEGER NOT NULL DEFAULT -1,
|
||||
lang TEXT NOT NULL,
|
||||
summary TEXT NOT NULL,
|
||||
updated_at INTEGER NOT NULL,
|
||||
UNIQUE(project, file_path, name, arity)
|
||||
);",
|
||||
)?;
|
||||
}
|
||||
}
|
||||
Ok(pool)
|
||||
}
|
||||
|
|
@ -196,49 +239,73 @@ pub mod index {
|
|||
Ok(issue_iter.filter_map(Result::ok).collect())
|
||||
}
|
||||
|
||||
// pub fn upsert_summary(
|
||||
// &mut self,
|
||||
// project: &str,
|
||||
// path: &Path,
|
||||
// hash: &str,
|
||||
// s: &crate::summary::FuncSummary,
|
||||
// ) -> NyxResult<()> {
|
||||
// let conn = self.c();
|
||||
// let now = chrono::Utc::now().timestamp_millis(); // i64
|
||||
//
|
||||
// conn.execute(
|
||||
// "INSERT INTO function_summaries (hash, project, name, lang, summary, updated_at)
|
||||
// VALUES (?1, ?2, ?3, ?4, ?5, ?6)
|
||||
// ON CONFLICT(hash) DO UPDATE SET summary = excluded.summary,
|
||||
// updated_at = excluded.updated_at",
|
||||
// (
|
||||
// hash,
|
||||
// project,
|
||||
// &s.name,
|
||||
// path.extension().and_then(|e| e.to_str()).unwrap_or_default(),
|
||||
// serde_json::to_string(s).unwrap(), //TODO REPLACE UNWRAP
|
||||
// now,
|
||||
// ),
|
||||
// )?;
|
||||
// Ok(())
|
||||
// }
|
||||
//
|
||||
// pub fn load_all_summaries(&self, project: &str) -> NyxResult<Vec<crate::summary::FuncSummary<'static>>> {
|
||||
// let mut stmt = self
|
||||
// .c()
|
||||
// .prepare("SELECT summary FROM function_summaries WHERE project = ?1")?;
|
||||
//
|
||||
// let iter = stmt.query_map([project], |row| {
|
||||
// let json: String = row.get(0)?;
|
||||
// Ok(serde_json::from_str::<crate::summary::FuncSummary>(json.as_str()).unwrap()) // TODO: REPLACE UNWRAP
|
||||
// })?;
|
||||
//
|
||||
// Ok(iter
|
||||
// .collect::<Result<Vec<_>, _>>()?
|
||||
// .into_iter()
|
||||
// .map(|s| unsafe { std::mem::transmute::<_, crate::summary::FuncSummary<'static>>(s) })
|
||||
// .collect())
|
||||
// }
|
||||
/// Atomically replace all function summaries for a single file.
|
||||
///
|
||||
/// Deletes every existing summary row for `(project, file_path)` then
|
||||
/// inserts the new set. This keeps the table in sync when a file is
|
||||
/// re‑parsed and its functions change.
|
||||
pub fn replace_summaries_for_file(
|
||||
&mut self,
|
||||
file_path: &Path,
|
||||
file_hash: &[u8],
|
||||
summaries: &[crate::summary::FuncSummary],
|
||||
) -> NyxResult<()> {
|
||||
let tx = self.conn.transaction()?;
|
||||
let path_str = file_path.to_string_lossy();
|
||||
let now = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64;
|
||||
|
||||
tx.execute(
|
||||
"DELETE FROM function_summaries WHERE project = ?1 AND file_path = ?2",
|
||||
params![self.project, path_str],
|
||||
)?;
|
||||
|
||||
{
|
||||
let mut stmt = tx.prepare(
|
||||
"INSERT OR REPLACE INTO function_summaries
|
||||
(project, file_path, file_hash, name, arity, lang, summary, updated_at)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
|
||||
)?;
|
||||
|
||||
for s in summaries {
|
||||
let json = serde_json::to_string(s)
|
||||
.map_err(|e| NyxError::Msg(format!("summary serialise: {e}")))?;
|
||||
stmt.execute(params![
|
||||
self.project,
|
||||
path_str,
|
||||
file_hash,
|
||||
s.name,
|
||||
s.param_count as i64,
|
||||
s.lang,
|
||||
json,
|
||||
now
|
||||
])?;
|
||||
}
|
||||
}
|
||||
|
||||
tx.commit()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Load every function summary for this project.
|
||||
pub fn load_all_summaries(&self) -> NyxResult<Vec<crate::summary::FuncSummary>> {
|
||||
let mut stmt = self
|
||||
.c()
|
||||
.prepare("SELECT summary FROM function_summaries WHERE project = ?1")?;
|
||||
|
||||
let iter = stmt.query_map([&self.project], |row| {
|
||||
let json: String = row.get(0)?;
|
||||
Ok(json)
|
||||
})?;
|
||||
|
||||
let mut out = Vec::new();
|
||||
for row in iter {
|
||||
let json = row?;
|
||||
let s: crate::summary::FuncSummary = serde_json::from_str(&json)
|
||||
.map_err(|e| rusqlite::Error::ToSqlConversionFailure(Box::new(e)))?;
|
||||
out.push(s);
|
||||
}
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
/// gets files from the database
|
||||
pub fn get_files(&self, project: &str) -> NyxResult<Vec<PathBuf>> {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue