Feat/full cfg (#30)

* feat: Enhance control flow analysis with function summaries and taint analysis * feat: Update taint analysis to utilize function summaries for enhanced tracking * Refactor `walk.rs` batch processing and override handling: - Renamed `Batcher` to `BatchSender` for clarity. - Added `BatchSender::new` constructor for cleaner initialization. - Simplified batch size management in `BatchSender`. - Extracted `build_overrides` function for reusable override construction. - Improved error handling and validation in override building. - Enhanced performance with directory and file type filtering in `walk`. * Improve logging and streamline directory walk process: - Added detailed `tracing` logs for debugging batch flushes, override construction, and walk initialization/completion. - Optimized and simplified `filter_entry` logic for directory and file type filters. - Improved metadata checks and max file size enforcement during the scan. * Refactor and optimize taint tracking, label rules, and directory walk process: - Replaced `DefaultHasher` with `blake3::Hasher` for improved taint hashing. - Enhanced sorting and hashing logic in `taint.rs` for consistency and efficiency. - Removed unused `set_hash` function and redundant imports across files. - Improved batch sender logic in `walk.rs`, renaming key components for clarity. - Unified `spawn_senders` and `spawn_file_walker` with thread handling and channel tuple return. - Expanded label rules with additional matchers for sources, sanitizers, and sinks. - Deprecated `dump_cfg` and specific logging utilities in `cfg.rs` for code cleanup. * fix: fixed let chains error in walk.rs * fix: updated dependencies * fix: updated dependencies * chore: Remove standard error in scan.rs * feat: Introduce function summaries for enhanced taint and control flow analysis * feat: Enhance taint analysis with interop support and function summaries * feat: Add configuration analysis module and enhance matcher rules * feat: Add arity column to function_summaries and handle schema migration * fix: fixed clippy &PathBuf warnings * chore: Update dependencies and versioning in Cargo files * docs: Update README to enhance clarity and detail on features and analysis modes * chore: Update CHANGELOG for version 0.2.0 with new features, changes, and fixes * docs: Update SECURITY.md to clarify version support status --------- Co-authored-by: elipeter <eli.peter@es.fcm.travel>
2026-06-27 20:29:39 +02:00 · 2026-02-24 23:44:07 -05:00 · 2026-02-24 23:44:07 -05:00 · f96a89e7c1
commit f96a89e7c1
parent 8cbbec7d90
87 changed files with 11505 additions and 1099 deletions
--- a/src/database.rs
+++ b/src/database.rs
@ -1,6 +1,6 @@
 pub mod index {
    use crate::commands::scan::Diag;
-    use crate::errors::NyxResult;
+    use crate::errors::{NyxError, NyxResult};
    use crate::patterns::Severity;
    use r2d2::{Pool, PooledConnection};
    use r2d2_sqlite::SqliteConnectionManager;
@ -34,12 +34,18 @@ pub mod index {
            col INTEGER NOT NULL,
            PRIMARY KEY (file_id, rule_id, line, col));

-        CREATE TABLE IF NOT EXISTS function_summaries (hash TEXT PRIMARY KEY,
+        CREATE TABLE IF NOT EXISTS function_summaries (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
            project TEXT NOT NULL,
+            file_path TEXT NOT NULL,
+            file_hash BLOB NOT NULL,
            name TEXT NOT NULL,
+            arity INTEGER NOT NULL DEFAULT -1,
            lang TEXT NOT NULL,
            summary TEXT NOT NULL,
-            updated_at INTEGER NOT NULL);
+            updated_at INTEGER NOT NULL,
+            UNIQUE(project, file_path, name, arity)
+        );
    "#;

    // TODO: ADD CLEANS FOR EACH TABLE BASED ON PROJECT WHICH RUNS ON CLEAN
@ -61,6 +67,7 @@ pub mod index {

    impl Indexer {
        pub fn init(database_path: &Path) -> NyxResult<Arc<Pool<SqliteConnectionManager>>> {
+            let _span = tracing::info_span!("db_init", path = %database_path.display()).entered();
            let flags = OpenFlags::SQLITE_OPEN_READ_WRITE
                | OpenFlags::SQLITE_OPEN_CREATE
                | OpenFlags::SQLITE_OPEN_FULL_MUTEX;
@ -70,7 +77,43 @@ pub mod index {
            {
                let conn = pool.get()?;
                conn.pragma_update(None, "journal_mode", "WAL")?;
+                conn.pragma_update(None, "synchronous", "NORMAL")?;
+                conn.pragma_update(None, "cache_size", "-8000")?; // 8 MB
+                conn.pragma_update(None, "temp_store", "MEMORY")?;
+                conn.pragma_update(None, "mmap_size", "268435456")?; // 256 MB
                conn.execute_batch(SCHEMA)?;
+
+                // Migrate: if the function_summaries table has the old schema
+                // (missing `arity` column), drop and recreate it.
+                let has_arity: bool = conn
+                    .prepare("PRAGMA table_info(function_summaries)")
+                    .and_then(|mut s| {
+                        let cols: Vec<String> = s
+                            .query_map([], |r| r.get::<_, String>(1))?
+                            .filter_map(Result::ok)
+                            .collect();
+                        Ok(cols.iter().any(|c| c == "arity"))
+                    })
+                    .unwrap_or(true);
+
+                if !has_arity {
+                    tracing::info!("migrating function_summaries: adding arity column");
+                    conn.execute_batch("DROP TABLE IF EXISTS function_summaries;")?;
+                    conn.execute_batch(
+                        "CREATE TABLE IF NOT EXISTS function_summaries (
+                            id INTEGER PRIMARY KEY AUTOINCREMENT,
+                            project TEXT NOT NULL,
+                            file_path TEXT NOT NULL,
+                            file_hash BLOB NOT NULL,
+                            name TEXT NOT NULL,
+                            arity INTEGER NOT NULL DEFAULT -1,
+                            lang TEXT NOT NULL,
+                            summary TEXT NOT NULL,
+                            updated_at INTEGER NOT NULL,
+                            UNIQUE(project, file_path, name, arity)
+                        );",
+                    )?;
+                }
            }
            Ok(pool)
        }
@ -196,49 +239,73 @@ pub mod index {
            Ok(issue_iter.filter_map(Result::ok).collect())
        }

-        // pub fn upsert_summary(
-        //     &mut self,
-        //     project: &str,
-        //     path: &Path,
-        //     hash: &str,
-        //     s: &crate::summary::FuncSummary,
-        // ) -> NyxResult<()> {
-        //     let conn = self.c();
-        //     let now  = chrono::Utc::now().timestamp_millis(); // i64
-        //
-        //     conn.execute(
-        //         "INSERT INTO function_summaries (hash, project, name, lang, summary, updated_at)
-        //              VALUES (?1, ?2, ?3, ?4, ?5, ?6)
-        //              ON CONFLICT(hash) DO UPDATE SET summary = excluded.summary,
-        //                                              updated_at = excluded.updated_at",
-        //         (
-        //             hash,
-        //             project,
-        //             &s.name,
-        //             path.extension().and_then(|e| e.to_str()).unwrap_or_default(),
-        //             serde_json::to_string(s).unwrap(), //TODO REPLACE UNWRAP
-        //             now,
-        //         ),
-        //     )?;
-        //     Ok(())
-        // }
-        //
-        // pub fn load_all_summaries(&self, project: &str) -> NyxResult<Vec<crate::summary::FuncSummary<'static>>> {
-        //     let mut stmt = self
-        //         .c()
-        //         .prepare("SELECT summary FROM function_summaries WHERE project = ?1")?;
-        //
-        //     let iter = stmt.query_map([project], |row| {
-        //         let json: String = row.get(0)?;
-        //         Ok(serde_json::from_str::<crate::summary::FuncSummary>(json.as_str()).unwrap()) // TODO: REPLACE UNWRAP
-        //     })?;
-        //
-        //     Ok(iter
-        //         .collect::<Result<Vec<_>, _>>()?
-        //         .into_iter()
-        //         .map(|s| unsafe { std::mem::transmute::<_, crate::summary::FuncSummary<'static>>(s) })
-        //         .collect())
-        // }
+        /// Atomically replace all function summaries for a single file.
+        ///
+        /// Deletes every existing summary row for `(project, file_path)` then
+        /// inserts the new set.  This keeps the table in sync when a file is
+        /// re‑parsed and its functions change.
+        pub fn replace_summaries_for_file(
+            &mut self,
+            file_path: &Path,
+            file_hash: &[u8],
+            summaries: &[crate::summary::FuncSummary],
+        ) -> NyxResult<()> {
+            let tx = self.conn.transaction()?;
+            let path_str = file_path.to_string_lossy();
+            let now = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64;
+
+            tx.execute(
+                "DELETE FROM function_summaries WHERE project = ?1 AND file_path = ?2",
+                params![self.project, path_str],
+            )?;
+
+            {
+                let mut stmt = tx.prepare(
+                    "INSERT OR REPLACE INTO function_summaries
+                        (project, file_path, file_hash, name, arity, lang, summary, updated_at)
+                     VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
+                )?;
+
+                for s in summaries {
+                    let json = serde_json::to_string(s)
+                        .map_err(|e| NyxError::Msg(format!("summary serialise: {e}")))?;
+                    stmt.execute(params![
+                        self.project,
+                        path_str,
+                        file_hash,
+                        s.name,
+                        s.param_count as i64,
+                        s.lang,
+                        json,
+                        now
+                    ])?;
+                }
+            }
+
+            tx.commit()?;
+            Ok(())
+        }
+
+        /// Load every function summary for this project.
+        pub fn load_all_summaries(&self) -> NyxResult<Vec<crate::summary::FuncSummary>> {
+            let mut stmt = self
+                .c()
+                .prepare("SELECT summary FROM function_summaries WHERE project = ?1")?;
+
+            let iter = stmt.query_map([&self.project], |row| {
+                let json: String = row.get(0)?;
+                Ok(json)
+            })?;
+
+            let mut out = Vec::new();
+            for row in iter {
+                let json = row?;
+                let s: crate::summary::FuncSummary = serde_json::from_str(&json)
+                    .map_err(|e| rusqlite::Error::ToSqlConversionFailure(Box::new(e)))?;
+                out.push(s);
+            }
+            Ok(out)
+        }

        /// gets files from the database
        pub fn get_files(&self, project: &str) -> NyxResult<Vec<PathBuf>> {