nyx/src/database.rs

pub mod index {
    use crate::commands::scan::Diag;
    use crate::errors::{NyxError, NyxResult};
    use crate::patterns::Severity;
    use r2d2::{Pool, PooledConnection};
    use r2d2_sqlite::SqliteConnectionManager;
    use rusqlite::{Connection, OpenFlags, OptionalExtension, params};
    use std::fs;
    use std::ops::Deref;
    use std::path::{Path, PathBuf};
    use std::str::FromStr;
    use std::sync::Arc;
    use std::time::{SystemTime, UNIX_EPOCH};

    /// DB schema (foreign‑keys enabled).
    const SCHEMA: &str = r#"
        PRAGMA foreign_keys = ON;

        CREATE TABLE IF NOT EXISTS files (id INTEGER PRIMARY KEY AUTOINCREMENT,
            project TEXT NOT NULL,
            path TEXT NOT NULL,
            hash BLOB NOT NULL,
            mtime INTEGER NOT NULL,
            scanned_at INTEGER NOT NULL,
            UNIQUE(project, path)
        );

        CREATE TABLE IF NOT EXISTS issues (file_id INTEGER NOT NULL
                              REFERENCES files(id)
                              ON DELETE CASCADE,
            rule_id TEXT NOT NULL,
            severity TEXT NOT NULL,
            line INTEGER NOT NULL,
            col INTEGER NOT NULL,
            PRIMARY KEY (file_id, rule_id, line, col));

        CREATE TABLE IF NOT EXISTS function_summaries (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            project TEXT NOT NULL,
            file_path TEXT NOT NULL,
            file_hash BLOB NOT NULL,
            name TEXT NOT NULL,
            arity INTEGER NOT NULL DEFAULT -1,
            lang TEXT NOT NULL,
            summary TEXT NOT NULL,
            updated_at INTEGER NOT NULL,
            UNIQUE(project, file_path, name, arity)
        );
    "#;

    // TODO: ADD CLEANS FOR EACH TABLE BASED ON PROJECT WHICH RUNS ON CLEAN
    // TODO: ADD DROP AND GIVE A CLI PARAMETER FOR DROP

    /// A single issue row, ready for insertion.
    #[derive(Debug, Clone)]
    pub struct IssueRow<'a> {
        pub rule_id: &'a str,
        pub severity: &'a str,
        pub line: i64,
        pub col: i64,
    }

    pub struct Indexer {
        conn: PooledConnection<SqliteConnectionManager>,
        project: String,
    }

    impl Indexer {
        pub fn init(database_path: &Path) -> NyxResult<Arc<Pool<SqliteConnectionManager>>> {
            let _span = tracing::info_span!("db_init", path = %database_path.display()).entered();
            let flags = OpenFlags::SQLITE_OPEN_READ_WRITE
                | OpenFlags::SQLITE_OPEN_CREATE
                | OpenFlags::SQLITE_OPEN_FULL_MUTEX;
            let manager = SqliteConnectionManager::file(database_path).with_flags(flags);
            let pool = Arc::new(Pool::new(manager)?);

            {
                let conn = pool.get()?;
                conn.pragma_update(None, "journal_mode", "WAL")?;
                conn.pragma_update(None, "synchronous", "NORMAL")?;
                conn.pragma_update(None, "cache_size", "-8000")?; // 8 MB
                conn.pragma_update(None, "temp_store", "MEMORY")?;
                conn.pragma_update(None, "mmap_size", "268435456")?; // 256 MB
                conn.execute_batch(SCHEMA)?;

                // Migrate: if the function_summaries table has the old schema
                // (missing `arity` column), drop and recreate it.
                let has_arity: bool = conn
                    .prepare("PRAGMA table_info(function_summaries)")
                    .and_then(|mut s| {
                        let cols: Vec<String> = s
                            .query_map([], |r| r.get::<_, String>(1))?
                            .filter_map(Result::ok)
                            .collect();
                        Ok(cols.iter().any(|c| c == "arity"))
                    })
                    .unwrap_or(true);

                if !has_arity {
                    tracing::info!("migrating function_summaries: adding arity column");
                    conn.execute_batch("DROP TABLE IF EXISTS function_summaries;")?;
                    conn.execute_batch(
                        "CREATE TABLE IF NOT EXISTS function_summaries (
                            id INTEGER PRIMARY KEY AUTOINCREMENT,
                            project TEXT NOT NULL,
                            file_path TEXT NOT NULL,
                            file_hash BLOB NOT NULL,
                            name TEXT NOT NULL,
                            arity INTEGER NOT NULL DEFAULT -1,
                            lang TEXT NOT NULL,
                            summary TEXT NOT NULL,
                            updated_at INTEGER NOT NULL,
                            UNIQUE(project, file_path, name, arity)
                        );",
                    )?;
                }
            }
            Ok(pool)
        }

        pub fn from_pool(project: &str, pool: &Pool<SqliteConnectionManager>) -> NyxResult<Self> {
            let conn = pool.get()?;
            Ok(Self {
                conn,
                project: project.to_owned(),
            })
        }

        // helper so code below can treat PooledConnection like &Connection
        fn c(&self) -> &Connection {
            self.conn.deref()
        }

        /// Return true when the file *content* or *mtime* changed since the last scan.
        pub fn should_scan(&self, path: &Path) -> NyxResult<bool> {
            let meta = fs::metadata(path)?;
            let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64;
            let digest = Self::digest_file(path)?;

            let row: Option<(Vec<u8>, i64)> = self
                .conn
                .query_row(
                    "SELECT hash, mtime FROM files WHERE project = ?1 AND path = ?2",
                    params![self.project, path.to_string_lossy()],
                    |r| Ok((r.get(0)?, r.get(1)?)),
                )
                .optional()?;

            Ok(match row {
                Some((stored_hash, stored_mtime)) => stored_hash != digest || stored_mtime != mtime,
                None => true,
            })
        }

        /// Insert or update the `files` row and return its id.
        pub fn upsert_file(&self, path: &Path) -> NyxResult<i64> {
            let meta = fs::metadata(path)?;
            let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64;
            let scanned_at = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64;
            let digest = Self::digest_file(path)?;

            self.c().execute(
                "INSERT INTO files (project, path, hash, mtime, scanned_at)
                 VALUES (?1, ?2, ?3, ?4, ?5)
                 ON CONFLICT(project,path) DO UPDATE
                 SET hash = excluded.hash,
                     mtime = excluded.mtime,
                     scanned_at = excluded.scanned_at",
                params![
                    self.project,
                    path.to_string_lossy(),
                    digest,
                    mtime,
                    scanned_at
                ],
            )?;

            let id: i64 = self.c().query_row(
                "SELECT id FROM files WHERE project = ?1 AND path = ?2",
                params![self.project, path.to_string_lossy()],
                |r| r.get(0),
            )?;
            Ok(id)
        }

        /// Replace all issues for `file_id` with the supplied set.
        pub fn replace_issues<'a>(
            &mut self,
            file_id: i64,
            issues: impl IntoIterator<Item = IssueRow<'a>>,
        ) -> NyxResult<()> {
            let tx = self.conn.transaction()?;
            tx.execute("DELETE FROM issues WHERE file_id = ?", params![file_id])?;

            {
                let mut stmt = tx.prepare(
                    "INSERT INTO issues (file_id, rule_id, severity, line, col)
                     VALUES (?1, ?2, ?3, ?4, ?5)",
                )?;
                for iss in issues {
                    stmt.execute(params![
                        file_id,
                        iss.rule_id,
                        iss.severity,
                        iss.line,
                        iss.col
                    ])?;
                }
            }
            tx.commit()?;
            Ok(())
        }

        /// Gets the issues for a specific file so we don't have to rescan
        pub fn get_issues_from_file(&self, path: &Path) -> NyxResult<Vec<Diag>> {
            let file_id: i64 = self.c().query_row(
                "SELECT id FROM files WHERE project = ?1 AND path = ?2",
                params![self.project, path.to_string_lossy()],
                |r| r.get(0),
            )?;

            let mut stmt = self.c().prepare(
                "SELECT rule_id, severity, line, col
         FROM issues
         WHERE file_id = ?1",
            )?;

            let issue_iter = stmt.query_map([file_id], |row| {
                let sev_str: String = row.get(1)?;
                Ok(Diag {
                    path: path.to_string_lossy().to_string(),
                    id: row.get::<_, String>(0)?, // rule_id
                    line: row.get::<_, i64>(2)? as usize,
                    col: row.get::<_, i64>(3)? as usize,
                    severity: Severity::from_str(&sev_str).unwrap(),
                })
            })?;

            Ok(issue_iter.filter_map(Result::ok).collect())
        }

        /// Atomically replace all function summaries for a single file.
        ///
        /// Deletes every existing summary row for `(project, file_path)` then
        /// inserts the new set.  This keeps the table in sync when a file is
        /// re‑parsed and its functions change.
        pub fn replace_summaries_for_file(
            &mut self,
            file_path: &Path,
            file_hash: &[u8],
            summaries: &[crate::summary::FuncSummary],
        ) -> NyxResult<()> {
            let tx = self.conn.transaction()?;
            let path_str = file_path.to_string_lossy();
            let now = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64;

            tx.execute(
                "DELETE FROM function_summaries WHERE project = ?1 AND file_path = ?2",
                params![self.project, path_str],
            )?;

            {
                let mut stmt = tx.prepare(
                    "INSERT OR REPLACE INTO function_summaries
                        (project, file_path, file_hash, name, arity, lang, summary, updated_at)
                     VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
                )?;

                for s in summaries {
                    let json = serde_json::to_string(s)
                        .map_err(|e| NyxError::Msg(format!("summary serialise: {e}")))?;
                    stmt.execute(params![
                        self.project,
                        path_str,
                        file_hash,
                        s.name,
                        s.param_count as i64,
                        s.lang,
                        json,
                        now
                    ])?;
                }
            }

            tx.commit()?;
            Ok(())
        }

        /// Load every function summary for this project.
        pub fn load_all_summaries(&self) -> NyxResult<Vec<crate::summary::FuncSummary>> {
            let mut stmt = self
                .c()
                .prepare("SELECT summary FROM function_summaries WHERE project = ?1")?;

            let iter = stmt.query_map([&self.project], |row| {
                let json: String = row.get(0)?;
                Ok(json)
            })?;

            let mut out = Vec::new();
            for row in iter {
                let json = row?;
                let s: crate::summary::FuncSummary = serde_json::from_str(&json)
                    .map_err(|e| rusqlite::Error::ToSqlConversionFailure(Box::new(e)))?;
                out.push(s);
            }
            Ok(out)
        }

        /// gets files from the database
        pub fn get_files(&self, project: &str) -> NyxResult<Vec<PathBuf>> {
            let mut stmt = self.c().prepare(
                "SELECT path
         FROM files
         WHERE project = ?1",
            )?;

            let file_iter = stmt.query_map([project], |row| row.get::<_, String>(0))?;

            Ok(file_iter
                .map(|p| p.map(PathBuf::from))
                .collect::<Result<_, _>>()?)
        }

        // -------------------------------------------------------------------------
        // Maintenance utilities
        // -------------------------------------------------------------------------
        pub fn clear(&self) -> NyxResult<()> {
            self.c().execute_batch(
                r#"
        PRAGMA foreign_keys = OFF;

        DROP TABLE IF EXISTS issues;
        DROP TABLE IF EXISTS files;
        DROP TABLE IF EXISTS function_summaries;

        PRAGMA foreign_keys = ON;
        VACUUM;
        "#,
            )?;

            self.c().execute_batch(SCHEMA)?;
            Ok(())
        }

        pub fn vacuum(&self) -> NyxResult<()> {
            self.c().execute("VACUUM;", [])?;
            Ok(())
        }

        // -------------------------------------------------------------------------
        // Helpers
        // -------------------------------------------------------------------------
        fn digest_file(path: &Path) -> NyxResult<Vec<u8>> {
            let mut hasher = blake3::Hasher::new();
            let mut file = fs::File::open(path)?;
            std::io::copy(&mut file, &mut hasher)?;
            Ok(hasher.finalize().as_bytes().to_vec())
        }
    }
}

#[test]
fn indexer_should_scan_and_upsert_logic() {
    let td = tempfile::tempdir().unwrap();
    let db = td.path().join("nyx.sqlite");
    let file = td.path().join("sample.rs");
    std::fs::write(&file, "fn main() {}").unwrap();

    let pool = index::Indexer::init(&db).unwrap();
    let idx = index::Indexer::from_pool("proj", &pool).unwrap();

    // first time: nothing in DB → must scan
    assert!(idx.should_scan(&file).unwrap());

    // after upsert: no changes → should *not* scan
    idx.upsert_file(&file).unwrap();
    assert!(!idx.should_scan(&file).unwrap());

    // modify contents
    std::thread::sleep(std::time::Duration::from_millis(25)); // ensure mtime tick
    std::fs::write(&file, "fn main() { /* changed */ }").unwrap();
    assert!(idx.should_scan(&file).unwrap());
}

#[test]
fn replace_issues_and_query_back() {
    let td = tempfile::tempdir().unwrap();
    let db = td.path().join("nyx.sqlite");
    let file = td.path().join("code.go");
    std::fs::write(&file, "package main").unwrap();

    let pool = index::Indexer::init(&db).unwrap();
    let mut idx = index::Indexer::from_pool("proj", &pool).unwrap();
    let fid = idx.upsert_file(&file).unwrap();

    let issues = [
        index::IssueRow {
            rule_id: "X1",
            severity: "High",
            line: 3,
            col: 7,
        },
        index::IssueRow {
            rule_id: "X2",
            severity: "Low",
            line: 4,
            col: 1,
        },
    ];
    idx.replace_issues(fid, issues.clone()).unwrap();

    let stored = idx.get_issues_from_file(&file).unwrap();
    assert_eq!(stored.len(), 2);
    assert!(
        stored
            .iter()
            .any(|d| d.id == "X1" && d.severity == crate::patterns::Severity::High)
    );
    assert!(
        stored
            .iter()
            .any(|d| d.id == "X2" && d.severity == crate::patterns::Severity::Low)
    );
}

#[test]
fn clear_and_vacuum_reset_tables() {
    let td = tempfile::tempdir().unwrap();
    let db = td.path().join("nyx.sqlite");
    let f = td.path().join("f.rs");
    std::fs::write(&f, "//").unwrap();

    let pool = index::Indexer::init(&db).unwrap();
    let idx = index::Indexer::from_pool("proj", &pool).unwrap();
    idx.upsert_file(&f).unwrap();

    assert!(!idx.get_files("proj").unwrap().is_empty());
    idx.clear().unwrap();
    idx.vacuum().unwrap();
    assert!(idx.get_files("proj").unwrap().is_empty());
}
-												- Remove unused `filetypes.rs` and `walk.rs` modules
- Introduce `index.rs` for file indexing using SQLite
- Expand configuration options in `config.rs`, including `excluded_files`
- Update dependencies in `Cargo.toml` to include SQLite, hashing, and regex libraries

											
										
										
											2025-06-16 23:47:50 +02:00
+								pub mod index {
-												Refactor codebase for consistent indentation and formatting

- Standardized spacing and indentation across multiple modules for improved readability.
- Reorganized `patterns` and `utils` imports for consistency.
- Updated `NyxError` and `NyxResult` related implementations to maintain consistent formatting.
- Enhanced readability in AST patterns for better clarity and maintainability.

											
										
										
											2025-06-24 20:27:06 +02:00
+								    use crate::commands::scan::Diag;
-												Feat/full cfg (#30)

* feat: Enhance control flow analysis with function summaries and taint analysis

* feat: Update taint analysis to utilize function summaries for enhanced tracking

* Refactor `walk.rs` batch processing and override handling:

- Renamed `Batcher` to `BatchSender` for clarity.
- Added `BatchSender::new` constructor for cleaner initialization.
- Simplified batch size management in `BatchSender`.
- Extracted `build_overrides` function for reusable override construction.
- Improved error handling and validation in override building.
- Enhanced performance with directory and file type filtering in `walk`.

* Improve logging and streamline directory walk process:

- Added detailed `tracing` logs for debugging batch flushes, override construction, and walk initialization/completion.
- Optimized and simplified `filter_entry` logic for directory and file type filters.
- Improved metadata checks and max file size enforcement during the scan.

* Refactor and optimize taint tracking, label rules, and directory walk process:

- Replaced `DefaultHasher` with `blake3::Hasher` for improved taint hashing.
- Enhanced sorting and hashing logic in `taint.rs` for consistency and efficiency.
- Removed unused `set_hash` function and redundant imports across files.
- Improved batch sender logic in `walk.rs`, renaming key components for clarity.
- Unified `spawn_senders` and `spawn_file_walker` with thread handling and channel tuple return.
- Expanded label rules with additional matchers for sources, sanitizers, and sinks.
- Deprecated `dump_cfg` and specific logging utilities in `cfg.rs` for code cleanup.

* fix: fixed let chains error in walk.rs

* fix: updated dependencies

* fix: updated dependencies

* chore: Remove standard error in scan.rs

* feat: Introduce function summaries for enhanced taint and control flow analysis

* feat: Enhance taint analysis with interop support and function summaries

* feat: Add configuration analysis module and enhance matcher rules

* feat: Add arity column to function_summaries and handle schema migration

* fix: fixed clippy &PathBuf warnings

* chore: Update dependencies and versioning in Cargo files

* docs: Update README to enhance clarity and detail on features and analysis modes

* chore: Update CHANGELOG for version 0.2.0 with new features, changes, and fixes

* docs: Update SECURITY.md to clarify version support status

---------

Co-authored-by: elipeter <eli.peter@es.fcm.travel>
											
										
										
											2026-02-24 23:44:07 -05:00
+								    use crate::errors::{NyxError, NyxResult};
-												Refactor codebase for consistent indentation and formatting

- Standardized spacing and indentation across multiple modules for improved readability.
- Reorganized `patterns` and `utils` imports for consistency.
- Updated `NyxError` and `NyxResult` related implementations to maintain consistent formatting.
- Enhanced readability in AST patterns for better clarity and maintainability.

											
										
										
											2025-06-24 20:27:06 +02:00
+								    use crate::patterns::Severity;
 								    use r2d2::{Pool, PooledConnection};
 								    use r2d2_sqlite::SqliteConnectionManager;
 								    use rusqlite::{Connection, OpenFlags, OptionalExtension, params};
 								    use std::fs;
 								    use std::ops::Deref;
 								    use std::path::{Path, PathBuf};
 								    use std::str::FromStr;
 								    use std::sync::Arc;
 								    use std::time::{SystemTime, UNIX_EPOCH};
 								    /// DB schema (foreign‑keys enabled).
 								    const SCHEMA: &str = r#"
-												Refactor database schema and scanning process:
- Introduced `issues` table for detailed vulnerability storage.
- Enhanced `files` table with project scoping and unique constraints.
- Replaced `OutputFormat` enum with `String` for flexibility.
- Added support for formatted console output of scan results.
- Integrated file and issue updating logic for incremental scans.
- Optimized scanning by leveraging database-stored issues.

											
										
										
											2025-06-17 16:46:45 +02:00
+								        PRAGMA foreign_keys = ON;
-												Added experimental control flow analysis and syntax classification for rust lang (#22)

* Introduce control flow graph (CFG) support:

- Added `cfg.rs` with CFG generation and analysis utilities.
- Integrated `petgraph` library for graph-based computations.
- Updated `ast.rs` to utilize CFG for function analysis.
- Modified `Cargo.toml` and `Cargo.lock` to include new dependencies.
- Improved static analysis with taint tracking through CFG paths.

* feat: enhance control flow analysis with taint tracking and node labeling

* feat: improve control flow graph with enhanced node handling and new tests

* Remove unnecessary reference marker in `byte_offset_to_point` comment.

* Remove unnecessary reference marker in `byte_offset_to_point` comment.

* Refactor `ast.rs` for performance and clarity; enhance `cfg.rs` with recursive CFG generation and improved classification logic for AST analysis.

* Refactor CFG and taint tracking logic:

- Enhanced `cfg.rs` with inline helper function `text_of` for cleaner UTF-8 handling in AST nodes.
- Expanded `labels.rs` rules with detailed `Sources`, `Sanitizers`, and `Sinks` for improved classification.
- Refined `push_node` to handle method call expressions with object-function pairing.
- Simplified code handling in trivia skipping and debug-only logic.

* Enhance `cfg.rs` with `first_call_ident` helper and improve identifier extraction logic in `push_node`.

* Add targeted CFG taint-tracking tests to enhance analysis coverage.

* Enhance CFG generation with loop expression handling and improve taint tracking logic. Add new sanitization example in `examples/sanitize/example.rs`.

* Update README with installation instructions for Cargo and GitHub releases.

* Expand taint-tracking with precise `def-use` computation and enhance `labels.rs` for detailed classification. Extend `examples/sanitize` with realistic scenarios demonstrating new rules.

* Refactor `labels.rs`:

- Removed redundant `LabelRule` entries for cleaner rule definitions.
- Adjusted matching logic to prioritize suffix and prefix matches effectively.

* Refactor `labels.rs`:

- Removed redundant `LabelRule` entries for cleaner rule definitions.
- Adjusted matching logic to prioritize suffix and prefix matches effectively.

* Add test for taint tracking with multiple sources in `cfg.rs`.

* Add `function_summaries` table and implement summary upsert/load methods. Refactor to handle summary storage and retrieval efficiently, with placeholder clean/drop logic.

* refactor: split `labels.rs` into modular structure with language-specific files

* refactor: split `labels.rs` into modular structure with language-specific files

* refactor: clean up SQL table definitions in `database.rs` for better readability

* refactor: simplify CFG structure by removing lifetime parameters and enhancing taint metadata handling

* refactor: update TODO comments in `cfg.rs` to clarify future enhancements for cap labels and function details

* refactor: remove redundant header from README.md for improved clarity

* feat: add PHF-based syntax classifiers and Kind enum for efficient syntax mapping across languages

* feat: introduce analysis modes for enhanced scanner configuration and diagnostics

* feat: define Kind enum for syntax classification in control flow analysis

* feat: bump version to 0.2.0-alpha and update CHANGELOG for new features and fixes

* refactor: clean up imports and formatting in AST and CFG modules for improved readability

* refactor: simplify function signatures and improve code readability in CFG and module files

* fix: correct rayon_thread_stack_size comment to reflect actual value of 8 MiB

* refactor: update string formatting in clean and project modules for consistency

* refactor: fix indentation in clean.rs for improved readability

---------

Co-authored-by: elipeter <eli.peter@es.fcm.travel>
											
										
										
											2025-06-28 17:36:14 +02:00
+								        CREATE TABLE IF NOT EXISTS files (id INTEGER PRIMARY KEY AUTOINCREMENT,
 								            project TEXT NOT NULL,
 								            path TEXT NOT NULL,
 								            hash BLOB NOT NULL,
 								            mtime INTEGER NOT NULL,
-												Refactor database schema and scanning process:
- Introduced `issues` table for detailed vulnerability storage.
- Enhanced `files` table with project scoping and unique constraints.
- Replaced `OutputFormat` enum with `String` for flexibility.
- Added support for formatted console output of scan results.
- Integrated file and issue updating logic for incremental scans.
- Optimized scanning by leveraging database-stored issues.

											
										
										
											2025-06-17 16:46:45 +02:00
+								            scanned_at INTEGER NOT NULL,
 								            UNIQUE(project, path)
 								        );
-												Added experimental control flow analysis and syntax classification for rust lang (#22)

* Introduce control flow graph (CFG) support:

- Added `cfg.rs` with CFG generation and analysis utilities.
- Integrated `petgraph` library for graph-based computations.
- Updated `ast.rs` to utilize CFG for function analysis.
- Modified `Cargo.toml` and `Cargo.lock` to include new dependencies.
- Improved static analysis with taint tracking through CFG paths.

* feat: enhance control flow analysis with taint tracking and node labeling

* feat: improve control flow graph with enhanced node handling and new tests

* Remove unnecessary reference marker in `byte_offset_to_point` comment.

* Remove unnecessary reference marker in `byte_offset_to_point` comment.

* Refactor `ast.rs` for performance and clarity; enhance `cfg.rs` with recursive CFG generation and improved classification logic for AST analysis.

* Refactor CFG and taint tracking logic:

- Enhanced `cfg.rs` with inline helper function `text_of` for cleaner UTF-8 handling in AST nodes.
- Expanded `labels.rs` rules with detailed `Sources`, `Sanitizers`, and `Sinks` for improved classification.
- Refined `push_node` to handle method call expressions with object-function pairing.
- Simplified code handling in trivia skipping and debug-only logic.

* Enhance `cfg.rs` with `first_call_ident` helper and improve identifier extraction logic in `push_node`.

* Add targeted CFG taint-tracking tests to enhance analysis coverage.

* Enhance CFG generation with loop expression handling and improve taint tracking logic. Add new sanitization example in `examples/sanitize/example.rs`.

* Update README with installation instructions for Cargo and GitHub releases.

* Expand taint-tracking with precise `def-use` computation and enhance `labels.rs` for detailed classification. Extend `examples/sanitize` with realistic scenarios demonstrating new rules.

* Refactor `labels.rs`:

- Removed redundant `LabelRule` entries for cleaner rule definitions.
- Adjusted matching logic to prioritize suffix and prefix matches effectively.

* Refactor `labels.rs`:

- Removed redundant `LabelRule` entries for cleaner rule definitions.
- Adjusted matching logic to prioritize suffix and prefix matches effectively.

* Add test for taint tracking with multiple sources in `cfg.rs`.

* Add `function_summaries` table and implement summary upsert/load methods. Refactor to handle summary storage and retrieval efficiently, with placeholder clean/drop logic.

* refactor: split `labels.rs` into modular structure with language-specific files

* refactor: split `labels.rs` into modular structure with language-specific files

* refactor: clean up SQL table definitions in `database.rs` for better readability

* refactor: simplify CFG structure by removing lifetime parameters and enhancing taint metadata handling

* refactor: update TODO comments in `cfg.rs` to clarify future enhancements for cap labels and function details

* refactor: remove redundant header from README.md for improved clarity

* feat: add PHF-based syntax classifiers and Kind enum for efficient syntax mapping across languages

* feat: introduce analysis modes for enhanced scanner configuration and diagnostics

* feat: define Kind enum for syntax classification in control flow analysis

* feat: bump version to 0.2.0-alpha and update CHANGELOG for new features and fixes

* refactor: clean up imports and formatting in AST and CFG modules for improved readability

* refactor: simplify function signatures and improve code readability in CFG and module files

* fix: correct rayon_thread_stack_size comment to reflect actual value of 8 MiB

* refactor: update string formatting in clean and project modules for consistency

* refactor: fix indentation in clean.rs for improved readability

---------

Co-authored-by: elipeter <eli.peter@es.fcm.travel>
											
										
										
											2025-06-28 17:36:14 +02:00
+								        CREATE TABLE IF NOT EXISTS issues (file_id INTEGER NOT NULL
-												Refactor database schema and scanning process:
- Introduced `issues` table for detailed vulnerability storage.
- Enhanced `files` table with project scoping and unique constraints.
- Replaced `OutputFormat` enum with `String` for flexibility.
- Added support for formatted console output of scan results.
- Integrated file and issue updating logic for incremental scans.
- Optimized scanning by leveraging database-stored issues.

											
										
										
											2025-06-17 16:46:45 +02:00
+								                              REFERENCES files(id)
 								                              ON DELETE CASCADE,
-												Added experimental control flow analysis and syntax classification for rust lang (#22)

* Introduce control flow graph (CFG) support:

- Added `cfg.rs` with CFG generation and analysis utilities.
- Integrated `petgraph` library for graph-based computations.
- Updated `ast.rs` to utilize CFG for function analysis.
- Modified `Cargo.toml` and `Cargo.lock` to include new dependencies.
- Improved static analysis with taint tracking through CFG paths.

* feat: enhance control flow analysis with taint tracking and node labeling

* feat: improve control flow graph with enhanced node handling and new tests

* Remove unnecessary reference marker in `byte_offset_to_point` comment.

* Remove unnecessary reference marker in `byte_offset_to_point` comment.

* Refactor `ast.rs` for performance and clarity; enhance `cfg.rs` with recursive CFG generation and improved classification logic for AST analysis.

* Refactor CFG and taint tracking logic:

- Enhanced `cfg.rs` with inline helper function `text_of` for cleaner UTF-8 handling in AST nodes.
- Expanded `labels.rs` rules with detailed `Sources`, `Sanitizers`, and `Sinks` for improved classification.
- Refined `push_node` to handle method call expressions with object-function pairing.
- Simplified code handling in trivia skipping and debug-only logic.

* Enhance `cfg.rs` with `first_call_ident` helper and improve identifier extraction logic in `push_node`.

* Add targeted CFG taint-tracking tests to enhance analysis coverage.

* Enhance CFG generation with loop expression handling and improve taint tracking logic. Add new sanitization example in `examples/sanitize/example.rs`.

* Update README with installation instructions for Cargo and GitHub releases.

* Expand taint-tracking with precise `def-use` computation and enhance `labels.rs` for detailed classification. Extend `examples/sanitize` with realistic scenarios demonstrating new rules.

* Refactor `labels.rs`:

- Removed redundant `LabelRule` entries for cleaner rule definitions.
- Adjusted matching logic to prioritize suffix and prefix matches effectively.

* Refactor `labels.rs`:

- Removed redundant `LabelRule` entries for cleaner rule definitions.
- Adjusted matching logic to prioritize suffix and prefix matches effectively.

* Add test for taint tracking with multiple sources in `cfg.rs`.

* Add `function_summaries` table and implement summary upsert/load methods. Refactor to handle summary storage and retrieval efficiently, with placeholder clean/drop logic.

* refactor: split `labels.rs` into modular structure with language-specific files

* refactor: split `labels.rs` into modular structure with language-specific files

* refactor: clean up SQL table definitions in `database.rs` for better readability

* refactor: simplify CFG structure by removing lifetime parameters and enhancing taint metadata handling

* refactor: update TODO comments in `cfg.rs` to clarify future enhancements for cap labels and function details

* refactor: remove redundant header from README.md for improved clarity

* feat: add PHF-based syntax classifiers and Kind enum for efficient syntax mapping across languages

* feat: introduce analysis modes for enhanced scanner configuration and diagnostics

* feat: define Kind enum for syntax classification in control flow analysis

* feat: bump version to 0.2.0-alpha and update CHANGELOG for new features and fixes

* refactor: clean up imports and formatting in AST and CFG modules for improved readability

* refactor: simplify function signatures and improve code readability in CFG and module files

* fix: correct rayon_thread_stack_size comment to reflect actual value of 8 MiB

* refactor: update string formatting in clean and project modules for consistency

* refactor: fix indentation in clean.rs for improved readability

---------

Co-authored-by: elipeter <eli.peter@es.fcm.travel>
											
										
										
											2025-06-28 17:36:14 +02:00
+								            rule_id TEXT NOT NULL,
 								            severity TEXT NOT NULL,
 								            line INTEGER NOT NULL,
 								            col INTEGER NOT NULL,
 								            PRIMARY KEY (file_id, rule_id, line, col));
-												Feat/full cfg (#30)

* feat: Enhance control flow analysis with function summaries and taint analysis

* feat: Update taint analysis to utilize function summaries for enhanced tracking

* Refactor `walk.rs` batch processing and override handling:

- Renamed `Batcher` to `BatchSender` for clarity.
- Added `BatchSender::new` constructor for cleaner initialization.
- Simplified batch size management in `BatchSender`.
- Extracted `build_overrides` function for reusable override construction.
- Improved error handling and validation in override building.
- Enhanced performance with directory and file type filtering in `walk`.

* Improve logging and streamline directory walk process:

- Added detailed `tracing` logs for debugging batch flushes, override construction, and walk initialization/completion.
- Optimized and simplified `filter_entry` logic for directory and file type filters.
- Improved metadata checks and max file size enforcement during the scan.

* Refactor and optimize taint tracking, label rules, and directory walk process:

- Replaced `DefaultHasher` with `blake3::Hasher` for improved taint hashing.
- Enhanced sorting and hashing logic in `taint.rs` for consistency and efficiency.
- Removed unused `set_hash` function and redundant imports across files.
- Improved batch sender logic in `walk.rs`, renaming key components for clarity.
- Unified `spawn_senders` and `spawn_file_walker` with thread handling and channel tuple return.
- Expanded label rules with additional matchers for sources, sanitizers, and sinks.
- Deprecated `dump_cfg` and specific logging utilities in `cfg.rs` for code cleanup.

* fix: fixed let chains error in walk.rs

* fix: updated dependencies

* fix: updated dependencies

* chore: Remove standard error in scan.rs

* feat: Introduce function summaries for enhanced taint and control flow analysis

* feat: Enhance taint analysis with interop support and function summaries

* feat: Add configuration analysis module and enhance matcher rules

* feat: Add arity column to function_summaries and handle schema migration

* fix: fixed clippy &PathBuf warnings

* chore: Update dependencies and versioning in Cargo files

* docs: Update README to enhance clarity and detail on features and analysis modes

* chore: Update CHANGELOG for version 0.2.0 with new features, changes, and fixes

* docs: Update SECURITY.md to clarify version support status

---------

Co-authored-by: elipeter <eli.peter@es.fcm.travel>
											
										
										
											2026-02-24 23:44:07 -05:00
+								        CREATE TABLE IF NOT EXISTS function_summaries (
 								            id INTEGER PRIMARY KEY AUTOINCREMENT,
-												Added experimental control flow analysis and syntax classification for rust lang (#22)

* Introduce control flow graph (CFG) support:

- Added `cfg.rs` with CFG generation and analysis utilities.
- Integrated `petgraph` library for graph-based computations.
- Updated `ast.rs` to utilize CFG for function analysis.
- Modified `Cargo.toml` and `Cargo.lock` to include new dependencies.
- Improved static analysis with taint tracking through CFG paths.

* feat: enhance control flow analysis with taint tracking and node labeling

* feat: improve control flow graph with enhanced node handling and new tests

* Remove unnecessary reference marker in `byte_offset_to_point` comment.

* Remove unnecessary reference marker in `byte_offset_to_point` comment.

* Refactor `ast.rs` for performance and clarity; enhance `cfg.rs` with recursive CFG generation and improved classification logic for AST analysis.

* Refactor CFG and taint tracking logic:

- Enhanced `cfg.rs` with inline helper function `text_of` for cleaner UTF-8 handling in AST nodes.
- Expanded `labels.rs` rules with detailed `Sources`, `Sanitizers`, and `Sinks` for improved classification.
- Refined `push_node` to handle method call expressions with object-function pairing.
- Simplified code handling in trivia skipping and debug-only logic.

* Enhance `cfg.rs` with `first_call_ident` helper and improve identifier extraction logic in `push_node`.

* Add targeted CFG taint-tracking tests to enhance analysis coverage.

* Enhance CFG generation with loop expression handling and improve taint tracking logic. Add new sanitization example in `examples/sanitize/example.rs`.

* Update README with installation instructions for Cargo and GitHub releases.

* Expand taint-tracking with precise `def-use` computation and enhance `labels.rs` for detailed classification. Extend `examples/sanitize` with realistic scenarios demonstrating new rules.

* Refactor `labels.rs`:

- Removed redundant `LabelRule` entries for cleaner rule definitions.
- Adjusted matching logic to prioritize suffix and prefix matches effectively.

* Refactor `labels.rs`:

- Removed redundant `LabelRule` entries for cleaner rule definitions.
- Adjusted matching logic to prioritize suffix and prefix matches effectively.

* Add test for taint tracking with multiple sources in `cfg.rs`.

* Add `function_summaries` table and implement summary upsert/load methods. Refactor to handle summary storage and retrieval efficiently, with placeholder clean/drop logic.

* refactor: split `labels.rs` into modular structure with language-specific files

* refactor: split `labels.rs` into modular structure with language-specific files

* refactor: clean up SQL table definitions in `database.rs` for better readability

* refactor: simplify CFG structure by removing lifetime parameters and enhancing taint metadata handling

* refactor: update TODO comments in `cfg.rs` to clarify future enhancements for cap labels and function details

* refactor: remove redundant header from README.md for improved clarity

* feat: add PHF-based syntax classifiers and Kind enum for efficient syntax mapping across languages

* feat: introduce analysis modes for enhanced scanner configuration and diagnostics

* feat: define Kind enum for syntax classification in control flow analysis

* feat: bump version to 0.2.0-alpha and update CHANGELOG for new features and fixes

* refactor: clean up imports and formatting in AST and CFG modules for improved readability

* refactor: simplify function signatures and improve code readability in CFG and module files

* fix: correct rayon_thread_stack_size comment to reflect actual value of 8 MiB

* refactor: update string formatting in clean and project modules for consistency

* refactor: fix indentation in clean.rs for improved readability

---------

Co-authored-by: elipeter <eli.peter@es.fcm.travel>
											
										
										
											2025-06-28 17:36:14 +02:00
+								            project TEXT NOT NULL,
-												Feat/full cfg (#30)

* feat: Enhance control flow analysis with function summaries and taint analysis

* feat: Update taint analysis to utilize function summaries for enhanced tracking

* Refactor `walk.rs` batch processing and override handling:

- Renamed `Batcher` to `BatchSender` for clarity.
- Added `BatchSender::new` constructor for cleaner initialization.
- Simplified batch size management in `BatchSender`.
- Extracted `build_overrides` function for reusable override construction.
- Improved error handling and validation in override building.
- Enhanced performance with directory and file type filtering in `walk`.

* Improve logging and streamline directory walk process:

- Added detailed `tracing` logs for debugging batch flushes, override construction, and walk initialization/completion.
- Optimized and simplified `filter_entry` logic for directory and file type filters.
- Improved metadata checks and max file size enforcement during the scan.

* Refactor and optimize taint tracking, label rules, and directory walk process:

- Replaced `DefaultHasher` with `blake3::Hasher` for improved taint hashing.
- Enhanced sorting and hashing logic in `taint.rs` for consistency and efficiency.
- Removed unused `set_hash` function and redundant imports across files.
- Improved batch sender logic in `walk.rs`, renaming key components for clarity.
- Unified `spawn_senders` and `spawn_file_walker` with thread handling and channel tuple return.
- Expanded label rules with additional matchers for sources, sanitizers, and sinks.
- Deprecated `dump_cfg` and specific logging utilities in `cfg.rs` for code cleanup.

* fix: fixed let chains error in walk.rs

* fix: updated dependencies

* fix: updated dependencies

* chore: Remove standard error in scan.rs

* feat: Introduce function summaries for enhanced taint and control flow analysis

* feat: Enhance taint analysis with interop support and function summaries

* feat: Add configuration analysis module and enhance matcher rules

* feat: Add arity column to function_summaries and handle schema migration

* fix: fixed clippy &PathBuf warnings

* chore: Update dependencies and versioning in Cargo files

* docs: Update README to enhance clarity and detail on features and analysis modes

* chore: Update CHANGELOG for version 0.2.0 with new features, changes, and fixes

* docs: Update SECURITY.md to clarify version support status

---------

Co-authored-by: elipeter <eli.peter@es.fcm.travel>
											
										
										
											2026-02-24 23:44:07 -05:00
+								            file_path TEXT NOT NULL,
 								            file_hash BLOB NOT NULL,
-												Added experimental control flow analysis and syntax classification for rust lang (#22)

* Introduce control flow graph (CFG) support:

- Added `cfg.rs` with CFG generation and analysis utilities.
- Integrated `petgraph` library for graph-based computations.
- Updated `ast.rs` to utilize CFG for function analysis.
- Modified `Cargo.toml` and `Cargo.lock` to include new dependencies.
- Improved static analysis with taint tracking through CFG paths.

* feat: enhance control flow analysis with taint tracking and node labeling

* feat: improve control flow graph with enhanced node handling and new tests

* Remove unnecessary reference marker in `byte_offset_to_point` comment.

* Remove unnecessary reference marker in `byte_offset_to_point` comment.

* Refactor `ast.rs` for performance and clarity; enhance `cfg.rs` with recursive CFG generation and improved classification logic for AST analysis.

* Refactor CFG and taint tracking logic:

- Enhanced `cfg.rs` with inline helper function `text_of` for cleaner UTF-8 handling in AST nodes.
- Expanded `labels.rs` rules with detailed `Sources`, `Sanitizers`, and `Sinks` for improved classification.
- Refined `push_node` to handle method call expressions with object-function pairing.
- Simplified code handling in trivia skipping and debug-only logic.

* Enhance `cfg.rs` with `first_call_ident` helper and improve identifier extraction logic in `push_node`.

* Add targeted CFG taint-tracking tests to enhance analysis coverage.

* Enhance CFG generation with loop expression handling and improve taint tracking logic. Add new sanitization example in `examples/sanitize/example.rs`.

* Update README with installation instructions for Cargo and GitHub releases.

* Expand taint-tracking with precise `def-use` computation and enhance `labels.rs` for detailed classification. Extend `examples/sanitize` with realistic scenarios demonstrating new rules.

* Refactor `labels.rs`:

- Removed redundant `LabelRule` entries for cleaner rule definitions.
- Adjusted matching logic to prioritize suffix and prefix matches effectively.

* Refactor `labels.rs`:

- Removed redundant `LabelRule` entries for cleaner rule definitions.
- Adjusted matching logic to prioritize suffix and prefix matches effectively.

* Add test for taint tracking with multiple sources in `cfg.rs`.

* Add `function_summaries` table and implement summary upsert/load methods. Refactor to handle summary storage and retrieval efficiently, with placeholder clean/drop logic.

* refactor: split `labels.rs` into modular structure with language-specific files

* refactor: split `labels.rs` into modular structure with language-specific files

* refactor: clean up SQL table definitions in `database.rs` for better readability

* refactor: simplify CFG structure by removing lifetime parameters and enhancing taint metadata handling

* refactor: update TODO comments in `cfg.rs` to clarify future enhancements for cap labels and function details

* refactor: remove redundant header from README.md for improved clarity

* feat: add PHF-based syntax classifiers and Kind enum for efficient syntax mapping across languages

* feat: introduce analysis modes for enhanced scanner configuration and diagnostics

* feat: define Kind enum for syntax classification in control flow analysis

* feat: bump version to 0.2.0-alpha and update CHANGELOG for new features and fixes

* refactor: clean up imports and formatting in AST and CFG modules for improved readability

* refactor: simplify function signatures and improve code readability in CFG and module files

* fix: correct rayon_thread_stack_size comment to reflect actual value of 8 MiB

* refactor: update string formatting in clean and project modules for consistency

* refactor: fix indentation in clean.rs for improved readability

---------

Co-authored-by: elipeter <eli.peter@es.fcm.travel>
											
										
										
											2025-06-28 17:36:14 +02:00
+								            name TEXT NOT NULL,
-												Feat/full cfg (#30)

* feat: Enhance control flow analysis with function summaries and taint analysis

* feat: Update taint analysis to utilize function summaries for enhanced tracking

* Refactor `walk.rs` batch processing and override handling:

- Renamed `Batcher` to `BatchSender` for clarity.
- Added `BatchSender::new` constructor for cleaner initialization.
- Simplified batch size management in `BatchSender`.
- Extracted `build_overrides` function for reusable override construction.
- Improved error handling and validation in override building.
- Enhanced performance with directory and file type filtering in `walk`.

* Improve logging and streamline directory walk process:

- Added detailed `tracing` logs for debugging batch flushes, override construction, and walk initialization/completion.
- Optimized and simplified `filter_entry` logic for directory and file type filters.
- Improved metadata checks and max file size enforcement during the scan.

* Refactor and optimize taint tracking, label rules, and directory walk process:

- Replaced `DefaultHasher` with `blake3::Hasher` for improved taint hashing.
- Enhanced sorting and hashing logic in `taint.rs` for consistency and efficiency.
- Removed unused `set_hash` function and redundant imports across files.
- Improved batch sender logic in `walk.rs`, renaming key components for clarity.
- Unified `spawn_senders` and `spawn_file_walker` with thread handling and channel tuple return.
- Expanded label rules with additional matchers for sources, sanitizers, and sinks.
- Deprecated `dump_cfg` and specific logging utilities in `cfg.rs` for code cleanup.

* fix: fixed let chains error in walk.rs

* fix: updated dependencies

* fix: updated dependencies

* chore: Remove standard error in scan.rs

* feat: Introduce function summaries for enhanced taint and control flow analysis

* feat: Enhance taint analysis with interop support and function summaries

* feat: Add configuration analysis module and enhance matcher rules

* feat: Add arity column to function_summaries and handle schema migration

* fix: fixed clippy &PathBuf warnings

* chore: Update dependencies and versioning in Cargo files

* docs: Update README to enhance clarity and detail on features and analysis modes

* chore: Update CHANGELOG for version 0.2.0 with new features, changes, and fixes

* docs: Update SECURITY.md to clarify version support status

---------

Co-authored-by: elipeter <eli.peter@es.fcm.travel>
											
										
										
											2026-02-24 23:44:07 -05:00
+								            arity INTEGER NOT NULL DEFAULT -1,
-												Added experimental control flow analysis and syntax classification for rust lang (#22)

* Introduce control flow graph (CFG) support:

- Added `cfg.rs` with CFG generation and analysis utilities.
- Integrated `petgraph` library for graph-based computations.
- Updated `ast.rs` to utilize CFG for function analysis.
- Modified `Cargo.toml` and `Cargo.lock` to include new dependencies.
- Improved static analysis with taint tracking through CFG paths.

* feat: enhance control flow analysis with taint tracking and node labeling

* feat: improve control flow graph with enhanced node handling and new tests

* Remove unnecessary reference marker in `byte_offset_to_point` comment.

* Remove unnecessary reference marker in `byte_offset_to_point` comment.

* Refactor `ast.rs` for performance and clarity; enhance `cfg.rs` with recursive CFG generation and improved classification logic for AST analysis.

* Refactor CFG and taint tracking logic:

- Enhanced `cfg.rs` with inline helper function `text_of` for cleaner UTF-8 handling in AST nodes.
- Expanded `labels.rs` rules with detailed `Sources`, `Sanitizers`, and `Sinks` for improved classification.
- Refined `push_node` to handle method call expressions with object-function pairing.
- Simplified code handling in trivia skipping and debug-only logic.

* Enhance `cfg.rs` with `first_call_ident` helper and improve identifier extraction logic in `push_node`.

* Add targeted CFG taint-tracking tests to enhance analysis coverage.

* Enhance CFG generation with loop expression handling and improve taint tracking logic. Add new sanitization example in `examples/sanitize/example.rs`.

* Update README with installation instructions for Cargo and GitHub releases.

* Expand taint-tracking with precise `def-use` computation and enhance `labels.rs` for detailed classification. Extend `examples/sanitize` with realistic scenarios demonstrating new rules.

* Refactor `labels.rs`:

- Removed redundant `LabelRule` entries for cleaner rule definitions.
- Adjusted matching logic to prioritize suffix and prefix matches effectively.

* Refactor `labels.rs`:

- Removed redundant `LabelRule` entries for cleaner rule definitions.
- Adjusted matching logic to prioritize suffix and prefix matches effectively.

* Add test for taint tracking with multiple sources in `cfg.rs`.

* Add `function_summaries` table and implement summary upsert/load methods. Refactor to handle summary storage and retrieval efficiently, with placeholder clean/drop logic.

* refactor: split `labels.rs` into modular structure with language-specific files

* refactor: split `labels.rs` into modular structure with language-specific files

* refactor: clean up SQL table definitions in `database.rs` for better readability

* refactor: simplify CFG structure by removing lifetime parameters and enhancing taint metadata handling

* refactor: update TODO comments in `cfg.rs` to clarify future enhancements for cap labels and function details

* refactor: remove redundant header from README.md for improved clarity

* feat: add PHF-based syntax classifiers and Kind enum for efficient syntax mapping across languages

* feat: introduce analysis modes for enhanced scanner configuration and diagnostics

* feat: define Kind enum for syntax classification in control flow analysis

* feat: bump version to 0.2.0-alpha and update CHANGELOG for new features and fixes

* refactor: clean up imports and formatting in AST and CFG modules for improved readability

* refactor: simplify function signatures and improve code readability in CFG and module files

* fix: correct rayon_thread_stack_size comment to reflect actual value of 8 MiB

* refactor: update string formatting in clean and project modules for consistency

* refactor: fix indentation in clean.rs for improved readability

---------

Co-authored-by: elipeter <eli.peter@es.fcm.travel>
											
										
										
											2025-06-28 17:36:14 +02:00
+								            lang TEXT NOT NULL,
 								            summary TEXT NOT NULL,
-												Feat/full cfg (#30)

* feat: Enhance control flow analysis with function summaries and taint analysis

* feat: Update taint analysis to utilize function summaries for enhanced tracking

* Refactor `walk.rs` batch processing and override handling:

- Renamed `Batcher` to `BatchSender` for clarity.
- Added `BatchSender::new` constructor for cleaner initialization.
- Simplified batch size management in `BatchSender`.
- Extracted `build_overrides` function for reusable override construction.
- Improved error handling and validation in override building.
- Enhanced performance with directory and file type filtering in `walk`.

* Improve logging and streamline directory walk process:

- Added detailed `tracing` logs for debugging batch flushes, override construction, and walk initialization/completion.
- Optimized and simplified `filter_entry` logic for directory and file type filters.
- Improved metadata checks and max file size enforcement during the scan.

* Refactor and optimize taint tracking, label rules, and directory walk process:

- Replaced `DefaultHasher` with `blake3::Hasher` for improved taint hashing.
- Enhanced sorting and hashing logic in `taint.rs` for consistency and efficiency.
- Removed unused `set_hash` function and redundant imports across files.
- Improved batch sender logic in `walk.rs`, renaming key components for clarity.
- Unified `spawn_senders` and `spawn_file_walker` with thread handling and channel tuple return.
- Expanded label rules with additional matchers for sources, sanitizers, and sinks.
- Deprecated `dump_cfg` and specific logging utilities in `cfg.rs` for code cleanup.

* fix: fixed let chains error in walk.rs

* fix: updated dependencies

* fix: updated dependencies

* chore: Remove standard error in scan.rs

* feat: Introduce function summaries for enhanced taint and control flow analysis

* feat: Enhance taint analysis with interop support and function summaries

* feat: Add configuration analysis module and enhance matcher rules

* feat: Add arity column to function_summaries and handle schema migration

* fix: fixed clippy &PathBuf warnings

* chore: Update dependencies and versioning in Cargo files

* docs: Update README to enhance clarity and detail on features and analysis modes

* chore: Update CHANGELOG for version 0.2.0 with new features, changes, and fixes

* docs: Update SECURITY.md to clarify version support status

---------

Co-authored-by: elipeter <eli.peter@es.fcm.travel>
											
										
										
											2026-02-24 23:44:07 -05:00
+								            updated_at INTEGER NOT NULL,
 								            UNIQUE(project, file_path, name, arity)
 								        );
-												Refactor database schema and scanning process:
- Introduced `issues` table for detailed vulnerability storage.
- Enhanced `files` table with project scoping and unique constraints.
- Replaced `OutputFormat` enum with `String` for flexibility.
- Added support for formatted console output of scan results.
- Integrated file and issue updating logic for incremental scans.
- Optimized scanning by leveraging database-stored issues.

											
										
										
											2025-06-17 16:46:45 +02:00
+								    "#;
-												Added experimental control flow analysis and syntax classification for rust lang (#22)

* Introduce control flow graph (CFG) support:

- Added `cfg.rs` with CFG generation and analysis utilities.
- Integrated `petgraph` library for graph-based computations.
- Updated `ast.rs` to utilize CFG for function analysis.
- Modified `Cargo.toml` and `Cargo.lock` to include new dependencies.
- Improved static analysis with taint tracking through CFG paths.

* feat: enhance control flow analysis with taint tracking and node labeling

* feat: improve control flow graph with enhanced node handling and new tests

* Remove unnecessary reference marker in `byte_offset_to_point` comment.

* Remove unnecessary reference marker in `byte_offset_to_point` comment.

* Refactor `ast.rs` for performance and clarity; enhance `cfg.rs` with recursive CFG generation and improved classification logic for AST analysis.

* Refactor CFG and taint tracking logic:

- Enhanced `cfg.rs` with inline helper function `text_of` for cleaner UTF-8 handling in AST nodes.
- Expanded `labels.rs` rules with detailed `Sources`, `Sanitizers`, and `Sinks` for improved classification.
- Refined `push_node` to handle method call expressions with object-function pairing.
- Simplified code handling in trivia skipping and debug-only logic.

* Enhance `cfg.rs` with `first_call_ident` helper and improve identifier extraction logic in `push_node`.

* Add targeted CFG taint-tracking tests to enhance analysis coverage.

* Enhance CFG generation with loop expression handling and improve taint tracking logic. Add new sanitization example in `examples/sanitize/example.rs`.

* Update README with installation instructions for Cargo and GitHub releases.

* Expand taint-tracking with precise `def-use` computation and enhance `labels.rs` for detailed classification. Extend `examples/sanitize` with realistic scenarios demonstrating new rules.

* Refactor `labels.rs`:

- Removed redundant `LabelRule` entries for cleaner rule definitions.
- Adjusted matching logic to prioritize suffix and prefix matches effectively.

* Refactor `labels.rs`:

- Removed redundant `LabelRule` entries for cleaner rule definitions.
- Adjusted matching logic to prioritize suffix and prefix matches effectively.

* Add test for taint tracking with multiple sources in `cfg.rs`.

* Add `function_summaries` table and implement summary upsert/load methods. Refactor to handle summary storage and retrieval efficiently, with placeholder clean/drop logic.

* refactor: split `labels.rs` into modular structure with language-specific files

* refactor: split `labels.rs` into modular structure with language-specific files

* refactor: clean up SQL table definitions in `database.rs` for better readability

* refactor: simplify CFG structure by removing lifetime parameters and enhancing taint metadata handling

* refactor: update TODO comments in `cfg.rs` to clarify future enhancements for cap labels and function details

* refactor: remove redundant header from README.md for improved clarity

* feat: add PHF-based syntax classifiers and Kind enum for efficient syntax mapping across languages

* feat: introduce analysis modes for enhanced scanner configuration and diagnostics

* feat: define Kind enum for syntax classification in control flow analysis

* feat: bump version to 0.2.0-alpha and update CHANGELOG for new features and fixes

* refactor: clean up imports and formatting in AST and CFG modules for improved readability

* refactor: simplify function signatures and improve code readability in CFG and module files

* fix: correct rayon_thread_stack_size comment to reflect actual value of 8 MiB

* refactor: update string formatting in clean and project modules for consistency

* refactor: fix indentation in clean.rs for improved readability

---------

Co-authored-by: elipeter <eli.peter@es.fcm.travel>
											
										
										
											2025-06-28 17:36:14 +02:00
+								    // TODO: ADD CLEANS FOR EACH TABLE BASED ON PROJECT WHICH RUNS ON CLEAN
 								    // TODO: ADD DROP AND GIVE A CLI PARAMETER FOR DROP
-												Refactor codebase for consistent indentation and formatting

- Standardized spacing and indentation across multiple modules for improved readability.
- Reorganized `patterns` and `utils` imports for consistency.
- Updated `NyxError` and `NyxResult` related implementations to maintain consistent formatting.
- Enhanced readability in AST patterns for better clarity and maintainability.

											
										
										
											2025-06-24 20:27:06 +02:00
+								    /// A single issue row, ready for insertion.
 								    #[derive(Debug, Clone)]
 								    pub struct IssueRow<'a> {
 								        pub rule_id: &'a str,
 								        pub severity: &'a str,
 								        pub line: i64,
 								        pub col: i64,
-												Refactor database connection handling with connection pooling and parallel processing

- Introduced `r2d2` connection pooling for SQLite in `database.rs`.
- Updated `Indexer` to use pooled connections for improved concurrency.
- Replaced sequential processing with `rayon` for parallel file scanning.
- Added a `clear` method to `Indexer` for reindexing support.
- Enhanced database initialization with `init` and `from_pool` methods.
- Updated `Cargo.toml` and `Cargo.lock` to include `r2d2`, `r2d2_sqlite`, and new dependencies.

											
										
										
											2025-06-17 20:45:33 +02:00
+								    }
-												Refactor codebase for consistent indentation and formatting

- Standardized spacing and indentation across multiple modules for improved readability.
- Reorganized `patterns` and `utils` imports for consistency.
- Updated `NyxError` and `NyxResult` related implementations to maintain consistent formatting.
- Enhanced readability in AST patterns for better clarity and maintainability.

											
										
										
											2025-06-24 20:27:06 +02:00
+								    pub struct Indexer {
 								        conn: PooledConnection<SqliteConnectionManager>,
 								        project: String,
-												- Remove unused `filetypes.rs` and `walk.rs` modules
- Introduce `index.rs` for file indexing using SQLite
- Expand configuration options in `config.rs`, including `excluded_files`
- Update dependencies in `Cargo.toml` to include SQLite, hashing, and regex libraries

											
										
										
											2025-06-16 23:47:50 +02:00
+								    }
-												Refactor codebase for consistent indentation and formatting

- Standardized spacing and indentation across multiple modules for improved readability.
- Reorganized `patterns` and `utils` imports for consistency.
- Updated `NyxError` and `NyxResult` related implementations to maintain consistent formatting.
- Enhanced readability in AST patterns for better clarity and maintainability.

											
										
										
											2025-06-24 20:27:06 +02:00
+								    impl Indexer {
 								        pub fn init(database_path: &Path) -> NyxResult<Arc<Pool<SqliteConnectionManager>>> {
-												Feat/full cfg (#30)

* feat: Enhance control flow analysis with function summaries and taint analysis

* feat: Update taint analysis to utilize function summaries for enhanced tracking

* Refactor `walk.rs` batch processing and override handling:

- Renamed `Batcher` to `BatchSender` for clarity.
- Added `BatchSender::new` constructor for cleaner initialization.
- Simplified batch size management in `BatchSender`.
- Extracted `build_overrides` function for reusable override construction.
- Improved error handling and validation in override building.
- Enhanced performance with directory and file type filtering in `walk`.

* Improve logging and streamline directory walk process:

- Added detailed `tracing` logs for debugging batch flushes, override construction, and walk initialization/completion.
- Optimized and simplified `filter_entry` logic for directory and file type filters.
- Improved metadata checks and max file size enforcement during the scan.

* Refactor and optimize taint tracking, label rules, and directory walk process:

- Replaced `DefaultHasher` with `blake3::Hasher` for improved taint hashing.
- Enhanced sorting and hashing logic in `taint.rs` for consistency and efficiency.
- Removed unused `set_hash` function and redundant imports across files.
- Improved batch sender logic in `walk.rs`, renaming key components for clarity.
- Unified `spawn_senders` and `spawn_file_walker` with thread handling and channel tuple return.
- Expanded label rules with additional matchers for sources, sanitizers, and sinks.
- Deprecated `dump_cfg` and specific logging utilities in `cfg.rs` for code cleanup.

* fix: fixed let chains error in walk.rs

* fix: updated dependencies

* fix: updated dependencies

* chore: Remove standard error in scan.rs

* feat: Introduce function summaries for enhanced taint and control flow analysis

* feat: Enhance taint analysis with interop support and function summaries

* feat: Add configuration analysis module and enhance matcher rules

* feat: Add arity column to function_summaries and handle schema migration

* fix: fixed clippy &PathBuf warnings

* chore: Update dependencies and versioning in Cargo files

* docs: Update README to enhance clarity and detail on features and analysis modes

* chore: Update CHANGELOG for version 0.2.0 with new features, changes, and fixes

* docs: Update SECURITY.md to clarify version support status

---------

Co-authored-by: elipeter <eli.peter@es.fcm.travel>
											
										
										
											2026-02-24 23:44:07 -05:00
+								            let _span = tracing::info_span!("db_init", path = %database_path.display()).entered();
-												Refactor codebase for consistent indentation and formatting

- Standardized spacing and indentation across multiple modules for improved readability.
- Reorganized `patterns` and `utils` imports for consistency.
- Updated `NyxError` and `NyxResult` related implementations to maintain consistent formatting.
- Enhanced readability in AST patterns for better clarity and maintainability.

											
										
										
											2025-06-24 20:27:06 +02:00
+								            let flags = OpenFlags::SQLITE_OPEN_READ_WRITE
 								                | OpenFlags::SQLITE_OPEN_CREATE
 								                | OpenFlags::SQLITE_OPEN_FULL_MUTEX;
 								            let manager = SqliteConnectionManager::file(database_path).with_flags(flags);
 								            let pool = Arc::new(Pool::new(manager)?);
 								            {
 								                let conn = pool.get()?;
 								                conn.pragma_update(None, "journal_mode", "WAL")?;
-												Feat/full cfg (#30)

* feat: Enhance control flow analysis with function summaries and taint analysis

* feat: Update taint analysis to utilize function summaries for enhanced tracking

* Refactor `walk.rs` batch processing and override handling:

- Renamed `Batcher` to `BatchSender` for clarity.
- Added `BatchSender::new` constructor for cleaner initialization.
- Simplified batch size management in `BatchSender`.
- Extracted `build_overrides` function for reusable override construction.
- Improved error handling and validation in override building.
- Enhanced performance with directory and file type filtering in `walk`.

* Improve logging and streamline directory walk process:

- Added detailed `tracing` logs for debugging batch flushes, override construction, and walk initialization/completion.
- Optimized and simplified `filter_entry` logic for directory and file type filters.
- Improved metadata checks and max file size enforcement during the scan.

* Refactor and optimize taint tracking, label rules, and directory walk process:

- Replaced `DefaultHasher` with `blake3::Hasher` for improved taint hashing.
- Enhanced sorting and hashing logic in `taint.rs` for consistency and efficiency.
- Removed unused `set_hash` function and redundant imports across files.
- Improved batch sender logic in `walk.rs`, renaming key components for clarity.
- Unified `spawn_senders` and `spawn_file_walker` with thread handling and channel tuple return.
- Expanded label rules with additional matchers for sources, sanitizers, and sinks.
- Deprecated `dump_cfg` and specific logging utilities in `cfg.rs` for code cleanup.

* fix: fixed let chains error in walk.rs

* fix: updated dependencies

* fix: updated dependencies

* chore: Remove standard error in scan.rs

* feat: Introduce function summaries for enhanced taint and control flow analysis

* feat: Enhance taint analysis with interop support and function summaries

* feat: Add configuration analysis module and enhance matcher rules

* feat: Add arity column to function_summaries and handle schema migration

* fix: fixed clippy &PathBuf warnings

* chore: Update dependencies and versioning in Cargo files

* docs: Update README to enhance clarity and detail on features and analysis modes

* chore: Update CHANGELOG for version 0.2.0 with new features, changes, and fixes

* docs: Update SECURITY.md to clarify version support status

---------

Co-authored-by: elipeter <eli.peter@es.fcm.travel>
											
										
										
											2026-02-24 23:44:07 -05:00
+								                conn.pragma_update(None, "synchronous", "NORMAL")?;
 								                conn.pragma_update(None, "cache_size", "-8000")?; // 8 MB
 								                conn.pragma_update(None, "temp_store", "MEMORY")?;
 								                conn.pragma_update(None, "mmap_size", "268435456")?; // 256 MB
-												Refactor codebase for consistent indentation and formatting

- Standardized spacing and indentation across multiple modules for improved readability.
- Reorganized `patterns` and `utils` imports for consistency.
- Updated `NyxError` and `NyxResult` related implementations to maintain consistent formatting.
- Enhanced readability in AST patterns for better clarity and maintainability.

											
										
										
											2025-06-24 20:27:06 +02:00
+								                conn.execute_batch(SCHEMA)?;
-												Feat/full cfg (#30)

* feat: Enhance control flow analysis with function summaries and taint analysis

* feat: Update taint analysis to utilize function summaries for enhanced tracking

* Refactor `walk.rs` batch processing and override handling:

- Renamed `Batcher` to `BatchSender` for clarity.
- Added `BatchSender::new` constructor for cleaner initialization.
- Simplified batch size management in `BatchSender`.
- Extracted `build_overrides` function for reusable override construction.
- Improved error handling and validation in override building.
- Enhanced performance with directory and file type filtering in `walk`.

* Improve logging and streamline directory walk process:

- Added detailed `tracing` logs for debugging batch flushes, override construction, and walk initialization/completion.
- Optimized and simplified `filter_entry` logic for directory and file type filters.
- Improved metadata checks and max file size enforcement during the scan.

* Refactor and optimize taint tracking, label rules, and directory walk process:

- Replaced `DefaultHasher` with `blake3::Hasher` for improved taint hashing.
- Enhanced sorting and hashing logic in `taint.rs` for consistency and efficiency.
- Removed unused `set_hash` function and redundant imports across files.
- Improved batch sender logic in `walk.rs`, renaming key components for clarity.
- Unified `spawn_senders` and `spawn_file_walker` with thread handling and channel tuple return.
- Expanded label rules with additional matchers for sources, sanitizers, and sinks.
- Deprecated `dump_cfg` and specific logging utilities in `cfg.rs` for code cleanup.

* fix: fixed let chains error in walk.rs

* fix: updated dependencies

* fix: updated dependencies

* chore: Remove standard error in scan.rs

* feat: Introduce function summaries for enhanced taint and control flow analysis

* feat: Enhance taint analysis with interop support and function summaries

* feat: Add configuration analysis module and enhance matcher rules

* feat: Add arity column to function_summaries and handle schema migration

* fix: fixed clippy &PathBuf warnings

* chore: Update dependencies and versioning in Cargo files

* docs: Update README to enhance clarity and detail on features and analysis modes

* chore: Update CHANGELOG for version 0.2.0 with new features, changes, and fixes

* docs: Update SECURITY.md to clarify version support status

---------

Co-authored-by: elipeter <eli.peter@es.fcm.travel>
											
										
										
											2026-02-24 23:44:07 -05:00
 								                // Migrate: if the function_summaries table has the old schema
 								                // (missing `arity` column), drop and recreate it.
 								                let has_arity: bool = conn
 								                    .prepare("PRAGMA table_info(function_summaries)")
 								                    .and_then(|mut s| {
 								                        let cols: Vec<String> = s
 								                            .query_map([], |r| r.get::<_, String>(1))?
 								                            .filter_map(Result::ok)
 								                            .collect();
 								                        Ok(cols.iter().any(|c| c == "arity"))
 								                    })
 								                    .unwrap_or(true);
 								                if !has_arity {
 								                    tracing::info!("migrating function_summaries: adding arity column");
 								                    conn.execute_batch("DROP TABLE IF EXISTS function_summaries;")?;
 								                    conn.execute_batch(
 								                        "CREATE TABLE IF NOT EXISTS function_summaries (
 								                            id INTEGER PRIMARY KEY AUTOINCREMENT,
 								                            project TEXT NOT NULL,
 								                            file_path TEXT NOT NULL,
 								                            file_hash BLOB NOT NULL,
 								                            name TEXT NOT NULL,
 								                            arity INTEGER NOT NULL DEFAULT -1,
 								                            lang TEXT NOT NULL,
 								                            summary TEXT NOT NULL,
 								                            updated_at INTEGER NOT NULL,
 								                            UNIQUE(project, file_path, name, arity)
 								                        );",
 								                    )?;
 								                }
-												Refactor codebase for consistent indentation and formatting

- Standardized spacing and indentation across multiple modules for improved readability.
- Reorganized `patterns` and `utils` imports for consistency.
- Updated `NyxError` and `NyxResult` related implementations to maintain consistent formatting.
- Enhanced readability in AST patterns for better clarity and maintainability.

											
										
										
											2025-06-24 20:27:06 +02:00
+								            }
 								            Ok(pool)
 								        }
 								        pub fn from_pool(project: &str, pool: &Pool<SqliteConnectionManager>) -> NyxResult<Self> {
 								            let conn = pool.get()?;
 								            Ok(Self {
 								                conn,
 								                project: project.to_owned(),
 								            })
 								        }
-												- Remove unused `filetypes.rs` and `walk.rs` modules
- Introduce `index.rs` for file indexing using SQLite
- Expand configuration options in `config.rs`, including `excluded_files`
- Update dependencies in `Cargo.toml` to include SQLite, hashing, and regex libraries

											
										
										
											2025-06-16 23:47:50 +02:00
-												Refactor codebase for consistent indentation and formatting

- Standardized spacing and indentation across multiple modules for improved readability.
- Reorganized `patterns` and `utils` imports for consistency.
- Updated `NyxError` and `NyxResult` related implementations to maintain consistent formatting.
- Enhanced readability in AST patterns for better clarity and maintainability.

											
										
										
											2025-06-24 20:27:06 +02:00
+								        // helper so code below can treat PooledConnection like &Connection
 								        fn c(&self) -> &Connection {
 								            self.conn.deref()
 								        }
 								        /// Return true when the file *content* or *mtime* changed since the last scan.
 								        pub fn should_scan(&self, path: &Path) -> NyxResult<bool> {
 								            let meta = fs::metadata(path)?;
 								            let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64;
 								            let digest = Self::digest_file(path)?;
 								            let row: Option<(Vec<u8>, i64)> = self
 								                .conn
 								                .query_row(
 								                    "SELECT hash, mtime FROM files WHERE project = ?1 AND path = ?2",
 								                    params![self.project, path.to_string_lossy()],
 								                    |r| Ok((r.get(0)?, r.get(1)?)),
 								                )
 								                .optional()?;
 								            Ok(match row {
 								                Some((stored_hash, stored_mtime)) => stored_hash != digest || stored_mtime != mtime,
 								                None => true,
 								            })
 								        }
-												- Remove unused `filetypes.rs` and `walk.rs` modules
- Introduce `index.rs` for file indexing using SQLite
- Expand configuration options in `config.rs`, including `excluded_files`
- Update dependencies in `Cargo.toml` to include SQLite, hashing, and regex libraries

											
										
										
											2025-06-16 23:47:50 +02:00
-												Refactor codebase for consistent indentation and formatting

- Standardized spacing and indentation across multiple modules for improved readability.
- Reorganized `patterns` and `utils` imports for consistency.
- Updated `NyxError` and `NyxResult` related implementations to maintain consistent formatting.
- Enhanced readability in AST patterns for better clarity and maintainability.

											
										
										
											2025-06-24 20:27:06 +02:00
+								        /// Insert or update the `files` row and return its id.
 								        pub fn upsert_file(&self, path: &Path) -> NyxResult<i64> {
 								            let meta = fs::metadata(path)?;
 								            let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64;
 								            let scanned_at = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64;
 								            let digest = Self::digest_file(path)?;
 								            self.c().execute(
 								                "INSERT INTO files (project, path, hash, mtime, scanned_at)
-												Refactor database schema and scanning process:
- Introduced `issues` table for detailed vulnerability storage.
- Enhanced `files` table with project scoping and unique constraints.
- Replaced `OutputFormat` enum with `String` for flexibility.
- Added support for formatted console output of scan results.
- Integrated file and issue updating logic for incremental scans.
- Optimized scanning by leveraging database-stored issues.

											
										
										
											2025-06-17 16:46:45 +02:00
+								                 VALUES (?1, ?2, ?3, ?4, ?5)
 								                 ON CONFLICT(project,path) DO UPDATE
 								                 SET hash = excluded.hash,
 								                     mtime = excluded.mtime,
 								                     scanned_at = excluded.scanned_at",
-												Refactor codebase for consistent indentation and formatting

- Standardized spacing and indentation across multiple modules for improved readability.
- Reorganized `patterns` and `utils` imports for consistency.
- Updated `NyxError` and `NyxResult` related implementations to maintain consistent formatting.
- Enhanced readability in AST patterns for better clarity and maintainability.

											
										
										
											2025-06-24 20:27:06 +02:00
+								                params![
 								                    self.project,
 								                    path.to_string_lossy(),
 								                    digest,
 								                    mtime,
 								                    scanned_at
 								                ],
 								            )?;
 								            let id: i64 = self.c().query_row(
 								                "SELECT id FROM files WHERE project = ?1 AND path = ?2",
 								                params![self.project, path.to_string_lossy()],
 								                |r| r.get(0),
 								            )?;
 								            Ok(id)
 								        }
-												Refactor database schema and scanning process:
- Introduced `issues` table for detailed vulnerability storage.
- Enhanced `files` table with project scoping and unique constraints.
- Replaced `OutputFormat` enum with `String` for flexibility.
- Added support for formatted console output of scan results.
- Integrated file and issue updating logic for incremental scans.
- Optimized scanning by leveraging database-stored issues.

											
										
										
											2025-06-17 16:46:45 +02:00
-												Refactor codebase for consistent indentation and formatting

- Standardized spacing and indentation across multiple modules for improved readability.
- Reorganized `patterns` and `utils` imports for consistency.
- Updated `NyxError` and `NyxResult` related implementations to maintain consistent formatting.
- Enhanced readability in AST patterns for better clarity and maintainability.

											
										
										
											2025-06-24 20:27:06 +02:00
+								        /// Replace all issues for `file_id` with the supplied set.
 								        pub fn replace_issues<'a>(
 								            &mut self,
 								            file_id: i64,
 								            issues: impl IntoIterator<Item = IssueRow<'a>>,
 								        ) -> NyxResult<()> {
 								            let tx = self.conn.transaction()?;
 								            tx.execute("DELETE FROM issues WHERE file_id = ?", params![file_id])?;
 								            {
 								                let mut stmt = tx.prepare(
 								                    "INSERT INTO issues (file_id, rule_id, severity, line, col)
-												Refactor database schema and scanning process:
- Introduced `issues` table for detailed vulnerability storage.
- Enhanced `files` table with project scoping and unique constraints.
- Replaced `OutputFormat` enum with `String` for flexibility.
- Added support for formatted console output of scan results.
- Integrated file and issue updating logic for incremental scans.
- Optimized scanning by leveraging database-stored issues.

											
										
										
											2025-06-17 16:46:45 +02:00
+								                     VALUES (?1, ?2, ?3, ?4, ?5)",
-												Refactor codebase for consistent indentation and formatting

- Standardized spacing and indentation across multiple modules for improved readability.
- Reorganized `patterns` and `utils` imports for consistency.
- Updated `NyxError` and `NyxResult` related implementations to maintain consistent formatting.
- Enhanced readability in AST patterns for better clarity and maintainability.

											
										
										
											2025-06-24 20:27:06 +02:00
+								                )?;
 								                for iss in issues {
 								                    stmt.execute(params![
 								                        file_id,
 								                        iss.rule_id,
 								                        iss.severity,
 								                        iss.line,
 								                        iss.col
 								                    ])?;
 								                }
 								            }
 								            tx.commit()?;
 								            Ok(())
-												Refactor database schema and scanning process:
- Introduced `issues` table for detailed vulnerability storage.
- Enhanced `files` table with project scoping and unique constraints.
- Replaced `OutputFormat` enum with `String` for flexibility.
- Added support for formatted console output of scan results.
- Integrated file and issue updating logic for incremental scans.
- Optimized scanning by leveraging database-stored issues.

											
										
										
											2025-06-17 16:46:45 +02:00
+								        }
-												Add unit tests for `Indexer` and update dependencies

- Introduced comprehensive tests for `Indexer` to validate file scan behavior.
- Added `tempfile` and `filetime` as dependencies for testing.
- Removed unused dependencies (`anyhow`, `nix`, `regex`).

											
										
										
											2025-06-17 00:02:32 +02:00
-												Refactor codebase for consistent indentation and formatting

- Standardized spacing and indentation across multiple modules for improved readability.
- Reorganized `patterns` and `utils` imports for consistency.
- Updated `NyxError` and `NyxResult` related implementations to maintain consistent formatting.
- Enhanced readability in AST patterns for better clarity and maintainability.

											
										
										
											2025-06-24 20:27:06 +02:00
+								        /// Gets the issues for a specific file so we don't have to rescan
 								        pub fn get_issues_from_file(&self, path: &Path) -> NyxResult<Vec<Diag>> {
 								            let file_id: i64 = self.c().query_row(
 								                "SELECT id FROM files WHERE project = ?1 AND path = ?2",
 								                params![self.project, path.to_string_lossy()],
 								                |r| r.get(0),
 								            )?;
 								            let mut stmt = self.c().prepare(
 								                "SELECT rule_id, severity, line, col
-												Refactor database schema and scanning process:
- Introduced `issues` table for detailed vulnerability storage.
- Enhanced `files` table with project scoping and unique constraints.
- Replaced `OutputFormat` enum with `String` for flexibility.
- Added support for formatted console output of scan results.
- Integrated file and issue updating logic for incremental scans.
- Optimized scanning by leveraging database-stored issues.

											
										
										
											2025-06-17 16:46:45 +02:00
+								         FROM issues
 								         WHERE file_id = ?1",
-												Refactor codebase for consistent indentation and formatting

- Standardized spacing and indentation across multiple modules for improved readability.
- Reorganized `patterns` and `utils` imports for consistency.
- Updated `NyxError` and `NyxResult` related implementations to maintain consistent formatting.
- Enhanced readability in AST patterns for better clarity and maintainability.

											
										
										
											2025-06-24 20:27:06 +02:00
+								            )?;
 								            let issue_iter = stmt.query_map([file_id], |row| {
 								                let sev_str: String = row.get(1)?;
 								                Ok(Diag {
 								                    path: path.to_string_lossy().to_string(),
 								                    id: row.get::<_, String>(0)?, // rule_id
 								                    line: row.get::<_, i64>(2)? as usize,
 								                    col: row.get::<_, i64>(3)? as usize,
 								                    severity: Severity::from_str(&sev_str).unwrap(),
 								                })
 								            })?;
 								            Ok(issue_iter.filter_map(Result::ok).collect())
 								        }
-												Feat/full cfg (#30)

* feat: Enhance control flow analysis with function summaries and taint analysis

* feat: Update taint analysis to utilize function summaries for enhanced tracking

* Refactor `walk.rs` batch processing and override handling:

- Renamed `Batcher` to `BatchSender` for clarity.
- Added `BatchSender::new` constructor for cleaner initialization.
- Simplified batch size management in `BatchSender`.
- Extracted `build_overrides` function for reusable override construction.
- Improved error handling and validation in override building.
- Enhanced performance with directory and file type filtering in `walk`.

* Improve logging and streamline directory walk process:

- Added detailed `tracing` logs for debugging batch flushes, override construction, and walk initialization/completion.
- Optimized and simplified `filter_entry` logic for directory and file type filters.
- Improved metadata checks and max file size enforcement during the scan.

* Refactor and optimize taint tracking, label rules, and directory walk process:

- Replaced `DefaultHasher` with `blake3::Hasher` for improved taint hashing.
- Enhanced sorting and hashing logic in `taint.rs` for consistency and efficiency.
- Removed unused `set_hash` function and redundant imports across files.
- Improved batch sender logic in `walk.rs`, renaming key components for clarity.
- Unified `spawn_senders` and `spawn_file_walker` with thread handling and channel tuple return.
- Expanded label rules with additional matchers for sources, sanitizers, and sinks.
- Deprecated `dump_cfg` and specific logging utilities in `cfg.rs` for code cleanup.

* fix: fixed let chains error in walk.rs

* fix: updated dependencies

* fix: updated dependencies

* chore: Remove standard error in scan.rs

* feat: Introduce function summaries for enhanced taint and control flow analysis

* feat: Enhance taint analysis with interop support and function summaries

* feat: Add configuration analysis module and enhance matcher rules

* feat: Add arity column to function_summaries and handle schema migration

* fix: fixed clippy &PathBuf warnings

* chore: Update dependencies and versioning in Cargo files

* docs: Update README to enhance clarity and detail on features and analysis modes

* chore: Update CHANGELOG for version 0.2.0 with new features, changes, and fixes

* docs: Update SECURITY.md to clarify version support status

---------

Co-authored-by: elipeter <eli.peter@es.fcm.travel>
											
										
										
											2026-02-24 23:44:07 -05:00
+								        /// Atomically replace all function summaries for a single file.
 								        ///
 								        /// Deletes every existing summary row for `(project, file_path)` then
 								        /// inserts the new set.  This keeps the table in sync when a file is
 								        /// re‑parsed and its functions change.
 								        pub fn replace_summaries_for_file(
 								            &mut self,
 								            file_path: &Path,
 								            file_hash: &[u8],
 								            summaries: &[crate::summary::FuncSummary],
 								        ) -> NyxResult<()> {
 								            let tx = self.conn.transaction()?;
 								            let path_str = file_path.to_string_lossy();
 								            let now = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64;
 								            tx.execute(
 								                "DELETE FROM function_summaries WHERE project = ?1 AND file_path = ?2",
 								                params![self.project, path_str],
 								            )?;
 								            {
 								                let mut stmt = tx.prepare(
 								                    "INSERT OR REPLACE INTO function_summaries
 								                        (project, file_path, file_hash, name, arity, lang, summary, updated_at)
 								                     VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
 								                )?;
 								                for s in summaries {
 								                    let json = serde_json::to_string(s)
 								                        .map_err(|e| NyxError::Msg(format!("summary serialise: {e}")))?;
 								                    stmt.execute(params![
 								                        self.project,
 								                        path_str,
 								                        file_hash,
 								                        s.name,
 								                        s.param_count as i64,
 								                        s.lang,
 								                        json,
 								                        now
 								                    ])?;
 								                }
 								            }
 								            tx.commit()?;
 								            Ok(())
 								        }
 								        /// Load every function summary for this project.
 								        pub fn load_all_summaries(&self) -> NyxResult<Vec<crate::summary::FuncSummary>> {
 								            let mut stmt = self
 								                .c()
 								                .prepare("SELECT summary FROM function_summaries WHERE project = ?1")?;
 								            let iter = stmt.query_map([&self.project], |row| {
 								                let json: String = row.get(0)?;
 								                Ok(json)
 								            })?;
 								            let mut out = Vec::new();
 								            for row in iter {
 								                let json = row?;
 								                let s: crate::summary::FuncSummary = serde_json::from_str(&json)
 								                    .map_err(|e| rusqlite::Error::ToSqlConversionFailure(Box::new(e)))?;
 								                out.push(s);
 								            }
 								            Ok(out)
 								        }
-												Added experimental control flow analysis and syntax classification for rust lang (#22)

* Introduce control flow graph (CFG) support:

- Added `cfg.rs` with CFG generation and analysis utilities.
- Integrated `petgraph` library for graph-based computations.
- Updated `ast.rs` to utilize CFG for function analysis.
- Modified `Cargo.toml` and `Cargo.lock` to include new dependencies.
- Improved static analysis with taint tracking through CFG paths.

* feat: enhance control flow analysis with taint tracking and node labeling

* feat: improve control flow graph with enhanced node handling and new tests

* Remove unnecessary reference marker in `byte_offset_to_point` comment.

* Remove unnecessary reference marker in `byte_offset_to_point` comment.

* Refactor `ast.rs` for performance and clarity; enhance `cfg.rs` with recursive CFG generation and improved classification logic for AST analysis.

* Refactor CFG and taint tracking logic:

- Enhanced `cfg.rs` with inline helper function `text_of` for cleaner UTF-8 handling in AST nodes.
- Expanded `labels.rs` rules with detailed `Sources`, `Sanitizers`, and `Sinks` for improved classification.
- Refined `push_node` to handle method call expressions with object-function pairing.
- Simplified code handling in trivia skipping and debug-only logic.

* Enhance `cfg.rs` with `first_call_ident` helper and improve identifier extraction logic in `push_node`.

* Add targeted CFG taint-tracking tests to enhance analysis coverage.

* Enhance CFG generation with loop expression handling and improve taint tracking logic. Add new sanitization example in `examples/sanitize/example.rs`.

* Update README with installation instructions for Cargo and GitHub releases.

* Expand taint-tracking with precise `def-use` computation and enhance `labels.rs` for detailed classification. Extend `examples/sanitize` with realistic scenarios demonstrating new rules.

* Refactor `labels.rs`:

- Removed redundant `LabelRule` entries for cleaner rule definitions.
- Adjusted matching logic to prioritize suffix and prefix matches effectively.

* Refactor `labels.rs`:

- Removed redundant `LabelRule` entries for cleaner rule definitions.
- Adjusted matching logic to prioritize suffix and prefix matches effectively.

* Add test for taint tracking with multiple sources in `cfg.rs`.

* Add `function_summaries` table and implement summary upsert/load methods. Refactor to handle summary storage and retrieval efficiently, with placeholder clean/drop logic.

* refactor: split `labels.rs` into modular structure with language-specific files

* refactor: split `labels.rs` into modular structure with language-specific files

* refactor: clean up SQL table definitions in `database.rs` for better readability

* refactor: simplify CFG structure by removing lifetime parameters and enhancing taint metadata handling

* refactor: update TODO comments in `cfg.rs` to clarify future enhancements for cap labels and function details

* refactor: remove redundant header from README.md for improved clarity

* feat: add PHF-based syntax classifiers and Kind enum for efficient syntax mapping across languages

* feat: introduce analysis modes for enhanced scanner configuration and diagnostics

* feat: define Kind enum for syntax classification in control flow analysis

* feat: bump version to 0.2.0-alpha and update CHANGELOG for new features and fixes

* refactor: clean up imports and formatting in AST and CFG modules for improved readability

* refactor: simplify function signatures and improve code readability in CFG and module files

* fix: correct rayon_thread_stack_size comment to reflect actual value of 8 MiB

* refactor: update string formatting in clean and project modules for consistency

* refactor: fix indentation in clean.rs for improved readability

---------

Co-authored-by: elipeter <eli.peter@es.fcm.travel>
											
										
										
											2025-06-28 17:36:14 +02:00
-												Refactor codebase for consistent indentation and formatting

- Standardized spacing and indentation across multiple modules for improved readability.
- Reorganized `patterns` and `utils` imports for consistency.
- Updated `NyxError` and `NyxResult` related implementations to maintain consistent formatting.
- Enhanced readability in AST patterns for better clarity and maintainability.

											
										
										
											2025-06-24 20:27:06 +02:00
+								        /// gets files from the database
 								        pub fn get_files(&self, project: &str) -> NyxResult<Vec<PathBuf>> {
 								            let mut stmt = self.c().prepare(
 								                "SELECT path
-												Refactor database schema and scanning process:
- Introduced `issues` table for detailed vulnerability storage.
- Enhanced `files` table with project scoping and unique constraints.
- Replaced `OutputFormat` enum with `String` for flexibility.
- Added support for formatted console output of scan results.
- Integrated file and issue updating logic for incremental scans.
- Optimized scanning by leveraging database-stored issues.

											
										
										
											2025-06-17 16:46:45 +02:00
+								         FROM files
 								         WHERE project = ?1",
-												Refactor codebase for consistent indentation and formatting

- Standardized spacing and indentation across multiple modules for improved readability.
- Reorganized `patterns` and `utils` imports for consistency.
- Updated `NyxError` and `NyxResult` related implementations to maintain consistent formatting.
- Enhanced readability in AST patterns for better clarity and maintainability.

											
										
										
											2025-06-24 20:27:06 +02:00
+								            )?;
-												Add unit tests for `Indexer` and update dependencies

- Introduced comprehensive tests for `Indexer` to validate file scan behavior.
- Added `tempfile` and `filetime` as dependencies for testing.
- Removed unused dependencies (`anyhow`, `nix`, `regex`).

											
										
										
											2025-06-17 00:02:32 +02:00
-												Refactor codebase for consistent indentation and formatting

- Standardized spacing and indentation across multiple modules for improved readability.
- Reorganized `patterns` and `utils` imports for consistency.
- Updated `NyxError` and `NyxResult` related implementations to maintain consistent formatting.
- Enhanced readability in AST patterns for better clarity and maintainability.

											
										
										
											2025-06-24 20:27:06 +02:00
+								            let file_iter = stmt.query_map([project], |row| row.get::<_, String>(0))?;
 								            Ok(file_iter
 								                .map(|p| p.map(PathBuf::from))
 								                .collect::<Result<_, _>>()?)
 								        }
-												Add unit tests for `Indexer` and update dependencies

- Introduced comprehensive tests for `Indexer` to validate file scan behavior.
- Added `tempfile` and `filetime` as dependencies for testing.
- Removed unused dependencies (`anyhow`, `nix`, `regex`).

											
										
										
											2025-06-17 00:02:32 +02:00
-												Refactor codebase for consistent indentation and formatting

- Standardized spacing and indentation across multiple modules for improved readability.
- Reorganized `patterns` and `utils` imports for consistency.
- Updated `NyxError` and `NyxResult` related implementations to maintain consistent formatting.
- Enhanced readability in AST patterns for better clarity and maintainability.

											
										
										
											2025-06-24 20:27:06 +02:00
+								        // -------------------------------------------------------------------------
 								        // Maintenance utilities
 								        // -------------------------------------------------------------------------
 								        pub fn clear(&self) -> NyxResult<()> {
 								            self.c().execute_batch(
 								                r#"
-												Refactor database connection handling with connection pooling and parallel processing

- Introduced `r2d2` connection pooling for SQLite in `database.rs`.
- Updated `Indexer` to use pooled connections for improved concurrency.
- Replaced sequential processing with `rayon` for parallel file scanning.
- Added a `clear` method to `Indexer` for reindexing support.
- Enhanced database initialization with `init` and `from_pool` methods.
- Updated `Cargo.toml` and `Cargo.lock` to include `r2d2`, `r2d2_sqlite`, and new dependencies.

											
										
										
											2025-06-17 20:45:33 +02:00
+								        PRAGMA foreign_keys = OFF;
 								        DROP TABLE IF EXISTS issues;
 								        DROP TABLE IF EXISTS files;
-												Added experimental control flow analysis and syntax classification for rust lang (#22)

* Introduce control flow graph (CFG) support:

- Added `cfg.rs` with CFG generation and analysis utilities.
- Integrated `petgraph` library for graph-based computations.
- Updated `ast.rs` to utilize CFG for function analysis.
- Modified `Cargo.toml` and `Cargo.lock` to include new dependencies.
- Improved static analysis with taint tracking through CFG paths.

* feat: enhance control flow analysis with taint tracking and node labeling

* feat: improve control flow graph with enhanced node handling and new tests

* Remove unnecessary reference marker in `byte_offset_to_point` comment.

* Remove unnecessary reference marker in `byte_offset_to_point` comment.

* Refactor `ast.rs` for performance and clarity; enhance `cfg.rs` with recursive CFG generation and improved classification logic for AST analysis.

* Refactor CFG and taint tracking logic:

- Enhanced `cfg.rs` with inline helper function `text_of` for cleaner UTF-8 handling in AST nodes.
- Expanded `labels.rs` rules with detailed `Sources`, `Sanitizers`, and `Sinks` for improved classification.
- Refined `push_node` to handle method call expressions with object-function pairing.
- Simplified code handling in trivia skipping and debug-only logic.

* Enhance `cfg.rs` with `first_call_ident` helper and improve identifier extraction logic in `push_node`.

* Add targeted CFG taint-tracking tests to enhance analysis coverage.

* Enhance CFG generation with loop expression handling and improve taint tracking logic. Add new sanitization example in `examples/sanitize/example.rs`.

* Update README with installation instructions for Cargo and GitHub releases.

* Expand taint-tracking with precise `def-use` computation and enhance `labels.rs` for detailed classification. Extend `examples/sanitize` with realistic scenarios demonstrating new rules.

* Refactor `labels.rs`:

- Removed redundant `LabelRule` entries for cleaner rule definitions.
- Adjusted matching logic to prioritize suffix and prefix matches effectively.

* Refactor `labels.rs`:

- Removed redundant `LabelRule` entries for cleaner rule definitions.
- Adjusted matching logic to prioritize suffix and prefix matches effectively.

* Add test for taint tracking with multiple sources in `cfg.rs`.

* Add `function_summaries` table and implement summary upsert/load methods. Refactor to handle summary storage and retrieval efficiently, with placeholder clean/drop logic.

* refactor: split `labels.rs` into modular structure with language-specific files

* refactor: split `labels.rs` into modular structure with language-specific files

* refactor: clean up SQL table definitions in `database.rs` for better readability

* refactor: simplify CFG structure by removing lifetime parameters and enhancing taint metadata handling

* refactor: update TODO comments in `cfg.rs` to clarify future enhancements for cap labels and function details

* refactor: remove redundant header from README.md for improved clarity

* feat: add PHF-based syntax classifiers and Kind enum for efficient syntax mapping across languages

* feat: introduce analysis modes for enhanced scanner configuration and diagnostics

* feat: define Kind enum for syntax classification in control flow analysis

* feat: bump version to 0.2.0-alpha and update CHANGELOG for new features and fixes

* refactor: clean up imports and formatting in AST and CFG modules for improved readability

* refactor: simplify function signatures and improve code readability in CFG and module files

* fix: correct rayon_thread_stack_size comment to reflect actual value of 8 MiB

* refactor: update string formatting in clean and project modules for consistency

* refactor: fix indentation in clean.rs for improved readability

---------

Co-authored-by: elipeter <eli.peter@es.fcm.travel>
											
										
										
											2025-06-28 17:36:14 +02:00
+								        DROP TABLE IF EXISTS function_summaries;
-												Refactor database connection handling with connection pooling and parallel processing

- Introduced `r2d2` connection pooling for SQLite in `database.rs`.
- Updated `Indexer` to use pooled connections for improved concurrency.
- Replaced sequential processing with `rayon` for parallel file scanning.
- Added a `clear` method to `Indexer` for reindexing support.
- Enhanced database initialization with `init` and `from_pool` methods.
- Updated `Cargo.toml` and `Cargo.lock` to include `r2d2`, `r2d2_sqlite`, and new dependencies.

											
										
										
											2025-06-17 20:45:33 +02:00
 								        PRAGMA foreign_keys = ON;
 								        VACUUM;
 								        "#,
-												Refactor codebase for consistent indentation and formatting

- Standardized spacing and indentation across multiple modules for improved readability.
- Reorganized `patterns` and `utils` imports for consistency.
- Updated `NyxError` and `NyxResult` related implementations to maintain consistent formatting.
- Enhanced readability in AST patterns for better clarity and maintainability.

											
										
										
											2025-06-24 20:27:06 +02:00
+								            )?;
-												Add `vacuum` method to `Indexer` for database maintenance

- Added a `vacuum` method in `database.rs` to optimize database file size and performance.
- Integrated `vacuum` calls into `scan.rs` and `index.rs` to ensure regular maintenance during operations.

											
										
										
											2025-06-17 21:00:24 +02:00
-												Refactor codebase for consistent indentation and formatting

- Standardized spacing and indentation across multiple modules for improved readability.
- Reorganized `patterns` and `utils` imports for consistency.
- Updated `NyxError` and `NyxResult` related implementations to maintain consistent formatting.
- Enhanced readability in AST patterns for better clarity and maintainability.

											
										
										
											2025-06-24 20:27:06 +02:00
+								            self.c().execute_batch(SCHEMA)?;
 								            Ok(())
 								        }
-												Refactor database connection handling with connection pooling and parallel processing

- Introduced `r2d2` connection pooling for SQLite in `database.rs`.
- Updated `Indexer` to use pooled connections for improved concurrency.
- Replaced sequential processing with `rayon` for parallel file scanning.
- Added a `clear` method to `Indexer` for reindexing support.
- Enhanced database initialization with `init` and `from_pool` methods.
- Updated `Cargo.toml` and `Cargo.lock` to include `r2d2`, `r2d2_sqlite`, and new dependencies.

											
										
										
											2025-06-17 20:45:33 +02:00
-												Refactor codebase for consistent indentation and formatting

- Standardized spacing and indentation across multiple modules for improved readability.
- Reorganized `patterns` and `utils` imports for consistency.
- Updated `NyxError` and `NyxResult` related implementations to maintain consistent formatting.
- Enhanced readability in AST patterns for better clarity and maintainability.

											
										
										
											2025-06-24 20:27:06 +02:00
+								        pub fn vacuum(&self) -> NyxResult<()> {
 								            self.c().execute("VACUUM;", [])?;
 								            Ok(())
 								        }
 								        // -------------------------------------------------------------------------
 								        // Helpers
 								        // -------------------------------------------------------------------------
 								        fn digest_file(path: &Path) -> NyxResult<Vec<u8>> {
 								            let mut hasher = blake3::Hasher::new();
 								            let mut file = fs::File::open(path)?;
 								            std::io::copy(&mut file, &mut hasher)?;
 								            Ok(hasher.finalize().as_bytes().to_vec())
 								        }
-												Refactor database schema and scanning process:
- Introduced `issues` table for detailed vulnerability storage.
- Enhanced `files` table with project scoping and unique constraints.
- Replaced `OutputFormat` enum with `String` for flexibility.
- Added support for formatted console output of scan results.
- Integrated file and issue updating logic for incremental scans.
- Optimized scanning by leveraging database-stored issues.

											
										
										
											2025-06-17 16:46:45 +02:00
+								    }
-												Add unit tests for `Indexer` and update dependencies

- Introduced comprehensive tests for `Indexer` to validate file scan behavior.
- Added `tempfile` and `filetime` as dependencies for testing.
- Removed unused dependencies (`anyhow`, `nix`, `regex`).

											
										
										
											2025-06-17 00:02:32 +02:00
+								}
-												test: Add unit tests for file handling and configuration merging (#7)

* test: Add unit tests for file handling and configuration merging

* test: Update IO error conversion test to use new error creation method
											
										
										
											2025-06-24 23:38:32 +02:00
 								#[test]
 								fn indexer_should_scan_and_upsert_logic() {
 								    let td = tempfile::tempdir().unwrap();
 								    let db = td.path().join("nyx.sqlite");
 								    let file = td.path().join("sample.rs");
 								    std::fs::write(&file, "fn main() {}").unwrap();
 								    let pool = index::Indexer::init(&db).unwrap();
 								    let idx = index::Indexer::from_pool("proj", &pool).unwrap();
 								    // first time: nothing in DB → must scan
 								    assert!(idx.should_scan(&file).unwrap());
 								    // after upsert: no changes → should *not* scan
 								    idx.upsert_file(&file).unwrap();
 								    assert!(!idx.should_scan(&file).unwrap());
 								    // modify contents
 								    std::thread::sleep(std::time::Duration::from_millis(25)); // ensure mtime tick
 								    std::fs::write(&file, "fn main() { /* changed */ }").unwrap();
 								    assert!(idx.should_scan(&file).unwrap());
 								}
 								#[test]
 								fn replace_issues_and_query_back() {
 								    let td = tempfile::tempdir().unwrap();
 								    let db = td.path().join("nyx.sqlite");
 								    let file = td.path().join("code.go");
 								    std::fs::write(&file, "package main").unwrap();
 								    let pool = index::Indexer::init(&db).unwrap();
 								    let mut idx = index::Indexer::from_pool("proj", &pool).unwrap();
 								    let fid = idx.upsert_file(&file).unwrap();
 								    let issues = [
 								        index::IssueRow {
 								            rule_id: "X1",
 								            severity: "High",
 								            line: 3,
 								            col: 7,
 								        },
 								        index::IssueRow {
 								            rule_id: "X2",
 								            severity: "Low",
 								            line: 4,
 								            col: 1,
 								        },
 								    ];
 								    idx.replace_issues(fid, issues.clone()).unwrap();
 								    let stored = idx.get_issues_from_file(&file).unwrap();
 								    assert_eq!(stored.len(), 2);
 								    assert!(
 								        stored
 								            .iter()
 								            .any(|d| d.id == "X1" && d.severity == crate::patterns::Severity::High)
 								    );
 								    assert!(
 								        stored
 								            .iter()
 								            .any(|d| d.id == "X2" && d.severity == crate::patterns::Severity::Low)
 								    );
 								}
 								#[test]
 								fn clear_and_vacuum_reset_tables() {
 								    let td = tempfile::tempdir().unwrap();
 								    let db = td.path().join("nyx.sqlite");
 								    let f = td.path().join("f.rs");
 								    std::fs::write(&f, "//").unwrap();
 								    let pool = index::Indexer::init(&db).unwrap();
 								    let idx = index::Indexer::from_pool("proj", &pool).unwrap();
 								    idx.upsert_file(&f).unwrap();
 								    assert!(!idx.get_files("proj").unwrap().is_empty());
 								    idx.clear().unwrap();
 								    idx.vacuum().unwrap();
 								    assert!(idx.get_files("proj").unwrap().is_empty());
 								}