From 0eecf886f28d872fb67e473921586ea42dc85ca5 Mon Sep 17 00:00:00 2001 From: elipeter Date: Tue, 17 Jun 2025 16:46:45 +0200 Subject: [PATCH] Refactor database schema and scanning process: - Introduced `issues` table for detailed vulnerability storage. - Enhanced `files` table with project scoping and unique constraints. - Replaced `OutputFormat` enum with `String` for flexibility. - Added support for formatted console output of scan results. - Integrated file and issue updating logic for incremental scans. - Optimized scanning by leveraging database-stored issues. --- Cargo.lock | 138 +++++------------------ Cargo.toml | 7 +- src/cli.rs | 12 +- src/commands/scan.rs | 224 ++++++++++++++++++++----------------- src/database.rs | 258 ++++++++++++++++++++++--------------------- src/patterns/mod.rs | 14 +++ src/utils/config.rs | 6 +- 7 files changed, 302 insertions(+), 357 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d6408db2..2b931d41 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -163,6 +163,19 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "console" +version = "0.15.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "unicode-width", + "windows-sys", +] + [[package]] name = "constant_time_eq" version = "0.3.1" @@ -233,22 +246,18 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + [[package]] name = "equivalent" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" -[[package]] -name = "errno" -version = "0.3.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18" -dependencies = [ - "libc", - "windows-sys", -] - [[package]] name = "fallible-iterator" version = "0.3.0" @@ -261,24 +270,6 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" -[[package]] -name = "fastrand" -version = "2.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" - -[[package]] -name = "filetime" -version = "0.2.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35c0522e981e68cbfa8c3f978441a5f34b30b96e146b33cd3359176b50fe8586" -dependencies = [ - "cfg-if", - "libc", - "libredox", - "windows-sys", -] - [[package]] name = "foldhash" version = "0.1.5" @@ -293,19 +284,7 @@ checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", "libc", - "wasi 0.11.1+wasi-snapshot-preview1", -] - -[[package]] -name = "getrandom" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" -dependencies = [ - "cfg-if", - "libc", - "r-efi", - "wasi 0.14.2+wasi-0.2.4", + "wasi", ] [[package]] @@ -409,7 +388,6 @@ checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ "bitflags", "libc", - "redox_syscall", ] [[package]] @@ -422,12 +400,6 @@ dependencies = [ "vcpkg", ] -[[package]] -name = "linux-raw-sys" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" - [[package]] name = "log" version = "0.4.27" @@ -481,15 +453,14 @@ version = "0.1.0" dependencies = [ "blake3", "clap", + "console", "crossbeam-channel", "directories", - "filetime", "ignore", "num_cpus", "once_cell", "rusqlite", "serde", - "tempfile", "toml", "tracing", "tracing-subscriber", @@ -565,28 +536,13 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "r-efi" -version = "5.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" - -[[package]] -name = "redox_syscall" -version = "0.5.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d04b7d0ee6b4a0207a0a7adb104d23ecb0b47d6beae7152d0fa34b692b29fd6" -dependencies = [ - "bitflags", -] - [[package]] name = "redox_users" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd6f9d3d47bdd2ad6945c5015a226ec6155d0bcdfd8f7cd29f86b71f8de99d2b" dependencies = [ - "getrandom 0.2.16", + "getrandom", "libredox", "thiserror", ] @@ -649,19 +605,6 @@ dependencies = [ "smallvec", ] -[[package]] -name = "rustix" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" -dependencies = [ - "bitflags", - "errno", - "libc", - "linux-raw-sys", - "windows-sys", -] - [[package]] name = "ryu" version = "1.0.20" @@ -763,19 +706,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "tempfile" -version = "3.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" -dependencies = [ - "fastrand", - "getrandom 0.3.3", - "once_cell", - "rustix", - "windows-sys", -] - [[package]] name = "thiserror" version = "2.0.12" @@ -1068,6 +998,12 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +[[package]] +name = "unicode-width" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" + [[package]] name = "utf8parse" version = "0.2.2" @@ -1102,15 +1038,6 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" -[[package]] -name = "wasi" -version = "0.14.2+wasi-0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" -dependencies = [ - "wit-bindgen-rt", -] - [[package]] name = "winapi" version = "0.3.9" @@ -1223,12 +1150,3 @@ checksum = "74c7b26e3480b707944fc872477815d29a8e429d2f93a1ce000f5fa84a15cbcd" dependencies = [ "memchr", ] - -[[package]] -name = "wit-bindgen-rt" -version = "0.39.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" -dependencies = [ - "bitflags", -] diff --git a/Cargo.toml b/Cargo.toml index 6e53f2c6..812ab7c7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,9 +3,6 @@ name = "nyx" version = "0.1.0" edition = "2024" -[dev-dependencies] -tempfile = "3" - [dependencies] directories = "6.0.0" clap = { version = "4.5.40", features = ["derive"] } @@ -14,9 +11,7 @@ toml = "0.8.23" tracing-subscriber = { version = "0.3.19", features = ["env-filter", "json", "ansi","time"] } tracing = "0.1.41" num_cpus = "1.17.0" - rusqlite = "0.36.0" - ignore = "0.4.23" tree-sitter = "0.25.6" tree-sitter-rust = "0.24.0" @@ -30,5 +25,5 @@ tree-sitter-php = "0.23.11" tree-sitter-python = "0.23.6" crossbeam-channel = "0.5.15" blake3 = "1.8.2" -filetime = "0.2.25" once_cell = "1.21.3" +console = "0.15.11" diff --git a/src/cli.rs b/src/cli.rs index 2f1da4ec..9b03018b 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -26,8 +26,8 @@ pub enum Commands { rebuild_index: bool, /// Output format - #[arg(short, long, value_enum, default_value = "table")] - format: OutputFormat, + #[arg(short, long, value_enum, default_value = "")] + format: String, /// Show only high severity issues #[arg(long)] @@ -78,11 +78,3 @@ pub enum IndexAction { path: String, }, } - -#[derive(clap::ValueEnum, Clone, Debug)] -pub enum OutputFormat { - Table, - Json, - Csv, - Sarif, -} \ No newline at end of file diff --git a/src/commands/scan.rs b/src/commands/scan.rs index 7b8f04dd..a9c7d3b9 100644 --- a/src/commands/scan.rs +++ b/src/commands/scan.rs @@ -1,18 +1,30 @@ -use crate::cli::OutputFormat; use crate::utils::project::get_project_info; +use console::style; use std::path::Path; -use crate::utils::config::Config; -use tree_sitter::{Language, Parser, QueryCursor, StreamingIterator}; -use crate::database::index::Indexer; + +use crate::database::index::{IssueRow, Indexer}; use crate::patterns::Severity; +use crate::utils::config::Config; use crate::utils::query_cache; use crate::walk::spawn_senders; +use tree_sitter::{Language, Parser, QueryCursor, StreamingIterator}; + +#[derive(Debug)] +pub struct Diag { + pub(crate) path: String, + pub(crate) line: usize, + pub(crate) col: usize, + pub(crate) severity: Severity, + pub(crate) id: String, +} + +/// Entry point called by the CLI. pub fn handle( path: &str, no_index: bool, rebuild_index: bool, - format: OutputFormat, + format: String, high_only: bool, database_dir: &Path, config: &Config, @@ -20,73 +32,111 @@ pub fn handle( let scan_path = Path::new(path).canonicalize()?; let (project_name, db_path) = get_project_info(&scan_path, database_dir)?; - tracing::debug!("Config: {:?}", config); - tracing::debug!("Scanning project: {}", project_name); - tracing::debug!("Scan path: {}", scan_path.display()); + let mut indexer = Indexer::new(&project_name, &db_path)?; + let diags: Vec; + if no_index { - tracing::debug!("Scanning without index..."); - scan_filesystem(&scan_path, config)?; + diags = scan_filesystem(&scan_path, config)?; } else { if rebuild_index || !db_path.exists() { - tracing::debug!("Building/updating index..."); crate::commands::index::build_index(&scan_path, &db_path)?; } - - tracing::debug!("Using index: {}", db_path.display()); - scan_with_index(&scan_path, &db_path, config)?; + diags = scan_with_index(&project_name, &db_path, config, &mut indexer)?; } - tracing::debug!("Output format: {:?}", format); - if high_only { - tracing::debug!("Filtering: High severity only"); - } - - Ok(()) -} - -fn scan_filesystem(root: &Path, cfg: &Config) -> Result<(), Box> { - let rx = spawn_senders(root, cfg); - - for batch in rx.iter().flatten() { - tracing::debug!("Scanning file: {}", batch.display()); - scan_single_file(&batch, cfg)?; // <-- your actual scanner - } - Ok(()) -} -fn scan_with_index(root: &Path, db_path: &Path, cfg: &Config) -> Result<(), Box> { - let indexer = Indexer::new(db_path) - .map_err(|e| format!("opening index {}: {e}", db_path.display()))?; - - let rx = spawn_senders(root, cfg); - - for batch in rx.iter().flatten() { - let scan = indexer.should_scan(&batch)?; - tracing::debug!("Should scan: {}, file: {}", scan, batch.display()); - if scan { - tracing::debug!("Scanning file: {}", batch.display()); - scan_single_file(&batch, cfg)?; // your scanner - indexer.record_scan(&batch)?; + if format == "console" || format == "" && config.output.default_format == "console" { + for d in &diags { + if high_only && d.severity != Severity::High { + continue; + } + let sev_str = match d.severity { + Severity::High => style("HIGH").red().bold(), + Severity::Medium => style("MEDIUM").yellow().bold(), + Severity::Low => style("LOW").cyan().bold(), + }; + println!( + "{}:{}:{} [{}] {}", + style(d.path.clone()).blue().underlined(), + d.line, + d.col, + sev_str, + style(&d.id).bold(), + ); } } Ok(()) } -fn scan_single_file( +// -------------------------------------------------------------------------------------------- +// Scanning helpers +// -------------------------------------------------------------------------------------------- + +fn scan_filesystem( + root: &Path, + cfg: &Config, +) -> Result, Box> { + let rx = spawn_senders(root, cfg); + let mut issues: Vec = Vec::new(); + for batch in rx.iter().flatten() { + issues.append(&mut run_rules_on_file(&batch, cfg)?); + } + Ok(issues) +} + +fn scan_with_index( + project: &str, + _db_path: &Path, + cfg: &Config, + indexer: &mut Indexer, +) -> Result, Box> { + let files = indexer.get_files(project).unwrap_or_default(); + let mut issues: Vec = Vec::new(); + for file in files { + if indexer.should_scan(&file)? { + let mut diags = run_rules_on_file(&file, cfg)?; + let file_id = indexer.upsert_file(&file)?; + + let issue_rows: Vec = diags + .iter() + .map(|d| IssueRow { + rule_id: d.id.as_ref(), + severity: match d.severity { + Severity::High => "HIGH", + Severity::Medium => "MEDIUM", + Severity::Low => "LOW", + }, + line: d.line as i64, + col: d.col as i64, + }) + .collect(); + + indexer.replace_issues(file_id, issue_rows)?; + issues.append(&mut diags); + continue; + } + issues.append(&mut indexer.get_issues_from_file(&file)?); + } + Ok(issues) +} + +// -------------------------------------------------------------------------------------------- +// Tree‑sitter‑based rule runner – returns a Vec +// -------------------------------------------------------------------------------------------- +fn run_rules_on_file( path: &Path, - cfg: &Config, // assume cfg.high_only: bool -) -> Result<(), Box> { + cfg: &Config, +) -> Result, Box> { let source = std::fs::read_to_string(path)?; let mut parser = Parser::new(); - let ext = path - .extension() - .and_then(|s| s.to_str()) - .unwrap_or_default() - .to_ascii_lowercase(); - - // Pick the right tree-sitter language *and* pre-compiled queries - let (ts_lang, lang_key): (Language, &'static str) = match ext.as_str() { + let lang_key = match path + .extension() + .and_then(|s| s.to_str()) + .unwrap_or_default() + .to_ascii_lowercase() + .as_str() + { "rs" => (Language::from(tree_sitter_rust::LANGUAGE), "rust"), "c" => (Language::from(tree_sitter_c::LANGUAGE), "c"), "cpp" | "c++" => (Language::from(tree_sitter_cpp::LANGUAGE), "cpp"), @@ -96,69 +146,35 @@ fn scan_single_file( "py" => (Language::from(tree_sitter_python::LANGUAGE), "python"), "ts" | "tsx" => (Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT), "typescript"), "js" => (Language::from(tree_sitter_javascript::LANGUAGE), "javascript"), - _ => return Ok(()), + _ => return Ok(Vec::new()), }; + let (ts_lang, lang_name) = lang_key; parser.set_language(&ts_lang)?; + let tree = parser.parse(&source, None).ok_or("tree‑sitter failed")?; + let root = tree.root_node(); - let tree = parser.parse(&source, None).ok_or("tree-sitter failed")?; - let root = tree.root_node(); - - // ----- run vulnerability patterns ----- - let compiled = query_cache::for_lang(lang_key, ts_lang); + let compiled = query_cache::for_lang(lang_name, ts_lang); let mut cursor = QueryCursor::new(); + let mut out = Vec::new(); for cq in &compiled { if cfg.scanner.min_severity > cq.meta.severity { - continue; + continue; } - let mut matches = cursor.matches(&cq.query, root, source.as_bytes()); - while let Some(m) = matches.next() { - // capture 0 is the one tagged @vuln for cap in m.captures.iter().filter(|c| c.index == 0) { let point = cap.node.start_position(); - let line = point.row; - let col = point.column; - - match cq.meta.severity { - Severity::High => { - tracing::error!( - file = %path.display(), - line = line + 1, - column = col + 1, - id = cq.meta.id, - sev = ?Severity::High, - "pattern matched" - ); - }, - Severity::Medium => { - tracing::warn!( - file = %path.display(), - line = line + 1, - column = col + 1, - id = cq.meta.id, - sev = ?Severity::Medium, - "pattern matched" - ); - } - Severity::Low => { - tracing::info!( - file = %path.display(), - line = line + 1, - column = col + 1, - id = cq.meta.id, - sev = ?Severity::Low, - "pattern matched" - ); - } - } - - + out.push(Diag { + path: path.to_string_lossy().to_string(), + line: point.row + 1, + col: point.column + 1, + severity: cq.meta.severity, + id: String::from(cq.meta.id), + }); } } } - - Ok(()) + Ok(out) } \ No newline at end of file diff --git a/src/database.rs b/src/database.rs index 2aa519e3..6bcea29c 100644 --- a/src/database.rs +++ b/src/database.rs @@ -1,164 +1,174 @@ pub mod index { - use blake3::Hasher; use rusqlite::{params, Connection, OptionalExtension}; use std::fs; - use std::path::Path; + use std::path::{Path, PathBuf}; + use std::str::FromStr; use std::time::{SystemTime, UNIX_EPOCH}; + use crate::commands::scan::Diag; + use crate::patterns::Severity; - /// Schema: stores digest, file modification time (secs since epoch) and - /// last time we *fully* scanned the file. + /// DB schema (foreign‑keys enabled). const SCHEMA: &str = r#" - CREATE TABLE IF NOT EXISTS files ( - path TEXT PRIMARY KEY, - hash BLOB NOT NULL, - mtime INTEGER NOT NULL, - scanned_at INTEGER NOT NULL - );"#; + PRAGMA foreign_keys = ON; - pub(crate) struct Indexer { + CREATE TABLE IF NOT EXISTS files ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + project TEXT NOT NULL, + path TEXT NOT NULL, + hash BLOB NOT NULL, + mtime INTEGER NOT NULL, + scanned_at INTEGER NOT NULL, + UNIQUE(project, path) + ); + + CREATE TABLE IF NOT EXISTS issues ( + file_id INTEGER NOT NULL + REFERENCES files(id) + ON DELETE CASCADE, + rule_id TEXT NOT NULL, + severity TEXT NOT NULL, + line INTEGER NOT NULL, + col INTEGER NOT NULL, + PRIMARY KEY (file_id, rule_id, line, col) + ); + "#; + + /// A single issue row, ready for insertion. + #[derive(Debug, Clone)] + pub struct IssueRow<'a> { + pub rule_id: &'a str, + pub severity: &'a str, + pub line: i64, + pub col: i64, + } + + pub struct Indexer { conn: Connection, + project: String, } impl Indexer { - pub fn new(database_path: &Path) -> Result> { + /// Open (or create) the DB at `database_path` for the given project name. + pub fn new(project: &str, database_path: &Path) -> Result> { let conn = Connection::open(database_path)?; conn.execute_batch(SCHEMA)?; - Ok(Self { conn }) + Ok(Self { conn, project: project.to_owned() }) } - /// Returns `true` if the caller should analyze the file, i.e., we have - /// never seen it or something changed (mtime or content hash). + /// Return true when the file *content* or *mtime* changed since the last scan. pub fn should_scan(&self, path: &Path) -> Result> { let meta = fs::metadata(path)?; let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64; - let digest = Self::digest_file(path)?; let row: Option<(Vec, i64)> = self - .conn - .query_row( - "SELECT hash, mtime FROM files WHERE path = ?1", - params![path.to_string_lossy()], - |r| Ok((r.get(0)?, r.get(1)?)), - ) - .optional()?; + .conn + .query_row( + "SELECT hash, mtime FROM files WHERE project = ?1 AND path = ?2", + params![self.project, path.to_string_lossy()], + |r| Ok((r.get(0)?, r.get(1)?)), + ) + .optional()?; - match row { - Some((stored_hash, stored_mtime)) => { - Ok(stored_hash != digest || stored_mtime != mtime) - } - None => Ok(true), - } + Ok(match row { + Some((stored_hash, stored_mtime)) => stored_hash != digest || stored_mtime != mtime, + None => true, + }) } - /// Persist a fresh scan result. - pub fn record_scan(&self, path: &Path) -> Result<(), Box> { + /// Insert or update the `files` row and return its id. + pub fn upsert_file(&self, path: &Path) -> Result> { let meta = fs::metadata(path)?; let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64; - let scanned_at = SystemTime::now() - .duration_since(UNIX_EPOCH)? - .as_secs() as i64; + let scanned_at = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64; let digest = Self::digest_file(path)?; self.conn.execute( - "REPLACE INTO files (path, hash, mtime, scanned_at) VALUES (?1, ?2, ?3, ?4)", - params![path.to_string_lossy(), digest, mtime, scanned_at], + "INSERT INTO files (project, path, hash, mtime, scanned_at) + VALUES (?1, ?2, ?3, ?4, ?5) + ON CONFLICT(project,path) DO UPDATE + SET hash = excluded.hash, + mtime = excluded.mtime, + scanned_at = excluded.scanned_at", + params![self.project, path.to_string_lossy(), digest, mtime, scanned_at], )?; + + let id: i64 = self.conn.query_row( + "SELECT id FROM files WHERE project = ?1 AND path = ?2", + params![self.project, path.to_string_lossy()], + |r| r.get(0), + )?; + Ok(id) + } + + /// Replace all issues for `file_id` with the supplied set. + pub fn replace_issues<'a>(&mut self, file_id: i64, issues: impl IntoIterator>) + -> Result<(), Box> { + let tx = self.conn.transaction()?; + tx.execute("DELETE FROM issues WHERE file_id = ?", params![file_id])?; + + { + let mut stmt = tx.prepare( + "INSERT INTO issues (file_id, rule_id, severity, line, col) + VALUES (?1, ?2, ?3, ?4, ?5)", + )?; + for iss in issues { + stmt.execute(params![file_id, iss.rule_id, iss.severity, iss.line, iss.col])?; + } + } + tx.commit()?; Ok(()) } + /// Gets the issues for a specific file so we don't have to rescan + pub fn get_issues_from_file( + &self, + path: &Path, + ) -> Result, Box> { + let file_id: i64 = self.conn.query_row( + "SELECT id FROM files WHERE project = ?1 AND path = ?2", + params![self.project, path.to_string_lossy()], + |r| r.get(0), + )?; + + let mut stmt = self.conn.prepare( + "SELECT rule_id, severity, line, col + FROM issues + WHERE file_id = ?1", + )?; + + let issue_iter = stmt.query_map([file_id], |row| { + let sev_str: String = row.get(1)?; + Ok(Diag { + path: path.to_string_lossy().to_string(), + id: row.get::<_, String>(0)?, // rule_id + line: row.get::<_, i64>(2)? as usize, + col: row.get::<_, i64>(3)? as usize, + severity: Severity::from_str(&sev_str).unwrap(), + }) + })?; + + Ok(issue_iter.filter_map(Result::ok).collect()) + } + + /// gets files from the database + pub fn get_files(&self, project: &str) -> Result, Box> { + let mut stmt = self.conn.prepare( + "SELECT path + FROM files + WHERE project = ?1", + )?; + + let file_iter = stmt.query_map([project], |row| row.get::<_, String>(0))?; + + Ok(file_iter.map(|p| p.map(PathBuf::from)).collect::>()?) + } + fn digest_file(path: &Path) -> Result, Box> { - let mut hasher = Hasher::new(); + let mut hasher = blake3::Hasher::new(); let mut file = fs::File::open(path)?; std::io::copy(&mut file, &mut hasher)?; Ok(hasher.finalize().as_bytes().to_vec()) } } } - -#[cfg(test)] -mod tests { - use crate::database::index::Indexer; - use std::error::Error; - use std::io::Write; - use tempfile::tempdir; - - /// Returns a freshly‑initialised `Indexer` backed by an *in‑memory* SQLite - /// database. Using `:memory:` sidesteps file‑system lifetime issues that can - /// occur when the temporary database file is deleted while a connection is - /// still open. - fn new_indexer() -> Indexer { - Indexer::new(std::path::Path::new(":memory:")) - .expect("create in‑memory Indexer") - } - - #[test] - fn new_file_is_flagged_for_scan() -> Result<(), Box> { - let indexer = new_indexer(); - - let dir = tempdir()?; - let file_path = dir.path().join("hello.txt"); - std::fs::write(&file_path, b"hello world")?; - - // File has never been seen ⇒ should be scanned. - assert!(indexer.should_scan(&file_path)?); - Ok(()) - } - - #[test] - fn unchanged_file_is_not_flagged_again() -> Result<(), Box> { - let indexer = new_indexer(); - let dir = tempdir()?; - let file_path = dir.path().join("foo.txt"); - std::fs::write(&file_path, b"abc123")?; - - // First pass – record the scan result. - indexer.record_scan(&file_path)?; - - // Nothing changed – should_scan must return false. - assert!(!indexer.should_scan(&file_path)?); - Ok(()) - } - - #[test] - fn modified_content_triggers_rescan() -> Result<(), Box> { - let indexer = new_indexer(); - let dir = tempdir()?; - let file_path = dir.path().join("bar.txt"); - std::fs::write(&file_path, b"first")?; - indexer.record_scan(&file_path)?; - - // Append data to change the hash. - let mut file = std::fs::OpenOptions::new() - .append(true) - .open(&file_path)?; - writeln!(file, "second line")?; - - assert!(indexer.should_scan(&file_path)?); - Ok(()) - } - - #[test] - fn modified_mtime_alone_triggers_rescan() -> Result<(), Box> { - // Compile this test only when the optional `filetime` feature is enabled. - { - use std::time::{Duration, SystemTime}; - use filetime::FileTime; - - let indexer = new_indexer(); - let dir = tempdir()?; - let file_path = dir.path().join("baz.txt"); - std::fs::write(&file_path, b"unchanged content")?; - indexer.record_scan(&file_path)?; - - // Bump the modification time without touching the contents. - let now_plus = SystemTime::now() + Duration::from_secs(5); - let new_mtime = FileTime::from_system_time(now_plus); - filetime::set_file_mtime(&file_path, new_mtime)?; - - assert!(indexer.should_scan(&file_path)?); - } - - Ok(()) - } -} diff --git a/src/patterns/mod.rs b/src/patterns/mod.rs index 5c2c532a..45370cb5 100644 --- a/src/patterns/mod.rs +++ b/src/patterns/mod.rs @@ -9,6 +9,7 @@ mod php; mod python; use std::collections::HashMap; +use std::str::FromStr; use serde::{Deserialize, Serialize}; use once_cell::sync::Lazy; @@ -33,6 +34,19 @@ pub struct Pattern { pub severity: Severity, } +impl FromStr for Severity { // TODO: FIX + type Err = (); + + fn from_str(input: &str) -> Result { + match input.to_lowercase().as_str() { + "medium" => Ok(Severity::Medium), + "high" => Ok(Severity::High), + _ => Ok(Severity::Low), + } + } +} + + /// Global, lazily-initialised registry: lang-name → pattern slice static REGISTRY: Lazy> = Lazy::new(|| { let mut m = HashMap::new(); diff --git a/src/utils/config.rs b/src/utils/config.rs index dcf4fcb3..d90971d0 100644 --- a/src/utils/config.rs +++ b/src/utils/config.rs @@ -97,13 +97,13 @@ impl Default for DatabaseConfig { #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(default)] pub struct OutputConfig { - /// The default output format. TODO: IMPLEMENT + /// The default output format. TODO: IMPLEMENT others pub default_format: String, /// Whether to show progress or not. TODO: IMPLEMENT pub show_progress: bool, - /// Whether to colorize output or not. TODO: IMPLEMENT + /// Whether to colorize output or not. TODO: IMPLEMENT changing to non colored pub color_output: bool, /// The maximum number of results to show. TODO: IMPLEMENT @@ -113,7 +113,7 @@ pub struct OutputConfig { impl Default for OutputConfig { fn default() -> Self { Self { - default_format: "table".into(), + default_format: "console".into(), show_progress: true, color_output: true, max_results: None,