From 14a549ac3948c3e4487127262633e4e901c2949e Mon Sep 17 00:00:00 2001 From: elipeter Date: Tue, 24 Jun 2025 20:27:06 +0200 Subject: [PATCH] Refactor codebase for consistent indentation and formatting - Standardized spacing and indentation across multiple modules for improved readability. - Reorganized `patterns` and `utils` imports for consistency. - Updated `NyxError` and `NyxResult` related implementations to maintain consistent formatting. - Enhanced readability in AST patterns for better clarity and maintainability. --- src/ast.rs | 122 ++++++------- src/commands/clean.rs | 36 ++-- src/commands/index.rs | 105 +++++++---- src/commands/list.rs | 30 ++-- src/commands/mod.rs | 36 ++-- src/commands/scan.rs | 138 ++++++++------- src/database.rs | 345 +++++++++++++++++++------------------ src/errors.rs | 64 +++---- src/main.rs | 35 ++-- src/patterns/c.rs | 72 ++++---- src/patterns/cpp.rs | 72 ++++---- src/patterns/go.rs | 60 +++---- src/patterns/java.rs | 72 ++++---- src/patterns/javascript.rs | 168 +++++++++--------- src/patterns/mod.rs | 147 ++++++++-------- src/patterns/php.rs | 72 ++++---- src/patterns/python.rs | 36 ++-- src/patterns/ruby.rs | 136 +++++++-------- src/patterns/rust.rs | 180 +++++++++---------- src/patterns/typescript.rs | 206 +++++++++++----------- src/utils/config.rs | 183 +++++++++++--------- src/utils/ext.rs | 25 ++- src/utils/mod.rs | 8 +- src/utils/project.rs | 48 +++--- src/utils/query_cache.rs | 48 +++--- src/walk.rs | 91 +++++----- 26 files changed, 1314 insertions(+), 1221 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 3bb85bf7..3502c62a 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -1,71 +1,75 @@ +use crate::commands::scan::Diag; +use crate::errors::{NyxError, NyxResult}; +use crate::utils::ext::lowercase_ext; +use crate::utils::{Config, query_cache}; use std::cell::RefCell; use std::path::Path; use tree_sitter::{Language, QueryCursor, StreamingIterator}; -use crate::commands::scan::Diag; -use crate::errors::{NyxResult, NyxError}; -use crate::utils::{query_cache, Config}; -use crate::utils::ext::lowercase_ext; thread_local! { static PARSER: RefCell = RefCell::new(tree_sitter::Parser::new()); } -pub(crate) fn run_rules_on_file( - path: &Path, - cfg: &Config, -) -> NyxResult> { - tracing::debug!("Running rules on: {}", path.display()); - let bytes = std::fs::read(path)?; +pub(crate) fn run_rules_on_file(path: &Path, cfg: &Config) -> NyxResult> { + tracing::debug!("Running rules on: {}", path.display()); + let bytes = std::fs::read(path)?; - // Fast binary-file guard (skip if >1% NULs) - if bytes.iter().filter(|b| **b == 0).count() * 100 / bytes.len().max(1) > 1 { - return Ok(vec![]); - } - - let (ts_lang, lang_slug) = match lowercase_ext(path) { - Some("rs") => (Language::from(tree_sitter_rust::LANGUAGE), "rust"), - Some("c") => (Language::from(tree_sitter_c::LANGUAGE), "c"), - Some("cpp") => (Language::from(tree_sitter_cpp::LANGUAGE), "cpp"), - Some("java")=> (Language::from(tree_sitter_java::LANGUAGE), "java"), - Some("go") => (Language::from(tree_sitter_go::LANGUAGE), "go"), - Some("php") => (Language::from(tree_sitter_php::LANGUAGE_PHP), "php"), - Some("py") => (Language::from(tree_sitter_python::LANGUAGE), "python"), - Some("ts") => (Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT), "typescript"), - Some("js") => (Language::from(tree_sitter_javascript::LANGUAGE), "javascript"), - Some("rb") => (Language::from(tree_sitter_ruby::LANGUAGE), "ruby"), - _ => return Ok(vec![]), - }; - - let _tree = PARSER.with(|cell| { - let mut parser = cell.borrow_mut(); - parser.set_language(&ts_lang)?; - parser.parse(&*bytes, None) - .ok_or_else(|| NyxError::Other("tree-sitter failed".into())) - })?; - - let root = _tree.root_node(); - - let compiled = query_cache::for_lang(lang_slug, ts_lang); - let mut cursor = QueryCursor::new(); - let mut out = Vec::new(); - - for cq in compiled.iter() { - if cfg.scanner.min_severity <= cq.meta.severity { - continue; + // Fast binary-file guard (skip if >1% NULs) + if bytes.iter().filter(|b| **b == 0).count() * 100 / bytes.len().max(1) > 1 { + return Ok(vec![]); } - let mut matches = cursor.matches(&cq.query, root, &*bytes); - while let Some(m) = matches.next() { - if let Some(cap) = m.captures.iter().find(|c| c.index == 0) { - let point = cap.node.start_position(); - out.push(Diag { - path: path.to_string_lossy().into_owned(), - line: point.row + 1, - col: point.column + 1, - severity: cq.meta.severity, - id: cq.meta.id.to_owned(), - }); - } + + let (ts_lang, lang_slug) = match lowercase_ext(path) { + Some("rs") => (Language::from(tree_sitter_rust::LANGUAGE), "rust"), + Some("c") => (Language::from(tree_sitter_c::LANGUAGE), "c"), + Some("cpp") => (Language::from(tree_sitter_cpp::LANGUAGE), "cpp"), + Some("java") => (Language::from(tree_sitter_java::LANGUAGE), "java"), + Some("go") => (Language::from(tree_sitter_go::LANGUAGE), "go"), + Some("php") => (Language::from(tree_sitter_php::LANGUAGE_PHP), "php"), + Some("py") => (Language::from(tree_sitter_python::LANGUAGE), "python"), + Some("ts") => ( + Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT), + "typescript", + ), + Some("js") => ( + Language::from(tree_sitter_javascript::LANGUAGE), + "javascript", + ), + Some("rb") => (Language::from(tree_sitter_ruby::LANGUAGE), "ruby"), + _ => return Ok(vec![]), + }; + + let _tree = PARSER.with(|cell| { + let mut parser = cell.borrow_mut(); + parser.set_language(&ts_lang)?; + parser + .parse(&*bytes, None) + .ok_or_else(|| NyxError::Other("tree-sitter failed".into())) + })?; + + let root = _tree.root_node(); + + let compiled = query_cache::for_lang(lang_slug, ts_lang); + let mut cursor = QueryCursor::new(); + let mut out = Vec::new(); + + for cq in compiled.iter() { + if cfg.scanner.min_severity <= cq.meta.severity { + continue; + } + let mut matches = cursor.matches(&cq.query, root, &*bytes); + while let Some(m) = matches.next() { + if let Some(cap) = m.captures.iter().find(|c| c.index == 0) { + let point = cap.node.start_position(); + out.push(Diag { + path: path.to_string_lossy().into_owned(), + line: point.row + 1, + col: point.column + 1, + severity: cq.meta.severity, + id: cq.meta.id.to_owned(), + }); + } + } } -} - Ok(out) + Ok(out) } diff --git a/src/commands/clean.rs b/src/commands/clean.rs index 0c0b9c95..336de2f0 100644 --- a/src/commands/clean.rs +++ b/src/commands/clean.rs @@ -1,13 +1,9 @@ -use std::{env, fs}; -use console::style; use crate::errors::NyxResult; use crate::utils::get_project_info; +use console::style; +use std::{env, fs}; -pub fn handle( - project: Option, - all: bool, - config_dir: &std::path::Path, -) -> NyxResult<()> { +pub fn handle(project: Option, all: bool, config_dir: &std::path::Path) -> NyxResult<()> { if all { println!("{}", style("Cleaning all indexes...").cyan().bold()); if config_dir.exists() { @@ -19,9 +15,17 @@ pub fn handle( let db_path = config_dir.join(format!("{}.sqlite", proj_name)); if db_path.exists() { fs::remove_file(&db_path)?; - println!("{} {}", style("✔ Cleaned index for").green(), style(&proj_name).white().bold()); + println!( + "{} {}", + style("✔ Cleaned index for").green(), + style(&proj_name).white().bold() + ); } else { - println!("{} {}", style("✖ No index found for").red(), style(&proj_name).white().bold()); + println!( + "{} {}", + style("✖ No index found for").red(), + style(&proj_name).white().bold() + ); } } else { let current_dir = env::current_dir()?; @@ -29,11 +33,19 @@ pub fn handle( if db_path.exists() { fs::remove_file(&db_path)?; - println!("{} {}", style("✔ Cleaned index for").green(), style(&project_name).white().bold()); + println!( + "{} {}", + style("✔ Cleaned index for").green(), + style(&project_name).white().bold() + ); } else { - println!("{} {}", style("✖ No index found for current project").red(), style(&project_name).white().bold()); + println!( + "{} {}", + style("✖ No index found for current project").red(), + style(&project_name).white().bold() + ); } } std::process::exit(0); -} \ No newline at end of file +} diff --git a/src/commands/index.rs b/src/commands/index.rs index 96da2ef6..16cd4142 100644 --- a/src/commands/index.rs +++ b/src/commands/index.rs @@ -1,16 +1,16 @@ -use std::fs; -use std::process::exit; -use bytesize::ByteSize; -use chrono::{DateTime, Local}; -use console::style; use crate::cli::IndexAction; use crate::database::index::{Indexer, IssueRow}; +use crate::errors::NyxResult; use crate::patterns::Severity; use crate::utils::Config; use crate::utils::project::get_project_info; use crate::walk::spawn_senders; +use bytesize::ByteSize; +use chrono::{DateTime, Local}; +use console::style; use rayon::prelude::*; -use crate::errors::NyxResult; +use std::fs; +use std::process::exit; pub fn handle( action: IndexAction, @@ -24,10 +24,18 @@ pub fn handle( if force || !db_path.exists() { build_index(&project_name, &build_path, &db_path, config)?; - println!("✔ {} {}", style("Index built:" ).green(), style(db_path.display()).white().bold()); + println!( + "✔ {} {}", + style("Index built:").green(), + style(db_path.display()).white().bold() + ); exit(0); } else { - println!("{} {}", style("↩ Index already exists").yellow(), style("(use --force to rebuild)").dim()); + println!( + "{} {}", + style("↩ Index already exists").yellow(), + style("(use --force to rebuild)").dim() + ); exit(0); } } @@ -36,18 +44,34 @@ pub fn handle( let (project_name, db_path) = get_project_info(&status_path, database_dir)?; println!("{}", style("Project status").blue().bold().underlined()); - println!(" {:14} {}", style("Project"), style(&project_name).white().bold()); - println!(" {:14} {}", style("Index path"), style(db_path.display()).underlined()); - println!(" {:14} {}", style("Exists"), style(db_path.exists()).bold()); + println!( + " {:14} {}", + style("Project"), + style(&project_name).white().bold() + ); + println!( + " {:14} {}", + style("Index path"), + style(db_path.display()).underlined() + ); + println!( + " {:14} {}", + style("Exists"), + style(db_path.exists()).bold() + ); if db_path.exists() { let meta = fs::metadata(&db_path)?; let size = ByteSize::b(meta.len()); let mtime: DateTime = meta.modified()?.into(); - println!(" {:14} {}", style("Size"), size); - println!(" {:14} {}", style("Modified"), mtime.format("%Y-%m-%d %H:%M:%S")); + println!(" {:14} {}", style("Size"), size); + println!( + " {:14} {}", + style("Modified"), + mtime.format("%Y-%m-%d %H:%M:%S") + ); } - + exit(0); } } @@ -61,7 +85,7 @@ pub fn build_index( ) -> NyxResult<()> { tracing::debug!("Building index for: {}", project_name); fs::File::create(db_path)?; - + let pool = Indexer::init(db_path)?; { let idx = Indexer::from_pool(project_name, &pool)?; @@ -69,34 +93,39 @@ pub fn build_index( } tracing::debug!("Cleaned index for: {}", project_name); - + let rx = spawn_senders(project_path, config); let paths: Vec<_> = rx.into_iter().flatten().collect(); - - paths.into_par_iter().try_for_each(|path| -> Result<(), Box> { - let issues = crate::commands::scan::run_rules_on_file(&path, config)?; - let mut idx = Indexer::from_pool(project_name, &pool)?; - let file_id = idx.upsert_file(&path)?; - let rows: Vec = issues.iter().map(|d| IssueRow { - rule_id: d.id.as_ref(), - severity: match d.severity { - Severity::High => "HIGH", - Severity::Medium => "MEDIUM", - Severity::Low => "LOW", - }, - line: d.line as i64, - col: d.col as i64, - }).collect(); - - idx.replace_issues(file_id, rows)?; - Ok(()) - })?; - + paths.into_par_iter().try_for_each( + |path| -> Result<(), Box> { + let issues = crate::commands::scan::run_rules_on_file(&path, config)?; + let mut idx = Indexer::from_pool(project_name, &pool)?; + let file_id = idx.upsert_file(&path)?; + + let rows: Vec = issues + .iter() + .map(|d| IssueRow { + rule_id: d.id.as_ref(), + severity: match d.severity { + Severity::High => "HIGH", + Severity::Medium => "MEDIUM", + Severity::Low => "LOW", + }, + line: d.line as i64, + col: d.col as i64, + }) + .collect(); + + idx.replace_issues(file_id, rows)?; + Ok(()) + }, + )?; + { let idx = Indexer::from_pool(project_name, &pool)?; idx.vacuum()?; } - + Ok(()) -} \ No newline at end of file +} diff --git a/src/commands/list.rs b/src/commands/list.rs index 5359b05b..21031896 100644 --- a/src/commands/list.rs +++ b/src/commands/list.rs @@ -1,13 +1,10 @@ -use std::fs; +use crate::errors::NyxResult; use bytesize::ByteSize; use chrono::{DateTime, Local}; use console::style; -use crate::errors::NyxResult; +use std::fs; -pub fn handle( - verbose: bool, - database_dir: &std::path::Path, -) -> NyxResult<()> { +pub fn handle(verbose: bool, database_dir: &std::path::Path) -> NyxResult<()> { println!("{}", style("Indexed projects").blue().bold().underlined()); if !database_dir.exists() { @@ -21,18 +18,29 @@ pub fn handle( continue; } - let name = path.file_stem().and_then(|s| s.to_str()).unwrap_or("unknown"); + let name = path + .file_stem() + .and_then(|s| s.to_str()) + .unwrap_or("unknown"); println!(" {}", style(name).white().bold()); if verbose { let meta = fs::metadata(&path)?; let size = ByteSize::b(meta.len()); let mtime: DateTime = meta.modified()?.into(); - println!(" {:10} {}", style("Path"), style(path.display()).underlined()); - println!(" {:10} {}", style("Size"), size); - println!(" {:10} {}", style("Modified"), mtime.format("%Y-%m-%d %H:%M:%S")); + println!( + " {:10} {}", + style("Path"), + style(path.display()).underlined() + ); + println!(" {:10} {}", style("Size"), size); + println!( + " {:10} {}", + style("Modified"), + mtime.format("%Y-%m-%d %H:%M:%S") + ); } } std::process::exit(0); -} \ No newline at end of file +} diff --git a/src/commands/mod.rs b/src/commands/mod.rs index 30a765fd..eb5ef7a4 100644 --- a/src/commands/mod.rs +++ b/src/commands/mod.rs @@ -1,33 +1,35 @@ -pub mod scan; +pub mod clean; pub mod index; pub mod list; -pub mod clean; +pub mod scan; use crate::cli::Commands; -use std::path::Path; use crate::errors::NyxResult; use crate::patterns::Severity; use crate::utils::config::Config; +use std::path::Path; pub fn handle_command( command: Commands, database_dir: &Path, - config: &mut Config + config: &mut Config, ) -> NyxResult<()> { match command { - Commands::Scan { path, no_index, rebuild_index, format, high_only } => { - if high_only { config.scanner.min_severity = Severity::High }; - + Commands::Scan { + path, + no_index, + rebuild_index, + format, + high_only, + } => { + if high_only { + config.scanner.min_severity = Severity::High + }; + scan::handle(&path, no_index, rebuild_index, format, database_dir, config) } - Commands::Index { action } => { - index::handle(action, database_dir, config) - } - Commands::List { verbose } => { - list::handle(verbose, database_dir) - } - Commands::Clean { project, all } => { - clean::handle(project, all, database_dir) - } + Commands::Index { action } => index::handle(action, database_dir, config), + Commands::List { verbose } => list::handle(verbose, database_dir), + Commands::Clean { project, all } => clean::handle(project, all, database_dir), } -} \ No newline at end of file +} diff --git a/src/commands/scan.rs b/src/commands/scan.rs index bd1c8da4..9c2ff0bb 100644 --- a/src/commands/scan.rs +++ b/src/commands/scan.rs @@ -1,18 +1,18 @@ -use crate::utils::project::get_project_info; -use console::style; -use std::path::Path; -use std::sync::{Arc, Mutex}; -use r2d2::Pool; -use r2d2_sqlite::SqliteConnectionManager; -use crate::database::index::{IssueRow, Indexer}; +pub(crate) use crate::ast::run_rules_on_file; +use crate::database::index::{Indexer, IssueRow}; +use crate::errors::NyxResult; use crate::patterns::Severity; use crate::utils::config::Config; +use crate::utils::project::get_project_info; use crate::walk::spawn_senders; +use console::style; +use dashmap::DashMap; +use r2d2::Pool; +use r2d2_sqlite::SqliteConnectionManager; use rayon::prelude::*; use std::collections::BTreeMap; -use dashmap::DashMap; -use crate::errors::NyxResult; -pub(crate) use crate::ast::run_rules_on_file; +use std::path::Path; +use std::sync::{Arc, Mutex}; type DynError = Box; @@ -37,14 +37,18 @@ pub fn handle( let scan_path = Path::new(path).canonicalize()?; let (project_name, db_path) = get_project_info(&scan_path, database_dir)?; - println!("{} {}...\n", style("Checking").green().bold(), &project_name); - + println!( + "{} {}...\n", + style("Checking").green().bold(), + &project_name + ); + let diags: Vec = if no_index { scan_filesystem(&scan_path, config)? } else { if rebuild_index || !db_path.exists() { tracing::debug!("Scanning filesystem index filesystem"); - crate::commands::index::build_index(&project_name,&scan_path, &db_path, config)?; + crate::commands::index::build_index(&project_name, &scan_path, &db_path, config)?; } let pool = Indexer::init(&db_path)?; @@ -53,9 +57,7 @@ pub fn handle( tracing::debug!("Found {:?} issues.", diags.len()); - if format == "console" - || (format.is_empty() && config.output.default_format == "console") - { + if format == "console" || (format.is_empty() && config.output.default_format == "console") { tracing::debug!("Printing to console"); let mut grouped: BTreeMap<&str, Vec<&Diag>> = BTreeMap::new(); for d in &diags { @@ -65,16 +67,23 @@ pub fn handle( for (path, issues) in &grouped { println!("{}", style(path).blue().underlined()); for d in issues { - println!(" {:>4}:{:<4} [{}] {}", - d.line, d.col, d.severity, style(&d.id).bold()); + println!( + " {:>4}:{:<4} [{}] {}", + d.line, + d.col, + d.severity, + style(&d.id).bold() + ); } println!(); } - println!("{} '{}' generated {} issues.", - style("warning").yellow().bold(), - style(project_name).white().bold(), - style(diags.len()).bold()); + println!( + "{} '{}' generated {} issues.", + style("warning").yellow().bold(), + style(project_name).white().bold(), + style(diags.len()).bold() + ); println!("\t"); // TODO: Add individual counts for different warning levels } Ok(()) @@ -84,22 +93,16 @@ pub fn handle( // Scanning helpers // -------------------------------------------------------------------------------------------- -fn scan_filesystem( - root: &Path, - cfg: &Config, -) -> NyxResult> { +fn scan_filesystem(root: &Path, cfg: &Config) -> NyxResult> { let rx = spawn_senders(root, cfg); let acc = Mutex::new(Vec::new()); - rx.into_iter() - .flatten() - .par_bridge() - .try_for_each(|path| { - let mut local = run_rules_on_file(&path, cfg)?; - acc.lock().unwrap().append(&mut local); - Ok::<(), DynError>(()) - })?; - + rx.into_iter().flatten().par_bridge().try_for_each(|path| { + let mut local = run_rules_on_file(&path, cfg)?; + acc.lock().unwrap().append(&mut local); + Ok::<(), DynError>(()) + })?; + Ok(acc.into_inner()?) } @@ -108,7 +111,6 @@ pub fn scan_with_index_parallel( pool: Arc>, cfg: &Config, ) -> NyxResult> { - let files = { let idx = Indexer::from_pool(project, &pool)?; idx.get_files(project)? @@ -117,40 +119,48 @@ pub fn scan_with_index_parallel( // ① Collect per-path Vec without a global mutex let diag_map: DashMap> = DashMap::new(); - files.into_par_iter() - .for_each_init( - // ② A single Indexer per Rayon worker thread - || Indexer::from_pool(project, &pool).expect("db pool"), - |idx, path| { - let needs_scan = idx.should_scan(&path).unwrap_or(true); + files.into_par_iter().for_each_init( + // ② A single Indexer per Rayon worker thread + || Indexer::from_pool(project, &pool).expect("db pool"), + |idx, path| { + let needs_scan = idx.should_scan(&path).unwrap_or(true); - let mut diags = if needs_scan { - let d = run_rules_on_file(&path, cfg).unwrap_or_default(); - let file_id = idx.upsert_file(&path).unwrap_or_default(); - idx.replace_issues( - file_id, - d.iter().map(|d| IssueRow { - rule_id: &d.id, - severity: d.severity.as_db_str(), - line: d.line as i64, - col: d.col as i64, - }), - ).ok(); - d - } else { - idx.get_issues_from_file(&path).unwrap_or_default() - }; - if !diags.is_empty() { - diag_map.entry(path.to_string_lossy().to_string()) + let mut diags = if needs_scan { + let d = run_rules_on_file(&path, cfg).unwrap_or_default(); + let file_id = idx.upsert_file(&path).unwrap_or_default(); + idx.replace_issues( + file_id, + d.iter().map(|d| IssueRow { + rule_id: &d.id, + severity: d.severity.as_db_str(), + line: d.line as i64, + col: d.col as i64, + }), + ) + .ok(); + d + } else { + idx.get_issues_from_file(&path).unwrap_or_default() + }; + if !diags.is_empty() { + diag_map + .entry(path.to_string_lossy().to_string()) .or_default() .append(&mut diags); - } - } - ); + } + }, + ); // Optional, heavy: only vacuum on --rebuild-index // if rebuild { idx.vacuum()?; } + // flatten + let mut diags: Vec = diag_map.into_iter().flat_map(|(_, v)| v).collect(); + + if let Some(max) = cfg.output.max_results { + diags.truncate(max as usize); + } + // Flatten - Ok(diag_map.into_iter().flat_map(|(_, v)| v).collect()) + Ok(diags) } diff --git a/src/database.rs b/src/database.rs index dfca9c78..37a13226 100644 --- a/src/database.rs +++ b/src/database.rs @@ -1,19 +1,19 @@ pub mod index { - use rusqlite::{params, Connection, OpenFlags, OptionalExtension}; - use std::fs; - use std::path::{Path, PathBuf}; - use std::str::FromStr; - use std::time::{SystemTime, UNIX_EPOCH}; - use crate::commands::scan::Diag; - use crate::patterns::Severity; - use r2d2_sqlite::{SqliteConnectionManager}; - use std::ops::Deref; - use std::sync::Arc; - use r2d2::{Pool, PooledConnection}; - use crate::errors::NyxResult; + use crate::commands::scan::Diag; + use crate::errors::NyxResult; + use crate::patterns::Severity; + use r2d2::{Pool, PooledConnection}; + use r2d2_sqlite::SqliteConnectionManager; + use rusqlite::{Connection, OpenFlags, OptionalExtension, params}; + use std::fs; + use std::ops::Deref; + use std::path::{Path, PathBuf}; + use std::str::FromStr; + use std::sync::Arc; + use std::time::{SystemTime, UNIX_EPOCH}; - /// DB schema (foreign‑keys enabled). - const SCHEMA: &str = r#" + /// DB schema (foreign‑keys enabled). + const SCHEMA: &str = r#" PRAGMA foreign_keys = ON; CREATE TABLE IF NOT EXISTS files ( @@ -38,165 +38,178 @@ pub mod index { ); "#; - /// A single issue row, ready for insertion. - #[derive(Debug, Clone)] - pub struct IssueRow<'a> { - pub rule_id: &'a str, - pub severity: &'a str, - pub line: i64, - pub col: i64, - } - - pub struct Indexer { - conn: PooledConnection, - project: String, - } - - impl Indexer { - - pub fn init( - database_path: &Path, - ) -> NyxResult>> { - let flags = OpenFlags::SQLITE_OPEN_READ_WRITE - | OpenFlags::SQLITE_OPEN_CREATE - | OpenFlags::SQLITE_OPEN_FULL_MUTEX; - let manager = SqliteConnectionManager::file(database_path).with_flags(flags); - let pool = Arc::new(Pool::new(manager)?); - - { - let conn = pool.get()?; - conn.pragma_update(None, "journal_mode", "WAL")?; - conn.execute_batch(SCHEMA)?; - } - Ok(pool) + /// A single issue row, ready for insertion. + #[derive(Debug, Clone)] + pub struct IssueRow<'a> { + pub rule_id: &'a str, + pub severity: &'a str, + pub line: i64, + pub col: i64, } - pub fn from_pool( - project: &str, - pool: &Pool, - ) -> NyxResult { - let conn = pool.get()?; - Ok(Self { conn, project: project.to_owned() }) + pub struct Indexer { + conn: PooledConnection, + project: String, } - // helper so code below can treat PooledConnection like &Connection - fn c(&self) -> &Connection { self.conn.deref() } + impl Indexer { + pub fn init(database_path: &Path) -> NyxResult>> { + let flags = OpenFlags::SQLITE_OPEN_READ_WRITE + | OpenFlags::SQLITE_OPEN_CREATE + | OpenFlags::SQLITE_OPEN_FULL_MUTEX; + let manager = SqliteConnectionManager::file(database_path).with_flags(flags); + let pool = Arc::new(Pool::new(manager)?); - /// Return true when the file *content* or *mtime* changed since the last scan. - pub fn should_scan(&self, path: &Path) -> NyxResult { - let meta = fs::metadata(path)?; - let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64; - let digest = Self::digest_file(path)?; + { + let conn = pool.get()?; + conn.pragma_update(None, "journal_mode", "WAL")?; + conn.execute_batch(SCHEMA)?; + } + Ok(pool) + } - let row: Option<(Vec, i64)> = self - .conn - .query_row( - "SELECT hash, mtime FROM files WHERE project = ?1 AND path = ?2", - params![self.project, path.to_string_lossy()], - |r| Ok((r.get(0)?, r.get(1)?)), - ) - .optional()?; + pub fn from_pool(project: &str, pool: &Pool) -> NyxResult { + let conn = pool.get()?; + Ok(Self { + conn, + project: project.to_owned(), + }) + } - Ok(match row { - Some((stored_hash, stored_mtime)) => stored_hash != digest || stored_mtime != mtime, - None => true, - }) - } + // helper so code below can treat PooledConnection like &Connection + fn c(&self) -> &Connection { + self.conn.deref() + } - /// Insert or update the `files` row and return its id. - pub fn upsert_file(&self, path: &Path) -> NyxResult { - let meta = fs::metadata(path)?; - let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64; - let scanned_at = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64; - let digest = Self::digest_file(path)?; + /// Return true when the file *content* or *mtime* changed since the last scan. + pub fn should_scan(&self, path: &Path) -> NyxResult { + let meta = fs::metadata(path)?; + let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64; + let digest = Self::digest_file(path)?; - self.c().execute( - "INSERT INTO files (project, path, hash, mtime, scanned_at) + let row: Option<(Vec, i64)> = self + .conn + .query_row( + "SELECT hash, mtime FROM files WHERE project = ?1 AND path = ?2", + params![self.project, path.to_string_lossy()], + |r| Ok((r.get(0)?, r.get(1)?)), + ) + .optional()?; + + Ok(match row { + Some((stored_hash, stored_mtime)) => stored_hash != digest || stored_mtime != mtime, + None => true, + }) + } + + /// Insert or update the `files` row and return its id. + pub fn upsert_file(&self, path: &Path) -> NyxResult { + let meta = fs::metadata(path)?; + let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64; + let scanned_at = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64; + let digest = Self::digest_file(path)?; + + self.c().execute( + "INSERT INTO files (project, path, hash, mtime, scanned_at) VALUES (?1, ?2, ?3, ?4, ?5) ON CONFLICT(project,path) DO UPDATE SET hash = excluded.hash, mtime = excluded.mtime, scanned_at = excluded.scanned_at", - params![self.project, path.to_string_lossy(), digest, mtime, scanned_at], - )?; + params![ + self.project, + path.to_string_lossy(), + digest, + mtime, + scanned_at + ], + )?; - let id: i64 = self.c().query_row( - "SELECT id FROM files WHERE project = ?1 AND path = ?2", - params![self.project, path.to_string_lossy()], - |r| r.get(0), - )?; - Ok(id) - } - - /// Replace all issues for `file_id` with the supplied set. - pub fn replace_issues<'a>(&mut self, file_id: i64, issues: impl IntoIterator>) - -> NyxResult<()> { - let tx = self.conn.transaction()?; - tx.execute("DELETE FROM issues WHERE file_id = ?", params![file_id])?; - - { - let mut stmt = tx.prepare( - "INSERT INTO issues (file_id, rule_id, severity, line, col) - VALUES (?1, ?2, ?3, ?4, ?5)", - )?; - for iss in issues { - stmt.execute(params![file_id, iss.rule_id, iss.severity, iss.line, iss.col])?; + let id: i64 = self.c().query_row( + "SELECT id FROM files WHERE project = ?1 AND path = ?2", + params![self.project, path.to_string_lossy()], + |r| r.get(0), + )?; + Ok(id) } - } - tx.commit()?; - Ok(()) - } - /// Gets the issues for a specific file so we don't have to rescan - pub fn get_issues_from_file( - &self, - path: &Path, - ) -> NyxResult> { - let file_id: i64 = self.c().query_row( - "SELECT id FROM files WHERE project = ?1 AND path = ?2", - params![self.project, path.to_string_lossy()], - |r| r.get(0), - )?; - - let mut stmt = self.c().prepare( - "SELECT rule_id, severity, line, col + /// Replace all issues for `file_id` with the supplied set. + pub fn replace_issues<'a>( + &mut self, + file_id: i64, + issues: impl IntoIterator>, + ) -> NyxResult<()> { + let tx = self.conn.transaction()?; + tx.execute("DELETE FROM issues WHERE file_id = ?", params![file_id])?; + + { + let mut stmt = tx.prepare( + "INSERT INTO issues (file_id, rule_id, severity, line, col) + VALUES (?1, ?2, ?3, ?4, ?5)", + )?; + for iss in issues { + stmt.execute(params![ + file_id, + iss.rule_id, + iss.severity, + iss.line, + iss.col + ])?; + } + } + tx.commit()?; + Ok(()) + } + + /// Gets the issues for a specific file so we don't have to rescan + pub fn get_issues_from_file(&self, path: &Path) -> NyxResult> { + let file_id: i64 = self.c().query_row( + "SELECT id FROM files WHERE project = ?1 AND path = ?2", + params![self.project, path.to_string_lossy()], + |r| r.get(0), + )?; + + let mut stmt = self.c().prepare( + "SELECT rule_id, severity, line, col FROM issues WHERE file_id = ?1", - )?; + )?; - let issue_iter = stmt.query_map([file_id], |row| { - let sev_str: String = row.get(1)?; - Ok(Diag { - path: path.to_string_lossy().to_string(), - id: row.get::<_, String>(0)?, // rule_id - line: row.get::<_, i64>(2)? as usize, - col: row.get::<_, i64>(3)? as usize, - severity: Severity::from_str(&sev_str).unwrap(), - }) - })?; + let issue_iter = stmt.query_map([file_id], |row| { + let sev_str: String = row.get(1)?; + Ok(Diag { + path: path.to_string_lossy().to_string(), + id: row.get::<_, String>(0)?, // rule_id + line: row.get::<_, i64>(2)? as usize, + col: row.get::<_, i64>(3)? as usize, + severity: Severity::from_str(&sev_str).unwrap(), + }) + })?; - Ok(issue_iter.filter_map(Result::ok).collect()) - } - - /// gets files from the database - pub fn get_files(&self, project: &str) -> NyxResult> { - let mut stmt = self.c().prepare( - "SELECT path + Ok(issue_iter.filter_map(Result::ok).collect()) + } + + /// gets files from the database + pub fn get_files(&self, project: &str) -> NyxResult> { + let mut stmt = self.c().prepare( + "SELECT path FROM files WHERE project = ?1", - )?; + )?; - let file_iter = stmt.query_map([project], |row| row.get::<_, String>(0))?; - - Ok(file_iter.map(|p| p.map(PathBuf::from)).collect::>()?) - } + let file_iter = stmt.query_map([project], |row| row.get::<_, String>(0))?; - // ------------------------------------------------------------------------- - // Maintenance utilities - // ------------------------------------------------------------------------- - pub fn clear(&self) -> NyxResult<()> { - self.c().execute_batch( - r#" + Ok(file_iter + .map(|p| p.map(PathBuf::from)) + .collect::>()?) + } + + // ------------------------------------------------------------------------- + // Maintenance utilities + // ------------------------------------------------------------------------- + pub fn clear(&self) -> NyxResult<()> { + self.c().execute_batch( + r#" PRAGMA foreign_keys = OFF; DROP TABLE IF EXISTS issues; @@ -205,25 +218,25 @@ pub mod index { PRAGMA foreign_keys = ON; VACUUM; "#, - )?; + )?; - self.c().execute_batch(SCHEMA)?; - Ok(()) - } - - pub fn vacuum(&self) -> NyxResult<()> { - self.c().execute("VACUUM;", [])?; - Ok(()) - } + self.c().execute_batch(SCHEMA)?; + Ok(()) + } - // ------------------------------------------------------------------------- - // Helpers - // ------------------------------------------------------------------------- - fn digest_file(path: &Path) -> NyxResult> { - let mut hasher = blake3::Hasher::new(); - let mut file = fs::File::open(path)?; - std::io::copy(&mut file, &mut hasher)?; - Ok(hasher.finalize().as_bytes().to_vec()) + pub fn vacuum(&self) -> NyxResult<()> { + self.c().execute("VACUUM;", [])?; + Ok(()) + } + + // ------------------------------------------------------------------------- + // Helpers + // ------------------------------------------------------------------------- + fn digest_file(path: &Path) -> NyxResult> { + let mut hasher = blake3::Hasher::new(); + let mut file = fs::File::open(path)?; + std::io::copy(&mut file, &mut hasher)?; + Ok(hasher.finalize().as_bytes().to_vec()) + } } - } } diff --git a/src/errors.rs b/src/errors.rs index dc221ba4..ce81de03 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -1,60 +1,60 @@ +use serde::de::StdError; use std::fmt; use std::sync::PoisonError; -use serde::de::StdError; use thiserror::Error; pub type NyxResult = Result; #[derive(Debug, Error)] pub enum NyxError { - #[error("I/O error: {0}")] - Io(#[from] std::io::Error), + #[error("I/O error: {0}")] + Io(#[from] std::io::Error), - #[error("SQLite error: {0}")] - Sql(#[from] rusqlite::Error), + #[error("SQLite error: {0}")] + Sql(#[from] rusqlite::Error), - #[error("tree-sitter error: {0}")] - TreeSitter(#[from] tree_sitter::LanguageError), + #[error("tree-sitter error: {0}")] + TreeSitter(#[from] tree_sitter::LanguageError), - #[error("connection-pool error: {0}")] - Pool(#[from] r2d2::Error), + #[error("connection-pool error: {0}")] + Pool(#[from] r2d2::Error), - #[error("time error: {0}")] - Time(#[from] std::time::SystemTimeError), + #[error("time error: {0}")] + Time(#[from] std::time::SystemTimeError), - #[error("poisoned lock: {0}")] - Poison(String), - - #[error(transparent)] - Other(#[from] Box), - - #[error("{0}")] - Msg(String), + #[error("poisoned lock: {0}")] + Poison(String), + + #[error(transparent)] + Other(#[from] Box), + + #[error("{0}")] + Msg(String), } impl From> for NyxError where - T: fmt::Debug, + T: fmt::Debug, { - fn from(err: PoisonError) -> Self { - NyxError::Poison(err.to_string()) - } + fn from(err: PoisonError) -> Self { + NyxError::Poison(err.to_string()) + } } impl From<&str> for NyxError { - fn from(s: &str) -> Self { - NyxError::Msg(s.to_owned()) - } + fn from(s: &str) -> Self { + NyxError::Msg(s.to_owned()) + } } impl From for NyxError { - fn from(s: String) -> Self { - NyxError::Msg(s) - } + fn from(s: String) -> Self { + NyxError::Msg(s) + } } impl From> for NyxError { - fn from(err: Box) -> Self { - NyxError::Msg(err.to_string()) - } + fn from(err: Box) -> Self { + NyxError::Msg(err.to_string()) + } } diff --git a/src/main.rs b/src/main.rs index 1177a775..d1871a55 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,43 +1,43 @@ +mod ast; mod cli; mod commands; +mod database; +mod errors; +mod patterns; mod utils; mod walk; -mod database; -mod patterns; -mod errors; -mod ast; +use crate::errors::NyxResult; use crate::utils::Config; -use cli::Cli; use clap::Parser; +use cli::Cli; +use console::style; use directories::ProjectDirs; use std::fs; use std::time::Instant; -use console::style; -use tracing_subscriber::{fmt, EnvFilter, Registry}; -use tracing_subscriber::prelude::*; use tracing_subscriber::fmt::time; -use crate::errors::NyxResult; +use tracing_subscriber::prelude::*; +use tracing_subscriber::{EnvFilter, Registry, fmt}; // use tracing_appender::rolling::{RollingFileAppender, Rotation}; // use tracing_appender::non_blocking; fn init_tracing() { // let file_appender = RollingFileAppender::new(Rotation::HOURLY, "logs", "nyx-scanner.log"); // let (file_writer, guard) = non_blocking(file_appender); - + let fmt_layer = fmt::layer() - .pretty() - .with_thread_ids(true) + .pretty() + .with_thread_ids(true) .with_timer(time::UtcTime::rfc_3339()); - + // let file_layer = fmt::layer() - // .with_writer(file_writer) - // .without_time() + // .with_writer(file_writer) + // .without_time() // .json(); Registry::default() - .with(EnvFilter::from_default_env()) - .with(fmt_layer) + .with(EnvFilter::from_default_env()) + .with(fmt_layer) .init(); } @@ -68,4 +68,3 @@ fn main() -> NyxResult<()> { ); Ok(()) } - diff --git a/src/patterns/c.rs b/src/patterns/c.rs index e3ef156c..4ee38477 100644 --- a/src/patterns/c.rs +++ b/src/patterns/c.rs @@ -1,40 +1,40 @@ use crate::patterns::{Pattern, Severity}; pub const PATTERNS: &[Pattern] = &[ - Pattern { - id: "strcpy_call", - description: "strcpy() usage", - query: "(call_expression function: (identifier) @id (#eq? @id \"strcpy\")) @vuln", - severity: Severity::High, - }, - Pattern { - id: "strcat_call", - description: "strcat() usage", - query: "(call_expression function: (identifier) @id (#eq? @id \"strcat\")) @vuln", - severity: Severity::High, - }, - Pattern { - id: "sprintf_call", - description: "sprintf() (no length limit)", - query: "(call_expression function: (identifier) @id (#eq? @id \"sprintf\")) @vuln", - severity: Severity::High, - }, - Pattern { - id: "gets_call", - description: "gets() usage", - query: "(call_expression function: (identifier) @id (#eq? @id \"gets\")) @vuln", - severity: Severity::High, - }, - Pattern { - id: "scanf_with_percent_s", - description: "scanf(\"%s\") without length specifier", - query: "(call_expression function: (identifier) @id (#eq? @id \"scanf\") arguments: (argument_list (string_literal) @fmt (#match? @fmt \".*%s.*\"))) @vuln", - severity: Severity::High, - }, - Pattern { - id: "system_call", - description: "system() shell execution", - query: "(call_expression function: (identifier) @id (#eq? @id \"system\")) @vuln", - severity: Severity::Medium, - }, + Pattern { + id: "strcpy_call", + description: "strcpy() usage", + query: "(call_expression function: (identifier) @id (#eq? @id \"strcpy\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "strcat_call", + description: "strcat() usage", + query: "(call_expression function: (identifier) @id (#eq? @id \"strcat\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "sprintf_call", + description: "sprintf() (no length limit)", + query: "(call_expression function: (identifier) @id (#eq? @id \"sprintf\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "gets_call", + description: "gets() usage", + query: "(call_expression function: (identifier) @id (#eq? @id \"gets\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "scanf_with_percent_s", + description: "scanf(\"%s\") without length specifier", + query: "(call_expression function: (identifier) @id (#eq? @id \"scanf\") arguments: (argument_list (string_literal) @fmt (#match? @fmt \".*%s.*\"))) @vuln", + severity: Severity::High, + }, + Pattern { + id: "system_call", + description: "system() shell execution", + query: "(call_expression function: (identifier) @id (#eq? @id \"system\")) @vuln", + severity: Severity::Medium, + }, ]; diff --git a/src/patterns/cpp.rs b/src/patterns/cpp.rs index e67196a5..85ed7f60 100644 --- a/src/patterns/cpp.rs +++ b/src/patterns/cpp.rs @@ -1,40 +1,40 @@ use crate::patterns::{Pattern, Severity}; pub const PATTERNS: &[Pattern] = &[ - Pattern { - id: "strcpy_call", - description: "strcpy() usage", - query: "(call_expression function: (identifier) @id (#eq? @id \"strcpy\")) @vuln", - severity: Severity::High, - }, - Pattern { - id: "strcat_call", - description: "strcat() usage", - query: "(call_expression function: (identifier) @id (#eq? @id \"strcat\")) @vuln", - severity: Severity::High, - }, - Pattern { - id: "sprintf_call", - description: "sprintf() (no length limit)", - query: "(call_expression function: (identifier) @id (#eq? @id \"sprintf\")) @vuln", - severity: Severity::High, - }, - Pattern { - id: "gets_call", - description: "gets() usage", - query: "(call_expression function: (identifier) @id (#eq? @id \"gets\")) @vuln", - severity: Severity::High, - }, - Pattern { - id: "system_call", - description: "system() shell execution", - query: "(call_expression function: (identifier) @id (#eq? @id \"system\")) @vuln", - severity: Severity::Medium, - }, - Pattern { - id: "reinterpret_cast", - description: "reinterpret_cast usage", - query: "(reinterpret_cast_expression) @vuln", - severity: Severity::Medium, - }, + Pattern { + id: "strcpy_call", + description: "strcpy() usage", + query: "(call_expression function: (identifier) @id (#eq? @id \"strcpy\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "strcat_call", + description: "strcat() usage", + query: "(call_expression function: (identifier) @id (#eq? @id \"strcat\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "sprintf_call", + description: "sprintf() (no length limit)", + query: "(call_expression function: (identifier) @id (#eq? @id \"sprintf\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "gets_call", + description: "gets() usage", + query: "(call_expression function: (identifier) @id (#eq? @id \"gets\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "system_call", + description: "system() shell execution", + query: "(call_expression function: (identifier) @id (#eq? @id \"system\")) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "reinterpret_cast", + description: "reinterpret_cast usage", + query: "(reinterpret_cast_expression) @vuln", + severity: Severity::Medium, + }, ]; diff --git a/src/patterns/go.rs b/src/patterns/go.rs index c4d2efb5..2da7f831 100644 --- a/src/patterns/go.rs +++ b/src/patterns/go.rs @@ -1,34 +1,34 @@ use crate::patterns::{Pattern, Severity}; pub const PATTERNS: &[Pattern] = &[ - Pattern { - id: "exec_command", - description: "os/exec Command construction", - query: "(call_expression function: (selector_expression field: (field_identifier) @f (#eq? @f \"Command\"))) @vuln", - severity: Severity::Medium, - }, - Pattern { - id: "http_insecure_tls", - description: "&http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}}", - query: "(composite_literal type: (selector_expression field: (field_identifier) @t (#eq? @t \"Transport\")) body: (literal_value (keyed_element key: (identifier) @k (#eq? @k \"TLSClientConfig\") value: (composite_literal body: (literal_value (keyed_element key: (identifier) @ik (#eq? @ik \"InsecureSkipVerify\") value: (true)))))) @vuln", - severity: Severity::High, - }, - Pattern { - id: "unsafe_pointer", - description: "Use of unsafe.Pointer", - query: "(qualified_type type: (selector_expression field: (field_identifier) @f (#eq? @f \"Pointer\"))) @vuln", - severity: Severity::High, - }, - Pattern { - id: "md5_sha1", - description: "crypto/md5 or crypto/sha1 usage", - query: "(call_expression function: (selector_expression object: (identifier) @pkg (#match? @pkg \"md5|sha1\"))) @vuln", - severity: Severity::Medium, - }, - Pattern { - id: "hardcoded_secret", - description: "Hard-coded string that looks like an API key/token", - query: "(interpreted_string_literal) @s (#match? @s \"(?i)(api|secret|token|password)[=:]?[ \\t]*[A-Za-z0-9_\\-]{8,}\")", - severity: Severity::Low, - }, + Pattern { + id: "exec_command", + description: "os/exec Command construction", + query: "(call_expression function: (selector_expression field: (field_identifier) @f (#eq? @f \"Command\"))) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "http_insecure_tls", + description: "&http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}}", + query: "(composite_literal type: (selector_expression field: (field_identifier) @t (#eq? @t \"Transport\")) body: (literal_value (keyed_element key: (identifier) @k (#eq? @k \"TLSClientConfig\") value: (composite_literal body: (literal_value (keyed_element key: (identifier) @ik (#eq? @ik \"InsecureSkipVerify\") value: (true)))))) @vuln", + severity: Severity::High, + }, + Pattern { + id: "unsafe_pointer", + description: "Use of unsafe.Pointer", + query: "(qualified_type type: (selector_expression field: (field_identifier) @f (#eq? @f \"Pointer\"))) @vuln", + severity: Severity::High, + }, + Pattern { + id: "md5_sha1", + description: "crypto/md5 or crypto/sha1 usage", + query: "(call_expression function: (selector_expression object: (identifier) @pkg (#match? @pkg \"md5|sha1\"))) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "hardcoded_secret", + description: "Hard-coded string that looks like an API key/token", + query: "(interpreted_string_literal) @s (#match? @s \"(?i)(api|secret|token|password)[=:]?[ \\t]*[A-Za-z0-9_\\-]{8,}\")", + severity: Severity::Low, + }, ]; diff --git a/src/patterns/java.rs b/src/patterns/java.rs index cebdcf69..d6fb3451 100644 --- a/src/patterns/java.rs +++ b/src/patterns/java.rs @@ -1,40 +1,40 @@ use crate::patterns::{Pattern, Severity}; pub const PATTERNS: &[Pattern] = &[ - Pattern { - id: "runtime_exec", - description: "Runtime.getRuntime().exec(...) – arbitrary-command execution", - query: "(method_invocation object: (method_invocation name: (identifier) @n (#eq? @n \"getRuntime\")) name: (identifier) @id (#eq? @id \"exec\")) @vuln", - severity: Severity::High, - }, - Pattern { - id: "class_for_name", - description: "Dynamic reflection via Class.forName(...)", - query: "(method_invocation object: (identifier) @c (#eq? @c \"Class\") name: (identifier) @id (#eq? @id \"forName\")) @vuln", - severity: Severity::Medium, - }, - Pattern { - id: "object_deserialization", - description: "java.io.ObjectInputStream#readObject() deserialization", - query: "(method_invocation object: (identifier) @o (#eq? @o \"ObjectInputStream\") name: (identifier) @id (#eq? @id \"readObject\")) @vuln", - severity: Severity::High, - }, - Pattern { - id: "insecure_random", - description: "java.util.Random used where SecureRandom is expected", - query: "(object_creation_expression type: (identifier) @t (#eq? @t \"Random\")) @vuln", - severity: Severity::Medium, - }, - Pattern { - id: "thread_stop", - description: "Deprecated Thread.stop() invocation", - query: "(method_invocation name: (identifier) @id (#eq? @id \"stop\") object: (identifier) @obj (#eq? @obj \"Thread\")) @vuln", - severity: Severity::Low, - }, - Pattern { - id: "sql_concat", - description: "SQL built with string concatenation", - query: "(method_invocation name: (identifier) @id (#match? @id \"execute(Query|Update)?\") arguments: (argument_list (binary_expression) @concat)) @vuln", - severity: Severity::Medium, - }, + Pattern { + id: "runtime_exec", + description: "Runtime.getRuntime().exec(...) – arbitrary-command execution", + query: "(method_invocation object: (method_invocation name: (identifier) @n (#eq? @n \"getRuntime\")) name: (identifier) @id (#eq? @id \"exec\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "class_for_name", + description: "Dynamic reflection via Class.forName(...)", + query: "(method_invocation object: (identifier) @c (#eq? @c \"Class\") name: (identifier) @id (#eq? @id \"forName\")) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "object_deserialization", + description: "java.io.ObjectInputStream#readObject() deserialization", + query: "(method_invocation object: (identifier) @o (#eq? @o \"ObjectInputStream\") name: (identifier) @id (#eq? @id \"readObject\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "insecure_random", + description: "java.util.Random used where SecureRandom is expected", + query: "(object_creation_expression type: (identifier) @t (#eq? @t \"Random\")) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "thread_stop", + description: "Deprecated Thread.stop() invocation", + query: "(method_invocation name: (identifier) @id (#eq? @id \"stop\") object: (identifier) @obj (#eq? @obj \"Thread\")) @vuln", + severity: Severity::Low, + }, + Pattern { + id: "sql_concat", + description: "SQL built with string concatenation", + query: "(method_invocation name: (identifier) @id (#match? @id \"execute(Query|Update)?\") arguments: (argument_list (binary_expression) @concat)) @vuln", + severity: Severity::Medium, + }, ]; diff --git a/src/patterns/javascript.rs b/src/patterns/javascript.rs index 5f868a66..eb5fe47d 100644 --- a/src/patterns/javascript.rs +++ b/src/patterns/javascript.rs @@ -1,94 +1,94 @@ use crate::patterns::{Pattern, Severity}; pub const PATTERNS: &[Pattern] = &[ - Pattern { - id: "eval_call", - description: "Use of eval()", - query: "(call_expression function: (identifier) @id (#eq? @id \"eval\")) @vuln", - severity: Severity::High, - }, - Pattern { - id: "new_function", - description: "new Function() constructor", - query: "(new_expression constructor: (identifier) @id (#eq? @id \"Function\")) @vuln", - severity: Severity::High, - }, - Pattern { - id: "document_write", - description: "document.write() call", - query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"document\") property: (property_identifier) @prop (#eq? @prop \"write\"))) @vuln", - severity: Severity::Medium, - }, - Pattern { - id: "inner_html_assignment", - description: "Assignment to element.innerHTML", - query: "(assignment_expression left: (member_expression property: (property_identifier) @prop (#eq? @prop \"innerHTML\"))) @vuln", - severity: Severity::Medium, - }, - Pattern { - id: "settimeout_string", - description: "setTimeout / setInterval with a string argument", - query: "(call_expression function: (identifier) @id (#match? @id \"setTimeout|setInterval\") arguments: (arguments (string) @code . _)) @vuln", - severity: Severity::Medium, - }, - Pattern { - id: "json_parse", - description: "JSON.parse on dynamic string", - query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"JSON\") property: (property_identifier) @prop (#eq? @prop \"parse\"))) @vuln", - severity: Severity::Low, - }, - Pattern { - id: "outer_html_assignment", - description: "Assignment to element.outerHTML", - query: "(assignment_expression + Pattern { + id: "eval_call", + description: "Use of eval()", + query: "(call_expression function: (identifier) @id (#eq? @id \"eval\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "new_function", + description: "new Function() constructor", + query: "(new_expression constructor: (identifier) @id (#eq? @id \"Function\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "document_write", + description: "document.write() call", + query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"document\") property: (property_identifier) @prop (#eq? @prop \"write\"))) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "inner_html_assignment", + description: "Assignment to element.innerHTML", + query: "(assignment_expression left: (member_expression property: (property_identifier) @prop (#eq? @prop \"innerHTML\"))) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "settimeout_string", + description: "setTimeout / setInterval with a string argument", + query: "(call_expression function: (identifier) @id (#match? @id \"setTimeout|setInterval\") arguments: (arguments (string) @code . _)) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "json_parse", + description: "JSON.parse on dynamic string", + query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"JSON\") property: (property_identifier) @prop (#eq? @prop \"parse\"))) @vuln", + severity: Severity::Low, + }, + Pattern { + id: "outer_html_assignment", + description: "Assignment to element.outerHTML", + query: "(assignment_expression left: (member_expression property: (property_identifier) @prop (#eq? @prop \"outerHTML\"))) @vuln", - severity: Severity::Medium, - }, - Pattern { - id: "insert_adjacent_html", - description: "insertAdjacentHTML() call", - query: "(call_expression + severity: Severity::Medium, + }, + Pattern { + id: "insert_adjacent_html", + description: "insertAdjacentHTML() call", + query: "(call_expression function: (member_expression property: (property_identifier) @prop (#eq? @prop \"insertAdjacentHTML\"))) @vuln", - severity: Severity::Medium, - }, - Pattern { - id: "location_href_assignment", - description: "Assignment to window.location / location.href", - query: "(assignment_expression + severity: Severity::Medium, + }, + Pattern { + id: "location_href_assignment", + description: "Assignment to window.location / location.href", + query: "(assignment_expression left: (member_expression object: (identifier)? @obj property: (property_identifier) @prop (#match? @prop \"location|href\"))) @vuln", - severity: Severity::High, - }, - Pattern { - id: "cookie_assignment", - description: "Write to document.cookie", - query: "(assignment_expression + severity: Severity::High, + }, + Pattern { + id: "cookie_assignment", + description: "Write to document.cookie", + query: "(assignment_expression left: (member_expression object: (identifier) @obj (#eq? @obj \"document\") property: (property_identifier) @prop (#eq? @prop \"cookie\"))) @vuln", - severity: Severity::Medium, - }, - Pattern { - id: "proto_pollution", - description: "Assignment to __proto__ (prototype pollution)", - query: "(assignment_expression + severity: Severity::Medium, + }, + Pattern { + id: "proto_pollution", + description: "Assignment to __proto__ (prototype pollution)", + query: "(assignment_expression left: (member_expression property: (property_identifier) @prop (#eq? @prop \"__proto__\"))) @vuln", - severity: Severity::High, - }, - Pattern { - id: "weak_hash_md5", - description: "crypto.createHash(\"md5\")", - query: "(call_expression + severity: Severity::High, + }, + Pattern { + id: "weak_hash_md5", + description: "crypto.createHash(\"md5\")", + query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"crypto\") @@ -97,26 +97,26 @@ pub const PATTERNS: &[Pattern] = &[ arguments: (arguments (string) @alg (#eq? @alg \"md5\"))) @vuln", - severity: Severity::Low, - }, - Pattern { - id: "regexp_constructor_string", - description: "new RegExp() with a dynamic string", - query: "(new_expression + severity: Severity::Low, + }, + Pattern { + id: "regexp_constructor_string", + description: "new RegExp() with a dynamic string", + query: "(new_expression constructor: (identifier) @id (#eq? @id \"RegExp\") arguments: (arguments (string) @pattern)) @vuln", - severity: Severity::Low, - }, - Pattern { - id: "dangerous_extend_builtin", - description: "Extending Object.prototype (may lead to collisions/pollution)", - query: "(assignment_expression + severity: Severity::Low, + }, + Pattern { + id: "dangerous_extend_builtin", + description: "Extending Object.prototype (may lead to collisions/pollution)", + query: "(assignment_expression left: (member_expression object: (identifier) @obj (#eq? @obj \"Object\") property: (property_identifier) @prop (#eq? @prop \"prototype\"))) @vuln", - severity: Severity::Medium, - }, + severity: Severity::Medium, + }, ]; diff --git a/src/patterns/mod.rs b/src/patterns/mod.rs index 06f67b80..55ce40bc 100644 --- a/src/patterns/mod.rs +++ b/src/patterns/mod.rs @@ -1,116 +1,115 @@ -pub mod rust; -pub mod typescript; -pub mod javascript; -pub mod cpp; pub mod c; -mod java; +pub mod cpp; mod go; +mod java; +pub mod javascript; mod php; mod python; mod ruby; +pub mod rust; +pub mod typescript; +use console::style; +use once_cell::sync::Lazy; +use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::fmt; use std::str::FromStr; -use console::style; -use serde::{Deserialize, Serialize}; -use once_cell::sync::Lazy; #[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)] -pub enum Severity { High, Medium, Low } +pub enum Severity { + High, + Medium, + Low, +} impl fmt::Display for Severity { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let s = match *self { - Severity::High => style("HIGH").red().bold().to_string(), - Severity::Medium => style("MEDIUM").yellow().bold().to_string(), - Severity::Low => style("LOW").cyan().bold().to_string(), - }; - f.write_str(&s) - } + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match *self { + Severity::High => style("HIGH").red().bold().to_string(), + Severity::Medium => style("MEDIUM").yellow().bold().to_string(), + Severity::Low => style("LOW").cyan().bold().to_string(), + }; + f.write_str(&s) + } } impl Severity { - /// Textual value stored in SQLite. - pub fn as_db_str(self) -> &'static str { - match self { - Severity::High => "HIGH", - Severity::Medium => "MEDIUM", - Severity::Low => "LOW", + /// Textual value stored in SQLite. + pub fn as_db_str(self) -> &'static str { + match self { + Severity::High => "HIGH", + Severity::Medium => "MEDIUM", + Severity::Low => "LOW", + } } - } } -impl FromStr for Severity { // TODO: FIX - type Err = (); +impl FromStr for Severity { + // TODO: FIX + type Err = (); - fn from_str(input: &str) -> Result { - match input.to_lowercase().as_str() { - "medium" => Ok(Severity::Medium), - "high" => Ok(Severity::High), - _ => Ok(Severity::Low), + fn from_str(input: &str) -> Result { + match input.to_lowercase().as_str() { + "medium" => Ok(Severity::Medium), + "high" => Ok(Severity::High), + _ => Ok(Severity::Low), + } } - } } /// One AST pattern with a tree-sitter query and meta-data. #[derive(Debug, Clone, Serialize)] pub struct Pattern { - /// Unique identifier (snake-case preferred). - pub id: &'static str, - /// Human-readable explanation. - pub description: &'static str, - /// tree-sitter query string. - pub query: &'static str, - /// Rough severity bucket. - pub severity: Severity, + /// Unique identifier (snake-case preferred). + pub id: &'static str, + /// Human-readable explanation. + pub description: &'static str, + /// tree-sitter query string. + pub query: &'static str, + /// Rough severity bucket. + pub severity: Severity, } - - /// Global, lazily-initialised registry: lang-name → pattern slice static REGISTRY: Lazy> = Lazy::new(|| { - let mut m = HashMap::new(); + let mut m = HashMap::new(); - // ---- Rust ---- - m.insert("rust", rust::PATTERNS); + // ---- Rust ---- + m.insert("rust", rust::PATTERNS); - // ---- TypeScript ---- - m.insert("typescript", typescript::PATTERNS); - m.insert("ts", typescript::PATTERNS); - m.insert("tsx", typescript::PATTERNS); + // ---- TypeScript ---- + m.insert("typescript", typescript::PATTERNS); + m.insert("ts", typescript::PATTERNS); + m.insert("tsx", typescript::PATTERNS); - // ---- JavaScript ---- - m.insert("javascript", javascript::PATTERNS); - m.insert("js", javascript::PATTERNS); + // ---- JavaScript ---- + m.insert("javascript", javascript::PATTERNS); + m.insert("js", javascript::PATTERNS); - // ---- C & C++ ---- - m.insert("c", c::PATTERNS); - m.insert("cpp", cpp::PATTERNS); - m.insert("c++", cpp::PATTERNS); + // ---- C & C++ ---- + m.insert("c", c::PATTERNS); + m.insert("cpp", cpp::PATTERNS); + m.insert("c++", cpp::PATTERNS); - // ---- Other languages in the folder ---- - m.insert("java", java::PATTERNS); - m.insert("go", go::PATTERNS); - m.insert("php", php::PATTERNS); - m.insert("python", python::PATTERNS); - m.insert("py", python::PATTERNS); - m.insert("ruby", ruby::PATTERNS); - m.insert("rb", ruby::PATTERNS); + // ---- Other languages in the folder ---- + m.insert("java", java::PATTERNS); + m.insert("go", go::PATTERNS); + m.insert("php", php::PATTERNS); + m.insert("python", python::PATTERNS); + m.insert("py", python::PATTERNS); + m.insert("ruby", ruby::PATTERNS); + m.insert("rb", ruby::PATTERNS); - tracing::debug!("AST-pattern registry initialised ({} languages)", m.len()); - - m + tracing::debug!("AST-pattern registry initialised ({} languages)", m.len()); + + m }); /// Return all patterns for the requested language (case-insensitive). /// /// Unknown languages yield an **empty** `Vec`. pub fn load(lang: &str) -> Vec { - let key = lang.to_ascii_lowercase(); - REGISTRY - .get(key.as_str()) - .copied() - .unwrap_or(&[]) - .to_vec() -} \ No newline at end of file + let key = lang.to_ascii_lowercase(); + REGISTRY.get(key.as_str()).copied().unwrap_or(&[]).to_vec() +} diff --git a/src/patterns/php.rs b/src/patterns/php.rs index fec96d31..3cbe16af 100644 --- a/src/patterns/php.rs +++ b/src/patterns/php.rs @@ -1,40 +1,40 @@ use crate::patterns::{Pattern, Severity}; pub const PATTERNS: &[Pattern] = &[ - Pattern { - id: "eval_call", - description: "eval($code) execution", - query: "(function_call_expression function: (name) @n (#eq? @n \"eval\")) @vuln", - severity: Severity::High, - }, - Pattern { - id: "preg_replace_e", - description: "preg_replace with deprecated /e modifier", - query: "(function_call_expression function: (name) @n (#eq? @n \"preg_replace\") arguments: (arguments (string) @pat (#match? @pat \"/.*e.*$/\"))) @vuln", - severity: Severity::High, - }, - Pattern { - id: "create_function", - description: "create_function(...) anonymous eval-like", - query: "(function_call_expression function: (name) @n (#eq? @n \"create_function\")) @vuln", - severity: Severity::Medium, - }, - Pattern { - id: "unserialize_call", - description: "unserialize(...) on user input", - query: "(function_call_expression function: (name) @n (#eq? @n \"unserialize\")) @vuln", - severity: Severity::High, - }, - Pattern { - id: "mysql_query_concat", - description: "mysql_query with concatenated SQL", - query: "(function_call_expression function: (name) @n (#eq? @n \"mysql_query\") arguments: (arguments (binary_expression) @concat)) @vuln", - severity: Severity::Medium, - }, - Pattern { - id: "system_call", - description: "system()/shell_exec()/exec() command execution", - query: "(function_call_expression function: (name) @n (#match? @n \"system|shell_exec|exec|passthru\")) @vuln", - severity: Severity::Medium, - }, + Pattern { + id: "eval_call", + description: "eval($code) execution", + query: "(function_call_expression function: (name) @n (#eq? @n \"eval\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "preg_replace_e", + description: "preg_replace with deprecated /e modifier", + query: "(function_call_expression function: (name) @n (#eq? @n \"preg_replace\") arguments: (arguments (string) @pat (#match? @pat \"/.*e.*$/\"))) @vuln", + severity: Severity::High, + }, + Pattern { + id: "create_function", + description: "create_function(...) anonymous eval-like", + query: "(function_call_expression function: (name) @n (#eq? @n \"create_function\")) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "unserialize_call", + description: "unserialize(...) on user input", + query: "(function_call_expression function: (name) @n (#eq? @n \"unserialize\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "mysql_query_concat", + description: "mysql_query with concatenated SQL", + query: "(function_call_expression function: (name) @n (#eq? @n \"mysql_query\") arguments: (arguments (binary_expression) @concat)) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "system_call", + description: "system()/shell_exec()/exec() command execution", + query: "(function_call_expression function: (name) @n (#match? @n \"system|shell_exec|exec|passthru\")) @vuln", + severity: Severity::Medium, + }, ]; diff --git a/src/patterns/python.rs b/src/patterns/python.rs index 86b6ae29..884af560 100644 --- a/src/patterns/python.rs +++ b/src/patterns/python.rs @@ -1,22 +1,22 @@ use crate::patterns::{Pattern, Severity}; pub const PATTERNS: &[Pattern] = &[ - Pattern { - id: "eval_call", - description: "eval() on dynamic input", - query: "(call function: (identifier) @id (#eq? @id \"eval\")) @vuln", - severity: Severity::High, - }, - Pattern { - id: "exec_call", - description: "exec(...) execution of dynamic code", - query: "(call function: (identifier) @id (#eq? @id \"exec\")) @vuln", - severity: Severity::High, - }, - Pattern { - id: "subprocess_shell_true", - description: "subprocess.* with shell=True", - query: "(call function: (attribute object: (identifier) @pkg (#eq? @pkg \"subprocess\")) arguments: (argument_list . (keyword_argument name: (identifier) @k (#eq? @k \"shell\")) (true) @val)) @vuln", - severity: Severity::Medium, - } + Pattern { + id: "eval_call", + description: "eval() on dynamic input", + query: "(call function: (identifier) @id (#eq? @id \"eval\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "exec_call", + description: "exec(...) execution of dynamic code", + query: "(call function: (identifier) @id (#eq? @id \"exec\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "subprocess_shell_true", + description: "subprocess.* with shell=True", + query: "(call function: (attribute object: (identifier) @pkg (#eq? @pkg \"subprocess\")) arguments: (argument_list . (keyword_argument name: (identifier) @k (#eq? @k \"shell\")) (true) @val)) @vuln", + severity: Severity::Medium, + }, ]; diff --git a/src/patterns/ruby.rs b/src/patterns/ruby.rs index 9fc939a3..47e80a9f 100644 --- a/src/patterns/ruby.rs +++ b/src/patterns/ruby.rs @@ -1,45 +1,44 @@ use crate::patterns::{Pattern, Severity}; pub const PATTERNS: &[Pattern] = &[ - // ---------- Runtime code-execution primitives ---------- - Pattern { - id: "eval_call", - description: "Kernel#eval usage", - query: r#" + // ---------- Runtime code-execution primitives ---------- + Pattern { + id: "eval_call", + description: "Kernel#eval usage", + query: r#" (call (identifier) @id (#eq? @id "eval") ) @vuln "#, - severity: Severity::High, - }, - Pattern { - id: "instance_eval_call", - description: "Object#instance_eval usage", - query: r#" + severity: Severity::High, + }, + Pattern { + id: "instance_eval_call", + description: "Object#instance_eval usage", + query: r#" (call (identifier) @id (#eq? @id "instance_eval") ) @vuln "#, - severity: Severity::High, - }, - Pattern { - id: "class_eval_call", - description: "Module#class_eval / module_eval usage", - query: r#" + severity: Severity::High, + }, + Pattern { + id: "class_eval_call", + description: "Module#class_eval / module_eval usage", + query: r#" (call (identifier) @id (#match? @id "^(class_eval|module_eval)$") ) @vuln "#, - severity: Severity::High, - }, - - // ---------- Shell execution ---------- - Pattern { - id: "system_exec_interp", - description: "system/exec with string interpolation", - query: r#" + severity: Severity::High, + }, + // ---------- Shell execution ---------- + Pattern { + id: "system_exec_interp", + description: "system/exec with string interpolation", + query: r#" (call method: (identifier) @m (#match? @m "^(system|exec)$") @@ -50,21 +49,20 @@ pub const PATTERNS: &[Pattern] = &[ ) ) "#, - severity: Severity::High, - }, - Pattern { - id: "backtick_command", - description: "Back-tick shell execution", - // `uname -a` - query: r#"(shell_command) @vuln"#, - severity: Severity::High, - }, - - // ---------- Dangerous deserialisation ---------- - Pattern { - id: "yaml_load", - description: "YAML.load / Psych.load (arbitrary object deserialisation)", - query: r#" + severity: Severity::High, + }, + Pattern { + id: "backtick_command", + description: "Back-tick shell execution", + // `uname -a` + query: r#"(shell_command) @vuln"#, + severity: Severity::High, + }, + // ---------- Dangerous deserialisation ---------- + Pattern { + id: "yaml_load", + description: "YAML.load / Psych.load (arbitrary object deserialisation)", + query: r#" (call receiver: (constant) @recv (#match? @recv "^(YAML|Psych)$") @@ -72,12 +70,12 @@ pub const PATTERNS: &[Pattern] = &[ (#eq? @m "load") ) @vuln "#, - severity: Severity::High, - }, - Pattern { - id: "marshal_load", - description: "Marshal.load usage", - query: r#" + severity: Severity::High, + }, + Pattern { + id: "marshal_load", + description: "Marshal.load usage", + query: r#" (call receiver: (constant) @recv (#eq? @recv "Marshal") @@ -85,14 +83,13 @@ pub const PATTERNS: &[Pattern] = &[ (#eq? @m "load") ) @vuln "#, - severity: Severity::High, - }, - - // ---------- Reflection / meta-programming ---------- - Pattern { - id: "send_dynamic", - description: "send() with dynamic first argument (not a literal symbol)", - query: r#" + severity: Severity::High, + }, + // ---------- Reflection / meta-programming ---------- + Pattern { + id: "send_dynamic", + description: "send() with dynamic first argument (not a literal symbol)", + query: r#" (call method: (identifier) @m (#eq? @m "send") @@ -104,25 +101,24 @@ pub const PATTERNS: &[Pattern] = &[ ) ) "#, - severity: Severity::Medium, - }, - Pattern { - id: "constantize_call", - description: "ActiveSupport constantize / safe_constantize on tainted data", - query: r#" + severity: Severity::Medium, + }, + Pattern { + id: "constantize_call", + description: "ActiveSupport constantize / safe_constantize on tainted data", + query: r#" (call method: (identifier) @m (#match? @m "^(constantize|safe_constantize)$") ) @vuln "#, - severity: Severity::Medium, - }, - - // ---------- Insecure resource access ---------- - Pattern { - id: "open_uri_http", - description: "Kernel#open with HTTP(S) URL (open-uri auto-follow)", - query: r#" + severity: Severity::Medium, + }, + // ---------- Insecure resource access ---------- + Pattern { + id: "open_uri_http", + description: "Kernel#open with HTTP(S) URL (open-uri auto-follow)", + query: r#" (call method: (identifier) @m (#eq? @m "open") @@ -132,6 +128,6 @@ pub const PATTERNS: &[Pattern] = &[ ) ) @vuln "#, - severity: Severity::Medium, - }, + severity: Severity::Medium, + }, ]; diff --git a/src/patterns/rust.rs b/src/patterns/rust.rs index f57f6e0c..3ef4a3db 100644 --- a/src/patterns/rust.rs +++ b/src/patterns/rust.rs @@ -1,118 +1,118 @@ use crate::patterns::{Pattern, Severity}; pub const PATTERNS: &[Pattern] = &[ - Pattern { - id: "unsafe_block", - description: "Use of an `unsafe` block", - query: "(unsafe_block) @vuln", - severity: Severity::High, - }, - Pattern { - id: "unsafe_fn", - description: "`unsafe fn` declaration", - query: "(function_item + Pattern { + id: "unsafe_block", + description: "Use of an `unsafe` block", + query: "(unsafe_block) @vuln", + severity: Severity::High, + }, + Pattern { + id: "unsafe_fn", + description: "`unsafe fn` declaration", + query: "(function_item (function_modifiers) @mods (#match? @mods \"^unsafe\\b\")) @vuln", - severity: Severity::High, - }, - Pattern { - id: "transmute_call", - description: "`std::mem::transmute` call", - query: "(call_expression + severity: Severity::High, + }, + Pattern { + id: "transmute_call", + description: "`std::mem::transmute` call", + query: "(call_expression function: (scoped_identifier path: (identifier) @p (#eq? @p \"mem\") name: (identifier) @f (#eq? @f \"transmute\"))) @vuln", - severity: Severity::High, - }, - Pattern { - id: "copy_nonoverlapping", - description: "Raw pointer `copy_nonoverlapping`", - query: "(call_expression + severity: Severity::High, + }, + Pattern { + id: "copy_nonoverlapping", + description: "Raw pointer `copy_nonoverlapping`", + query: "(call_expression function: (scoped_identifier path: (identifier) @p (#eq? @p \"ptr\") name: (identifier) @f (#eq? @f \"copy_nonoverlapping\"))) @vuln", - severity: Severity::High, - }, - Pattern { - id: "get_unchecked", - description: "`get_unchecked` / `get_unchecked_mut` slice access", - query: "(call_expression + severity: Severity::High, + }, + Pattern { + id: "get_unchecked", + description: "`get_unchecked` / `get_unchecked_mut` slice access", + query: "(call_expression function: (field_expression field: (field_identifier) @m (#match? @m \"get_unchecked(_mut)?\"))) @vuln", - severity: Severity::High, - }, - Pattern { - id: "unwrap_call", - description: "`.unwrap()` call (may panic)", - query: "(call_expression + severity: Severity::High, + }, + Pattern { + id: "unwrap_call", + description: "`.unwrap()` call (may panic)", + query: "(call_expression function: (field_expression field: (field_identifier) @name (#eq? @name \"unwrap\"))) ; exact match @vuln", - severity: Severity::Medium, - }, - Pattern { - id: "expect_call", - description: "`.expect()` call (may panic)", - query: "(call_expression + severity: Severity::Medium, + }, + Pattern { + id: "expect_call", + description: "`.expect()` call (may panic)", + query: "(call_expression function: (field_expression field: (field_identifier) @name (#eq? @name \"expect\"))) @vuln", - severity: Severity::Medium, - }, - Pattern { - id: "panic_macro", - description: "`panic!` macro invocation", - query: "(macro_invocation (identifier) @id (#eq? @id \"panic\")) @vuln", - severity: Severity::Medium, - }, - Pattern { - id: "todo_or_unimplemented", - description: "`todo!()` / `unimplemented!()` placeholder", - query: "(macro_invocation + severity: Severity::Medium, + }, + Pattern { + id: "panic_macro", + description: "`panic!` macro invocation", + query: "(macro_invocation (identifier) @id (#eq? @id \"panic\")) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "todo_or_unimplemented", + description: "`todo!()` / `unimplemented!()` placeholder", + query: "(macro_invocation (identifier) @id (#match? @id \"todo|unimplemented\")) @vuln", - severity: Severity::Low, - }, - Pattern { - id: "narrow_cast_with_as", - description: "`as` cast to an 8-/16-bit integer (possible truncation)", - query: "(type_cast_expression + severity: Severity::Low, + }, + Pattern { + id: "narrow_cast_with_as", + description: "`as` cast to an 8-/16-bit integer (possible truncation)", + query: "(type_cast_expression type: (primitive_type) @to (#match? @to \"^u?i(8|16)$\")) @vuln", - severity: Severity::Low, - }, - Pattern { - id: "mem_zeroed", - description: "`std::mem::zeroed()`", - query: "(call_expression function:(scoped_identifier path:(identifier)@p (#eq? @p \"mem\") name:(identifier)@n (#eq? @n \"zeroed\")))@vuln", - severity: Severity::High - }, - Pattern { - id: "mem_forget", - description: "`std::mem::forget()`", - query: "(call_expression function:(scoped_identifier path:(identifier)@p (#eq? @p \"mem\") name:(identifier)@n (#eq? @n \"forget\")))@vuln", - severity: Severity::Medium - }, - Pattern { - id: "ptr_read", - description: "`ptr::read_*` raw-ptr read", - query: "(call_expression function:(scoped_identifier path:(identifier)@p (#eq? @p \"ptr\") name:(identifier)@n (#match? @n \"read(_volatile)?\")))@vuln", - severity: Severity::High - }, - Pattern { - id: "arc_unwrap", - description: "`Arc::unwrap_or_else_unchecked`", - query: "(call_expression function:(scoped_identifier name:(identifier)@n (#eq? @n \"unwrap_or_else_unchecked\")))@vuln", - severity: Severity::High - }, - Pattern { - id: "dbg_macro", - description: "`dbg!()` left in code", - query: "(macro_invocation (identifier)@id (#eq? @id \"dbg\"))@vuln", - severity: Severity::Low - }, + severity: Severity::Low, + }, + Pattern { + id: "mem_zeroed", + description: "`std::mem::zeroed()`", + query: "(call_expression function:(scoped_identifier path:(identifier)@p (#eq? @p \"mem\") name:(identifier)@n (#eq? @n \"zeroed\")))@vuln", + severity: Severity::High, + }, + Pattern { + id: "mem_forget", + description: "`std::mem::forget()`", + query: "(call_expression function:(scoped_identifier path:(identifier)@p (#eq? @p \"mem\") name:(identifier)@n (#eq? @n \"forget\")))@vuln", + severity: Severity::Medium, + }, + Pattern { + id: "ptr_read", + description: "`ptr::read_*` raw-ptr read", + query: "(call_expression function:(scoped_identifier path:(identifier)@p (#eq? @p \"ptr\") name:(identifier)@n (#match? @n \"read(_volatile)?\")))@vuln", + severity: Severity::High, + }, + Pattern { + id: "arc_unwrap", + description: "`Arc::unwrap_or_else_unchecked`", + query: "(call_expression function:(scoped_identifier name:(identifier)@n (#eq? @n \"unwrap_or_else_unchecked\")))@vuln", + severity: Severity::High, + }, + Pattern { + id: "dbg_macro", + description: "`dbg!()` left in code", + query: "(macro_invocation (identifier)@id (#eq? @id \"dbg\"))@vuln", + severity: Severity::Low, + }, ]; diff --git a/src/patterns/typescript.rs b/src/patterns/typescript.rs index 83647e5c..0aac1b1d 100644 --- a/src/patterns/typescript.rs +++ b/src/patterns/typescript.rs @@ -1,106 +1,106 @@ use crate::patterns::{Pattern, Severity}; pub const PATTERNS: &[Pattern] = &[ - Pattern { - id: "eval_call", - description: "Use of eval()", - query: "(call_expression function: (identifier) @id (#eq? @id \"eval\")) @vuln", - severity: Severity::High, - }, - Pattern { - id: "new_function", - description: "new Function() constructor", - query: "(new_expression constructor: (identifier) @id (#eq? @id \"Function\")) @vuln", - severity: Severity::High, - }, - Pattern { - id: "document_write", - description: "document.write() call", - query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"document\") property: (property_identifier) @prop (#eq? @prop \"write\"))) @vuln", - severity: Severity::Medium, - }, - Pattern { - id: "inner_html_assignment", - description: "Assignment to element.innerHTML", - query: "(assignment_expression left: (member_expression property: (property_identifier) @prop (#eq? @prop \"innerHTML\"))) @vuln", - severity: Severity::Medium, - }, - Pattern { - id: "settimeout_string", - description: "setTimeout / setInterval with a string argument", - query: "(call_expression function: (identifier) @id (#match? @id \"setTimeout|setInterval\") arguments: (arguments (string) @code . _)) @vuln", - severity: Severity::Medium, - }, - Pattern { - id: "any_type", - description: "Type annotation of `any`", - query: "(type_annotation (predefined_type) @t (#eq? @t \"any\")) @vuln", - severity: Severity::Low, - }, - Pattern { - id: "json_parse", - description: "JSON.parse on dynamic string", - query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"JSON\") property: (property_identifier) @prop (#eq? @prop \"parse\"))) @vuln", - severity: Severity::Low, - }, - Pattern { - id: "as_any_assertion", - description: "Type assertion to `any` using `as any`", - query: "(as_expression type: (predefined_type) @t (#eq? @t \"any\")) @vuln", - severity: Severity::Low, - }, - Pattern { - id: "type_assertion_any", - description: "Type assertion to `any` using `` syntax", - query: "(type_assertion type: (predefined_type) @t (#eq? @t \"any\")) @vuln", - severity: Severity::Low, - }, - Pattern { - id: "outer_html_assignment", - description: "Assignment to element.outerHTML", - query: "(assignment_expression left: (member_expression property: (property_identifier) @prop (#eq? @prop \"outerHTML\"))) @vuln", - severity: Severity::Medium, - }, - Pattern { - id: "insert_adjacent_html", - description: "insertAdjacentHTML() call", - query: "(call_expression function: (member_expression property: (property_identifier) @prop (#eq? @prop \"insertAdjacentHTML\"))) @vuln", - severity: Severity::Medium, - }, - Pattern { - id: "document_cookie_write", - description: "Write to document.cookie", - query: "(assignment_expression left: (member_expression object: (identifier) @obj (#eq? @obj \"document\") property: (property_identifier) @prop (#eq? @prop \"cookie\"))) @vuln", - severity: Severity::Low, - }, - Pattern { - id: "onclick_setattribute", - description: "Element.setAttribute('onclick', …)", - query: "(call_expression function: (member_expression property: (property_identifier) @prop (#eq? @prop \"setAttribute\")) arguments: (arguments (string) @name (#eq? @name \"\\\"onclick\\\"\") . (string) @handler)) @vuln", - severity: Severity::Medium, - }, - Pattern { - id: "math_random_call", - description: "Use of Math.random() for security-sensitive randomness", - query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"Math\") property: (property_identifier) @prop (#eq? @prop \"random\"))) @vuln", - severity: Severity::Low, - }, - Pattern { - id: "crypto_createhash_md5", - description: "Insecure hash algorithm: crypto.createHash('md5')", - query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"crypto\") property: (property_identifier) @prop (#eq? @prop \"createHash\")) arguments: (arguments (string) @alg (#match? @alg \"(?i)\\\"md5\\\"\"))) @vuln", - severity: Severity::Medium, - }, - Pattern { - id: "fetch_http_url", - description: "fetch() over plain HTTP", - query: "(call_expression function: (identifier) @id (#eq? @id \"fetch\") arguments: (arguments (string) @url (#match? @url \"^\\\"http://\"))) @vuln", - severity: Severity::Low, - }, - Pattern { - id: "xhr_eval_response", - description: "eval() of XMLHttpRequest.responseText", - query: "(call_expression function: (identifier) @id (#eq? @id \"eval\") arguments: (arguments (member_expression property: (property_identifier) @prop (#eq? @prop \"responseText\")))) @vuln", - severity: Severity::High, - }, -]; \ No newline at end of file + Pattern { + id: "eval_call", + description: "Use of eval()", + query: "(call_expression function: (identifier) @id (#eq? @id \"eval\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "new_function", + description: "new Function() constructor", + query: "(new_expression constructor: (identifier) @id (#eq? @id \"Function\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "document_write", + description: "document.write() call", + query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"document\") property: (property_identifier) @prop (#eq? @prop \"write\"))) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "inner_html_assignment", + description: "Assignment to element.innerHTML", + query: "(assignment_expression left: (member_expression property: (property_identifier) @prop (#eq? @prop \"innerHTML\"))) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "settimeout_string", + description: "setTimeout / setInterval with a string argument", + query: "(call_expression function: (identifier) @id (#match? @id \"setTimeout|setInterval\") arguments: (arguments (string) @code . _)) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "any_type", + description: "Type annotation of `any`", + query: "(type_annotation (predefined_type) @t (#eq? @t \"any\")) @vuln", + severity: Severity::Low, + }, + Pattern { + id: "json_parse", + description: "JSON.parse on dynamic string", + query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"JSON\") property: (property_identifier) @prop (#eq? @prop \"parse\"))) @vuln", + severity: Severity::Low, + }, + Pattern { + id: "as_any_assertion", + description: "Type assertion to `any` using `as any`", + query: "(as_expression type: (predefined_type) @t (#eq? @t \"any\")) @vuln", + severity: Severity::Low, + }, + Pattern { + id: "type_assertion_any", + description: "Type assertion to `any` using `` syntax", + query: "(type_assertion type: (predefined_type) @t (#eq? @t \"any\")) @vuln", + severity: Severity::Low, + }, + Pattern { + id: "outer_html_assignment", + description: "Assignment to element.outerHTML", + query: "(assignment_expression left: (member_expression property: (property_identifier) @prop (#eq? @prop \"outerHTML\"))) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "insert_adjacent_html", + description: "insertAdjacentHTML() call", + query: "(call_expression function: (member_expression property: (property_identifier) @prop (#eq? @prop \"insertAdjacentHTML\"))) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "document_cookie_write", + description: "Write to document.cookie", + query: "(assignment_expression left: (member_expression object: (identifier) @obj (#eq? @obj \"document\") property: (property_identifier) @prop (#eq? @prop \"cookie\"))) @vuln", + severity: Severity::Low, + }, + Pattern { + id: "onclick_setattribute", + description: "Element.setAttribute('onclick', …)", + query: "(call_expression function: (member_expression property: (property_identifier) @prop (#eq? @prop \"setAttribute\")) arguments: (arguments (string) @name (#eq? @name \"\\\"onclick\\\"\") . (string) @handler)) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "math_random_call", + description: "Use of Math.random() for security-sensitive randomness", + query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"Math\") property: (property_identifier) @prop (#eq? @prop \"random\"))) @vuln", + severity: Severity::Low, + }, + Pattern { + id: "crypto_createhash_md5", + description: "Insecure hash algorithm: crypto.createHash('md5')", + query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"crypto\") property: (property_identifier) @prop (#eq? @prop \"createHash\")) arguments: (arguments (string) @alg (#match? @alg \"(?i)\\\"md5\\\"\"))) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "fetch_http_url", + description: "fetch() over plain HTTP", + query: "(call_expression function: (identifier) @id (#eq? @id \"fetch\") arguments: (arguments (string) @url (#match? @url \"^\\\"http://\"))) @vuln", + severity: Severity::Low, + }, + Pattern { + id: "xhr_eval_response", + description: "eval() of XMLHttpRequest.responseText", + query: "(call_expression function: (identifier) @id (#eq? @id \"eval\") arguments: (arguments (member_expression property: (property_identifier) @prop (#eq? @prop \"responseText\")))) @vuln", + severity: Severity::High, + }, +]; diff --git a/src/utils/config.rs b/src/utils/config.rs index 9f864f7c..8b77f7be 100644 --- a/src/utils/config.rs +++ b/src/utils/config.rs @@ -1,25 +1,25 @@ -use serde::{Deserialize, Serialize}; -use std::path::{Path}; -use std::fs; -use console::style; -use toml; use crate::patterns::Severity; +use console::style; +use serde::{Deserialize, Serialize}; +use std::fs; +use std::path::Path; +use toml; #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(default)] pub struct ScannerConfig { /// The minimum severity level to output pub min_severity: Severity, - + /// The maximum file size to scan, in megabytes. pub max_file_size_mb: Option, - + /// File extensions to exclude from scanning. pub excluded_extensions: Vec, - + /// Directories to exclude from scanning. pub excluded_directories: Vec, - + /// Excluded files pub excluded_files: Vec, @@ -34,10 +34,10 @@ pub struct ScannerConfig { /// Whether to limit the search to starting file system or not. pub one_file_system: bool, - - /// Whether to follow symlinks or not. + + /// Whether to follow symlinks or not. pub follow_symlinks: bool, - + /// Whether to scan hidden files or not. pub scan_hidden_files: bool, } @@ -47,22 +47,24 @@ impl Default for ScannerConfig { min_severity: Severity::Low, max_file_size_mb: None, excluded_extensions: vec![ - "jpg", "png", "gif", "mp4", "avi", "mkv", - "zip", "tar", "gz", "exe", "dll", "so", + "jpg", "png", "gif", "mp4", "avi", "mkv", "zip", "tar", "gz", "exe", "dll", "so", ] - .into_iter() - .map(str::to_owned) - .collect(), + .into_iter() + .map(str::to_owned) + .collect(), excluded_directories: vec![ - "node_modules", ".git", "target", ".vscode", ".idea", "build", "dist", + "node_modules", + ".git", + "target", + ".vscode", + ".idea", + "build", + "dist", ] - .into_iter() - .map(str::to_owned) - .collect(), - excluded_files: vec![] - .into_iter() - .map(str::to_owned) - .collect(), + .into_iter() + .map(str::to_owned) + .collect(), + excluded_files: vec![].into_iter().map(str::to_owned).collect(), read_global_ignore: false, read_vcsignore: true, require_git_to_read_vcsignore: true, @@ -76,18 +78,22 @@ impl Default for ScannerConfig { #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(default)] pub struct DatabaseConfig { + /// Custom path for database + pub path: String, + /// The number of days to keep database files for. TODO: IMPLEMENT pub auto_cleanup_days: u32, - + /// The maximum size of the database, in megabytes. TODO: IMPLEMENT pub max_db_size_mb: u64, - + /// Whether to run a VACUUM on startup or not. TODO: IMPLEMENT pub vacuum_on_startup: bool, } impl Default for DatabaseConfig { fn default() -> Self { Self { + path: String::from(""), auto_cleanup_days: 30, max_db_size_mb: 1024, vacuum_on_startup: false, @@ -100,15 +106,12 @@ impl Default for DatabaseConfig { pub struct OutputConfig { /// The default output format. TODO: IMPLEMENT others pub default_format: String, - + /// Whether to show progress or not. TODO: IMPLEMENT pub show_progress: bool, - - /// Whether to colorize output or not. TODO: IMPLEMENT changing to non colored - pub color_output: bool, - - /// The maximum number of results to show. TODO: IMPLEMENT - pub max_results: Option, + + /// The maximum number of results to show. + pub max_results: Option, } impl Default for OutputConfig { @@ -116,7 +119,6 @@ impl Default for OutputConfig { Self { default_format: "console".into(), show_progress: true, - color_output: true, max_results: None, } } @@ -128,21 +130,27 @@ pub struct PerformanceConfig { /// The maximum search depth, or `None` if no maximum search depth should be set. /// /// A depth of `1` includes all files under the current directory, a depth of `2` also includes - /// all files under subdirectories of the current directory, etc. + /// all files under subdirectories of the current directory, etc. pub max_depth: Option, // TODO: IMPLEMENT /// The minimum depth for reported entries, or `None`. pub min_depth: Option, // TODO: IMPLEMENT /// Whether to stop traversing into matching directories. - pub prune: bool, // TODO: IMPLEMENT + pub prune: bool, /// The maximum number of worker threads to use., or `None` to auto-detect. - pub worker_threads: Option, // TODO: IMPLEMENT - + pub worker_threads: Option, + /// The maximum number of entries to index in a single chunk. - pub index_chunk_size: u32, // TODO: IMPLEMENT - + pub batch_size: usize, + + /// capacity = threads × this + pub channel_multiplier: usize, + + /// Timeout on individual files // TODO: IMPLEMENT + pub scan_timeout_secs: Option, + /// The maximum amount of memory to use, in megabytes. pub memory_limit_mb: u64, // TODO: IMPLEMENT } @@ -154,7 +162,9 @@ impl Default for PerformanceConfig { min_depth: None, prune: false, worker_threads: None, - index_chunk_size: 1_000, + batch_size: 100usize, + channel_multiplier: 4usize, + scan_timeout_secs: None, memory_limit_mb: 512, } } @@ -170,11 +180,8 @@ pub struct Config { pub performance: PerformanceConfig, } - impl Config { - pub fn load( - config_dir: &Path, - ) -> Result> { + pub fn load(config_dir: &Path) -> Result> { let mut config = Config::default(); let default_config_path = config_dir.join("nyx.conf"); @@ -188,24 +195,32 @@ impl Config { let user_config: Config = toml::from_str(&user_config_content)?; config = merge_configs(config, user_config); - - println!("{}: Loaded user config from: {}\n", - style("note").green().bold(), - style(user_config_path.display()).underlined().white().bold()); + + println!( + "{}: Loaded user config from: {}\n", + style("note").green().bold(), + style(user_config_path.display()) + .underlined() + .white() + .bold() + ); } else { - println!("{}: Using {} configuration.\n Create file in '{}'to customize.\n", - style("note").green().bold(), - style("default").bold(), - style(user_config_path.display()).underlined().white().bold()); + println!( + "{}: Using {} configuration.\n Create file in '{}'to customize.\n", + style("note").green().bold(), + style("default").bold(), + style(user_config_path.display()) + .underlined() + .white() + .bold() + ); } Ok(config) } } -fn create_example_config( - config_dir: &Path, -) -> Result<(), Box> { +fn create_example_config(config_dir: &Path) -> Result<(), Box> { let example_path = config_dir.join("nyx.conf"); let default_config = Config::default(); @@ -213,7 +228,7 @@ fn create_example_config( // Add comments to make it user-friendly let commented_content = format!( - "# nnyx Vulnerability Scanner Configuration\n\ + "# nnyx Vulnerability Scanner Configuration\n\ # YOU SHOULD NOT MODIFY THIS FILE.\n\ # Create/modify 'nyx.local' to set configs\n\ # Only include the sections you want to override\n\n{}", @@ -230,40 +245,46 @@ fn create_example_config( /// supply new exclusions and overriding everything else. fn merge_configs(mut default: Config, user: Config) -> Config { // --- ScannerConfig --- - default.scanner.max_file_size_mb = user.scanner.max_file_size_mb; - default.scanner.read_global_ignore = user.scanner.read_global_ignore; - default.scanner.read_vcsignore = user.scanner.read_vcsignore; - default.scanner.require_git_to_read_vcsignore = user.scanner.require_git_to_read_vcsignore; - default.scanner.one_file_system = user.scanner.one_file_system; - default.scanner.follow_symlinks = user.scanner.follow_symlinks; - default.scanner.scan_hidden_files = user.scanner.scan_hidden_files; + default.scanner.max_file_size_mb = user.scanner.max_file_size_mb; + default.scanner.read_global_ignore = user.scanner.read_global_ignore; + default.scanner.read_vcsignore = user.scanner.read_vcsignore; + default.scanner.require_git_to_read_vcsignore = user.scanner.require_git_to_read_vcsignore; + default.scanner.one_file_system = user.scanner.one_file_system; + default.scanner.follow_symlinks = user.scanner.follow_symlinks; + default.scanner.scan_hidden_files = user.scanner.scan_hidden_files; // Merge exclusion lists (default ⊔ user), then sort & dedupe - default.scanner.excluded_extensions.extend(user.scanner.excluded_extensions); - default.scanner.excluded_directories.extend(user.scanner.excluded_directories); + default + .scanner + .excluded_extensions + .extend(user.scanner.excluded_extensions); + default + .scanner + .excluded_directories + .extend(user.scanner.excluded_directories); default.scanner.excluded_extensions.sort_unstable(); default.scanner.excluded_extensions.dedup(); default.scanner.excluded_directories.sort_unstable(); default.scanner.excluded_directories.dedup(); // --- DatabaseConfig --- - default.database.auto_cleanup_days = user.database.auto_cleanup_days; - default.database.max_db_size_mb = user.database.max_db_size_mb; - default.database.vacuum_on_startup = user.database.vacuum_on_startup; + default.database.auto_cleanup_days = user.database.auto_cleanup_days; + default.database.max_db_size_mb = user.database.max_db_size_mb; + default.database.vacuum_on_startup = user.database.vacuum_on_startup; // --- OutputConfig --- - default.output.default_format = user.output.default_format; - default.output.show_progress = user.output.show_progress; - default.output.color_output = user.output.color_output; - default.output.max_results = user.output.max_results; + default.output.default_format = user.output.default_format; + default.output.show_progress = user.output.show_progress; + default.output.max_results = user.output.max_results; // --- PerformanceConfig --- - default.performance.max_depth = user.performance.max_depth; - default.performance.min_depth = user.performance.min_depth; - default.performance.prune = user.performance.prune; - default.performance.worker_threads = user.performance.worker_threads; - default.performance.index_chunk_size = user.performance.index_chunk_size; - default.performance.memory_limit_mb = user.performance.memory_limit_mb; + default.performance.max_depth = user.performance.max_depth; + default.performance.min_depth = user.performance.min_depth; + default.performance.prune = user.performance.prune; + default.performance.worker_threads = user.performance.worker_threads; + default.performance.batch_size = user.performance.batch_size; + default.performance.channel_multiplier = user.performance.channel_multiplier; + default.performance.memory_limit_mb = user.performance.memory_limit_mb; default -} \ No newline at end of file +} diff --git a/src/utils/ext.rs b/src/utils/ext.rs index f46e4d22..21cee881 100644 --- a/src/utils/ext.rs +++ b/src/utils/ext.rs @@ -1,15 +1,14 @@ pub fn lowercase_ext(path: &std::path::Path) -> Option<&'static str> { - path.extension() - .and_then(|s| match s.to_str()? { - "rs" | "RS" => Some("rs"), - "c" => Some("c"), - "cpp" | "c++" => Some("cpp"), - "java" => Some("java"), - "go" => Some("go"), - "php" => Some("php"), - "py" | "PY" => Some("py"), - "ts" | "TSX" | "tsx" => Some("ts"), - "js" => Some("js"), - _ => None, + path.extension().and_then(|s| match s.to_str()? { + "rs" | "RS" => Some("rs"), + "c" => Some("c"), + "cpp" | "c++" => Some("cpp"), + "java" => Some("java"), + "go" => Some("go"), + "php" => Some("php"), + "py" | "PY" => Some("py"), + "ts" | "TSX" | "tsx" => Some("ts"), + "js" => Some("js"), + _ => None, }) -} \ No newline at end of file +} diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 5149d181..b4d2301b 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1,8 +1,8 @@ -pub mod project; pub mod config; -pub(crate) mod query_cache; pub(crate) mod ext; +pub mod project; +pub(crate) mod query_cache; +pub use config::Config; // Re-export commonly used functions for convenience -pub use project::{get_project_info}; -pub use config::Config; \ No newline at end of file +pub use project::get_project_info; diff --git a/src/utils/project.rs b/src/utils/project.rs index e24c164d..50024f4e 100644 --- a/src/utils/project.rs +++ b/src/utils/project.rs @@ -1,34 +1,30 @@ -use std::path::{Path, PathBuf}; use crate::errors::{NyxError, NyxResult}; +use std::path::{Path, PathBuf}; /// Determine `.sqlite>`. -pub fn get_project_info( - project_path: &Path, - config_dir: &Path, -) -> NyxResult<(String, PathBuf)> { +pub fn get_project_info(project_path: &Path, config_dir: &Path) -> NyxResult<(String, PathBuf)> { + let project_name = project_path + .file_name() + .and_then(|n| n.to_str()) + .ok_or_else(|| NyxError::Other("Unable to determine project name".into()))?; - let project_name = project_path - .file_name() - .and_then(|n| n.to_str()) - .ok_or_else(|| NyxError::Other("Unable to determine project name".into()))?; + let db_name = sanitize_project_name(project_name); + let db_path = config_dir.join(format!("{}.sqlite", db_name)); - let db_name = sanitize_project_name(project_name); - let db_path = config_dir.join(format!("{}.sqlite", db_name)); - - Ok((project_name.to_owned(), db_path)) + Ok((project_name.to_owned(), db_path)) } pub fn sanitize_project_name(name: &str) -> String { - name.to_lowercase() - .chars() - .map(|c| match c { - ' ' | '\t' | '\n' | '\r' => '_', - c if c.is_alphanumeric() || c == '_' || c == '-' => c, - _ => '_', - }) - .collect::() - .split('_') - .filter(|s| !s.is_empty()) - .collect::>() - .join("_") -} \ No newline at end of file + name.to_lowercase() + .chars() + .map(|c| match c { + ' ' | '\t' | '\n' | '\r' => '_', + c if c.is_alphanumeric() || c == '_' || c == '-' => c, + _ => '_', + }) + .collect::() + .split('_') + .filter(|s| !s.is_empty()) + .collect::>() + .join("_") +} diff --git a/src/utils/query_cache.rs b/src/utils/query_cache.rs index 78f22332..c513bf6c 100644 --- a/src/utils/query_cache.rs +++ b/src/utils/query_cache.rs @@ -6,37 +6,41 @@ use crate::patterns::{self, Pattern}; #[derive(Clone)] pub struct CompiledQuery { - pub meta: Pattern, - pub query: Arc, + pub meta: Pattern, + pub query: Arc, } type QuerySet = Arc>; static CACHE: LazyLock>> = - LazyLock::new(|| RwLock::new(HashMap::new())); + LazyLock::new(|| RwLock::new(HashMap::new())); /// Return **one shared Arc** to the per-language query set. /// Cloning the `Arc` is O(1) and the underlying Vec lives for the /// lifetime of the process. pub fn for_lang(lang: &'static str, ts_lang: Language) -> std::sync::Arc> { - // fast path - if let Some(v) = CACHE.read().unwrap().get(lang) { - return v.clone(); - } - - // slow path — compile - let patterns = patterns::load(lang); - let compiled: Vec<_> = patterns.into_iter().filter_map(|p| { - match Query::new(&ts_lang, p.query) { - Ok(q) => Some(CompiledQuery { meta: p, query: std::sync::Arc::new(q) }), - Err(e)=> { - tracing::warn!(lang, id = p.id, "query compile error: {e}"); - None - } + // fast path + if let Some(v) = CACHE.read().unwrap().get(lang) { + return v.clone(); } - }).collect(); - let compiled = std::sync::Arc::new(compiled); + // slow path — compile + let patterns = patterns::load(lang); + let compiled: Vec<_> = patterns + .into_iter() + .filter_map(|p| match Query::new(&ts_lang, p.query) { + Ok(q) => Some(CompiledQuery { + meta: p, + query: std::sync::Arc::new(q), + }), + Err(e) => { + tracing::warn!(lang, id = p.id, "query compile error: {e}"); + None + } + }) + .collect(); - let mut w = CACHE.write().unwrap(); - w.entry(lang).or_insert_with(|| compiled.clone()).clone() -} \ No newline at end of file + let compiled = std::sync::Arc::new(compiled); + + let mut w = CACHE.write().unwrap(); + w.entry(lang).or_insert_with(|| compiled.clone()).clone() +} diff --git a/src/walk.rs b/src/walk.rs index c65212f8..ae1165a7 100644 --- a/src/walk.rs +++ b/src/walk.rs @@ -1,5 +1,5 @@ -use crossbeam_channel::{bounded, Receiver, Sender}; -use ignore::{overrides::OverrideBuilder, WalkBuilder, WalkState}; +use crossbeam_channel::{Receiver, Sender, bounded}; +use ignore::{WalkBuilder, WalkState, overrides::OverrideBuilder}; use std::{ mem, path::{Path, PathBuf}, @@ -11,19 +11,17 @@ use crate::utils::Config; // --------------------------------------------------------------------------- // Internal constants / helpers // --------------------------------------------------------------------------- -const DEFAULT_BATCH: usize = 8; // a tad larger for fewer sends -const CHANNEL_MULTIPLIER:usize = 4; // capacity = threads × this type Batch = Vec; struct Batcher { - tx: Sender, + tx: Sender, batch: Batch, } impl Batcher { - fn push(&mut self, p: PathBuf) { + fn push(&mut self, p: PathBuf, batch_size: usize) { self.batch.push(p); - if self.batch.len() == DEFAULT_BATCH { + if self.batch.len() == batch_size { self.flush(); } } @@ -34,7 +32,9 @@ impl Batcher { } } impl Drop for Batcher { - fn drop(&mut self) { self.flush(); } + fn drop(&mut self) { + self.flush(); + } } // --------------------------------------------------------------------------- @@ -52,54 +52,55 @@ pub fn spawn_senders(root: &Path, cfg: &Config) -> Receiver { tracing::warn!("cannot add ignore pattern ‘{dir}’: {e}"); } } - let overrides = ob.build().unwrap(); + let overrides = ob.build().unwrap(); // ----- 2 channel & thread pool parameters ----------------------------- - let workers = cfg.performance.worker_threads.unwrap_or(num_cpus::get()); - let (tx, rx) = bounded::(workers * CHANNEL_MULTIPLIER); + let workers = cfg.performance.worker_threads.unwrap_or(num_cpus::get()); + let (tx, rx) = bounded::(workers * cfg.performance.channel_multiplier); - let root = root.to_path_buf(); + let root = root.to_path_buf(); let scan_hidden = cfg.scanner.scan_hidden_files; - let follow = cfg.scanner.follow_symlinks; - let max_bytes = cfg.scanner.max_file_size_mb.unwrap_or(0) * 1_048_576; + let follow = cfg.scanner.follow_symlinks; + let max_bytes = cfg.scanner.max_file_size_mb.unwrap_or(0) * 1_048_576; + let batch_size = cfg.performance.batch_size; // ----- 3 the background walker thread --------------------------------- thread::spawn(move || { WalkBuilder::new(root) - .hidden(!scan_hidden) - .follow_links(follow) - .threads(workers) - .overrides(overrides) - .build_parallel() - .run(move || { - let mut b = Batcher { - tx: tx.clone(), - batch: Vec::with_capacity(DEFAULT_BATCH), - }; + .hidden(!scan_hidden) + .follow_links(follow) + .threads(workers) + .overrides(overrides) + .build_parallel() + .run(move || { + let mut b = Batcher { + tx: tx.clone(), + batch: Vec::with_capacity(batch_size), + }; - Box::new(move |entry| { - tracing::debug!("walking {:?}", entry); - let entry = match entry { - Ok(e) if e.file_type().map(|ft| ft.is_file()).unwrap_or(false) => e, - _ => return WalkState::Continue, - }; + Box::new(move |entry| { + tracing::debug!("walking {:?}", entry); + let entry = match entry { + Ok(e) if e.file_type().map(|ft| ft.is_file()).unwrap_or(false) => e, + _ => return WalkState::Continue, + }; - if max_bytes != 0 { - match entry.metadata() { - Ok(m) if m.len() > max_bytes => return WalkState::Continue, - Err(e) => { - tracing::debug!("metadata failed for {:?}: {e}", entry.path()); - return WalkState::Continue; - } - _ => {} - } - } + if max_bytes != 0 { + match entry.metadata() { + Ok(m) if m.len() > max_bytes => return WalkState::Continue, + Err(e) => { + tracing::debug!("metadata failed for {:?}: {e}", entry.path()); + return WalkState::Continue; + } + _ => {} + } + } - tracing::debug!("sending {:?}", entry); - b.push(entry.into_path()); - WalkState::Continue - }) - }); + tracing::debug!("sending {:?}", entry); + b.push(entry.into_path(), batch_size); + WalkState::Continue + }) + }); }); rx