Refactor codebase for consistent indentation and formatting

- Standardized spacing and indentation across multiple modules for improved readability.
- Reorganized `patterns` and `utils` imports for consistency.
- Updated `NyxError` and `NyxResult` related implementations to maintain consistent formatting.
- Enhanced readability in AST patterns for better clarity and maintainability.
This commit is contained in:
elipeter 2025-06-24 20:27:06 +02:00
parent b3870997d7
commit 14a549ac39
26 changed files with 1314 additions and 1221 deletions

View file

@ -1,71 +1,75 @@
use crate::commands::scan::Diag;
use crate::errors::{NyxError, NyxResult};
use crate::utils::ext::lowercase_ext;
use crate::utils::{Config, query_cache};
use std::cell::RefCell;
use std::path::Path;
use tree_sitter::{Language, QueryCursor, StreamingIterator};
use crate::commands::scan::Diag;
use crate::errors::{NyxResult, NyxError};
use crate::utils::{query_cache, Config};
use crate::utils::ext::lowercase_ext;
thread_local! {
static PARSER: RefCell<tree_sitter::Parser> = RefCell::new(tree_sitter::Parser::new());
}
pub(crate) fn run_rules_on_file(
path: &Path,
cfg: &Config,
) -> NyxResult<Vec<Diag>> {
tracing::debug!("Running rules on: {}", path.display());
let bytes = std::fs::read(path)?;
pub(crate) fn run_rules_on_file(path: &Path, cfg: &Config) -> NyxResult<Vec<Diag>> {
tracing::debug!("Running rules on: {}", path.display());
let bytes = std::fs::read(path)?;
// Fast binary-file guard (skip if >1% NULs)
if bytes.iter().filter(|b| **b == 0).count() * 100 / bytes.len().max(1) > 1 {
return Ok(vec![]);
}
let (ts_lang, lang_slug) = match lowercase_ext(path) {
Some("rs") => (Language::from(tree_sitter_rust::LANGUAGE), "rust"),
Some("c") => (Language::from(tree_sitter_c::LANGUAGE), "c"),
Some("cpp") => (Language::from(tree_sitter_cpp::LANGUAGE), "cpp"),
Some("java")=> (Language::from(tree_sitter_java::LANGUAGE), "java"),
Some("go") => (Language::from(tree_sitter_go::LANGUAGE), "go"),
Some("php") => (Language::from(tree_sitter_php::LANGUAGE_PHP), "php"),
Some("py") => (Language::from(tree_sitter_python::LANGUAGE), "python"),
Some("ts") => (Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT), "typescript"),
Some("js") => (Language::from(tree_sitter_javascript::LANGUAGE), "javascript"),
Some("rb") => (Language::from(tree_sitter_ruby::LANGUAGE), "ruby"),
_ => return Ok(vec![]),
};
let _tree = PARSER.with(|cell| {
let mut parser = cell.borrow_mut();
parser.set_language(&ts_lang)?;
parser.parse(&*bytes, None)
.ok_or_else(|| NyxError::Other("tree-sitter failed".into()))
})?;
let root = _tree.root_node();
let compiled = query_cache::for_lang(lang_slug, ts_lang);
let mut cursor = QueryCursor::new();
let mut out = Vec::new();
for cq in compiled.iter() {
if cfg.scanner.min_severity <= cq.meta.severity {
continue;
// Fast binary-file guard (skip if >1% NULs)
if bytes.iter().filter(|b| **b == 0).count() * 100 / bytes.len().max(1) > 1 {
return Ok(vec![]);
}
let mut matches = cursor.matches(&cq.query, root, &*bytes);
while let Some(m) = matches.next() {
if let Some(cap) = m.captures.iter().find(|c| c.index == 0) {
let point = cap.node.start_position();
out.push(Diag {
path: path.to_string_lossy().into_owned(),
line: point.row + 1,
col: point.column + 1,
severity: cq.meta.severity,
id: cq.meta.id.to_owned(),
});
}
let (ts_lang, lang_slug) = match lowercase_ext(path) {
Some("rs") => (Language::from(tree_sitter_rust::LANGUAGE), "rust"),
Some("c") => (Language::from(tree_sitter_c::LANGUAGE), "c"),
Some("cpp") => (Language::from(tree_sitter_cpp::LANGUAGE), "cpp"),
Some("java") => (Language::from(tree_sitter_java::LANGUAGE), "java"),
Some("go") => (Language::from(tree_sitter_go::LANGUAGE), "go"),
Some("php") => (Language::from(tree_sitter_php::LANGUAGE_PHP), "php"),
Some("py") => (Language::from(tree_sitter_python::LANGUAGE), "python"),
Some("ts") => (
Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT),
"typescript",
),
Some("js") => (
Language::from(tree_sitter_javascript::LANGUAGE),
"javascript",
),
Some("rb") => (Language::from(tree_sitter_ruby::LANGUAGE), "ruby"),
_ => return Ok(vec![]),
};
let _tree = PARSER.with(|cell| {
let mut parser = cell.borrow_mut();
parser.set_language(&ts_lang)?;
parser
.parse(&*bytes, None)
.ok_or_else(|| NyxError::Other("tree-sitter failed".into()))
})?;
let root = _tree.root_node();
let compiled = query_cache::for_lang(lang_slug, ts_lang);
let mut cursor = QueryCursor::new();
let mut out = Vec::new();
for cq in compiled.iter() {
if cfg.scanner.min_severity <= cq.meta.severity {
continue;
}
let mut matches = cursor.matches(&cq.query, root, &*bytes);
while let Some(m) = matches.next() {
if let Some(cap) = m.captures.iter().find(|c| c.index == 0) {
let point = cap.node.start_position();
out.push(Diag {
path: path.to_string_lossy().into_owned(),
line: point.row + 1,
col: point.column + 1,
severity: cq.meta.severity,
id: cq.meta.id.to_owned(),
});
}
}
}
}
Ok(out)
Ok(out)
}

View file

@ -1,13 +1,9 @@
use std::{env, fs};
use console::style;
use crate::errors::NyxResult;
use crate::utils::get_project_info;
use console::style;
use std::{env, fs};
pub fn handle(
project: Option<String>,
all: bool,
config_dir: &std::path::Path,
) -> NyxResult<()> {
pub fn handle(project: Option<String>, all: bool, config_dir: &std::path::Path) -> NyxResult<()> {
if all {
println!("{}", style("Cleaning all indexes...").cyan().bold());
if config_dir.exists() {
@ -19,9 +15,17 @@ pub fn handle(
let db_path = config_dir.join(format!("{}.sqlite", proj_name));
if db_path.exists() {
fs::remove_file(&db_path)?;
println!("{} {}", style("✔ Cleaned index for").green(), style(&proj_name).white().bold());
println!(
"{} {}",
style("✔ Cleaned index for").green(),
style(&proj_name).white().bold()
);
} else {
println!("{} {}", style("✖ No index found for").red(), style(&proj_name).white().bold());
println!(
"{} {}",
style("✖ No index found for").red(),
style(&proj_name).white().bold()
);
}
} else {
let current_dir = env::current_dir()?;
@ -29,11 +33,19 @@ pub fn handle(
if db_path.exists() {
fs::remove_file(&db_path)?;
println!("{} {}", style("✔ Cleaned index for").green(), style(&project_name).white().bold());
println!(
"{} {}",
style("✔ Cleaned index for").green(),
style(&project_name).white().bold()
);
} else {
println!("{} {}", style("✖ No index found for current project").red(), style(&project_name).white().bold());
println!(
"{} {}",
style("✖ No index found for current project").red(),
style(&project_name).white().bold()
);
}
}
std::process::exit(0);
}
}

View file

@ -1,16 +1,16 @@
use std::fs;
use std::process::exit;
use bytesize::ByteSize;
use chrono::{DateTime, Local};
use console::style;
use crate::cli::IndexAction;
use crate::database::index::{Indexer, IssueRow};
use crate::errors::NyxResult;
use crate::patterns::Severity;
use crate::utils::Config;
use crate::utils::project::get_project_info;
use crate::walk::spawn_senders;
use bytesize::ByteSize;
use chrono::{DateTime, Local};
use console::style;
use rayon::prelude::*;
use crate::errors::NyxResult;
use std::fs;
use std::process::exit;
pub fn handle(
action: IndexAction,
@ -24,10 +24,18 @@ pub fn handle(
if force || !db_path.exists() {
build_index(&project_name, &build_path, &db_path, config)?;
println!("{} {}", style("Index built:" ).green(), style(db_path.display()).white().bold());
println!(
"✔ {} {}",
style("Index built:").green(),
style(db_path.display()).white().bold()
);
exit(0);
} else {
println!("{} {}", style("↩ Index already exists").yellow(), style("(use --force to rebuild)").dim());
println!(
"{} {}",
style("↩ Index already exists").yellow(),
style("(use --force to rebuild)").dim()
);
exit(0);
}
}
@ -36,18 +44,34 @@ pub fn handle(
let (project_name, db_path) = get_project_info(&status_path, database_dir)?;
println!("{}", style("Project status").blue().bold().underlined());
println!(" {:14} {}", style("Project"), style(&project_name).white().bold());
println!(" {:14} {}", style("Index path"), style(db_path.display()).underlined());
println!(" {:14} {}", style("Exists"), style(db_path.exists()).bold());
println!(
" {:14} {}",
style("Project"),
style(&project_name).white().bold()
);
println!(
" {:14} {}",
style("Index path"),
style(db_path.display()).underlined()
);
println!(
" {:14} {}",
style("Exists"),
style(db_path.exists()).bold()
);
if db_path.exists() {
let meta = fs::metadata(&db_path)?;
let size = ByteSize::b(meta.len());
let mtime: DateTime<Local> = meta.modified()?.into();
println!(" {:14} {}", style("Size"), size);
println!(" {:14} {}", style("Modified"), mtime.format("%Y-%m-%d %H:%M:%S"));
println!(" {:14} {}", style("Size"), size);
println!(
" {:14} {}",
style("Modified"),
mtime.format("%Y-%m-%d %H:%M:%S")
);
}
exit(0);
}
}
@ -61,7 +85,7 @@ pub fn build_index(
) -> NyxResult<()> {
tracing::debug!("Building index for: {}", project_name);
fs::File::create(db_path)?;
let pool = Indexer::init(db_path)?;
{
let idx = Indexer::from_pool(project_name, &pool)?;
@ -69,34 +93,39 @@ pub fn build_index(
}
tracing::debug!("Cleaned index for: {}", project_name);
let rx = spawn_senders(project_path, config);
let paths: Vec<_> = rx.into_iter().flatten().collect();
paths.into_par_iter().try_for_each(|path| -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let issues = crate::commands::scan::run_rules_on_file(&path, config)?;
let mut idx = Indexer::from_pool(project_name, &pool)?;
let file_id = idx.upsert_file(&path)?;
let rows: Vec<IssueRow> = issues.iter().map(|d| IssueRow {
rule_id: d.id.as_ref(),
severity: match d.severity {
Severity::High => "HIGH",
Severity::Medium => "MEDIUM",
Severity::Low => "LOW",
},
line: d.line as i64,
col: d.col as i64,
}).collect();
idx.replace_issues(file_id, rows)?;
Ok(())
})?;
paths.into_par_iter().try_for_each(
|path| -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let issues = crate::commands::scan::run_rules_on_file(&path, config)?;
let mut idx = Indexer::from_pool(project_name, &pool)?;
let file_id = idx.upsert_file(&path)?;
let rows: Vec<IssueRow> = issues
.iter()
.map(|d| IssueRow {
rule_id: d.id.as_ref(),
severity: match d.severity {
Severity::High => "HIGH",
Severity::Medium => "MEDIUM",
Severity::Low => "LOW",
},
line: d.line as i64,
col: d.col as i64,
})
.collect();
idx.replace_issues(file_id, rows)?;
Ok(())
},
)?;
{
let idx = Indexer::from_pool(project_name, &pool)?;
idx.vacuum()?;
}
Ok(())
}
}

View file

@ -1,13 +1,10 @@
use std::fs;
use crate::errors::NyxResult;
use bytesize::ByteSize;
use chrono::{DateTime, Local};
use console::style;
use crate::errors::NyxResult;
use std::fs;
pub fn handle(
verbose: bool,
database_dir: &std::path::Path,
) -> NyxResult<()> {
pub fn handle(verbose: bool, database_dir: &std::path::Path) -> NyxResult<()> {
println!("{}", style("Indexed projects").blue().bold().underlined());
if !database_dir.exists() {
@ -21,18 +18,29 @@ pub fn handle(
continue;
}
let name = path.file_stem().and_then(|s| s.to_str()).unwrap_or("unknown");
let name = path
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("unknown");
println!(" {}", style(name).white().bold());
if verbose {
let meta = fs::metadata(&path)?;
let size = ByteSize::b(meta.len());
let mtime: DateTime<Local> = meta.modified()?.into();
println!(" {:10} {}", style("Path"), style(path.display()).underlined());
println!(" {:10} {}", style("Size"), size);
println!(" {:10} {}", style("Modified"), mtime.format("%Y-%m-%d %H:%M:%S"));
println!(
" {:10} {}",
style("Path"),
style(path.display()).underlined()
);
println!(" {:10} {}", style("Size"), size);
println!(
" {:10} {}",
style("Modified"),
mtime.format("%Y-%m-%d %H:%M:%S")
);
}
}
std::process::exit(0);
}
}

View file

@ -1,33 +1,35 @@
pub mod scan;
pub mod clean;
pub mod index;
pub mod list;
pub mod clean;
pub mod scan;
use crate::cli::Commands;
use std::path::Path;
use crate::errors::NyxResult;
use crate::patterns::Severity;
use crate::utils::config::Config;
use std::path::Path;
pub fn handle_command(
command: Commands,
database_dir: &Path,
config: &mut Config
config: &mut Config,
) -> NyxResult<()> {
match command {
Commands::Scan { path, no_index, rebuild_index, format, high_only } => {
if high_only { config.scanner.min_severity = Severity::High };
Commands::Scan {
path,
no_index,
rebuild_index,
format,
high_only,
} => {
if high_only {
config.scanner.min_severity = Severity::High
};
scan::handle(&path, no_index, rebuild_index, format, database_dir, config)
}
Commands::Index { action } => {
index::handle(action, database_dir, config)
}
Commands::List { verbose } => {
list::handle(verbose, database_dir)
}
Commands::Clean { project, all } => {
clean::handle(project, all, database_dir)
}
Commands::Index { action } => index::handle(action, database_dir, config),
Commands::List { verbose } => list::handle(verbose, database_dir),
Commands::Clean { project, all } => clean::handle(project, all, database_dir),
}
}
}

View file

@ -1,18 +1,18 @@
use crate::utils::project::get_project_info;
use console::style;
use std::path::Path;
use std::sync::{Arc, Mutex};
use r2d2::Pool;
use r2d2_sqlite::SqliteConnectionManager;
use crate::database::index::{IssueRow, Indexer};
pub(crate) use crate::ast::run_rules_on_file;
use crate::database::index::{Indexer, IssueRow};
use crate::errors::NyxResult;
use crate::patterns::Severity;
use crate::utils::config::Config;
use crate::utils::project::get_project_info;
use crate::walk::spawn_senders;
use console::style;
use dashmap::DashMap;
use r2d2::Pool;
use r2d2_sqlite::SqliteConnectionManager;
use rayon::prelude::*;
use std::collections::BTreeMap;
use dashmap::DashMap;
use crate::errors::NyxResult;
pub(crate) use crate::ast::run_rules_on_file;
use std::path::Path;
use std::sync::{Arc, Mutex};
type DynError = Box<dyn std::error::Error + Send + Sync>;
@ -37,14 +37,18 @@ pub fn handle(
let scan_path = Path::new(path).canonicalize()?;
let (project_name, db_path) = get_project_info(&scan_path, database_dir)?;
println!("{} {}...\n", style("Checking").green().bold(), &project_name);
println!(
"{} {}...\n",
style("Checking").green().bold(),
&project_name
);
let diags: Vec<Diag> = if no_index {
scan_filesystem(&scan_path, config)?
} else {
if rebuild_index || !db_path.exists() {
tracing::debug!("Scanning filesystem index filesystem");
crate::commands::index::build_index(&project_name,&scan_path, &db_path, config)?;
crate::commands::index::build_index(&project_name, &scan_path, &db_path, config)?;
}
let pool = Indexer::init(&db_path)?;
@ -53,9 +57,7 @@ pub fn handle(
tracing::debug!("Found {:?} issues.", diags.len());
if format == "console"
|| (format.is_empty() && config.output.default_format == "console")
{
if format == "console" || (format.is_empty() && config.output.default_format == "console") {
tracing::debug!("Printing to console");
let mut grouped: BTreeMap<&str, Vec<&Diag>> = BTreeMap::new();
for d in &diags {
@ -65,16 +67,23 @@ pub fn handle(
for (path, issues) in &grouped {
println!("{}", style(path).blue().underlined());
for d in issues {
println!(" {:>4}:{:<4} [{}] {}",
d.line, d.col, d.severity, style(&d.id).bold());
println!(
" {:>4}:{:<4} [{}] {}",
d.line,
d.col,
d.severity,
style(&d.id).bold()
);
}
println!();
}
println!("{} '{}' generated {} issues.",
style("warning").yellow().bold(),
style(project_name).white().bold(),
style(diags.len()).bold());
println!(
"{} '{}' generated {} issues.",
style("warning").yellow().bold(),
style(project_name).white().bold(),
style(diags.len()).bold()
);
println!("\t"); // TODO: Add individual counts for different warning levels
}
Ok(())
@ -84,22 +93,16 @@ pub fn handle(
// Scanning helpers
// --------------------------------------------------------------------------------------------
fn scan_filesystem(
root: &Path,
cfg: &Config,
) -> NyxResult<Vec<Diag>> {
fn scan_filesystem(root: &Path, cfg: &Config) -> NyxResult<Vec<Diag>> {
let rx = spawn_senders(root, cfg);
let acc = Mutex::new(Vec::new());
rx.into_iter()
.flatten()
.par_bridge()
.try_for_each(|path| {
let mut local = run_rules_on_file(&path, cfg)?;
acc.lock().unwrap().append(&mut local);
Ok::<(), DynError>(())
})?;
rx.into_iter().flatten().par_bridge().try_for_each(|path| {
let mut local = run_rules_on_file(&path, cfg)?;
acc.lock().unwrap().append(&mut local);
Ok::<(), DynError>(())
})?;
Ok(acc.into_inner()?)
}
@ -108,7 +111,6 @@ pub fn scan_with_index_parallel(
pool: Arc<Pool<SqliteConnectionManager>>,
cfg: &Config,
) -> NyxResult<Vec<Diag>> {
let files = {
let idx = Indexer::from_pool(project, &pool)?;
idx.get_files(project)?
@ -117,40 +119,48 @@ pub fn scan_with_index_parallel(
// ① Collect per-path Vec<Diag> without a global mutex
let diag_map: DashMap<String, Vec<Diag>> = DashMap::new();
files.into_par_iter()
.for_each_init(
// ② A single Indexer per Rayon worker thread
|| Indexer::from_pool(project, &pool).expect("db pool"),
|idx, path| {
let needs_scan = idx.should_scan(&path).unwrap_or(true);
files.into_par_iter().for_each_init(
// ② A single Indexer per Rayon worker thread
|| Indexer::from_pool(project, &pool).expect("db pool"),
|idx, path| {
let needs_scan = idx.should_scan(&path).unwrap_or(true);
let mut diags = if needs_scan {
let d = run_rules_on_file(&path, cfg).unwrap_or_default();
let file_id = idx.upsert_file(&path).unwrap_or_default();
idx.replace_issues(
file_id,
d.iter().map(|d| IssueRow {
rule_id: &d.id,
severity: d.severity.as_db_str(),
line: d.line as i64,
col: d.col as i64,
}),
).ok();
d
} else {
idx.get_issues_from_file(&path).unwrap_or_default()
};
if !diags.is_empty() {
diag_map.entry(path.to_string_lossy().to_string())
let mut diags = if needs_scan {
let d = run_rules_on_file(&path, cfg).unwrap_or_default();
let file_id = idx.upsert_file(&path).unwrap_or_default();
idx.replace_issues(
file_id,
d.iter().map(|d| IssueRow {
rule_id: &d.id,
severity: d.severity.as_db_str(),
line: d.line as i64,
col: d.col as i64,
}),
)
.ok();
d
} else {
idx.get_issues_from_file(&path).unwrap_or_default()
};
if !diags.is_empty() {
diag_map
.entry(path.to_string_lossy().to_string())
.or_default()
.append(&mut diags);
}
}
);
}
},
);
// Optional, heavy: only vacuum on --rebuild-index
// if rebuild { idx.vacuum()?; }
// flatten
let mut diags: Vec<Diag> = diag_map.into_iter().flat_map(|(_, v)| v).collect();
if let Some(max) = cfg.output.max_results {
diags.truncate(max as usize);
}
// Flatten
Ok(diag_map.into_iter().flat_map(|(_, v)| v).collect())
Ok(diags)
}

View file

@ -1,19 +1,19 @@
pub mod index {
use rusqlite::{params, Connection, OpenFlags, OptionalExtension};
use std::fs;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::time::{SystemTime, UNIX_EPOCH};
use crate::commands::scan::Diag;
use crate::patterns::Severity;
use r2d2_sqlite::{SqliteConnectionManager};
use std::ops::Deref;
use std::sync::Arc;
use r2d2::{Pool, PooledConnection};
use crate::errors::NyxResult;
use crate::commands::scan::Diag;
use crate::errors::NyxResult;
use crate::patterns::Severity;
use r2d2::{Pool, PooledConnection};
use r2d2_sqlite::SqliteConnectionManager;
use rusqlite::{Connection, OpenFlags, OptionalExtension, params};
use std::fs;
use std::ops::Deref;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH};
/// DB schema (foreignkeys enabled).
const SCHEMA: &str = r#"
/// DB schema (foreignkeys enabled).
const SCHEMA: &str = r#"
PRAGMA foreign_keys = ON;
CREATE TABLE IF NOT EXISTS files (
@ -38,165 +38,178 @@ pub mod index {
);
"#;
/// A single issue row, ready for insertion.
#[derive(Debug, Clone)]
pub struct IssueRow<'a> {
pub rule_id: &'a str,
pub severity: &'a str,
pub line: i64,
pub col: i64,
}
pub struct Indexer {
conn: PooledConnection<SqliteConnectionManager>,
project: String,
}
impl Indexer {
pub fn init(
database_path: &Path,
) -> NyxResult<Arc<Pool<SqliteConnectionManager>>> {
let flags = OpenFlags::SQLITE_OPEN_READ_WRITE
| OpenFlags::SQLITE_OPEN_CREATE
| OpenFlags::SQLITE_OPEN_FULL_MUTEX;
let manager = SqliteConnectionManager::file(database_path).with_flags(flags);
let pool = Arc::new(Pool::new(manager)?);
{
let conn = pool.get()?;
conn.pragma_update(None, "journal_mode", "WAL")?;
conn.execute_batch(SCHEMA)?;
}
Ok(pool)
/// A single issue row, ready for insertion.
#[derive(Debug, Clone)]
pub struct IssueRow<'a> {
pub rule_id: &'a str,
pub severity: &'a str,
pub line: i64,
pub col: i64,
}
pub fn from_pool(
project: &str,
pool: &Pool<SqliteConnectionManager>,
) -> NyxResult<Self> {
let conn = pool.get()?;
Ok(Self { conn, project: project.to_owned() })
pub struct Indexer {
conn: PooledConnection<SqliteConnectionManager>,
project: String,
}
// helper so code below can treat PooledConnection like &Connection
fn c(&self) -> &Connection { self.conn.deref() }
impl Indexer {
pub fn init(database_path: &Path) -> NyxResult<Arc<Pool<SqliteConnectionManager>>> {
let flags = OpenFlags::SQLITE_OPEN_READ_WRITE
| OpenFlags::SQLITE_OPEN_CREATE
| OpenFlags::SQLITE_OPEN_FULL_MUTEX;
let manager = SqliteConnectionManager::file(database_path).with_flags(flags);
let pool = Arc::new(Pool::new(manager)?);
/// Return true when the file *content* or *mtime* changed since the last scan.
pub fn should_scan(&self, path: &Path) -> NyxResult<bool> {
let meta = fs::metadata(path)?;
let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64;
let digest = Self::digest_file(path)?;
{
let conn = pool.get()?;
conn.pragma_update(None, "journal_mode", "WAL")?;
conn.execute_batch(SCHEMA)?;
}
Ok(pool)
}
let row: Option<(Vec<u8>, i64)> = self
.conn
.query_row(
"SELECT hash, mtime FROM files WHERE project = ?1 AND path = ?2",
params![self.project, path.to_string_lossy()],
|r| Ok((r.get(0)?, r.get(1)?)),
)
.optional()?;
pub fn from_pool(project: &str, pool: &Pool<SqliteConnectionManager>) -> NyxResult<Self> {
let conn = pool.get()?;
Ok(Self {
conn,
project: project.to_owned(),
})
}
Ok(match row {
Some((stored_hash, stored_mtime)) => stored_hash != digest || stored_mtime != mtime,
None => true,
})
}
// helper so code below can treat PooledConnection like &Connection
fn c(&self) -> &Connection {
self.conn.deref()
}
/// Insert or update the `files` row and return its id.
pub fn upsert_file(&self, path: &Path) -> NyxResult<i64> {
let meta = fs::metadata(path)?;
let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64;
let scanned_at = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64;
let digest = Self::digest_file(path)?;
/// Return true when the file *content* or *mtime* changed since the last scan.
pub fn should_scan(&self, path: &Path) -> NyxResult<bool> {
let meta = fs::metadata(path)?;
let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64;
let digest = Self::digest_file(path)?;
self.c().execute(
"INSERT INTO files (project, path, hash, mtime, scanned_at)
let row: Option<(Vec<u8>, i64)> = self
.conn
.query_row(
"SELECT hash, mtime FROM files WHERE project = ?1 AND path = ?2",
params![self.project, path.to_string_lossy()],
|r| Ok((r.get(0)?, r.get(1)?)),
)
.optional()?;
Ok(match row {
Some((stored_hash, stored_mtime)) => stored_hash != digest || stored_mtime != mtime,
None => true,
})
}
/// Insert or update the `files` row and return its id.
pub fn upsert_file(&self, path: &Path) -> NyxResult<i64> {
let meta = fs::metadata(path)?;
let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64;
let scanned_at = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64;
let digest = Self::digest_file(path)?;
self.c().execute(
"INSERT INTO files (project, path, hash, mtime, scanned_at)
VALUES (?1, ?2, ?3, ?4, ?5)
ON CONFLICT(project,path) DO UPDATE
SET hash = excluded.hash,
mtime = excluded.mtime,
scanned_at = excluded.scanned_at",
params![self.project, path.to_string_lossy(), digest, mtime, scanned_at],
)?;
params![
self.project,
path.to_string_lossy(),
digest,
mtime,
scanned_at
],
)?;
let id: i64 = self.c().query_row(
"SELECT id FROM files WHERE project = ?1 AND path = ?2",
params![self.project, path.to_string_lossy()],
|r| r.get(0),
)?;
Ok(id)
}
/// Replace all issues for `file_id` with the supplied set.
pub fn replace_issues<'a>(&mut self, file_id: i64, issues: impl IntoIterator<Item = IssueRow<'a>>)
-> NyxResult<()> {
let tx = self.conn.transaction()?;
tx.execute("DELETE FROM issues WHERE file_id = ?", params![file_id])?;
{
let mut stmt = tx.prepare(
"INSERT INTO issues (file_id, rule_id, severity, line, col)
VALUES (?1, ?2, ?3, ?4, ?5)",
)?;
for iss in issues {
stmt.execute(params![file_id, iss.rule_id, iss.severity, iss.line, iss.col])?;
let id: i64 = self.c().query_row(
"SELECT id FROM files WHERE project = ?1 AND path = ?2",
params![self.project, path.to_string_lossy()],
|r| r.get(0),
)?;
Ok(id)
}
}
tx.commit()?;
Ok(())
}
/// Gets the issues for a specific file so we don't have to rescan
pub fn get_issues_from_file(
&self,
path: &Path,
) -> NyxResult<Vec<Diag>> {
let file_id: i64 = self.c().query_row(
"SELECT id FROM files WHERE project = ?1 AND path = ?2",
params![self.project, path.to_string_lossy()],
|r| r.get(0),
)?;
let mut stmt = self.c().prepare(
"SELECT rule_id, severity, line, col
/// Replace all issues for `file_id` with the supplied set.
pub fn replace_issues<'a>(
&mut self,
file_id: i64,
issues: impl IntoIterator<Item = IssueRow<'a>>,
) -> NyxResult<()> {
let tx = self.conn.transaction()?;
tx.execute("DELETE FROM issues WHERE file_id = ?", params![file_id])?;
{
let mut stmt = tx.prepare(
"INSERT INTO issues (file_id, rule_id, severity, line, col)
VALUES (?1, ?2, ?3, ?4, ?5)",
)?;
for iss in issues {
stmt.execute(params![
file_id,
iss.rule_id,
iss.severity,
iss.line,
iss.col
])?;
}
}
tx.commit()?;
Ok(())
}
/// Gets the issues for a specific file so we don't have to rescan
pub fn get_issues_from_file(&self, path: &Path) -> NyxResult<Vec<Diag>> {
let file_id: i64 = self.c().query_row(
"SELECT id FROM files WHERE project = ?1 AND path = ?2",
params![self.project, path.to_string_lossy()],
|r| r.get(0),
)?;
let mut stmt = self.c().prepare(
"SELECT rule_id, severity, line, col
FROM issues
WHERE file_id = ?1",
)?;
)?;
let issue_iter = stmt.query_map([file_id], |row| {
let sev_str: String = row.get(1)?;
Ok(Diag {
path: path.to_string_lossy().to_string(),
id: row.get::<_, String>(0)?, // rule_id
line: row.get::<_, i64>(2)? as usize,
col: row.get::<_, i64>(3)? as usize,
severity: Severity::from_str(&sev_str).unwrap(),
})
})?;
let issue_iter = stmt.query_map([file_id], |row| {
let sev_str: String = row.get(1)?;
Ok(Diag {
path: path.to_string_lossy().to_string(),
id: row.get::<_, String>(0)?, // rule_id
line: row.get::<_, i64>(2)? as usize,
col: row.get::<_, i64>(3)? as usize,
severity: Severity::from_str(&sev_str).unwrap(),
})
})?;
Ok(issue_iter.filter_map(Result::ok).collect())
}
/// gets files from the database
pub fn get_files(&self, project: &str) -> NyxResult<Vec<PathBuf>> {
let mut stmt = self.c().prepare(
"SELECT path
Ok(issue_iter.filter_map(Result::ok).collect())
}
/// gets files from the database
pub fn get_files(&self, project: &str) -> NyxResult<Vec<PathBuf>> {
let mut stmt = self.c().prepare(
"SELECT path
FROM files
WHERE project = ?1",
)?;
)?;
let file_iter = stmt.query_map([project], |row| row.get::<_, String>(0))?;
Ok(file_iter.map(|p| p.map(PathBuf::from)).collect::<Result<_, _>>()?)
}
let file_iter = stmt.query_map([project], |row| row.get::<_, String>(0))?;
// -------------------------------------------------------------------------
// Maintenance utilities
// -------------------------------------------------------------------------
pub fn clear(&self) -> NyxResult<()> {
self.c().execute_batch(
r#"
Ok(file_iter
.map(|p| p.map(PathBuf::from))
.collect::<Result<_, _>>()?)
}
// -------------------------------------------------------------------------
// Maintenance utilities
// -------------------------------------------------------------------------
pub fn clear(&self) -> NyxResult<()> {
self.c().execute_batch(
r#"
PRAGMA foreign_keys = OFF;
DROP TABLE IF EXISTS issues;
@ -205,25 +218,25 @@ pub mod index {
PRAGMA foreign_keys = ON;
VACUUM;
"#,
)?;
)?;
self.c().execute_batch(SCHEMA)?;
Ok(())
}
pub fn vacuum(&self) -> NyxResult<()> {
self.c().execute("VACUUM;", [])?;
Ok(())
}
self.c().execute_batch(SCHEMA)?;
Ok(())
}
// -------------------------------------------------------------------------
// Helpers
// -------------------------------------------------------------------------
fn digest_file(path: &Path) -> NyxResult<Vec<u8>> {
let mut hasher = blake3::Hasher::new();
let mut file = fs::File::open(path)?;
std::io::copy(&mut file, &mut hasher)?;
Ok(hasher.finalize().as_bytes().to_vec())
pub fn vacuum(&self) -> NyxResult<()> {
self.c().execute("VACUUM;", [])?;
Ok(())
}
// -------------------------------------------------------------------------
// Helpers
// -------------------------------------------------------------------------
fn digest_file(path: &Path) -> NyxResult<Vec<u8>> {
let mut hasher = blake3::Hasher::new();
let mut file = fs::File::open(path)?;
std::io::copy(&mut file, &mut hasher)?;
Ok(hasher.finalize().as_bytes().to_vec())
}
}
}
}

View file

@ -1,60 +1,60 @@
use serde::de::StdError;
use std::fmt;
use std::sync::PoisonError;
use serde::de::StdError;
use thiserror::Error;
pub type NyxResult<T, E = NyxError> = Result<T, E>;
#[derive(Debug, Error)]
pub enum NyxError {
#[error("I/O error: {0}")]
Io(#[from] std::io::Error),
#[error("I/O error: {0}")]
Io(#[from] std::io::Error),
#[error("SQLite error: {0}")]
Sql(#[from] rusqlite::Error),
#[error("SQLite error: {0}")]
Sql(#[from] rusqlite::Error),
#[error("tree-sitter error: {0}")]
TreeSitter(#[from] tree_sitter::LanguageError),
#[error("tree-sitter error: {0}")]
TreeSitter(#[from] tree_sitter::LanguageError),
#[error("connection-pool error: {0}")]
Pool(#[from] r2d2::Error),
#[error("connection-pool error: {0}")]
Pool(#[from] r2d2::Error),
#[error("time error: {0}")]
Time(#[from] std::time::SystemTimeError),
#[error("time error: {0}")]
Time(#[from] std::time::SystemTimeError),
#[error("poisoned lock: {0}")]
Poison(String),
#[error(transparent)]
Other(#[from] Box<dyn StdError + Send + Sync + 'static>),
#[error("{0}")]
Msg(String),
#[error("poisoned lock: {0}")]
Poison(String),
#[error(transparent)]
Other(#[from] Box<dyn StdError + Send + Sync + 'static>),
#[error("{0}")]
Msg(String),
}
impl<T> From<PoisonError<T>> for NyxError
where
T: fmt::Debug,
T: fmt::Debug,
{
fn from(err: PoisonError<T>) -> Self {
NyxError::Poison(err.to_string())
}
fn from(err: PoisonError<T>) -> Self {
NyxError::Poison(err.to_string())
}
}
impl From<&str> for NyxError {
fn from(s: &str) -> Self {
NyxError::Msg(s.to_owned())
}
fn from(s: &str) -> Self {
NyxError::Msg(s.to_owned())
}
}
impl From<String> for NyxError {
fn from(s: String) -> Self {
NyxError::Msg(s)
}
fn from(s: String) -> Self {
NyxError::Msg(s)
}
}
impl From<Box<dyn std::error::Error>> for NyxError {
fn from(err: Box<dyn std::error::Error>) -> Self {
NyxError::Msg(err.to_string())
}
fn from(err: Box<dyn std::error::Error>) -> Self {
NyxError::Msg(err.to_string())
}
}

View file

@ -1,43 +1,43 @@
mod ast;
mod cli;
mod commands;
mod database;
mod errors;
mod patterns;
mod utils;
mod walk;
mod database;
mod patterns;
mod errors;
mod ast;
use crate::errors::NyxResult;
use crate::utils::Config;
use cli::Cli;
use clap::Parser;
use cli::Cli;
use console::style;
use directories::ProjectDirs;
use std::fs;
use std::time::Instant;
use console::style;
use tracing_subscriber::{fmt, EnvFilter, Registry};
use tracing_subscriber::prelude::*;
use tracing_subscriber::fmt::time;
use crate::errors::NyxResult;
use tracing_subscriber::prelude::*;
use tracing_subscriber::{EnvFilter, Registry, fmt};
// use tracing_appender::rolling::{RollingFileAppender, Rotation};
// use tracing_appender::non_blocking;
fn init_tracing() {
// let file_appender = RollingFileAppender::new(Rotation::HOURLY, "logs", "nyx-scanner.log");
// let (file_writer, guard) = non_blocking(file_appender);
let fmt_layer = fmt::layer()
.pretty()
.with_thread_ids(true)
.pretty()
.with_thread_ids(true)
.with_timer(time::UtcTime::rfc_3339());
// let file_layer = fmt::layer()
// .with_writer(file_writer)
// .without_time()
// .with_writer(file_writer)
// .without_time()
// .json();
Registry::default()
.with(EnvFilter::from_default_env())
.with(fmt_layer)
.with(EnvFilter::from_default_env())
.with(fmt_layer)
.init();
}
@ -68,4 +68,3 @@ fn main() -> NyxResult<()> {
);
Ok(())
}

View file

@ -1,40 +1,40 @@
use crate::patterns::{Pattern, Severity};
pub const PATTERNS: &[Pattern] = &[
Pattern {
id: "strcpy_call",
description: "strcpy() usage",
query: "(call_expression function: (identifier) @id (#eq? @id \"strcpy\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "strcat_call",
description: "strcat() usage",
query: "(call_expression function: (identifier) @id (#eq? @id \"strcat\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "sprintf_call",
description: "sprintf() (no length limit)",
query: "(call_expression function: (identifier) @id (#eq? @id \"sprintf\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "gets_call",
description: "gets() usage",
query: "(call_expression function: (identifier) @id (#eq? @id \"gets\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "scanf_with_percent_s",
description: "scanf(\"%s\") without length specifier",
query: "(call_expression function: (identifier) @id (#eq? @id \"scanf\") arguments: (argument_list (string_literal) @fmt (#match? @fmt \".*%s.*\"))) @vuln",
severity: Severity::High,
},
Pattern {
id: "system_call",
description: "system() shell execution",
query: "(call_expression function: (identifier) @id (#eq? @id \"system\")) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "strcpy_call",
description: "strcpy() usage",
query: "(call_expression function: (identifier) @id (#eq? @id \"strcpy\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "strcat_call",
description: "strcat() usage",
query: "(call_expression function: (identifier) @id (#eq? @id \"strcat\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "sprintf_call",
description: "sprintf() (no length limit)",
query: "(call_expression function: (identifier) @id (#eq? @id \"sprintf\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "gets_call",
description: "gets() usage",
query: "(call_expression function: (identifier) @id (#eq? @id \"gets\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "scanf_with_percent_s",
description: "scanf(\"%s\") without length specifier",
query: "(call_expression function: (identifier) @id (#eq? @id \"scanf\") arguments: (argument_list (string_literal) @fmt (#match? @fmt \".*%s.*\"))) @vuln",
severity: Severity::High,
},
Pattern {
id: "system_call",
description: "system() shell execution",
query: "(call_expression function: (identifier) @id (#eq? @id \"system\")) @vuln",
severity: Severity::Medium,
},
];

View file

@ -1,40 +1,40 @@
use crate::patterns::{Pattern, Severity};
pub const PATTERNS: &[Pattern] = &[
Pattern {
id: "strcpy_call",
description: "strcpy() usage",
query: "(call_expression function: (identifier) @id (#eq? @id \"strcpy\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "strcat_call",
description: "strcat() usage",
query: "(call_expression function: (identifier) @id (#eq? @id \"strcat\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "sprintf_call",
description: "sprintf() (no length limit)",
query: "(call_expression function: (identifier) @id (#eq? @id \"sprintf\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "gets_call",
description: "gets() usage",
query: "(call_expression function: (identifier) @id (#eq? @id \"gets\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "system_call",
description: "system() shell execution",
query: "(call_expression function: (identifier) @id (#eq? @id \"system\")) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "reinterpret_cast",
description: "reinterpret_cast usage",
query: "(reinterpret_cast_expression) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "strcpy_call",
description: "strcpy() usage",
query: "(call_expression function: (identifier) @id (#eq? @id \"strcpy\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "strcat_call",
description: "strcat() usage",
query: "(call_expression function: (identifier) @id (#eq? @id \"strcat\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "sprintf_call",
description: "sprintf() (no length limit)",
query: "(call_expression function: (identifier) @id (#eq? @id \"sprintf\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "gets_call",
description: "gets() usage",
query: "(call_expression function: (identifier) @id (#eq? @id \"gets\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "system_call",
description: "system() shell execution",
query: "(call_expression function: (identifier) @id (#eq? @id \"system\")) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "reinterpret_cast",
description: "reinterpret_cast usage",
query: "(reinterpret_cast_expression) @vuln",
severity: Severity::Medium,
},
];

View file

@ -1,34 +1,34 @@
use crate::patterns::{Pattern, Severity};
pub const PATTERNS: &[Pattern] = &[
Pattern {
id: "exec_command",
description: "os/exec Command construction",
query: "(call_expression function: (selector_expression field: (field_identifier) @f (#eq? @f \"Command\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "http_insecure_tls",
description: "&http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}}",
query: "(composite_literal type: (selector_expression field: (field_identifier) @t (#eq? @t \"Transport\")) body: (literal_value (keyed_element key: (identifier) @k (#eq? @k \"TLSClientConfig\") value: (composite_literal body: (literal_value (keyed_element key: (identifier) @ik (#eq? @ik \"InsecureSkipVerify\") value: (true)))))) @vuln",
severity: Severity::High,
},
Pattern {
id: "unsafe_pointer",
description: "Use of unsafe.Pointer",
query: "(qualified_type type: (selector_expression field: (field_identifier) @f (#eq? @f \"Pointer\"))) @vuln",
severity: Severity::High,
},
Pattern {
id: "md5_sha1",
description: "crypto/md5 or crypto/sha1 usage",
query: "(call_expression function: (selector_expression object: (identifier) @pkg (#match? @pkg \"md5|sha1\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "hardcoded_secret",
description: "Hard-coded string that looks like an API key/token",
query: "(interpreted_string_literal) @s (#match? @s \"(?i)(api|secret|token|password)[=:]?[ \\t]*[A-Za-z0-9_\\-]{8,}\")",
severity: Severity::Low,
},
Pattern {
id: "exec_command",
description: "os/exec Command construction",
query: "(call_expression function: (selector_expression field: (field_identifier) @f (#eq? @f \"Command\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "http_insecure_tls",
description: "&http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}}",
query: "(composite_literal type: (selector_expression field: (field_identifier) @t (#eq? @t \"Transport\")) body: (literal_value (keyed_element key: (identifier) @k (#eq? @k \"TLSClientConfig\") value: (composite_literal body: (literal_value (keyed_element key: (identifier) @ik (#eq? @ik \"InsecureSkipVerify\") value: (true)))))) @vuln",
severity: Severity::High,
},
Pattern {
id: "unsafe_pointer",
description: "Use of unsafe.Pointer",
query: "(qualified_type type: (selector_expression field: (field_identifier) @f (#eq? @f \"Pointer\"))) @vuln",
severity: Severity::High,
},
Pattern {
id: "md5_sha1",
description: "crypto/md5 or crypto/sha1 usage",
query: "(call_expression function: (selector_expression object: (identifier) @pkg (#match? @pkg \"md5|sha1\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "hardcoded_secret",
description: "Hard-coded string that looks like an API key/token",
query: "(interpreted_string_literal) @s (#match? @s \"(?i)(api|secret|token|password)[=:]?[ \\t]*[A-Za-z0-9_\\-]{8,}\")",
severity: Severity::Low,
},
];

View file

@ -1,40 +1,40 @@
use crate::patterns::{Pattern, Severity};
pub const PATTERNS: &[Pattern] = &[
Pattern {
id: "runtime_exec",
description: "Runtime.getRuntime().exec(...) arbitrary-command execution",
query: "(method_invocation object: (method_invocation name: (identifier) @n (#eq? @n \"getRuntime\")) name: (identifier) @id (#eq? @id \"exec\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "class_for_name",
description: "Dynamic reflection via Class.forName(...)",
query: "(method_invocation object: (identifier) @c (#eq? @c \"Class\") name: (identifier) @id (#eq? @id \"forName\")) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "object_deserialization",
description: "java.io.ObjectInputStream#readObject() deserialization",
query: "(method_invocation object: (identifier) @o (#eq? @o \"ObjectInputStream\") name: (identifier) @id (#eq? @id \"readObject\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "insecure_random",
description: "java.util.Random used where SecureRandom is expected",
query: "(object_creation_expression type: (identifier) @t (#eq? @t \"Random\")) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "thread_stop",
description: "Deprecated Thread.stop() invocation",
query: "(method_invocation name: (identifier) @id (#eq? @id \"stop\") object: (identifier) @obj (#eq? @obj \"Thread\")) @vuln",
severity: Severity::Low,
},
Pattern {
id: "sql_concat",
description: "SQL built with string concatenation",
query: "(method_invocation name: (identifier) @id (#match? @id \"execute(Query|Update)?\") arguments: (argument_list (binary_expression) @concat)) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "runtime_exec",
description: "Runtime.getRuntime().exec(...) arbitrary-command execution",
query: "(method_invocation object: (method_invocation name: (identifier) @n (#eq? @n \"getRuntime\")) name: (identifier) @id (#eq? @id \"exec\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "class_for_name",
description: "Dynamic reflection via Class.forName(...)",
query: "(method_invocation object: (identifier) @c (#eq? @c \"Class\") name: (identifier) @id (#eq? @id \"forName\")) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "object_deserialization",
description: "java.io.ObjectInputStream#readObject() deserialization",
query: "(method_invocation object: (identifier) @o (#eq? @o \"ObjectInputStream\") name: (identifier) @id (#eq? @id \"readObject\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "insecure_random",
description: "java.util.Random used where SecureRandom is expected",
query: "(object_creation_expression type: (identifier) @t (#eq? @t \"Random\")) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "thread_stop",
description: "Deprecated Thread.stop() invocation",
query: "(method_invocation name: (identifier) @id (#eq? @id \"stop\") object: (identifier) @obj (#eq? @obj \"Thread\")) @vuln",
severity: Severity::Low,
},
Pattern {
id: "sql_concat",
description: "SQL built with string concatenation",
query: "(method_invocation name: (identifier) @id (#match? @id \"execute(Query|Update)?\") arguments: (argument_list (binary_expression) @concat)) @vuln",
severity: Severity::Medium,
},
];

View file

@ -1,94 +1,94 @@
use crate::patterns::{Pattern, Severity};
pub const PATTERNS: &[Pattern] = &[
Pattern {
id: "eval_call",
description: "Use of eval()",
query: "(call_expression function: (identifier) @id (#eq? @id \"eval\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "new_function",
description: "new Function() constructor",
query: "(new_expression constructor: (identifier) @id (#eq? @id \"Function\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "document_write",
description: "document.write() call",
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"document\") property: (property_identifier) @prop (#eq? @prop \"write\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "inner_html_assignment",
description: "Assignment to element.innerHTML",
query: "(assignment_expression left: (member_expression property: (property_identifier) @prop (#eq? @prop \"innerHTML\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "settimeout_string",
description: "setTimeout / setInterval with a string argument",
query: "(call_expression function: (identifier) @id (#match? @id \"setTimeout|setInterval\") arguments: (arguments (string) @code . _)) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "json_parse",
description: "JSON.parse on dynamic string",
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"JSON\") property: (property_identifier) @prop (#eq? @prop \"parse\"))) @vuln",
severity: Severity::Low,
},
Pattern {
id: "outer_html_assignment",
description: "Assignment to element.outerHTML",
query: "(assignment_expression
Pattern {
id: "eval_call",
description: "Use of eval()",
query: "(call_expression function: (identifier) @id (#eq? @id \"eval\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "new_function",
description: "new Function() constructor",
query: "(new_expression constructor: (identifier) @id (#eq? @id \"Function\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "document_write",
description: "document.write() call",
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"document\") property: (property_identifier) @prop (#eq? @prop \"write\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "inner_html_assignment",
description: "Assignment to element.innerHTML",
query: "(assignment_expression left: (member_expression property: (property_identifier) @prop (#eq? @prop \"innerHTML\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "settimeout_string",
description: "setTimeout / setInterval with a string argument",
query: "(call_expression function: (identifier) @id (#match? @id \"setTimeout|setInterval\") arguments: (arguments (string) @code . _)) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "json_parse",
description: "JSON.parse on dynamic string",
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"JSON\") property: (property_identifier) @prop (#eq? @prop \"parse\"))) @vuln",
severity: Severity::Low,
},
Pattern {
id: "outer_html_assignment",
description: "Assignment to element.outerHTML",
query: "(assignment_expression
left: (member_expression
property: (property_identifier) @prop
(#eq? @prop \"outerHTML\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "insert_adjacent_html",
description: "insertAdjacentHTML() call",
query: "(call_expression
severity: Severity::Medium,
},
Pattern {
id: "insert_adjacent_html",
description: "insertAdjacentHTML() call",
query: "(call_expression
function: (member_expression
property: (property_identifier) @prop
(#eq? @prop \"insertAdjacentHTML\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "location_href_assignment",
description: "Assignment to window.location / location.href",
query: "(assignment_expression
severity: Severity::Medium,
},
Pattern {
id: "location_href_assignment",
description: "Assignment to window.location / location.href",
query: "(assignment_expression
left: (member_expression
object: (identifier)? @obj
property: (property_identifier) @prop
(#match? @prop \"location|href\"))) @vuln",
severity: Severity::High,
},
Pattern {
id: "cookie_assignment",
description: "Write to document.cookie",
query: "(assignment_expression
severity: Severity::High,
},
Pattern {
id: "cookie_assignment",
description: "Write to document.cookie",
query: "(assignment_expression
left: (member_expression
object: (identifier) @obj
(#eq? @obj \"document\")
property: (property_identifier) @prop
(#eq? @prop \"cookie\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "proto_pollution",
description: "Assignment to __proto__ (prototype pollution)",
query: "(assignment_expression
severity: Severity::Medium,
},
Pattern {
id: "proto_pollution",
description: "Assignment to __proto__ (prototype pollution)",
query: "(assignment_expression
left: (member_expression
property: (property_identifier) @prop
(#eq? @prop \"__proto__\"))) @vuln",
severity: Severity::High,
},
Pattern {
id: "weak_hash_md5",
description: "crypto.createHash(\"md5\")",
query: "(call_expression
severity: Severity::High,
},
Pattern {
id: "weak_hash_md5",
description: "crypto.createHash(\"md5\")",
query: "(call_expression
function: (member_expression
object: (identifier) @obj
(#eq? @obj \"crypto\")
@ -97,26 +97,26 @@ pub const PATTERNS: &[Pattern] = &[
arguments: (arguments
(string) @alg
(#eq? @alg \"md5\"))) @vuln",
severity: Severity::Low,
},
Pattern {
id: "regexp_constructor_string",
description: "new RegExp() with a dynamic string",
query: "(new_expression
severity: Severity::Low,
},
Pattern {
id: "regexp_constructor_string",
description: "new RegExp() with a dynamic string",
query: "(new_expression
constructor: (identifier) @id
(#eq? @id \"RegExp\")
arguments: (arguments (string) @pattern)) @vuln",
severity: Severity::Low,
},
Pattern {
id: "dangerous_extend_builtin",
description: "Extending Object.prototype (may lead to collisions/pollution)",
query: "(assignment_expression
severity: Severity::Low,
},
Pattern {
id: "dangerous_extend_builtin",
description: "Extending Object.prototype (may lead to collisions/pollution)",
query: "(assignment_expression
left: (member_expression
object: (identifier) @obj
(#eq? @obj \"Object\")
property: (property_identifier) @prop
(#eq? @prop \"prototype\"))) @vuln",
severity: Severity::Medium,
},
severity: Severity::Medium,
},
];

View file

@ -1,116 +1,115 @@
pub mod rust;
pub mod typescript;
pub mod javascript;
pub mod cpp;
pub mod c;
mod java;
pub mod cpp;
mod go;
mod java;
pub mod javascript;
mod php;
mod python;
mod ruby;
pub mod rust;
pub mod typescript;
use console::style;
use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::fmt;
use std::str::FromStr;
use console::style;
use serde::{Deserialize, Serialize};
use once_cell::sync::Lazy;
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)]
pub enum Severity { High, Medium, Low }
pub enum Severity {
High,
Medium,
Low,
}
impl fmt::Display for Severity {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s = match *self {
Severity::High => style("HIGH").red().bold().to_string(),
Severity::Medium => style("MEDIUM").yellow().bold().to_string(),
Severity::Low => style("LOW").cyan().bold().to_string(),
};
f.write_str(&s)
}
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s = match *self {
Severity::High => style("HIGH").red().bold().to_string(),
Severity::Medium => style("MEDIUM").yellow().bold().to_string(),
Severity::Low => style("LOW").cyan().bold().to_string(),
};
f.write_str(&s)
}
}
impl Severity {
/// Textual value stored in SQLite.
pub fn as_db_str(self) -> &'static str {
match self {
Severity::High => "HIGH",
Severity::Medium => "MEDIUM",
Severity::Low => "LOW",
/// Textual value stored in SQLite.
pub fn as_db_str(self) -> &'static str {
match self {
Severity::High => "HIGH",
Severity::Medium => "MEDIUM",
Severity::Low => "LOW",
}
}
}
}
impl FromStr for Severity { // TODO: FIX
type Err = ();
impl FromStr for Severity {
// TODO: FIX
type Err = ();
fn from_str(input: &str) -> Result<Self, Self::Err> {
match input.to_lowercase().as_str() {
"medium" => Ok(Severity::Medium),
"high" => Ok(Severity::High),
_ => Ok(Severity::Low),
fn from_str(input: &str) -> Result<Self, Self::Err> {
match input.to_lowercase().as_str() {
"medium" => Ok(Severity::Medium),
"high" => Ok(Severity::High),
_ => Ok(Severity::Low),
}
}
}
}
/// One AST pattern with a tree-sitter query and meta-data.
#[derive(Debug, Clone, Serialize)]
pub struct Pattern {
/// Unique identifier (snake-case preferred).
pub id: &'static str,
/// Human-readable explanation.
pub description: &'static str,
/// tree-sitter query string.
pub query: &'static str,
/// Rough severity bucket.
pub severity: Severity,
/// Unique identifier (snake-case preferred).
pub id: &'static str,
/// Human-readable explanation.
pub description: &'static str,
/// tree-sitter query string.
pub query: &'static str,
/// Rough severity bucket.
pub severity: Severity,
}
/// Global, lazily-initialised registry: lang-name → pattern slice
static REGISTRY: Lazy<HashMap<&'static str, &'static [Pattern]>> = Lazy::new(|| {
let mut m = HashMap::new();
let mut m = HashMap::new();
// ---- Rust ----
m.insert("rust", rust::PATTERNS);
// ---- Rust ----
m.insert("rust", rust::PATTERNS);
// ---- TypeScript ----
m.insert("typescript", typescript::PATTERNS);
m.insert("ts", typescript::PATTERNS);
m.insert("tsx", typescript::PATTERNS);
// ---- TypeScript ----
m.insert("typescript", typescript::PATTERNS);
m.insert("ts", typescript::PATTERNS);
m.insert("tsx", typescript::PATTERNS);
// ---- JavaScript ----
m.insert("javascript", javascript::PATTERNS);
m.insert("js", javascript::PATTERNS);
// ---- JavaScript ----
m.insert("javascript", javascript::PATTERNS);
m.insert("js", javascript::PATTERNS);
// ---- C & C++ ----
m.insert("c", c::PATTERNS);
m.insert("cpp", cpp::PATTERNS);
m.insert("c++", cpp::PATTERNS);
// ---- C & C++ ----
m.insert("c", c::PATTERNS);
m.insert("cpp", cpp::PATTERNS);
m.insert("c++", cpp::PATTERNS);
// ---- Other languages in the folder ----
m.insert("java", java::PATTERNS);
m.insert("go", go::PATTERNS);
m.insert("php", php::PATTERNS);
m.insert("python", python::PATTERNS);
m.insert("py", python::PATTERNS);
m.insert("ruby", ruby::PATTERNS);
m.insert("rb", ruby::PATTERNS);
// ---- Other languages in the folder ----
m.insert("java", java::PATTERNS);
m.insert("go", go::PATTERNS);
m.insert("php", php::PATTERNS);
m.insert("python", python::PATTERNS);
m.insert("py", python::PATTERNS);
m.insert("ruby", ruby::PATTERNS);
m.insert("rb", ruby::PATTERNS);
tracing::debug!("AST-pattern registry initialised ({} languages)", m.len());
m
tracing::debug!("AST-pattern registry initialised ({} languages)", m.len());
m
});
/// Return all patterns for the requested language (case-insensitive).
///
/// Unknown languages yield an **empty** `Vec`.
pub fn load(lang: &str) -> Vec<Pattern> {
let key = lang.to_ascii_lowercase();
REGISTRY
.get(key.as_str())
.copied()
.unwrap_or(&[])
.to_vec()
}
let key = lang.to_ascii_lowercase();
REGISTRY.get(key.as_str()).copied().unwrap_or(&[]).to_vec()
}

View file

@ -1,40 +1,40 @@
use crate::patterns::{Pattern, Severity};
pub const PATTERNS: &[Pattern] = &[
Pattern {
id: "eval_call",
description: "eval($code) execution",
query: "(function_call_expression function: (name) @n (#eq? @n \"eval\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "preg_replace_e",
description: "preg_replace with deprecated /e modifier",
query: "(function_call_expression function: (name) @n (#eq? @n \"preg_replace\") arguments: (arguments (string) @pat (#match? @pat \"/.*e.*$/\"))) @vuln",
severity: Severity::High,
},
Pattern {
id: "create_function",
description: "create_function(...) anonymous eval-like",
query: "(function_call_expression function: (name) @n (#eq? @n \"create_function\")) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "unserialize_call",
description: "unserialize(...) on user input",
query: "(function_call_expression function: (name) @n (#eq? @n \"unserialize\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "mysql_query_concat",
description: "mysql_query with concatenated SQL",
query: "(function_call_expression function: (name) @n (#eq? @n \"mysql_query\") arguments: (arguments (binary_expression) @concat)) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "system_call",
description: "system()/shell_exec()/exec() command execution",
query: "(function_call_expression function: (name) @n (#match? @n \"system|shell_exec|exec|passthru\")) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "eval_call",
description: "eval($code) execution",
query: "(function_call_expression function: (name) @n (#eq? @n \"eval\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "preg_replace_e",
description: "preg_replace with deprecated /e modifier",
query: "(function_call_expression function: (name) @n (#eq? @n \"preg_replace\") arguments: (arguments (string) @pat (#match? @pat \"/.*e.*$/\"))) @vuln",
severity: Severity::High,
},
Pattern {
id: "create_function",
description: "create_function(...) anonymous eval-like",
query: "(function_call_expression function: (name) @n (#eq? @n \"create_function\")) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "unserialize_call",
description: "unserialize(...) on user input",
query: "(function_call_expression function: (name) @n (#eq? @n \"unserialize\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "mysql_query_concat",
description: "mysql_query with concatenated SQL",
query: "(function_call_expression function: (name) @n (#eq? @n \"mysql_query\") arguments: (arguments (binary_expression) @concat)) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "system_call",
description: "system()/shell_exec()/exec() command execution",
query: "(function_call_expression function: (name) @n (#match? @n \"system|shell_exec|exec|passthru\")) @vuln",
severity: Severity::Medium,
},
];

View file

@ -1,22 +1,22 @@
use crate::patterns::{Pattern, Severity};
pub const PATTERNS: &[Pattern] = &[
Pattern {
id: "eval_call",
description: "eval() on dynamic input",
query: "(call function: (identifier) @id (#eq? @id \"eval\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "exec_call",
description: "exec(...) execution of dynamic code",
query: "(call function: (identifier) @id (#eq? @id \"exec\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "subprocess_shell_true",
description: "subprocess.* with shell=True",
query: "(call function: (attribute object: (identifier) @pkg (#eq? @pkg \"subprocess\")) arguments: (argument_list . (keyword_argument name: (identifier) @k (#eq? @k \"shell\")) (true) @val)) @vuln",
severity: Severity::Medium,
}
Pattern {
id: "eval_call",
description: "eval() on dynamic input",
query: "(call function: (identifier) @id (#eq? @id \"eval\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "exec_call",
description: "exec(...) execution of dynamic code",
query: "(call function: (identifier) @id (#eq? @id \"exec\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "subprocess_shell_true",
description: "subprocess.* with shell=True",
query: "(call function: (attribute object: (identifier) @pkg (#eq? @pkg \"subprocess\")) arguments: (argument_list . (keyword_argument name: (identifier) @k (#eq? @k \"shell\")) (true) @val)) @vuln",
severity: Severity::Medium,
},
];

View file

@ -1,45 +1,44 @@
use crate::patterns::{Pattern, Severity};
pub const PATTERNS: &[Pattern] = &[
// ---------- Runtime code-execution primitives ----------
Pattern {
id: "eval_call",
description: "Kernel#eval usage",
query: r#"
// ---------- Runtime code-execution primitives ----------
Pattern {
id: "eval_call",
description: "Kernel#eval usage",
query: r#"
(call
(identifier) @id
(#eq? @id "eval")
) @vuln
"#,
severity: Severity::High,
},
Pattern {
id: "instance_eval_call",
description: "Object#instance_eval usage",
query: r#"
severity: Severity::High,
},
Pattern {
id: "instance_eval_call",
description: "Object#instance_eval usage",
query: r#"
(call
(identifier) @id
(#eq? @id "instance_eval")
) @vuln
"#,
severity: Severity::High,
},
Pattern {
id: "class_eval_call",
description: "Module#class_eval / module_eval usage",
query: r#"
severity: Severity::High,
},
Pattern {
id: "class_eval_call",
description: "Module#class_eval / module_eval usage",
query: r#"
(call
(identifier) @id
(#match? @id "^(class_eval|module_eval)$")
) @vuln
"#,
severity: Severity::High,
},
// ---------- Shell execution ----------
Pattern {
id: "system_exec_interp",
description: "system/exec with string interpolation",
query: r#"
severity: Severity::High,
},
// ---------- Shell execution ----------
Pattern {
id: "system_exec_interp",
description: "system/exec with string interpolation",
query: r#"
(call
method: (identifier) @m
(#match? @m "^(system|exec)$")
@ -50,21 +49,20 @@ pub const PATTERNS: &[Pattern] = &[
)
)
"#,
severity: Severity::High,
},
Pattern {
id: "backtick_command",
description: "Back-tick shell execution",
// `uname -a`
query: r#"(shell_command) @vuln"#,
severity: Severity::High,
},
// ---------- Dangerous deserialisation ----------
Pattern {
id: "yaml_load",
description: "YAML.load / Psych.load (arbitrary object deserialisation)",
query: r#"
severity: Severity::High,
},
Pattern {
id: "backtick_command",
description: "Back-tick shell execution",
// `uname -a`
query: r#"(shell_command) @vuln"#,
severity: Severity::High,
},
// ---------- Dangerous deserialisation ----------
Pattern {
id: "yaml_load",
description: "YAML.load / Psych.load (arbitrary object deserialisation)",
query: r#"
(call
receiver: (constant) @recv
(#match? @recv "^(YAML|Psych)$")
@ -72,12 +70,12 @@ pub const PATTERNS: &[Pattern] = &[
(#eq? @m "load")
) @vuln
"#,
severity: Severity::High,
},
Pattern {
id: "marshal_load",
description: "Marshal.load usage",
query: r#"
severity: Severity::High,
},
Pattern {
id: "marshal_load",
description: "Marshal.load usage",
query: r#"
(call
receiver: (constant) @recv
(#eq? @recv "Marshal")
@ -85,14 +83,13 @@ pub const PATTERNS: &[Pattern] = &[
(#eq? @m "load")
) @vuln
"#,
severity: Severity::High,
},
// ---------- Reflection / meta-programming ----------
Pattern {
id: "send_dynamic",
description: "send() with dynamic first argument (not a literal symbol)",
query: r#"
severity: Severity::High,
},
// ---------- Reflection / meta-programming ----------
Pattern {
id: "send_dynamic",
description: "send() with dynamic first argument (not a literal symbol)",
query: r#"
(call
method: (identifier) @m
(#eq? @m "send")
@ -104,25 +101,24 @@ pub const PATTERNS: &[Pattern] = &[
)
)
"#,
severity: Severity::Medium,
},
Pattern {
id: "constantize_call",
description: "ActiveSupport constantize / safe_constantize on tainted data",
query: r#"
severity: Severity::Medium,
},
Pattern {
id: "constantize_call",
description: "ActiveSupport constantize / safe_constantize on tainted data",
query: r#"
(call
method: (identifier) @m
(#match? @m "^(constantize|safe_constantize)$")
) @vuln
"#,
severity: Severity::Medium,
},
// ---------- Insecure resource access ----------
Pattern {
id: "open_uri_http",
description: "Kernel#open with HTTP(S) URL (open-uri auto-follow)",
query: r#"
severity: Severity::Medium,
},
// ---------- Insecure resource access ----------
Pattern {
id: "open_uri_http",
description: "Kernel#open with HTTP(S) URL (open-uri auto-follow)",
query: r#"
(call
method: (identifier) @m
(#eq? @m "open")
@ -132,6 +128,6 @@ pub const PATTERNS: &[Pattern] = &[
)
) @vuln
"#,
severity: Severity::Medium,
},
severity: Severity::Medium,
},
];

View file

@ -1,118 +1,118 @@
use crate::patterns::{Pattern, Severity};
pub const PATTERNS: &[Pattern] = &[
Pattern {
id: "unsafe_block",
description: "Use of an `unsafe` block",
query: "(unsafe_block) @vuln",
severity: Severity::High,
},
Pattern {
id: "unsafe_fn",
description: "`unsafe fn` declaration",
query: "(function_item
Pattern {
id: "unsafe_block",
description: "Use of an `unsafe` block",
query: "(unsafe_block) @vuln",
severity: Severity::High,
},
Pattern {
id: "unsafe_fn",
description: "`unsafe fn` declaration",
query: "(function_item
(function_modifiers) @mods
(#match? @mods \"^unsafe\\b\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "transmute_call",
description: "`std::mem::transmute` call",
query: "(call_expression
severity: Severity::High,
},
Pattern {
id: "transmute_call",
description: "`std::mem::transmute` call",
query: "(call_expression
function: (scoped_identifier
path: (identifier) @p (#eq? @p \"mem\")
name: (identifier) @f (#eq? @f \"transmute\")))
@vuln",
severity: Severity::High,
},
Pattern {
id: "copy_nonoverlapping",
description: "Raw pointer `copy_nonoverlapping`",
query: "(call_expression
severity: Severity::High,
},
Pattern {
id: "copy_nonoverlapping",
description: "Raw pointer `copy_nonoverlapping`",
query: "(call_expression
function: (scoped_identifier
path: (identifier) @p (#eq? @p \"ptr\")
name: (identifier) @f (#eq? @f \"copy_nonoverlapping\")))
@vuln",
severity: Severity::High,
},
Pattern {
id: "get_unchecked",
description: "`get_unchecked` / `get_unchecked_mut` slice access",
query: "(call_expression
severity: Severity::High,
},
Pattern {
id: "get_unchecked",
description: "`get_unchecked` / `get_unchecked_mut` slice access",
query: "(call_expression
function: (field_expression
field: (field_identifier) @m
(#match? @m \"get_unchecked(_mut)?\"))) @vuln",
severity: Severity::High,
},
Pattern {
id: "unwrap_call",
description: "`.unwrap()` call (may panic)",
query: "(call_expression
severity: Severity::High,
},
Pattern {
id: "unwrap_call",
description: "`.unwrap()` call (may panic)",
query: "(call_expression
function: (field_expression
field: (field_identifier) @name
(#eq? @name \"unwrap\"))) ; exact match
@vuln",
severity: Severity::Medium,
},
Pattern {
id: "expect_call",
description: "`.expect()` call (may panic)",
query: "(call_expression
severity: Severity::Medium,
},
Pattern {
id: "expect_call",
description: "`.expect()` call (may panic)",
query: "(call_expression
function: (field_expression
field: (field_identifier) @name
(#eq? @name \"expect\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "panic_macro",
description: "`panic!` macro invocation",
query: "(macro_invocation (identifier) @id (#eq? @id \"panic\")) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "todo_or_unimplemented",
description: "`todo!()` / `unimplemented!()` placeholder",
query: "(macro_invocation
severity: Severity::Medium,
},
Pattern {
id: "panic_macro",
description: "`panic!` macro invocation",
query: "(macro_invocation (identifier) @id (#eq? @id \"panic\")) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "todo_or_unimplemented",
description: "`todo!()` / `unimplemented!()` placeholder",
query: "(macro_invocation
(identifier) @id
(#match? @id \"todo|unimplemented\")) @vuln",
severity: Severity::Low,
},
Pattern {
id: "narrow_cast_with_as",
description: "`as` cast to an 8-/16-bit integer (possible truncation)",
query: "(type_cast_expression
severity: Severity::Low,
},
Pattern {
id: "narrow_cast_with_as",
description: "`as` cast to an 8-/16-bit integer (possible truncation)",
query: "(type_cast_expression
type: (primitive_type) @to
(#match? @to \"^u?i(8|16)$\")) @vuln",
severity: Severity::Low,
},
Pattern {
id: "mem_zeroed",
description: "`std::mem::zeroed()`",
query: "(call_expression function:(scoped_identifier path:(identifier)@p (#eq? @p \"mem\") name:(identifier)@n (#eq? @n \"zeroed\")))@vuln",
severity: Severity::High
},
Pattern {
id: "mem_forget",
description: "`std::mem::forget()`",
query: "(call_expression function:(scoped_identifier path:(identifier)@p (#eq? @p \"mem\") name:(identifier)@n (#eq? @n \"forget\")))@vuln",
severity: Severity::Medium
},
Pattern {
id: "ptr_read",
description: "`ptr::read_*` raw-ptr read",
query: "(call_expression function:(scoped_identifier path:(identifier)@p (#eq? @p \"ptr\") name:(identifier)@n (#match? @n \"read(_volatile)?\")))@vuln",
severity: Severity::High
},
Pattern {
id: "arc_unwrap",
description: "`Arc::unwrap_or_else_unchecked`",
query: "(call_expression function:(scoped_identifier name:(identifier)@n (#eq? @n \"unwrap_or_else_unchecked\")))@vuln",
severity: Severity::High
},
Pattern {
id: "dbg_macro",
description: "`dbg!()` left in code",
query: "(macro_invocation (identifier)@id (#eq? @id \"dbg\"))@vuln",
severity: Severity::Low
},
severity: Severity::Low,
},
Pattern {
id: "mem_zeroed",
description: "`std::mem::zeroed()`",
query: "(call_expression function:(scoped_identifier path:(identifier)@p (#eq? @p \"mem\") name:(identifier)@n (#eq? @n \"zeroed\")))@vuln",
severity: Severity::High,
},
Pattern {
id: "mem_forget",
description: "`std::mem::forget()`",
query: "(call_expression function:(scoped_identifier path:(identifier)@p (#eq? @p \"mem\") name:(identifier)@n (#eq? @n \"forget\")))@vuln",
severity: Severity::Medium,
},
Pattern {
id: "ptr_read",
description: "`ptr::read_*` raw-ptr read",
query: "(call_expression function:(scoped_identifier path:(identifier)@p (#eq? @p \"ptr\") name:(identifier)@n (#match? @n \"read(_volatile)?\")))@vuln",
severity: Severity::High,
},
Pattern {
id: "arc_unwrap",
description: "`Arc::unwrap_or_else_unchecked`",
query: "(call_expression function:(scoped_identifier name:(identifier)@n (#eq? @n \"unwrap_or_else_unchecked\")))@vuln",
severity: Severity::High,
},
Pattern {
id: "dbg_macro",
description: "`dbg!()` left in code",
query: "(macro_invocation (identifier)@id (#eq? @id \"dbg\"))@vuln",
severity: Severity::Low,
},
];

View file

@ -1,106 +1,106 @@
use crate::patterns::{Pattern, Severity};
pub const PATTERNS: &[Pattern] = &[
Pattern {
id: "eval_call",
description: "Use of eval()",
query: "(call_expression function: (identifier) @id (#eq? @id \"eval\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "new_function",
description: "new Function() constructor",
query: "(new_expression constructor: (identifier) @id (#eq? @id \"Function\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "document_write",
description: "document.write() call",
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"document\") property: (property_identifier) @prop (#eq? @prop \"write\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "inner_html_assignment",
description: "Assignment to element.innerHTML",
query: "(assignment_expression left: (member_expression property: (property_identifier) @prop (#eq? @prop \"innerHTML\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "settimeout_string",
description: "setTimeout / setInterval with a string argument",
query: "(call_expression function: (identifier) @id (#match? @id \"setTimeout|setInterval\") arguments: (arguments (string) @code . _)) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "any_type",
description: "Type annotation of `any`",
query: "(type_annotation (predefined_type) @t (#eq? @t \"any\")) @vuln",
severity: Severity::Low,
},
Pattern {
id: "json_parse",
description: "JSON.parse on dynamic string",
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"JSON\") property: (property_identifier) @prop (#eq? @prop \"parse\"))) @vuln",
severity: Severity::Low,
},
Pattern {
id: "as_any_assertion",
description: "Type assertion to `any` using `as any`",
query: "(as_expression type: (predefined_type) @t (#eq? @t \"any\")) @vuln",
severity: Severity::Low,
},
Pattern {
id: "type_assertion_any",
description: "Type assertion to `any` using `<any>` syntax",
query: "(type_assertion type: (predefined_type) @t (#eq? @t \"any\")) @vuln",
severity: Severity::Low,
},
Pattern {
id: "outer_html_assignment",
description: "Assignment to element.outerHTML",
query: "(assignment_expression left: (member_expression property: (property_identifier) @prop (#eq? @prop \"outerHTML\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "insert_adjacent_html",
description: "insertAdjacentHTML() call",
query: "(call_expression function: (member_expression property: (property_identifier) @prop (#eq? @prop \"insertAdjacentHTML\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "document_cookie_write",
description: "Write to document.cookie",
query: "(assignment_expression left: (member_expression object: (identifier) @obj (#eq? @obj \"document\") property: (property_identifier) @prop (#eq? @prop \"cookie\"))) @vuln",
severity: Severity::Low,
},
Pattern {
id: "onclick_setattribute",
description: "Element.setAttribute('onclick', …)",
query: "(call_expression function: (member_expression property: (property_identifier) @prop (#eq? @prop \"setAttribute\")) arguments: (arguments (string) @name (#eq? @name \"\\\"onclick\\\"\") . (string) @handler)) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "math_random_call",
description: "Use of Math.random() for security-sensitive randomness",
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"Math\") property: (property_identifier) @prop (#eq? @prop \"random\"))) @vuln",
severity: Severity::Low,
},
Pattern {
id: "crypto_createhash_md5",
description: "Insecure hash algorithm: crypto.createHash('md5')",
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"crypto\") property: (property_identifier) @prop (#eq? @prop \"createHash\")) arguments: (arguments (string) @alg (#match? @alg \"(?i)\\\"md5\\\"\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "fetch_http_url",
description: "fetch() over plain HTTP",
query: "(call_expression function: (identifier) @id (#eq? @id \"fetch\") arguments: (arguments (string) @url (#match? @url \"^\\\"http://\"))) @vuln",
severity: Severity::Low,
},
Pattern {
id: "xhr_eval_response",
description: "eval() of XMLHttpRequest.responseText",
query: "(call_expression function: (identifier) @id (#eq? @id \"eval\") arguments: (arguments (member_expression property: (property_identifier) @prop (#eq? @prop \"responseText\")))) @vuln",
severity: Severity::High,
},
];
Pattern {
id: "eval_call",
description: "Use of eval()",
query: "(call_expression function: (identifier) @id (#eq? @id \"eval\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "new_function",
description: "new Function() constructor",
query: "(new_expression constructor: (identifier) @id (#eq? @id \"Function\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "document_write",
description: "document.write() call",
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"document\") property: (property_identifier) @prop (#eq? @prop \"write\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "inner_html_assignment",
description: "Assignment to element.innerHTML",
query: "(assignment_expression left: (member_expression property: (property_identifier) @prop (#eq? @prop \"innerHTML\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "settimeout_string",
description: "setTimeout / setInterval with a string argument",
query: "(call_expression function: (identifier) @id (#match? @id \"setTimeout|setInterval\") arguments: (arguments (string) @code . _)) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "any_type",
description: "Type annotation of `any`",
query: "(type_annotation (predefined_type) @t (#eq? @t \"any\")) @vuln",
severity: Severity::Low,
},
Pattern {
id: "json_parse",
description: "JSON.parse on dynamic string",
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"JSON\") property: (property_identifier) @prop (#eq? @prop \"parse\"))) @vuln",
severity: Severity::Low,
},
Pattern {
id: "as_any_assertion",
description: "Type assertion to `any` using `as any`",
query: "(as_expression type: (predefined_type) @t (#eq? @t \"any\")) @vuln",
severity: Severity::Low,
},
Pattern {
id: "type_assertion_any",
description: "Type assertion to `any` using `<any>` syntax",
query: "(type_assertion type: (predefined_type) @t (#eq? @t \"any\")) @vuln",
severity: Severity::Low,
},
Pattern {
id: "outer_html_assignment",
description: "Assignment to element.outerHTML",
query: "(assignment_expression left: (member_expression property: (property_identifier) @prop (#eq? @prop \"outerHTML\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "insert_adjacent_html",
description: "insertAdjacentHTML() call",
query: "(call_expression function: (member_expression property: (property_identifier) @prop (#eq? @prop \"insertAdjacentHTML\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "document_cookie_write",
description: "Write to document.cookie",
query: "(assignment_expression left: (member_expression object: (identifier) @obj (#eq? @obj \"document\") property: (property_identifier) @prop (#eq? @prop \"cookie\"))) @vuln",
severity: Severity::Low,
},
Pattern {
id: "onclick_setattribute",
description: "Element.setAttribute('onclick', …)",
query: "(call_expression function: (member_expression property: (property_identifier) @prop (#eq? @prop \"setAttribute\")) arguments: (arguments (string) @name (#eq? @name \"\\\"onclick\\\"\") . (string) @handler)) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "math_random_call",
description: "Use of Math.random() for security-sensitive randomness",
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"Math\") property: (property_identifier) @prop (#eq? @prop \"random\"))) @vuln",
severity: Severity::Low,
},
Pattern {
id: "crypto_createhash_md5",
description: "Insecure hash algorithm: crypto.createHash('md5')",
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"crypto\") property: (property_identifier) @prop (#eq? @prop \"createHash\")) arguments: (arguments (string) @alg (#match? @alg \"(?i)\\\"md5\\\"\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "fetch_http_url",
description: "fetch() over plain HTTP",
query: "(call_expression function: (identifier) @id (#eq? @id \"fetch\") arguments: (arguments (string) @url (#match? @url \"^\\\"http://\"))) @vuln",
severity: Severity::Low,
},
Pattern {
id: "xhr_eval_response",
description: "eval() of XMLHttpRequest.responseText",
query: "(call_expression function: (identifier) @id (#eq? @id \"eval\") arguments: (arguments (member_expression property: (property_identifier) @prop (#eq? @prop \"responseText\")))) @vuln",
severity: Severity::High,
},
];

View file

@ -1,25 +1,25 @@
use serde::{Deserialize, Serialize};
use std::path::{Path};
use std::fs;
use console::style;
use toml;
use crate::patterns::Severity;
use console::style;
use serde::{Deserialize, Serialize};
use std::fs;
use std::path::Path;
use toml;
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(default)]
pub struct ScannerConfig {
/// The minimum severity level to output
pub min_severity: Severity,
/// The maximum file size to scan, in megabytes.
pub max_file_size_mb: Option<u64>,
/// File extensions to exclude from scanning.
pub excluded_extensions: Vec<String>,
/// Directories to exclude from scanning.
pub excluded_directories: Vec<String>,
/// Excluded files
pub excluded_files: Vec<String>,
@ -34,10 +34,10 @@ pub struct ScannerConfig {
/// Whether to limit the search to starting file system or not.
pub one_file_system: bool,
/// Whether to follow symlinks or not.
/// Whether to follow symlinks or not.
pub follow_symlinks: bool,
/// Whether to scan hidden files or not.
pub scan_hidden_files: bool,
}
@ -47,22 +47,24 @@ impl Default for ScannerConfig {
min_severity: Severity::Low,
max_file_size_mb: None,
excluded_extensions: vec![
"jpg", "png", "gif", "mp4", "avi", "mkv",
"zip", "tar", "gz", "exe", "dll", "so",
"jpg", "png", "gif", "mp4", "avi", "mkv", "zip", "tar", "gz", "exe", "dll", "so",
]
.into_iter()
.map(str::to_owned)
.collect(),
.into_iter()
.map(str::to_owned)
.collect(),
excluded_directories: vec![
"node_modules", ".git", "target", ".vscode", ".idea", "build", "dist",
"node_modules",
".git",
"target",
".vscode",
".idea",
"build",
"dist",
]
.into_iter()
.map(str::to_owned)
.collect(),
excluded_files: vec![]
.into_iter()
.map(str::to_owned)
.collect(),
.into_iter()
.map(str::to_owned)
.collect(),
excluded_files: vec![].into_iter().map(str::to_owned).collect(),
read_global_ignore: false,
read_vcsignore: true,
require_git_to_read_vcsignore: true,
@ -76,18 +78,22 @@ impl Default for ScannerConfig {
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(default)]
pub struct DatabaseConfig {
/// Custom path for database
pub path: String,
/// The number of days to keep database files for. TODO: IMPLEMENT
pub auto_cleanup_days: u32,
/// The maximum size of the database, in megabytes. TODO: IMPLEMENT
pub max_db_size_mb: u64,
/// Whether to run a VACUUM on startup or not. TODO: IMPLEMENT
pub vacuum_on_startup: bool,
}
impl Default for DatabaseConfig {
fn default() -> Self {
Self {
path: String::from(""),
auto_cleanup_days: 30,
max_db_size_mb: 1024,
vacuum_on_startup: false,
@ -100,15 +106,12 @@ impl Default for DatabaseConfig {
pub struct OutputConfig {
/// The default output format. TODO: IMPLEMENT others
pub default_format: String,
/// Whether to show progress or not. TODO: IMPLEMENT
pub show_progress: bool,
/// Whether to colorize output or not. TODO: IMPLEMENT changing to non colored
pub color_output: bool,
/// The maximum number of results to show. TODO: IMPLEMENT
pub max_results: Option<u32>,
/// The maximum number of results to show.
pub max_results: Option<u32>,
}
impl Default for OutputConfig {
@ -116,7 +119,6 @@ impl Default for OutputConfig {
Self {
default_format: "console".into(),
show_progress: true,
color_output: true,
max_results: None,
}
}
@ -128,21 +130,27 @@ pub struct PerformanceConfig {
/// The maximum search depth, or `None` if no maximum search depth should be set.
///
/// A depth of `1` includes all files under the current directory, a depth of `2` also includes
/// all files under subdirectories of the current directory, etc.
/// all files under subdirectories of the current directory, etc.
pub max_depth: Option<usize>, // TODO: IMPLEMENT
/// The minimum depth for reported entries, or `None`.
pub min_depth: Option<usize>, // TODO: IMPLEMENT
/// Whether to stop traversing into matching directories.
pub prune: bool, // TODO: IMPLEMENT
pub prune: bool,
/// The maximum number of worker threads to use., or `None` to auto-detect.
pub worker_threads: Option<usize>, // TODO: IMPLEMENT
pub worker_threads: Option<usize>,
/// The maximum number of entries to index in a single chunk.
pub index_chunk_size: u32, // TODO: IMPLEMENT
pub batch_size: usize,
/// capacity = threads × this
pub channel_multiplier: usize,
/// Timeout on individual files // TODO: IMPLEMENT
pub scan_timeout_secs: Option<u64>,
/// The maximum amount of memory to use, in megabytes.
pub memory_limit_mb: u64, // TODO: IMPLEMENT
}
@ -154,7 +162,9 @@ impl Default for PerformanceConfig {
min_depth: None,
prune: false,
worker_threads: None,
index_chunk_size: 1_000,
batch_size: 100usize,
channel_multiplier: 4usize,
scan_timeout_secs: None,
memory_limit_mb: 512,
}
}
@ -170,11 +180,8 @@ pub struct Config {
pub performance: PerformanceConfig,
}
impl Config {
pub fn load(
config_dir: &Path,
) -> Result<Self, Box<dyn std::error::Error>> {
pub fn load(config_dir: &Path) -> Result<Self, Box<dyn std::error::Error>> {
let mut config = Config::default();
let default_config_path = config_dir.join("nyx.conf");
@ -188,24 +195,32 @@ impl Config {
let user_config: Config = toml::from_str(&user_config_content)?;
config = merge_configs(config, user_config);
println!("{}: Loaded user config from: {}\n",
style("note").green().bold(),
style(user_config_path.display()).underlined().white().bold());
println!(
"{}: Loaded user config from: {}\n",
style("note").green().bold(),
style(user_config_path.display())
.underlined()
.white()
.bold()
);
} else {
println!("{}: Using {} configuration.\n Create file in '{}'to customize.\n",
style("note").green().bold(),
style("default").bold(),
style(user_config_path.display()).underlined().white().bold());
println!(
"{}: Using {} configuration.\n Create file in '{}'to customize.\n",
style("note").green().bold(),
style("default").bold(),
style(user_config_path.display())
.underlined()
.white()
.bold()
);
}
Ok(config)
}
}
fn create_example_config(
config_dir: &Path,
) -> Result<(), Box<dyn std::error::Error>> {
fn create_example_config(config_dir: &Path) -> Result<(), Box<dyn std::error::Error>> {
let example_path = config_dir.join("nyx.conf");
let default_config = Config::default();
@ -213,7 +228,7 @@ fn create_example_config(
// Add comments to make it user-friendly
let commented_content = format!(
"# nnyx Vulnerability Scanner Configuration\n\
"# nnyx Vulnerability Scanner Configuration\n\
# YOU SHOULD NOT MODIFY THIS FILE.\n\
# Create/modify 'nyx.local' to set configs\n\
# Only include the sections you want to override\n\n{}",
@ -230,40 +245,46 @@ fn create_example_config(
/// supply new exclusions and overriding everything else.
fn merge_configs(mut default: Config, user: Config) -> Config {
// --- ScannerConfig ---
default.scanner.max_file_size_mb = user.scanner.max_file_size_mb;
default.scanner.read_global_ignore = user.scanner.read_global_ignore;
default.scanner.read_vcsignore = user.scanner.read_vcsignore;
default.scanner.require_git_to_read_vcsignore = user.scanner.require_git_to_read_vcsignore;
default.scanner.one_file_system = user.scanner.one_file_system;
default.scanner.follow_symlinks = user.scanner.follow_symlinks;
default.scanner.scan_hidden_files = user.scanner.scan_hidden_files;
default.scanner.max_file_size_mb = user.scanner.max_file_size_mb;
default.scanner.read_global_ignore = user.scanner.read_global_ignore;
default.scanner.read_vcsignore = user.scanner.read_vcsignore;
default.scanner.require_git_to_read_vcsignore = user.scanner.require_git_to_read_vcsignore;
default.scanner.one_file_system = user.scanner.one_file_system;
default.scanner.follow_symlinks = user.scanner.follow_symlinks;
default.scanner.scan_hidden_files = user.scanner.scan_hidden_files;
// Merge exclusion lists (default ⊔ user), then sort & dedupe
default.scanner.excluded_extensions.extend(user.scanner.excluded_extensions);
default.scanner.excluded_directories.extend(user.scanner.excluded_directories);
default
.scanner
.excluded_extensions
.extend(user.scanner.excluded_extensions);
default
.scanner
.excluded_directories
.extend(user.scanner.excluded_directories);
default.scanner.excluded_extensions.sort_unstable();
default.scanner.excluded_extensions.dedup();
default.scanner.excluded_directories.sort_unstable();
default.scanner.excluded_directories.dedup();
// --- DatabaseConfig ---
default.database.auto_cleanup_days = user.database.auto_cleanup_days;
default.database.max_db_size_mb = user.database.max_db_size_mb;
default.database.vacuum_on_startup = user.database.vacuum_on_startup;
default.database.auto_cleanup_days = user.database.auto_cleanup_days;
default.database.max_db_size_mb = user.database.max_db_size_mb;
default.database.vacuum_on_startup = user.database.vacuum_on_startup;
// --- OutputConfig ---
default.output.default_format = user.output.default_format;
default.output.show_progress = user.output.show_progress;
default.output.color_output = user.output.color_output;
default.output.max_results = user.output.max_results;
default.output.default_format = user.output.default_format;
default.output.show_progress = user.output.show_progress;
default.output.max_results = user.output.max_results;
// --- PerformanceConfig ---
default.performance.max_depth = user.performance.max_depth;
default.performance.min_depth = user.performance.min_depth;
default.performance.prune = user.performance.prune;
default.performance.worker_threads = user.performance.worker_threads;
default.performance.index_chunk_size = user.performance.index_chunk_size;
default.performance.memory_limit_mb = user.performance.memory_limit_mb;
default.performance.max_depth = user.performance.max_depth;
default.performance.min_depth = user.performance.min_depth;
default.performance.prune = user.performance.prune;
default.performance.worker_threads = user.performance.worker_threads;
default.performance.batch_size = user.performance.batch_size;
default.performance.channel_multiplier = user.performance.channel_multiplier;
default.performance.memory_limit_mb = user.performance.memory_limit_mb;
default
}
}

View file

@ -1,15 +1,14 @@
pub fn lowercase_ext(path: &std::path::Path) -> Option<&'static str> {
path.extension()
.and_then(|s| match s.to_str()? {
"rs" | "RS" => Some("rs"),
"c" => Some("c"),
"cpp" | "c++" => Some("cpp"),
"java" => Some("java"),
"go" => Some("go"),
"php" => Some("php"),
"py" | "PY" => Some("py"),
"ts" | "TSX" | "tsx" => Some("ts"),
"js" => Some("js"),
_ => None,
path.extension().and_then(|s| match s.to_str()? {
"rs" | "RS" => Some("rs"),
"c" => Some("c"),
"cpp" | "c++" => Some("cpp"),
"java" => Some("java"),
"go" => Some("go"),
"php" => Some("php"),
"py" | "PY" => Some("py"),
"ts" | "TSX" | "tsx" => Some("ts"),
"js" => Some("js"),
_ => None,
})
}
}

View file

@ -1,8 +1,8 @@
pub mod project;
pub mod config;
pub(crate) mod query_cache;
pub(crate) mod ext;
pub mod project;
pub(crate) mod query_cache;
pub use config::Config;
// Re-export commonly used functions for convenience
pub use project::{get_project_info};
pub use config::Config;
pub use project::get_project_info;

View file

@ -1,34 +1,30 @@
use std::path::{Path, PathBuf};
use crate::errors::{NyxError, NyxResult};
use std::path::{Path, PathBuf};
/// Determine `<project-name, path/to/<project>.sqlite>`.
pub fn get_project_info(
project_path: &Path,
config_dir: &Path,
) -> NyxResult<(String, PathBuf)> {
pub fn get_project_info(project_path: &Path, config_dir: &Path) -> NyxResult<(String, PathBuf)> {
let project_name = project_path
.file_name()
.and_then(|n| n.to_str())
.ok_or_else(|| NyxError::Other("Unable to determine project name".into()))?;
let project_name = project_path
.file_name()
.and_then(|n| n.to_str())
.ok_or_else(|| NyxError::Other("Unable to determine project name".into()))?;
let db_name = sanitize_project_name(project_name);
let db_path = config_dir.join(format!("{}.sqlite", db_name));
let db_name = sanitize_project_name(project_name);
let db_path = config_dir.join(format!("{}.sqlite", db_name));
Ok((project_name.to_owned(), db_path))
Ok((project_name.to_owned(), db_path))
}
pub fn sanitize_project_name(name: &str) -> String {
name.to_lowercase()
.chars()
.map(|c| match c {
' ' | '\t' | '\n' | '\r' => '_',
c if c.is_alphanumeric() || c == '_' || c == '-' => c,
_ => '_',
})
.collect::<String>()
.split('_')
.filter(|s| !s.is_empty())
.collect::<Vec<_>>()
.join("_")
}
name.to_lowercase()
.chars()
.map(|c| match c {
' ' | '\t' | '\n' | '\r' => '_',
c if c.is_alphanumeric() || c == '_' || c == '-' => c,
_ => '_',
})
.collect::<String>()
.split('_')
.filter(|s| !s.is_empty())
.collect::<Vec<_>>()
.join("_")
}

View file

@ -6,37 +6,41 @@ use crate::patterns::{self, Pattern};
#[derive(Clone)]
pub struct CompiledQuery {
pub meta: Pattern,
pub query: Arc<Query>,
pub meta: Pattern,
pub query: Arc<Query>,
}
type QuerySet = Arc<Vec<CompiledQuery>>;
static CACHE: LazyLock<RwLock<HashMap<&'static str, QuerySet>>> =
LazyLock::new(|| RwLock::new(HashMap::new()));
LazyLock::new(|| RwLock::new(HashMap::new()));
/// Return **one shared Arc** to the per-language query set.
/// Cloning the `Arc` is O(1) and the underlying Vec lives for the
/// lifetime of the process.
pub fn for_lang(lang: &'static str, ts_lang: Language) -> std::sync::Arc<Vec<CompiledQuery>> {
// fast path
if let Some(v) = CACHE.read().unwrap().get(lang) {
return v.clone();
}
// slow path — compile
let patterns = patterns::load(lang);
let compiled: Vec<_> = patterns.into_iter().filter_map(|p| {
match Query::new(&ts_lang, p.query) {
Ok(q) => Some(CompiledQuery { meta: p, query: std::sync::Arc::new(q) }),
Err(e)=> {
tracing::warn!(lang, id = p.id, "query compile error: {e}");
None
}
// fast path
if let Some(v) = CACHE.read().unwrap().get(lang) {
return v.clone();
}
}).collect();
let compiled = std::sync::Arc::new(compiled);
// slow path — compile
let patterns = patterns::load(lang);
let compiled: Vec<_> = patterns
.into_iter()
.filter_map(|p| match Query::new(&ts_lang, p.query) {
Ok(q) => Some(CompiledQuery {
meta: p,
query: std::sync::Arc::new(q),
}),
Err(e) => {
tracing::warn!(lang, id = p.id, "query compile error: {e}");
None
}
})
.collect();
let mut w = CACHE.write().unwrap();
w.entry(lang).or_insert_with(|| compiled.clone()).clone()
}
let compiled = std::sync::Arc::new(compiled);
let mut w = CACHE.write().unwrap();
w.entry(lang).or_insert_with(|| compiled.clone()).clone()
}

View file

@ -1,5 +1,5 @@
use crossbeam_channel::{bounded, Receiver, Sender};
use ignore::{overrides::OverrideBuilder, WalkBuilder, WalkState};
use crossbeam_channel::{Receiver, Sender, bounded};
use ignore::{WalkBuilder, WalkState, overrides::OverrideBuilder};
use std::{
mem,
path::{Path, PathBuf},
@ -11,19 +11,17 @@ use crate::utils::Config;
// ---------------------------------------------------------------------------
// Internal constants / helpers
// ---------------------------------------------------------------------------
const DEFAULT_BATCH: usize = 8; // a tad larger for fewer sends
const CHANNEL_MULTIPLIER:usize = 4; // capacity = threads × this
type Batch = Vec<PathBuf>;
struct Batcher {
tx: Sender<Batch>,
tx: Sender<Batch>,
batch: Batch,
}
impl Batcher {
fn push(&mut self, p: PathBuf) {
fn push(&mut self, p: PathBuf, batch_size: usize) {
self.batch.push(p);
if self.batch.len() == DEFAULT_BATCH {
if self.batch.len() == batch_size {
self.flush();
}
}
@ -34,7 +32,9 @@ impl Batcher {
}
}
impl Drop for Batcher {
fn drop(&mut self) { self.flush(); }
fn drop(&mut self) {
self.flush();
}
}
// ---------------------------------------------------------------------------
@ -52,54 +52,55 @@ pub fn spawn_senders(root: &Path, cfg: &Config) -> Receiver<Batch> {
tracing::warn!("cannot add ignore pattern {dir}: {e}");
}
}
let overrides = ob.build().unwrap();
let overrides = ob.build().unwrap();
// ----- 2 channel & thread pool parameters -----------------------------
let workers = cfg.performance.worker_threads.unwrap_or(num_cpus::get());
let (tx, rx) = bounded::<Batch>(workers * CHANNEL_MULTIPLIER);
let workers = cfg.performance.worker_threads.unwrap_or(num_cpus::get());
let (tx, rx) = bounded::<Batch>(workers * cfg.performance.channel_multiplier);
let root = root.to_path_buf();
let root = root.to_path_buf();
let scan_hidden = cfg.scanner.scan_hidden_files;
let follow = cfg.scanner.follow_symlinks;
let max_bytes = cfg.scanner.max_file_size_mb.unwrap_or(0) * 1_048_576;
let follow = cfg.scanner.follow_symlinks;
let max_bytes = cfg.scanner.max_file_size_mb.unwrap_or(0) * 1_048_576;
let batch_size = cfg.performance.batch_size;
// ----- 3 the background walker thread ---------------------------------
thread::spawn(move || {
WalkBuilder::new(root)
.hidden(!scan_hidden)
.follow_links(follow)
.threads(workers)
.overrides(overrides)
.build_parallel()
.run(move || {
let mut b = Batcher {
tx: tx.clone(),
batch: Vec::with_capacity(DEFAULT_BATCH),
};
.hidden(!scan_hidden)
.follow_links(follow)
.threads(workers)
.overrides(overrides)
.build_parallel()
.run(move || {
let mut b = Batcher {
tx: tx.clone(),
batch: Vec::with_capacity(batch_size),
};
Box::new(move |entry| {
tracing::debug!("walking {:?}", entry);
let entry = match entry {
Ok(e) if e.file_type().map(|ft| ft.is_file()).unwrap_or(false) => e,
_ => return WalkState::Continue,
};
Box::new(move |entry| {
tracing::debug!("walking {:?}", entry);
let entry = match entry {
Ok(e) if e.file_type().map(|ft| ft.is_file()).unwrap_or(false) => e,
_ => return WalkState::Continue,
};
if max_bytes != 0 {
match entry.metadata() {
Ok(m) if m.len() > max_bytes => return WalkState::Continue,
Err(e) => {
tracing::debug!("metadata failed for {:?}: {e}", entry.path());
return WalkState::Continue;
}
_ => {}
}
}
if max_bytes != 0 {
match entry.metadata() {
Ok(m) if m.len() > max_bytes => return WalkState::Continue,
Err(e) => {
tracing::debug!("metadata failed for {:?}: {e}", entry.path());
return WalkState::Continue;
}
_ => {}
}
}
tracing::debug!("sending {:?}", entry);
b.push(entry.into_path());
WalkState::Continue
})
});
tracing::debug!("sending {:?}", entry);
b.push(entry.into_path(), batch_size);
WalkState::Continue
})
});
});
rx