mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-06 19:35:13 +02:00
Add error handling with NyxError and refactor console output formatting
- Introduced `NyxError` and `NyxResult` for unified error handling across modules. - Refactored `scan.rs`, `index.rs`, and `walk.rs` with improved error management and consistent formatting. - Replaced existing error handling in `database.rs` with `NyxResult`. - Improved database maintenance by integrating `vacuum` and `clear` methods into workflows. - Added `dashmap` for efficient parallel diagnostics result aggregation in `scan_with_index_parallel`. - Enhanced readability and formatting of console outputs in multiple modules.
This commit is contained in:
parent
75a20eaa2a
commit
0a66a0ae2d
14 changed files with 360 additions and 240 deletions
16
Cargo.lock
generated
16
Cargo.lock
generated
|
|
@ -267,6 +267,20 @@ version = "0.8.21"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
|
||||
|
||||
[[package]]
|
||||
name = "dashmap"
|
||||
version = "7.0.0-rc2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e4a1e35a65fe0538a60167f0ada6e195ad5d477f6ddae273943596d4a1a5730b"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"crossbeam-utils",
|
||||
"equivalent",
|
||||
"hashbrown",
|
||||
"lock_api",
|
||||
"parking_lot_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "deranged"
|
||||
version = "0.4.0"
|
||||
|
|
@ -579,6 +593,7 @@ dependencies = [
|
|||
"clap",
|
||||
"console",
|
||||
"crossbeam-channel",
|
||||
"dashmap",
|
||||
"directories",
|
||||
"ignore",
|
||||
"num_cpus",
|
||||
|
|
@ -588,6 +603,7 @@ dependencies = [
|
|||
"rayon",
|
||||
"rusqlite",
|
||||
"serde",
|
||||
"thiserror",
|
||||
"toml",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
|
|
|
|||
|
|
@ -32,3 +32,5 @@ r2d2_sqlite = "0.30.0"
|
|||
r2d2 = "0.8.10"
|
||||
bytesize = "2.0.1"
|
||||
chrono = { version = "0.4.41", default-features = false, features = ["std", "clock"] }
|
||||
thiserror = "2.0.12"
|
||||
dashmap = "7.0.0-rc2"
|
||||
|
|
|
|||
|
|
@ -24,8 +24,10 @@ pub fn handle(
|
|||
if force || !db_path.exists() {
|
||||
build_index(&project_name, &build_path, &db_path, config)?;
|
||||
println!("✔ {} {}", style("Index built:" ).green(), style(db_path.display()).white().bold());
|
||||
exit(0);
|
||||
} else {
|
||||
println!("{} {}", style("↩ Index already exists").yellow(), style("(use --force to rebuild)").dim());
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
IndexAction::Status { path } => {
|
||||
|
|
@ -48,7 +50,6 @@ pub fn handle(
|
|||
exit(0);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn build_index(
|
||||
|
|
|
|||
|
|
@ -7,11 +7,12 @@ use r2d2_sqlite::SqliteConnectionManager;
|
|||
use crate::database::index::{IssueRow, Indexer};
|
||||
use crate::patterns::Severity;
|
||||
use crate::utils::config::Config;
|
||||
use crate::utils::query_cache;
|
||||
use crate::walk::spawn_senders;
|
||||
use rayon::prelude::*;
|
||||
use std::collections::BTreeMap;
|
||||
use tree_sitter::{Language, Parser, QueryCursor, StreamingIterator};
|
||||
use dashmap::DashMap;
|
||||
use crate::errors::NyxResult;
|
||||
pub(crate) use crate::file::run_rules_on_file;
|
||||
|
||||
type DynError = Box<dyn std::error::Error + Send + Sync>;
|
||||
|
||||
|
|
@ -35,6 +36,8 @@ pub fn handle(
|
|||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let scan_path = Path::new(path).canonicalize()?;
|
||||
let (project_name, db_path) = get_project_info(&scan_path, database_dir)?;
|
||||
|
||||
println!("{} {}...\n", style("Checking").green().bold(), &project_name);
|
||||
|
||||
let diags: Vec<Diag> = if no_index {
|
||||
scan_filesystem(&scan_path, config)?
|
||||
|
|
@ -58,25 +61,18 @@ pub fn handle(
|
|||
for d in &diags {
|
||||
grouped.entry(&d.path).or_default().push(d);
|
||||
}
|
||||
|
||||
for (path, issues) in grouped {
|
||||
|
||||
for (path, issues) in &grouped {
|
||||
println!("{}", style(path).blue().underlined());
|
||||
for d in issues {
|
||||
let sev_str = match d.severity {
|
||||
Severity::High => style("HIGH").red().bold(),
|
||||
Severity::Medium => style("MEDIUM").yellow().bold(),
|
||||
Severity::Low => style("LOW").cyan().bold(),
|
||||
};
|
||||
println!(
|
||||
" {:>4}:{:<4} [{}] {}",
|
||||
d.line, d.col, sev_str, style(&d.id).bold()
|
||||
);
|
||||
println!(" {:>4}:{:<4} [{}] {}",
|
||||
d.line, d.col, d.severity, style(&d.id).bold());
|
||||
}
|
||||
println!();
|
||||
println!();
|
||||
}
|
||||
|
||||
println!("{} '{}' generated {} issues.",
|
||||
style("warning").yellow().bold(),
|
||||
println!("{} '{}' generated {} issues.",
|
||||
style("warning").yellow().bold(),
|
||||
style(project_name).white().bold(),
|
||||
style(diags.len()).bold());
|
||||
println!("\t"); // TODO: Add individual counts for different warning levels
|
||||
|
|
@ -94,11 +90,11 @@ fn scan_filesystem(
|
|||
) ->Result<Vec<Diag>, Box<dyn std::error::Error>> {
|
||||
let rx = spawn_senders(root, cfg);
|
||||
let acc = Mutex::new(Vec::new());
|
||||
|
||||
|
||||
rx.into_iter()
|
||||
.flatten()
|
||||
.par_bridge()
|
||||
.try_for_each(|path| {
|
||||
.try_for_each(|path| {
|
||||
let mut local = run_rules_on_file(&path, cfg).unwrap();
|
||||
acc.lock().unwrap().append(&mut local);
|
||||
Ok::<(), DynError>(())
|
||||
|
|
@ -107,113 +103,54 @@ fn scan_filesystem(
|
|||
Ok(acc.into_inner()?)
|
||||
}
|
||||
|
||||
fn scan_with_index_parallel(
|
||||
pub fn scan_with_index_parallel(
|
||||
project: &str,
|
||||
pool: Arc<Pool<SqliteConnectionManager>>,
|
||||
cfg: &Config,
|
||||
) -> Result<Vec<Diag>, Box<dyn std::error::Error>> {
|
||||
) -> NyxResult<Vec<Diag>> {
|
||||
|
||||
let files = {
|
||||
let idx = Indexer::from_pool(project, &pool)?;
|
||||
idx.get_files(project)?
|
||||
};
|
||||
|
||||
let acc = Mutex::new(Vec::new());
|
||||
// ① Collect per-path Vec<Diag> without a global mutex
|
||||
let diag_map: DashMap<String, Vec<Diag>> = DashMap::new();
|
||||
|
||||
files.into_par_iter()
|
||||
.try_for_each(|path| -> Result<(), DynError> {
|
||||
let mut idx = Indexer::from_pool(project, &pool).unwrap();
|
||||
.for_each_init(
|
||||
// ② A single Indexer per Rayon worker thread
|
||||
|| Indexer::from_pool(project, &pool).expect("db pool"),
|
||||
|idx, path| {
|
||||
let needs_scan = idx.should_scan(&path).unwrap_or(true);
|
||||
|
||||
if idx.should_scan(&path).unwrap() {
|
||||
let mut diags = run_rules_on_file(&path, cfg).unwrap();
|
||||
let file_id = idx.upsert_file(&path).unwrap();
|
||||
|
||||
let rows: Vec<IssueRow> = diags.iter().map(|d| IssueRow {
|
||||
rule_id: d.id.as_ref(),
|
||||
severity: match d.severity {
|
||||
Severity::High => "HIGH",
|
||||
Severity::Medium => "MEDIUM",
|
||||
Severity::Low => "LOW",
|
||||
},
|
||||
line: d.line as i64,
|
||||
col: d.col as i64,
|
||||
}).collect();
|
||||
|
||||
idx.replace_issues(file_id, rows).unwrap();
|
||||
acc.lock().unwrap().append(&mut diags);
|
||||
} else {
|
||||
let mut cached = idx.get_issues_from_file(&path).unwrap();
|
||||
acc.lock().unwrap().append(&mut cached);
|
||||
let mut diags = if needs_scan {
|
||||
let d = run_rules_on_file(&path, cfg).unwrap_or_default();
|
||||
let file_id = idx.upsert_file(&path).unwrap();
|
||||
idx.replace_issues(
|
||||
file_id,
|
||||
d.iter().map(|d| IssueRow {
|
||||
rule_id: &d.id,
|
||||
severity: d.severity.as_db_str(),
|
||||
line: d.line as i64,
|
||||
col: d.col as i64,
|
||||
}),
|
||||
).ok();
|
||||
d
|
||||
} else {
|
||||
idx.get_issues_from_file(&path).unwrap_or_default()
|
||||
};
|
||||
if !diags.is_empty() {
|
||||
diag_map.entry(path.to_string_lossy().to_string())
|
||||
.or_default()
|
||||
.append(&mut diags);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}).unwrap();
|
||||
);
|
||||
|
||||
{
|
||||
let idx = Indexer::from_pool(project, &pool)?;
|
||||
idx.vacuum()?;
|
||||
}
|
||||
// Optional, heavy: only vacuum on --rebuild-index
|
||||
// if rebuild { idx.vacuum()?; }
|
||||
|
||||
Ok(acc.into_inner().unwrap())
|
||||
// Flatten
|
||||
Ok(diag_map.into_iter().flat_map(|(_, v)| v).collect())
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------
|
||||
// Tree‑sitter‑based rule runner – returns a Vec<Diag>
|
||||
// --------------------------------------------------------------------------------------------
|
||||
pub(crate) fn run_rules_on_file(
|
||||
path: &Path,
|
||||
cfg: &Config,
|
||||
) -> Result<Vec<Diag>, Box<dyn std::error::Error>> {
|
||||
tracing::debug!("Running rules on {}", path.to_string_lossy());
|
||||
let bytes = std::fs::read(path)?;
|
||||
|
||||
let mut parser = Parser::new();
|
||||
|
||||
let lang_key = match path
|
||||
.extension()
|
||||
.and_then(|s| s.to_str())
|
||||
.unwrap_or_default()
|
||||
.to_ascii_lowercase()
|
||||
.as_str()
|
||||
{
|
||||
"rs" => (Language::from(tree_sitter_rust::LANGUAGE), "rust"),
|
||||
"c" => (Language::from(tree_sitter_c::LANGUAGE), "c"),
|
||||
"cpp" | "c++" => (Language::from(tree_sitter_cpp::LANGUAGE), "cpp"),
|
||||
"java" => (Language::from(tree_sitter_java::LANGUAGE), "java"),
|
||||
"go" => (Language::from(tree_sitter_go::LANGUAGE), "go"),
|
||||
"php" => (Language::from(tree_sitter_php::LANGUAGE_PHP), "php"),
|
||||
"py" => (Language::from(tree_sitter_python::LANGUAGE), "python"),
|
||||
"ts" | "tsx" => (Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT), "typescript"),
|
||||
"js" => (Language::from(tree_sitter_javascript::LANGUAGE), "javascript"),
|
||||
_ => return Ok(Vec::new()),
|
||||
};
|
||||
let (ts_lang, lang_name) = lang_key;
|
||||
|
||||
parser.set_language(&ts_lang)?;
|
||||
let tree = parser.parse(&*bytes, None).ok_or("tree‑sitter failed")?;
|
||||
let root = tree.root_node();
|
||||
|
||||
let compiled = query_cache::for_lang(lang_name, ts_lang);
|
||||
let mut cursor = QueryCursor::new();
|
||||
let mut out = Vec::new();
|
||||
|
||||
for cq in &compiled {
|
||||
if cfg.scanner.min_severity > cq.meta.severity {
|
||||
tracing::debug!("Skipping rule {} because it's below the minimum severity", cq.meta.id);
|
||||
continue;
|
||||
}
|
||||
let mut matches = cursor.matches(&cq.query, root, &*bytes);
|
||||
while let Some(m) = matches.next() {
|
||||
for cap in m.captures.iter().filter(|c| c.index == 0) {
|
||||
let point = cap.node.start_position();
|
||||
tracing::debug!("Found match for rule {}", cq.meta.id);
|
||||
out.push(Diag {
|
||||
path: path.to_string_lossy().to_string(),
|
||||
line: point.row + 1,
|
||||
col: point.column + 1,
|
||||
severity: cq.meta.severity,
|
||||
id: String::from(cq.meta.id),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(out)
|
||||
}
|
||||
|
|
@ -10,6 +10,7 @@ pub mod index {
|
|||
use std::ops::Deref;
|
||||
use std::sync::Arc;
|
||||
use r2d2::{Pool, PooledConnection};
|
||||
use crate::errors::NyxResult;
|
||||
|
||||
/// DB schema (foreign‑keys enabled).
|
||||
const SCHEMA: &str = r#"
|
||||
|
|
@ -55,7 +56,7 @@ pub mod index {
|
|||
|
||||
pub fn init(
|
||||
database_path: &Path,
|
||||
) -> Result<std::sync::Arc<Pool<SqliteConnectionManager>>, Box<dyn std::error::Error>> {
|
||||
) -> NyxResult<Arc<Pool<SqliteConnectionManager>>> {
|
||||
let flags = OpenFlags::SQLITE_OPEN_READ_WRITE
|
||||
| OpenFlags::SQLITE_OPEN_CREATE
|
||||
| OpenFlags::SQLITE_OPEN_FULL_MUTEX;
|
||||
|
|
@ -73,7 +74,7 @@ pub mod index {
|
|||
pub fn from_pool(
|
||||
project: &str,
|
||||
pool: &Pool<SqliteConnectionManager>,
|
||||
) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
) -> NyxResult<Self> {
|
||||
let conn = pool.get()?;
|
||||
Ok(Self { conn, project: project.to_owned() })
|
||||
}
|
||||
|
|
@ -82,7 +83,7 @@ pub mod index {
|
|||
fn c(&self) -> &Connection { self.conn.deref() }
|
||||
|
||||
/// Return true when the file *content* or *mtime* changed since the last scan.
|
||||
pub fn should_scan(&self, path: &Path) -> Result<bool, Box<dyn std::error::Error>> {
|
||||
pub fn should_scan(&self, path: &Path) -> NyxResult<bool> {
|
||||
let meta = fs::metadata(path)?;
|
||||
let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64;
|
||||
let digest = Self::digest_file(path)?;
|
||||
|
|
@ -103,7 +104,7 @@ pub mod index {
|
|||
}
|
||||
|
||||
/// Insert or update the `files` row and return its id.
|
||||
pub fn upsert_file(&self, path: &Path) -> Result<i64, Box<dyn std::error::Error>> {
|
||||
pub fn upsert_file(&self, path: &Path) -> NyxResult<i64> {
|
||||
let meta = fs::metadata(path)?;
|
||||
let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64;
|
||||
let scanned_at = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64;
|
||||
|
|
@ -129,7 +130,7 @@ pub mod index {
|
|||
|
||||
/// Replace all issues for `file_id` with the supplied set.
|
||||
pub fn replace_issues<'a>(&mut self, file_id: i64, issues: impl IntoIterator<Item = IssueRow<'a>>)
|
||||
-> Result<(), Box<dyn std::error::Error>> {
|
||||
-> NyxResult<()> {
|
||||
let tx = self.conn.transaction()?;
|
||||
tx.execute("DELETE FROM issues WHERE file_id = ?", params![file_id])?;
|
||||
|
||||
|
|
@ -150,7 +151,7 @@ pub mod index {
|
|||
pub fn get_issues_from_file(
|
||||
&self,
|
||||
path: &Path,
|
||||
) -> Result<Vec<Diag>, Box<dyn std::error::Error>> {
|
||||
) -> NyxResult<Vec<Diag>> {
|
||||
let file_id: i64 = self.c().query_row(
|
||||
"SELECT id FROM files WHERE project = ?1 AND path = ?2",
|
||||
params![self.project, path.to_string_lossy()],
|
||||
|
|
@ -178,7 +179,7 @@ pub mod index {
|
|||
}
|
||||
|
||||
/// gets files from the database
|
||||
pub fn get_files(&self, project: &str) -> Result<Vec<std::path::PathBuf>, Box<dyn std::error::Error>> {
|
||||
pub fn get_files(&self, project: &str) -> NyxResult<Vec<PathBuf>> {
|
||||
let mut stmt = self.c().prepare(
|
||||
"SELECT path
|
||||
FROM files
|
||||
|
|
@ -190,8 +191,10 @@ pub mod index {
|
|||
Ok(file_iter.map(|p| p.map(PathBuf::from)).collect::<Result<_, _>>()?)
|
||||
}
|
||||
|
||||
/// Clears the tables to prep for a reindex
|
||||
pub fn clear(&self) -> rusqlite::Result<()> {
|
||||
// -------------------------------------------------------------------------
|
||||
// Maintenance utilities
|
||||
// -------------------------------------------------------------------------
|
||||
pub fn clear(&self) -> NyxResult<()> {
|
||||
self.c().execute_batch(
|
||||
r#"
|
||||
PRAGMA foreign_keys = OFF;
|
||||
|
|
@ -208,12 +211,15 @@ pub mod index {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
pub fn vacuum(&self) -> rusqlite::Result<()> {
|
||||
pub fn vacuum(&self) -> NyxResult<()> {
|
||||
self.c().execute("VACUUM;", [])?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn digest_file(path: &Path) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
|
||||
// -------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// -------------------------------------------------------------------------
|
||||
fn digest_file(path: &Path) -> NyxResult<Vec<u8>> {
|
||||
let mut hasher = blake3::Hasher::new();
|
||||
let mut file = fs::File::open(path)?;
|
||||
std::io::copy(&mut file, &mut hasher)?;
|
||||
|
|
|
|||
24
src/errors.rs
Normal file
24
src/errors.rs
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
use thiserror::Error;
|
||||
|
||||
pub type NyxResult<T, E = NyxError> = core::result::Result<T, E>;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum NyxError {
|
||||
#[error("I/O error: {0}")]
|
||||
Io(#[from] std::io::Error),
|
||||
|
||||
#[error("SQLite error: {0}")]
|
||||
Sql(#[from] rusqlite::Error),
|
||||
|
||||
#[error("tree-sitter error: {0}")]
|
||||
TreeSitter(#[from] tree_sitter::LanguageError),
|
||||
|
||||
#[error("connection-pool error: {0}")]
|
||||
Pool(#[from] r2d2::Error),
|
||||
|
||||
#[error("time error: {0}")]
|
||||
Time(#[from] std::time::SystemTimeError),
|
||||
|
||||
#[error("other: {0}")]
|
||||
Other(String),
|
||||
}
|
||||
74
src/file.rs
Normal file
74
src/file.rs
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
use std::cell::RefCell;
|
||||
use std::path::Path;
|
||||
use tree_sitter::{Language, QueryCursor, StreamingIterator};
|
||||
use crate::commands::scan::Diag;
|
||||
use crate::errors::{NyxResult, NyxError};
|
||||
use crate::utils::{query_cache, Config};
|
||||
use crate::utils::ext::lowercase_ext;
|
||||
|
||||
thread_local! {
|
||||
static PARSER: RefCell<tree_sitter::Parser> = RefCell::new(tree_sitter::Parser::new());
|
||||
}
|
||||
|
||||
pub(crate) fn run_rules_on_file(
|
||||
path: &Path,
|
||||
cfg: &Config,
|
||||
) -> NyxResult<Vec<Diag>> {
|
||||
let bytes = std::fs::read(path)?;
|
||||
|
||||
// Fast binary-file guard (skip if >1% NULs)
|
||||
if bytes.iter().filter(|b| **b == 0).count() * 100 / bytes.len().max(1) > 1 {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
let lang_name = match lowercase_ext(path) {
|
||||
Some(l) => l,
|
||||
None => return Ok(vec![]),
|
||||
};
|
||||
|
||||
let ts_lang = match lang_name {
|
||||
"rs" => Language::from(tree_sitter_rust::LANGUAGE),
|
||||
"c" => Language::from(tree_sitter_c::LANGUAGE),
|
||||
"cpp" => Language::from(tree_sitter_cpp::LANGUAGE),
|
||||
"java"=> Language::from(tree_sitter_java::LANGUAGE),
|
||||
"go" => Language::from(tree_sitter_go::LANGUAGE),
|
||||
"php" => Language::from(tree_sitter_php::LANGUAGE_PHP),
|
||||
"py" => Language::from(tree_sitter_python::LANGUAGE),
|
||||
"ts" => Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT),
|
||||
"js" => Language::from(tree_sitter_javascript::LANGUAGE),
|
||||
_ => return Ok(vec![]),
|
||||
};
|
||||
|
||||
let _tree = PARSER.with(|cell| {
|
||||
let mut parser = cell.borrow_mut();
|
||||
parser.set_language(&ts_lang)?;
|
||||
parser.parse(&*bytes, None)
|
||||
.ok_or_else(|| NyxError::Other("tree-sitter failed".into()))
|
||||
})?;
|
||||
|
||||
let root = _tree.root_node();
|
||||
|
||||
let compiled = query_cache::for_lang(lang_name, ts_lang);
|
||||
let mut cursor = QueryCursor::new();
|
||||
let mut out = Vec::new();
|
||||
|
||||
for cq in compiled.iter() {
|
||||
if cfg.scanner.min_severity > cq.meta.severity {
|
||||
continue;
|
||||
}
|
||||
let mut matches = cursor.matches(&cq.query, root, &*bytes);
|
||||
while let Some(m) = matches.next() {
|
||||
if let Some(cap) = m.captures.iter().find(|c| c.index == 0) {
|
||||
let point = cap.node.start_position();
|
||||
out.push(Diag {
|
||||
path: path.to_string_lossy().into_owned(),
|
||||
line: point.row + 1,
|
||||
col: point.column + 1,
|
||||
severity: cq.meta.severity,
|
||||
id: cq.meta.id.to_owned(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(out)
|
||||
}
|
||||
11
src/main.rs
11
src/main.rs
|
|
@ -4,6 +4,8 @@ mod utils;
|
|||
mod walk;
|
||||
mod database;
|
||||
mod patterns;
|
||||
mod errors;
|
||||
mod file;
|
||||
|
||||
use crate::utils::Config;
|
||||
use cli::Cli;
|
||||
|
|
@ -59,10 +61,11 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||
|
||||
commands::handle_command(cli.command, database_dir, &mut config)?;
|
||||
|
||||
let elapsed: f32 = now.elapsed().as_millis() as f32 / 1000f32;
|
||||
println!("{} in {} s.",
|
||||
style("Finished").green().bold(),
|
||||
style(elapsed).white().bold());
|
||||
println!(
|
||||
"{} in {:.3}s.",
|
||||
style("Finished").green().bold(),
|
||||
now.elapsed().as_secs_f32()
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -9,18 +9,63 @@ mod php;
|
|||
mod python;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::fmt;
|
||||
use std::str::FromStr;
|
||||
use console::style;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
/// How bad / noisy a pattern is considered.
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd)]
|
||||
pub enum Severity {
|
||||
Low,
|
||||
Medium,
|
||||
High,
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)]
|
||||
pub enum Severity { High, Medium, Low }
|
||||
|
||||
impl fmt::Display for Severity {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let s = match *self {
|
||||
Severity::High => style("HIGH").red().bold().to_string(),
|
||||
Severity::Medium => style("MEDIUM").yellow().bold().to_string(),
|
||||
Severity::Low => style("LOW").cyan().bold().to_string(),
|
||||
};
|
||||
f.write_str(&s)
|
||||
}
|
||||
}
|
||||
|
||||
impl Severity {
|
||||
/// Textual value stored in SQLite.
|
||||
pub fn as_db_str(self) -> &'static str {
|
||||
match self {
|
||||
Severity::High => "HIGH",
|
||||
Severity::Medium => "MEDIUM",
|
||||
Severity::Low => "LOW",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for Severity { // TODO: FIX
|
||||
type Err = ();
|
||||
|
||||
fn from_str(input: &str) -> Result<Self, Self::Err> {
|
||||
match input.to_lowercase().as_str() {
|
||||
"medium" => Ok(Severity::Medium),
|
||||
"high" => Ok(Severity::High),
|
||||
_ => Ok(Severity::Low),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// /// How bad / noisy a pattern is considered.
|
||||
// #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd)]
|
||||
// pub enum Severity {
|
||||
// Low,
|
||||
// Medium,
|
||||
// High,
|
||||
// }
|
||||
//
|
||||
// impl Severity {
|
||||
// pub(crate) fn as_db_str(&self) -> &str {
|
||||
// todo!()
|
||||
// }
|
||||
// }
|
||||
|
||||
/// One AST pattern with a tree-sitter query and meta-data.
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct Pattern {
|
||||
|
|
@ -34,17 +79,6 @@ pub struct Pattern {
|
|||
pub severity: Severity,
|
||||
}
|
||||
|
||||
impl FromStr for Severity { // TODO: FIX
|
||||
type Err = ();
|
||||
|
||||
fn from_str(input: &str) -> Result<Self, Self::Err> {
|
||||
match input.to_lowercase().as_str() {
|
||||
"medium" => Ok(Severity::Medium),
|
||||
"high" => Ok(Severity::High),
|
||||
_ => Ok(Severity::Low),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Global, lazily-initialised registry: lang-name → pattern slice
|
||||
|
|
|
|||
15
src/utils/ext.rs
Normal file
15
src/utils/ext.rs
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
pub fn lowercase_ext(path: &std::path::Path) -> Option<&'static str> {
|
||||
path.extension()
|
||||
.and_then(|s| match s.to_str()? {
|
||||
"rs" | "RS" => Some("rs"),
|
||||
"c" => Some("c"),
|
||||
"cpp" | "c++" => Some("cpp"),
|
||||
"java" => Some("java"),
|
||||
"go" => Some("go"),
|
||||
"php" => Some("php"),
|
||||
"py" | "PY" => Some("py"),
|
||||
"ts" | "TSX" | "tsx" => Some("ts"),
|
||||
"js" => Some("js"),
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
pub mod project;
|
||||
pub mod config;
|
||||
pub(crate) mod query_cache;
|
||||
pub(crate) mod ext;
|
||||
|
||||
// Re-export commonly used functions for convenience
|
||||
pub use project::{get_project_info};
|
||||
|
|
|
|||
|
|
@ -1,27 +1,30 @@
|
|||
use std::path::{Path, PathBuf};
|
||||
use crate::errors::{NyxError, NyxResult};
|
||||
|
||||
/// Determine `<project-name, path/to/<project>.sqlite>`.
|
||||
pub fn get_project_info(
|
||||
project_path: &Path,
|
||||
config_dir: &Path,
|
||||
) -> Result<(String, PathBuf), Box<dyn std::error::Error>> {
|
||||
project_path: &Path,
|
||||
config_dir: &Path,
|
||||
) -> NyxResult<(String, PathBuf)> {
|
||||
|
||||
let project_name = project_path
|
||||
.file_name()
|
||||
.and_then(|name| name.to_str())
|
||||
.ok_or("Unable to determine project name")?;
|
||||
|
||||
.and_then(|n| n.to_str())
|
||||
.ok_or_else(|| NyxError::Other("Unable to determine project name".into()))?;
|
||||
|
||||
let db_name = sanitize_project_name(project_name);
|
||||
let db_path = config_dir.join(format!("{}.sqlite", db_name));
|
||||
|
||||
Ok((project_name.to_string(), db_path))
|
||||
|
||||
Ok((project_name.to_owned(), db_path))
|
||||
}
|
||||
|
||||
pub fn sanitize_project_name(name: &str) -> String {
|
||||
pub fn sanitize_project_name(name: &str) -> String {
|
||||
name.to_lowercase()
|
||||
.chars()
|
||||
.map(|c| match c {
|
||||
' ' | '\t' | '\n' | '\r' => '_',
|
||||
c if c.is_alphanumeric() || c == '_' || c == '-' => c,
|
||||
_ => '_'
|
||||
.map(|c| match c {
|
||||
' ' | '\t' | '\n' | '\r' => '_',
|
||||
c if c.is_alphanumeric() || c == '_' || c == '-' => c,
|
||||
_ => '_',
|
||||
})
|
||||
.collect::<String>()
|
||||
.split('_')
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::sync::RwLock;
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
use tree_sitter::{Language, Query};
|
||||
|
|
@ -8,30 +8,36 @@ use crate::patterns::{self, Pattern};
|
|||
|
||||
#[derive(Clone)]
|
||||
pub struct CompiledQuery {
|
||||
pub meta: Pattern,
|
||||
pub query: Arc<Query>,
|
||||
pub meta: Pattern,
|
||||
pub query: std::sync::Arc<Query>,
|
||||
}
|
||||
|
||||
static CACHE: Lazy<RwLock<HashMap<&'static str, Vec<CompiledQuery>>>> =
|
||||
static CACHE: Lazy<RwLock<HashMap<&'static str, std::sync::Arc<Vec<CompiledQuery>>>>> =
|
||||
Lazy::new(|| RwLock::new(HashMap::new()));
|
||||
|
||||
pub fn for_lang(lang: &'static str, ts_lang: Language) -> Vec<CompiledQuery> {
|
||||
// fast-path read
|
||||
/// Return **one shared Arc** to the per-language query set.
|
||||
/// Cloning the `Arc` is O(1) and the underlying Vec lives for the
|
||||
/// lifetime of the process.
|
||||
pub fn for_lang(lang: &'static str, ts_lang: Language) -> std::sync::Arc<Vec<CompiledQuery>> {
|
||||
// fast path
|
||||
if let Some(v) = CACHE.read().unwrap().get(lang) {
|
||||
return v.clone();
|
||||
}
|
||||
|
||||
// compile under write-lock exactly once
|
||||
let patterns = patterns::load(lang);
|
||||
let mut vec = Vec::with_capacity(patterns.len());
|
||||
|
||||
for p in patterns {
|
||||
// slow path — compile
|
||||
let patterns = patterns::load(lang);
|
||||
let compiled: Vec<_> = patterns.into_iter().filter_map(|p| {
|
||||
match Query::new(&ts_lang, p.query) {
|
||||
Ok(q) => vec.push(CompiledQuery { meta: p, query: Arc::new(q) }),
|
||||
Err(e) => tracing::warn!(lang, id = p.id, "query compile error: {e}"),
|
||||
Ok(q) => Some(CompiledQuery { meta: p, query: std::sync::Arc::new(q) }),
|
||||
Err(e)=> {
|
||||
tracing::warn!(lang, id = p.id, "query compile error: {e}");
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
}).collect();
|
||||
|
||||
CACHE.write().unwrap().insert(lang, vec.clone());
|
||||
vec
|
||||
let compiled = std::sync::Arc::new(compiled);
|
||||
|
||||
let mut w = CACHE.write().unwrap();
|
||||
w.entry(lang).or_insert_with(|| compiled.clone()).clone()
|
||||
}
|
||||
128
src/walk.rs
128
src/walk.rs
|
|
@ -1,106 +1,104 @@
|
|||
use crossbeam_channel::{bounded, Receiver};
|
||||
use ignore::{WalkBuilder, WalkState};
|
||||
use std::{path::{Path, PathBuf}, thread};
|
||||
use ignore::overrides::OverrideBuilder;
|
||||
use crossbeam_channel::{bounded, Receiver, Sender};
|
||||
use ignore::{overrides::OverrideBuilder, WalkBuilder, WalkState};
|
||||
use std::{
|
||||
mem,
|
||||
path::{Path, PathBuf},
|
||||
thread,
|
||||
};
|
||||
|
||||
use crate::utils::Config;
|
||||
|
||||
const BATCH_SIZE: usize = 5;
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal constants / helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
const DEFAULT_BATCH: usize = 8; // a tad larger for fewer sends
|
||||
const CHANNEL_MULTIPLIER:usize = 4; // capacity = threads × this
|
||||
|
||||
type Batch = Vec<PathBuf>;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Batcher {
|
||||
tx: crossbeam_channel::Sender<Batch>,
|
||||
tx: Sender<Batch>,
|
||||
batch: Batch,
|
||||
}
|
||||
|
||||
impl Batcher {
|
||||
fn push(&mut self, p: PathBuf) {
|
||||
self.batch.push(p);
|
||||
if self.batch.len() == BATCH_SIZE {
|
||||
if self.batch.len() == DEFAULT_BATCH {
|
||||
self.flush();
|
||||
}
|
||||
}
|
||||
fn flush(&mut self) {
|
||||
if !self.batch.is_empty() {
|
||||
let _ = self.tx.send(std::mem::take(&mut self.batch));
|
||||
let _ = self.tx.send(mem::take(&mut self.batch));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for Batcher {
|
||||
fn drop(&mut self) {
|
||||
// guarantees the remainder is sent when the worker is dropped
|
||||
self.flush();
|
||||
}
|
||||
fn drop(&mut self) { self.flush(); }
|
||||
}
|
||||
|
||||
|
||||
/// Walk `root`, send file paths to the returned receiver.
|
||||
pub fn spawn_senders(
|
||||
root: &Path,
|
||||
cfg: &Config
|
||||
) -> Receiver<Batch> {
|
||||
// ---------------------------------------------------------------------------
|
||||
/// Walk `root` and send *batches* of paths through the returned channel.
|
||||
pub fn spawn_senders(root: &Path, cfg: &Config) -> Receiver<Batch> {
|
||||
// ----- 1 build ignore/override rules ----------------------------------
|
||||
let mut ob = OverrideBuilder::new(root);
|
||||
|
||||
for ext in &cfg.scanner.excluded_extensions {
|
||||
if let Err(e) = ob.add(&format!("!*.{ext}")) {
|
||||
tracing::warn!("could not add ignore pattern: {e}");
|
||||
tracing::warn!("cannot add ignore pattern ‘{ext}’: {e}");
|
||||
}
|
||||
}
|
||||
|
||||
for dir in &cfg.scanner.excluded_directories {
|
||||
if let Err(e) = ob.add(&format!("!**/{dir}/**")) {
|
||||
tracing::warn!("could not add ignore pattern: {e}");
|
||||
tracing::warn!("cannot add ignore pattern ‘{dir}’: {e}");
|
||||
}
|
||||
}
|
||||
|
||||
let overrides = ob.build().unwrap();
|
||||
let worker_thrs = cfg.performance.worker_threads.unwrap_or(num_cpus::get());
|
||||
|
||||
let (tx, rx) = bounded::<Batch>(worker_thrs * 2usize);
|
||||
|
||||
let root = root.to_path_buf();
|
||||
let scan_hidden = cfg.scanner.scan_hidden_files;
|
||||
let follow_links = cfg.scanner.follow_symlinks;
|
||||
let max_bytes: u64 = (cfg.scanner.max_file_size_mb.unwrap_or(0)) * 1_048_576;
|
||||
let overrides = ob.build().unwrap();
|
||||
|
||||
// ----- 2 channel & thread pool parameters -----------------------------
|
||||
let workers = cfg.performance.worker_threads.unwrap_or(num_cpus::get());
|
||||
let (tx, rx) = bounded::<Batch>(workers * CHANNEL_MULTIPLIER);
|
||||
|
||||
let root = root.to_path_buf();
|
||||
let scan_hidden = cfg.scanner.scan_hidden_files;
|
||||
let follow = cfg.scanner.follow_symlinks;
|
||||
let max_bytes = cfg.scanner.max_file_size_mb.unwrap_or(0) as u64 * 1_048_576;
|
||||
|
||||
// ----- 3 the background walker thread ---------------------------------
|
||||
thread::spawn(move || {
|
||||
let walker = WalkBuilder::new(root)
|
||||
WalkBuilder::new(root)
|
||||
.hidden(!scan_hidden)
|
||||
.follow_links(follow_links)
|
||||
.threads(worker_thrs)
|
||||
.follow_links(follow)
|
||||
.threads(workers)
|
||||
.overrides(overrides)
|
||||
.build_parallel();
|
||||
.build_parallel()
|
||||
.run(move || {
|
||||
let mut b = Batcher {
|
||||
tx: tx.clone(),
|
||||
batch: Vec::with_capacity(DEFAULT_BATCH),
|
||||
};
|
||||
|
||||
walker.run(move || {
|
||||
let mut batcher = Batcher {
|
||||
tx: tx.clone(),
|
||||
batch: Vec::with_capacity(BATCH_SIZE),
|
||||
};
|
||||
Box::new(move |entry| {
|
||||
let entry = match entry {
|
||||
Ok(e) if e.file_type().map(|ft| ft.is_file()).unwrap_or(false) => e,
|
||||
_ => return WalkState::Continue,
|
||||
};
|
||||
|
||||
Box::new(move |entry| {
|
||||
tracing::debug!("walking: {:?}", entry);
|
||||
let e = match entry {
|
||||
Ok(e) if e.file_type().map(|ft| ft.is_file()).unwrap_or(false) => e,
|
||||
_ => return WalkState::Continue,
|
||||
};
|
||||
if max_bytes != 0 {
|
||||
match e.metadata() {
|
||||
Ok(m) if m.len() <= max_bytes => {},
|
||||
_ => return WalkState::Continue,
|
||||
}
|
||||
}
|
||||
tracing::debug!("scanning file: {:?}", e);
|
||||
batcher.push(e.into_path());
|
||||
if batcher.batch.len() == BATCH_SIZE {
|
||||
let _ = batcher.tx.send(std::mem::take(&mut batcher.batch));
|
||||
}
|
||||
WalkState::Continue
|
||||
})
|
||||
});
|
||||
if max_bytes != 0 {
|
||||
match entry.metadata() {
|
||||
Ok(m) if m.len() > max_bytes => return WalkState::Continue,
|
||||
Err(e) => {
|
||||
tracing::debug!("metadata failed for {:?}: {e}", entry.path());
|
||||
return WalkState::Continue;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
b.push(entry.into_path());
|
||||
WalkState::Continue
|
||||
})
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
rx
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue