Add error handling with NyxError and refactor console output formatting

- Introduced `NyxError` and `NyxResult` for unified error handling across modules.
- Refactored `scan.rs`, `index.rs`, and `walk.rs` with improved error management and consistent formatting.
- Replaced existing error handling in `database.rs` with `NyxResult`.
- Improved database maintenance by integrating `vacuum` and `clear` methods into workflows.
- Added `dashmap` for efficient parallel diagnostics result aggregation in `scan_with_index_parallel`.
- Enhanced readability and formatting of console outputs in multiple modules.
This commit is contained in:
elipeter 2025-06-23 20:27:16 +02:00
parent 75a20eaa2a
commit 0a66a0ae2d
14 changed files with 360 additions and 240 deletions

16
Cargo.lock generated
View file

@ -267,6 +267,20 @@ version = "0.8.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]]
name = "dashmap"
version = "7.0.0-rc2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e4a1e35a65fe0538a60167f0ada6e195ad5d477f6ddae273943596d4a1a5730b"
dependencies = [
"cfg-if",
"crossbeam-utils",
"equivalent",
"hashbrown",
"lock_api",
"parking_lot_core",
]
[[package]]
name = "deranged"
version = "0.4.0"
@ -579,6 +593,7 @@ dependencies = [
"clap",
"console",
"crossbeam-channel",
"dashmap",
"directories",
"ignore",
"num_cpus",
@ -588,6 +603,7 @@ dependencies = [
"rayon",
"rusqlite",
"serde",
"thiserror",
"toml",
"tracing",
"tracing-subscriber",

View file

@ -32,3 +32,5 @@ r2d2_sqlite = "0.30.0"
r2d2 = "0.8.10"
bytesize = "2.0.1"
chrono = { version = "0.4.41", default-features = false, features = ["std", "clock"] }
thiserror = "2.0.12"
dashmap = "7.0.0-rc2"

View file

@ -24,8 +24,10 @@ pub fn handle(
if force || !db_path.exists() {
build_index(&project_name, &build_path, &db_path, config)?;
println!("{} {}", style("Index built:" ).green(), style(db_path.display()).white().bold());
exit(0);
} else {
println!("{} {}", style("↩ Index already exists").yellow(), style("(use --force to rebuild)").dim());
exit(0);
}
}
IndexAction::Status { path } => {
@ -48,7 +50,6 @@ pub fn handle(
exit(0);
}
}
Ok(())
}
pub fn build_index(

View file

@ -7,11 +7,12 @@ use r2d2_sqlite::SqliteConnectionManager;
use crate::database::index::{IssueRow, Indexer};
use crate::patterns::Severity;
use crate::utils::config::Config;
use crate::utils::query_cache;
use crate::walk::spawn_senders;
use rayon::prelude::*;
use std::collections::BTreeMap;
use tree_sitter::{Language, Parser, QueryCursor, StreamingIterator};
use dashmap::DashMap;
use crate::errors::NyxResult;
pub(crate) use crate::file::run_rules_on_file;
type DynError = Box<dyn std::error::Error + Send + Sync>;
@ -35,6 +36,8 @@ pub fn handle(
) -> Result<(), Box<dyn std::error::Error>> {
let scan_path = Path::new(path).canonicalize()?;
let (project_name, db_path) = get_project_info(&scan_path, database_dir)?;
println!("{} {}...\n", style("Checking").green().bold(), &project_name);
let diags: Vec<Diag> = if no_index {
scan_filesystem(&scan_path, config)?
@ -58,25 +61,18 @@ pub fn handle(
for d in &diags {
grouped.entry(&d.path).or_default().push(d);
}
for (path, issues) in grouped {
for (path, issues) in &grouped {
println!("{}", style(path).blue().underlined());
for d in issues {
let sev_str = match d.severity {
Severity::High => style("HIGH").red().bold(),
Severity::Medium => style("MEDIUM").yellow().bold(),
Severity::Low => style("LOW").cyan().bold(),
};
println!(
" {:>4}:{:<4} [{}] {}",
d.line, d.col, sev_str, style(&d.id).bold()
);
println!(" {:>4}:{:<4} [{}] {}",
d.line, d.col, d.severity, style(&d.id).bold());
}
println!();
println!();
}
println!("{} '{}' generated {} issues.",
style("warning").yellow().bold(),
println!("{} '{}' generated {} issues.",
style("warning").yellow().bold(),
style(project_name).white().bold(),
style(diags.len()).bold());
println!("\t"); // TODO: Add individual counts for different warning levels
@ -94,11 +90,11 @@ fn scan_filesystem(
) ->Result<Vec<Diag>, Box<dyn std::error::Error>> {
let rx = spawn_senders(root, cfg);
let acc = Mutex::new(Vec::new());
rx.into_iter()
.flatten()
.par_bridge()
.try_for_each(|path| {
.try_for_each(|path| {
let mut local = run_rules_on_file(&path, cfg).unwrap();
acc.lock().unwrap().append(&mut local);
Ok::<(), DynError>(())
@ -107,113 +103,54 @@ fn scan_filesystem(
Ok(acc.into_inner()?)
}
fn scan_with_index_parallel(
pub fn scan_with_index_parallel(
project: &str,
pool: Arc<Pool<SqliteConnectionManager>>,
cfg: &Config,
) -> Result<Vec<Diag>, Box<dyn std::error::Error>> {
) -> NyxResult<Vec<Diag>> {
let files = {
let idx = Indexer::from_pool(project, &pool)?;
idx.get_files(project)?
};
let acc = Mutex::new(Vec::new());
// ① Collect per-path Vec<Diag> without a global mutex
let diag_map: DashMap<String, Vec<Diag>> = DashMap::new();
files.into_par_iter()
.try_for_each(|path| -> Result<(), DynError> {
let mut idx = Indexer::from_pool(project, &pool).unwrap();
.for_each_init(
// ② A single Indexer per Rayon worker thread
|| Indexer::from_pool(project, &pool).expect("db pool"),
|idx, path| {
let needs_scan = idx.should_scan(&path).unwrap_or(true);
if idx.should_scan(&path).unwrap() {
let mut diags = run_rules_on_file(&path, cfg).unwrap();
let file_id = idx.upsert_file(&path).unwrap();
let rows: Vec<IssueRow> = diags.iter().map(|d| IssueRow {
rule_id: d.id.as_ref(),
severity: match d.severity {
Severity::High => "HIGH",
Severity::Medium => "MEDIUM",
Severity::Low => "LOW",
},
line: d.line as i64,
col: d.col as i64,
}).collect();
idx.replace_issues(file_id, rows).unwrap();
acc.lock().unwrap().append(&mut diags);
} else {
let mut cached = idx.get_issues_from_file(&path).unwrap();
acc.lock().unwrap().append(&mut cached);
let mut diags = if needs_scan {
let d = run_rules_on_file(&path, cfg).unwrap_or_default();
let file_id = idx.upsert_file(&path).unwrap();
idx.replace_issues(
file_id,
d.iter().map(|d| IssueRow {
rule_id: &d.id,
severity: d.severity.as_db_str(),
line: d.line as i64,
col: d.col as i64,
}),
).ok();
d
} else {
idx.get_issues_from_file(&path).unwrap_or_default()
};
if !diags.is_empty() {
diag_map.entry(path.to_string_lossy().to_string())
.or_default()
.append(&mut diags);
}
}
Ok(())
}).unwrap();
);
{
let idx = Indexer::from_pool(project, &pool)?;
idx.vacuum()?;
}
// Optional, heavy: only vacuum on --rebuild-index
// if rebuild { idx.vacuum()?; }
Ok(acc.into_inner().unwrap())
// Flatten
Ok(diag_map.into_iter().flat_map(|(_, v)| v).collect())
}
// --------------------------------------------------------------------------------------------
// Treesitterbased rule runner returns a Vec<Diag>
// --------------------------------------------------------------------------------------------
pub(crate) fn run_rules_on_file(
path: &Path,
cfg: &Config,
) -> Result<Vec<Diag>, Box<dyn std::error::Error>> {
tracing::debug!("Running rules on {}", path.to_string_lossy());
let bytes = std::fs::read(path)?;
let mut parser = Parser::new();
let lang_key = match path
.extension()
.and_then(|s| s.to_str())
.unwrap_or_default()
.to_ascii_lowercase()
.as_str()
{
"rs" => (Language::from(tree_sitter_rust::LANGUAGE), "rust"),
"c" => (Language::from(tree_sitter_c::LANGUAGE), "c"),
"cpp" | "c++" => (Language::from(tree_sitter_cpp::LANGUAGE), "cpp"),
"java" => (Language::from(tree_sitter_java::LANGUAGE), "java"),
"go" => (Language::from(tree_sitter_go::LANGUAGE), "go"),
"php" => (Language::from(tree_sitter_php::LANGUAGE_PHP), "php"),
"py" => (Language::from(tree_sitter_python::LANGUAGE), "python"),
"ts" | "tsx" => (Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT), "typescript"),
"js" => (Language::from(tree_sitter_javascript::LANGUAGE), "javascript"),
_ => return Ok(Vec::new()),
};
let (ts_lang, lang_name) = lang_key;
parser.set_language(&ts_lang)?;
let tree = parser.parse(&*bytes, None).ok_or("treesitter failed")?;
let root = tree.root_node();
let compiled = query_cache::for_lang(lang_name, ts_lang);
let mut cursor = QueryCursor::new();
let mut out = Vec::new();
for cq in &compiled {
if cfg.scanner.min_severity > cq.meta.severity {
tracing::debug!("Skipping rule {} because it's below the minimum severity", cq.meta.id);
continue;
}
let mut matches = cursor.matches(&cq.query, root, &*bytes);
while let Some(m) = matches.next() {
for cap in m.captures.iter().filter(|c| c.index == 0) {
let point = cap.node.start_position();
tracing::debug!("Found match for rule {}", cq.meta.id);
out.push(Diag {
path: path.to_string_lossy().to_string(),
line: point.row + 1,
col: point.column + 1,
severity: cq.meta.severity,
id: String::from(cq.meta.id),
});
}
}
}
Ok(out)
}

View file

@ -10,6 +10,7 @@ pub mod index {
use std::ops::Deref;
use std::sync::Arc;
use r2d2::{Pool, PooledConnection};
use crate::errors::NyxResult;
/// DB schema (foreignkeys enabled).
const SCHEMA: &str = r#"
@ -55,7 +56,7 @@ pub mod index {
pub fn init(
database_path: &Path,
) -> Result<std::sync::Arc<Pool<SqliteConnectionManager>>, Box<dyn std::error::Error>> {
) -> NyxResult<Arc<Pool<SqliteConnectionManager>>> {
let flags = OpenFlags::SQLITE_OPEN_READ_WRITE
| OpenFlags::SQLITE_OPEN_CREATE
| OpenFlags::SQLITE_OPEN_FULL_MUTEX;
@ -73,7 +74,7 @@ pub mod index {
pub fn from_pool(
project: &str,
pool: &Pool<SqliteConnectionManager>,
) -> Result<Self, Box<dyn std::error::Error>> {
) -> NyxResult<Self> {
let conn = pool.get()?;
Ok(Self { conn, project: project.to_owned() })
}
@ -82,7 +83,7 @@ pub mod index {
fn c(&self) -> &Connection { self.conn.deref() }
/// Return true when the file *content* or *mtime* changed since the last scan.
pub fn should_scan(&self, path: &Path) -> Result<bool, Box<dyn std::error::Error>> {
pub fn should_scan(&self, path: &Path) -> NyxResult<bool> {
let meta = fs::metadata(path)?;
let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64;
let digest = Self::digest_file(path)?;
@ -103,7 +104,7 @@ pub mod index {
}
/// Insert or update the `files` row and return its id.
pub fn upsert_file(&self, path: &Path) -> Result<i64, Box<dyn std::error::Error>> {
pub fn upsert_file(&self, path: &Path) -> NyxResult<i64> {
let meta = fs::metadata(path)?;
let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64;
let scanned_at = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64;
@ -129,7 +130,7 @@ pub mod index {
/// Replace all issues for `file_id` with the supplied set.
pub fn replace_issues<'a>(&mut self, file_id: i64, issues: impl IntoIterator<Item = IssueRow<'a>>)
-> Result<(), Box<dyn std::error::Error>> {
-> NyxResult<()> {
let tx = self.conn.transaction()?;
tx.execute("DELETE FROM issues WHERE file_id = ?", params![file_id])?;
@ -150,7 +151,7 @@ pub mod index {
pub fn get_issues_from_file(
&self,
path: &Path,
) -> Result<Vec<Diag>, Box<dyn std::error::Error>> {
) -> NyxResult<Vec<Diag>> {
let file_id: i64 = self.c().query_row(
"SELECT id FROM files WHERE project = ?1 AND path = ?2",
params![self.project, path.to_string_lossy()],
@ -178,7 +179,7 @@ pub mod index {
}
/// gets files from the database
pub fn get_files(&self, project: &str) -> Result<Vec<std::path::PathBuf>, Box<dyn std::error::Error>> {
pub fn get_files(&self, project: &str) -> NyxResult<Vec<PathBuf>> {
let mut stmt = self.c().prepare(
"SELECT path
FROM files
@ -190,8 +191,10 @@ pub mod index {
Ok(file_iter.map(|p| p.map(PathBuf::from)).collect::<Result<_, _>>()?)
}
/// Clears the tables to prep for a reindex
pub fn clear(&self) -> rusqlite::Result<()> {
// -------------------------------------------------------------------------
// Maintenance utilities
// -------------------------------------------------------------------------
pub fn clear(&self) -> NyxResult<()> {
self.c().execute_batch(
r#"
PRAGMA foreign_keys = OFF;
@ -208,12 +211,15 @@ pub mod index {
Ok(())
}
pub fn vacuum(&self) -> rusqlite::Result<()> {
pub fn vacuum(&self) -> NyxResult<()> {
self.c().execute("VACUUM;", [])?;
Ok(())
}
fn digest_file(path: &Path) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
// -------------------------------------------------------------------------
// Helpers
// -------------------------------------------------------------------------
fn digest_file(path: &Path) -> NyxResult<Vec<u8>> {
let mut hasher = blake3::Hasher::new();
let mut file = fs::File::open(path)?;
std::io::copy(&mut file, &mut hasher)?;

24
src/errors.rs Normal file
View file

@ -0,0 +1,24 @@
use thiserror::Error;
pub type NyxResult<T, E = NyxError> = core::result::Result<T, E>;
#[derive(Debug, Error)]
pub enum NyxError {
#[error("I/O error: {0}")]
Io(#[from] std::io::Error),
#[error("SQLite error: {0}")]
Sql(#[from] rusqlite::Error),
#[error("tree-sitter error: {0}")]
TreeSitter(#[from] tree_sitter::LanguageError),
#[error("connection-pool error: {0}")]
Pool(#[from] r2d2::Error),
#[error("time error: {0}")]
Time(#[from] std::time::SystemTimeError),
#[error("other: {0}")]
Other(String),
}

74
src/file.rs Normal file
View file

@ -0,0 +1,74 @@
use std::cell::RefCell;
use std::path::Path;
use tree_sitter::{Language, QueryCursor, StreamingIterator};
use crate::commands::scan::Diag;
use crate::errors::{NyxResult, NyxError};
use crate::utils::{query_cache, Config};
use crate::utils::ext::lowercase_ext;
thread_local! {
static PARSER: RefCell<tree_sitter::Parser> = RefCell::new(tree_sitter::Parser::new());
}
pub(crate) fn run_rules_on_file(
path: &Path,
cfg: &Config,
) -> NyxResult<Vec<Diag>> {
let bytes = std::fs::read(path)?;
// Fast binary-file guard (skip if >1% NULs)
if bytes.iter().filter(|b| **b == 0).count() * 100 / bytes.len().max(1) > 1 {
return Ok(vec![]);
}
let lang_name = match lowercase_ext(path) {
Some(l) => l,
None => return Ok(vec![]),
};
let ts_lang = match lang_name {
"rs" => Language::from(tree_sitter_rust::LANGUAGE),
"c" => Language::from(tree_sitter_c::LANGUAGE),
"cpp" => Language::from(tree_sitter_cpp::LANGUAGE),
"java"=> Language::from(tree_sitter_java::LANGUAGE),
"go" => Language::from(tree_sitter_go::LANGUAGE),
"php" => Language::from(tree_sitter_php::LANGUAGE_PHP),
"py" => Language::from(tree_sitter_python::LANGUAGE),
"ts" => Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT),
"js" => Language::from(tree_sitter_javascript::LANGUAGE),
_ => return Ok(vec![]),
};
let _tree = PARSER.with(|cell| {
let mut parser = cell.borrow_mut();
parser.set_language(&ts_lang)?;
parser.parse(&*bytes, None)
.ok_or_else(|| NyxError::Other("tree-sitter failed".into()))
})?;
let root = _tree.root_node();
let compiled = query_cache::for_lang(lang_name, ts_lang);
let mut cursor = QueryCursor::new();
let mut out = Vec::new();
for cq in compiled.iter() {
if cfg.scanner.min_severity > cq.meta.severity {
continue;
}
let mut matches = cursor.matches(&cq.query, root, &*bytes);
while let Some(m) = matches.next() {
if let Some(cap) = m.captures.iter().find(|c| c.index == 0) {
let point = cap.node.start_position();
out.push(Diag {
path: path.to_string_lossy().into_owned(),
line: point.row + 1,
col: point.column + 1,
severity: cq.meta.severity,
id: cq.meta.id.to_owned(),
});
}
}
}
Ok(out)
}

View file

@ -4,6 +4,8 @@ mod utils;
mod walk;
mod database;
mod patterns;
mod errors;
mod file;
use crate::utils::Config;
use cli::Cli;
@ -59,10 +61,11 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
commands::handle_command(cli.command, database_dir, &mut config)?;
let elapsed: f32 = now.elapsed().as_millis() as f32 / 1000f32;
println!("{} in {} s.",
style("Finished").green().bold(),
style(elapsed).white().bold());
println!(
"{} in {:.3}s.",
style("Finished").green().bold(),
now.elapsed().as_secs_f32()
);
Ok(())
}

View file

@ -9,18 +9,63 @@ mod php;
mod python;
use std::collections::HashMap;
use std::fmt;
use std::str::FromStr;
use console::style;
use serde::{Deserialize, Serialize};
use once_cell::sync::Lazy;
/// How bad / noisy a pattern is considered.
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd)]
pub enum Severity {
Low,
Medium,
High,
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)]
pub enum Severity { High, Medium, Low }
impl fmt::Display for Severity {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s = match *self {
Severity::High => style("HIGH").red().bold().to_string(),
Severity::Medium => style("MEDIUM").yellow().bold().to_string(),
Severity::Low => style("LOW").cyan().bold().to_string(),
};
f.write_str(&s)
}
}
impl Severity {
/// Textual value stored in SQLite.
pub fn as_db_str(self) -> &'static str {
match self {
Severity::High => "HIGH",
Severity::Medium => "MEDIUM",
Severity::Low => "LOW",
}
}
}
impl FromStr for Severity { // TODO: FIX
type Err = ();
fn from_str(input: &str) -> Result<Self, Self::Err> {
match input.to_lowercase().as_str() {
"medium" => Ok(Severity::Medium),
"high" => Ok(Severity::High),
_ => Ok(Severity::Low),
}
}
}
// /// How bad / noisy a pattern is considered.
// #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd)]
// pub enum Severity {
// Low,
// Medium,
// High,
// }
//
// impl Severity {
// pub(crate) fn as_db_str(&self) -> &str {
// todo!()
// }
// }
/// One AST pattern with a tree-sitter query and meta-data.
#[derive(Debug, Clone, Serialize)]
pub struct Pattern {
@ -34,17 +79,6 @@ pub struct Pattern {
pub severity: Severity,
}
impl FromStr for Severity { // TODO: FIX
type Err = ();
fn from_str(input: &str) -> Result<Self, Self::Err> {
match input.to_lowercase().as_str() {
"medium" => Ok(Severity::Medium),
"high" => Ok(Severity::High),
_ => Ok(Severity::Low),
}
}
}
/// Global, lazily-initialised registry: lang-name → pattern slice

15
src/utils/ext.rs Normal file
View file

@ -0,0 +1,15 @@
pub fn lowercase_ext(path: &std::path::Path) -> Option<&'static str> {
path.extension()
.and_then(|s| match s.to_str()? {
"rs" | "RS" => Some("rs"),
"c" => Some("c"),
"cpp" | "c++" => Some("cpp"),
"java" => Some("java"),
"go" => Some("go"),
"php" => Some("php"),
"py" | "PY" => Some("py"),
"ts" | "TSX" | "tsx" => Some("ts"),
"js" => Some("js"),
_ => None,
})
}

View file

@ -1,6 +1,7 @@
pub mod project;
pub mod config;
pub(crate) mod query_cache;
pub(crate) mod ext;
// Re-export commonly used functions for convenience
pub use project::{get_project_info};

View file

@ -1,27 +1,30 @@
use std::path::{Path, PathBuf};
use crate::errors::{NyxError, NyxResult};
/// Determine `<project-name, path/to/<project>.sqlite>`.
pub fn get_project_info(
project_path: &Path,
config_dir: &Path,
) -> Result<(String, PathBuf), Box<dyn std::error::Error>> {
project_path: &Path,
config_dir: &Path,
) -> NyxResult<(String, PathBuf)> {
let project_name = project_path
.file_name()
.and_then(|name| name.to_str())
.ok_or("Unable to determine project name")?;
.and_then(|n| n.to_str())
.ok_or_else(|| NyxError::Other("Unable to determine project name".into()))?;
let db_name = sanitize_project_name(project_name);
let db_path = config_dir.join(format!("{}.sqlite", db_name));
Ok((project_name.to_string(), db_path))
Ok((project_name.to_owned(), db_path))
}
pub fn sanitize_project_name(name: &str) -> String {
pub fn sanitize_project_name(name: &str) -> String {
name.to_lowercase()
.chars()
.map(|c| match c {
' ' | '\t' | '\n' | '\r' => '_',
c if c.is_alphanumeric() || c == '_' || c == '-' => c,
_ => '_'
.map(|c| match c {
' ' | '\t' | '\n' | '\r' => '_',
c if c.is_alphanumeric() || c == '_' || c == '-' => c,
_ => '_',
})
.collect::<String>()
.split('_')

View file

@ -1,5 +1,5 @@
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
use std::sync::RwLock;
use once_cell::sync::Lazy;
use tree_sitter::{Language, Query};
@ -8,30 +8,36 @@ use crate::patterns::{self, Pattern};
#[derive(Clone)]
pub struct CompiledQuery {
pub meta: Pattern,
pub query: Arc<Query>,
pub meta: Pattern,
pub query: std::sync::Arc<Query>,
}
static CACHE: Lazy<RwLock<HashMap<&'static str, Vec<CompiledQuery>>>> =
static CACHE: Lazy<RwLock<HashMap<&'static str, std::sync::Arc<Vec<CompiledQuery>>>>> =
Lazy::new(|| RwLock::new(HashMap::new()));
pub fn for_lang(lang: &'static str, ts_lang: Language) -> Vec<CompiledQuery> {
// fast-path read
/// Return **one shared Arc** to the per-language query set.
/// Cloning the `Arc` is O(1) and the underlying Vec lives for the
/// lifetime of the process.
pub fn for_lang(lang: &'static str, ts_lang: Language) -> std::sync::Arc<Vec<CompiledQuery>> {
// fast path
if let Some(v) = CACHE.read().unwrap().get(lang) {
return v.clone();
}
// compile under write-lock exactly once
let patterns = patterns::load(lang);
let mut vec = Vec::with_capacity(patterns.len());
for p in patterns {
// slow path — compile
let patterns = patterns::load(lang);
let compiled: Vec<_> = patterns.into_iter().filter_map(|p| {
match Query::new(&ts_lang, p.query) {
Ok(q) => vec.push(CompiledQuery { meta: p, query: Arc::new(q) }),
Err(e) => tracing::warn!(lang, id = p.id, "query compile error: {e}"),
Ok(q) => Some(CompiledQuery { meta: p, query: std::sync::Arc::new(q) }),
Err(e)=> {
tracing::warn!(lang, id = p.id, "query compile error: {e}");
None
}
}
}
}).collect();
CACHE.write().unwrap().insert(lang, vec.clone());
vec
let compiled = std::sync::Arc::new(compiled);
let mut w = CACHE.write().unwrap();
w.entry(lang).or_insert_with(|| compiled.clone()).clone()
}

View file

@ -1,106 +1,104 @@
use crossbeam_channel::{bounded, Receiver};
use ignore::{WalkBuilder, WalkState};
use std::{path::{Path, PathBuf}, thread};
use ignore::overrides::OverrideBuilder;
use crossbeam_channel::{bounded, Receiver, Sender};
use ignore::{overrides::OverrideBuilder, WalkBuilder, WalkState};
use std::{
mem,
path::{Path, PathBuf},
thread,
};
use crate::utils::Config;
const BATCH_SIZE: usize = 5;
// ---------------------------------------------------------------------------
// Internal constants / helpers
// ---------------------------------------------------------------------------
const DEFAULT_BATCH: usize = 8; // a tad larger for fewer sends
const CHANNEL_MULTIPLIER:usize = 4; // capacity = threads × this
type Batch = Vec<PathBuf>;
#[derive(Debug)]
struct Batcher {
tx: crossbeam_channel::Sender<Batch>,
tx: Sender<Batch>,
batch: Batch,
}
impl Batcher {
fn push(&mut self, p: PathBuf) {
self.batch.push(p);
if self.batch.len() == BATCH_SIZE {
if self.batch.len() == DEFAULT_BATCH {
self.flush();
}
}
fn flush(&mut self) {
if !self.batch.is_empty() {
let _ = self.tx.send(std::mem::take(&mut self.batch));
let _ = self.tx.send(mem::take(&mut self.batch));
}
}
}
impl Drop for Batcher {
fn drop(&mut self) {
// guarantees the remainder is sent when the worker is dropped
self.flush();
}
fn drop(&mut self) { self.flush(); }
}
/// Walk `root`, send file paths to the returned receiver.
pub fn spawn_senders(
root: &Path,
cfg: &Config
) -> Receiver<Batch> {
// ---------------------------------------------------------------------------
/// Walk `root` and send *batches* of paths through the returned channel.
pub fn spawn_senders(root: &Path, cfg: &Config) -> Receiver<Batch> {
// ----- 1 build ignore/override rules ----------------------------------
let mut ob = OverrideBuilder::new(root);
for ext in &cfg.scanner.excluded_extensions {
if let Err(e) = ob.add(&format!("!*.{ext}")) {
tracing::warn!("could not add ignore pattern: {e}");
tracing::warn!("cannot add ignore pattern {ext}: {e}");
}
}
for dir in &cfg.scanner.excluded_directories {
if let Err(e) = ob.add(&format!("!**/{dir}/**")) {
tracing::warn!("could not add ignore pattern: {e}");
tracing::warn!("cannot add ignore pattern {dir}: {e}");
}
}
let overrides = ob.build().unwrap();
let worker_thrs = cfg.performance.worker_threads.unwrap_or(num_cpus::get());
let (tx, rx) = bounded::<Batch>(worker_thrs * 2usize);
let root = root.to_path_buf();
let scan_hidden = cfg.scanner.scan_hidden_files;
let follow_links = cfg.scanner.follow_symlinks;
let max_bytes: u64 = (cfg.scanner.max_file_size_mb.unwrap_or(0)) * 1_048_576;
let overrides = ob.build().unwrap();
// ----- 2 channel & thread pool parameters -----------------------------
let workers = cfg.performance.worker_threads.unwrap_or(num_cpus::get());
let (tx, rx) = bounded::<Batch>(workers * CHANNEL_MULTIPLIER);
let root = root.to_path_buf();
let scan_hidden = cfg.scanner.scan_hidden_files;
let follow = cfg.scanner.follow_symlinks;
let max_bytes = cfg.scanner.max_file_size_mb.unwrap_or(0) as u64 * 1_048_576;
// ----- 3 the background walker thread ---------------------------------
thread::spawn(move || {
let walker = WalkBuilder::new(root)
WalkBuilder::new(root)
.hidden(!scan_hidden)
.follow_links(follow_links)
.threads(worker_thrs)
.follow_links(follow)
.threads(workers)
.overrides(overrides)
.build_parallel();
.build_parallel()
.run(move || {
let mut b = Batcher {
tx: tx.clone(),
batch: Vec::with_capacity(DEFAULT_BATCH),
};
walker.run(move || {
let mut batcher = Batcher {
tx: tx.clone(),
batch: Vec::with_capacity(BATCH_SIZE),
};
Box::new(move |entry| {
let entry = match entry {
Ok(e) if e.file_type().map(|ft| ft.is_file()).unwrap_or(false) => e,
_ => return WalkState::Continue,
};
Box::new(move |entry| {
tracing::debug!("walking: {:?}", entry);
let e = match entry {
Ok(e) if e.file_type().map(|ft| ft.is_file()).unwrap_or(false) => e,
_ => return WalkState::Continue,
};
if max_bytes != 0 {
match e.metadata() {
Ok(m) if m.len() <= max_bytes => {},
_ => return WalkState::Continue,
}
}
tracing::debug!("scanning file: {:?}", e);
batcher.push(e.into_path());
if batcher.batch.len() == BATCH_SIZE {
let _ = batcher.tx.send(std::mem::take(&mut batcher.batch));
}
WalkState::Continue
})
});
if max_bytes != 0 {
match entry.metadata() {
Ok(m) if m.len() > max_bytes => return WalkState::Continue,
Err(e) => {
tracing::debug!("metadata failed for {:?}: {e}", entry.path());
return WalkState::Continue;
}
_ => {}
}
}
b.push(entry.into_path());
WalkState::Continue
})
});
});
rx
}