- Remove unused filetypes.rs and walk.rs modules

- Introduce `index.rs` for file indexing using SQLite
- Expand configuration options in `config.rs`, including `excluded_files`
- Update dependencies in `Cargo.toml` to include SQLite, hashing, and regex libraries
This commit is contained in:
elipeter 2025-06-16 23:47:50 +02:00
parent 646293ed72
commit ada7835efa
11 changed files with 551 additions and 766 deletions

View file

@ -2,6 +2,9 @@ use crate::cli::OutputFormat;
use crate::utils::project::get_project_info;
use std::path::Path;
use crate::utils::config::Config;
use tree_sitter::{Parser};
use crate::index::index::Indexer;
use crate::walk::spawn_senders;
pub fn handle(
path: &str,
@ -15,13 +18,13 @@ pub fn handle(
let scan_path = Path::new(path).canonicalize()?;
let (project_name, db_path) = get_project_info(&scan_path, database_dir)?;
tracing::info!("Config: {:?}", config);
tracing::debug!("Config: {:?}", config);
tracing::info!("Scanning project: {}", project_name);
tracing::info!("Scan path: {}", scan_path.display());
if no_index {
tracing::info!("Scanning without index...");
scan_filesystem(&scan_path)?;
scan_filesystem(&scan_path, config)?;
} else {
if rebuild_index || !db_path.exists() {
tracing::info!("Building/updating index...");
@ -29,7 +32,7 @@ pub fn handle(
}
tracing::info!("Using index: {}", db_path.display());
scan_with_index(&db_path)?;
scan_with_index(&scan_path, &db_path, config)?;
}
tracing::info!("Output format: {:?}", format);
@ -40,14 +43,63 @@ pub fn handle(
Ok(())
}
fn scan_filesystem(path: &Path) -> Result<(), Box<dyn std::error::Error>> {
// TODO: Implement direct filesystem scanning
tracing::info!("Direct filesystem scan of: {}", path.display());
fn scan_filesystem(root: &Path, cfg: &Config) -> Result<(), Box<dyn std::error::Error>> {
let rx = spawn_senders(root, cfg);
for batch in rx.iter().flatten() {
tracing::debug!("Scanning file: {}", batch.display());
scan_single_file(&batch, cfg)?; // <-- your actual scanner
}
Ok(())
}
fn scan_with_index(root: &Path, db_path: &Path, cfg: &Config) -> Result<(), Box<dyn std::error::Error>> {
let indexer = Indexer::new(db_path)
.map_err(|e| format!("opening index {}: {e}", db_path.display()))?;
let rx = spawn_senders(root, cfg);
for batch in rx.iter().flatten() {
let scan = indexer.should_scan(&batch)?;
tracing::debug!("Should scan: {}, file: {}", scan, batch.display());
if scan {
tracing::debug!("Scanning file: {}", batch.display());
scan_single_file(&batch, cfg)?; // your scanner
indexer.record_scan(&batch)?;
}
}
Ok(())
}
fn scan_with_index(db_path: &Path) -> Result<(), Box<dyn std::error::Error>> {
// TODO: Implement index-based scanning
tracing::info!("Index-based scan using: {}", db_path.display());
fn scan_single_file(
path: &Path,
_cfg: &Config,
) -> Result<(), Box<dyn std::error::Error>> {
if path.extension().and_then(|s| s.to_str()) != Some("rs") {
return Ok(());
}
let source = std::fs::read_to_string(path)?;
let mut parser = Parser::new();
parser.set_language(&tree_sitter_rust::LANGUAGE.into())?;
let tree = parser.parse(&source, None).ok_or("tree-sitter failed")?;
let root = tree.root_node();
let mut fn_count = 0;
let mut cursor = root.walk();
for child in root.children(&mut cursor) {
if child.kind() == "function_item" {
fn_count += 1;
}
}
tracing::info!(
"scanned {} found {} Rust function(s)",
path.display(),
fn_count
);
// TODO: real vulnerability/pattern checks go here
Ok(())
}

View file

View file

@ -1,43 +0,0 @@
// use crate::dir_entry;
// use crate::filesystem;
//
// use faccess::PathExt;
//
// /// Whether or not to show
// #[derive(Default)]
// pub struct FileTypes {
// pub files: bool,
// pub directories: bool,
// pub symlinks: bool,
// pub block_devices: bool,
// pub char_devices: bool,
// pub sockets: bool,
// pub pipes: bool,
// pub executables_only: bool,
// pub empty_only: bool,
// }
//
// impl FileTypes {
// pub fn should_ignore(&self, entry: &dir_entry::DirEntry) -> bool {
// if let Some(ref entry_type) = entry.file_type() {
// (!self.files && entry_type.is_file())
// || (!self.directories && entry_type.is_dir())
// || (!self.symlinks && entry_type.is_symlink())
// || (!self.block_devices && filesystem::is_block_device(*entry_type))
// || (!self.char_devices && filesystem::is_char_device(*entry_type))
// || (!self.sockets && filesystem::is_socket(*entry_type))
// || (!self.pipes && filesystem::is_pipe(*entry_type))
// || (self.executables_only && !entry.path().executable())
// || (self.empty_only && !filesystem::is_empty(entry))
// || !(entry_type.is_file()
// || entry_type.is_dir()
// || entry_type.is_symlink()
// || filesystem::is_block_device(*entry_type)
// || filesystem::is_char_device(*entry_type)
// || filesystem::is_socket(*entry_type)
// || filesystem::is_pipe(*entry_type))
// } else {
// true
// }
// }
// }

77
src/index.rs Normal file
View file

@ -0,0 +1,77 @@
pub mod index {
use blake3::Hasher;
use rusqlite::{params, Connection, OptionalExtension};
use std::fs;
use std::path::Path;
use std::time::{SystemTime, UNIX_EPOCH};
/// Schema: stores digest, file modification time (secs since epoch) and
/// last time we *fully* scanned the file.
const SCHEMA: &str = r#"
CREATE TABLE IF NOT EXISTS files (
path TEXT PRIMARY KEY,
hash BLOB NOT NULL,
mtime INTEGER NOT NULL,
scanned_at INTEGER NOT NULL
);"#;
pub(crate) struct Indexer {
conn: Connection,
}
impl Indexer {
pub fn new(database_path: &Path) -> Result<Self, Box<dyn std::error::Error>> {
let conn = Connection::open(database_path)?;
conn.execute_batch(SCHEMA)?;
Ok(Self { conn })
}
/// Returns `true` if the caller should analyze the file, i.e., we have
/// never seen it or something changed (mtime or content hash).
pub fn should_scan(&self, path: &Path) -> Result<bool, Box<dyn std::error::Error>> {
let meta = fs::metadata(path)?;
let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64;
let digest = Self::digest_file(path)?;
let row: Option<(Vec<u8>, i64)> = self
.conn
.query_row(
"SELECT hash, mtime FROM files WHERE path = ?1",
params![path.to_string_lossy()],
|r| Ok((r.get(0)?, r.get(1)?)),
)
.optional()?;
match row {
Some((stored_hash, stored_mtime)) => {
Ok(stored_hash != digest || stored_mtime != mtime)
}
None => Ok(true),
}
}
/// Persist a fresh scan result.
pub fn record_scan(&self, path: &Path) -> Result<(), Box<dyn std::error::Error>> {
let meta = fs::metadata(path)?;
let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64;
let scanned_at = SystemTime::now()
.duration_since(UNIX_EPOCH)?
.as_secs() as i64;
let digest = Self::digest_file(path)?;
self.conn.execute(
"REPLACE INTO files (path, hash, mtime, scanned_at) VALUES (?1, ?2, ?3, ?4)",
params![path.to_string_lossy(), digest, mtime, scanned_at],
)?;
Ok(())
}
fn digest_file(path: &Path) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
let mut hasher = Hasher::new();
let mut file = fs::File::open(path)?;
std::io::copy(&mut file, &mut hasher)?;
Ok(hasher.finalize().as_bytes().to_vec())
}
}
}

View file

@ -1,6 +1,8 @@
mod cli;
mod commands;
mod utils;
mod walk;
mod index;
use crate::utils::Config;
use cli::Cli;
@ -30,9 +32,9 @@ fn init_tracing() {
// .json();
Registry::default()
.with(EnvFilter::from_default_env()) // obey RUST_LOG
.with(EnvFilter::from_default_env())
.with(fmt_layer)
.init(); // install as the global subscriber
.init();
}
fn main() -> Result<(), Box<dyn std::error::Error>> {

View file

@ -14,6 +14,9 @@ pub struct ScannerConfig {
/// Directories to exclude from scanning. TODO: IMPLEMENT
pub excluded_directories: Vec<String>,
/// Excluded files
pub excluded_files: Vec<String>,
/// Whether to respect the global ignore file or not. TODO: IMPLEMENT
pub read_global_ignore: bool,
@ -24,10 +27,10 @@ pub struct ScannerConfig {
/// Whether to require a `.git` directory to respect gitignore files. TODO: IMPLEMENT
pub require_git_to_read_vcsignore: bool,
/// Whether to limit the search to starting file system or not. TODO: IMPLEMENT
/// Whether to limit the search to starting file system or not.
pub one_file_system: bool,
/// Whether to follow symlinks or not. TODO: IMPLEMENT
/// Whether to follow symlinks or not.
pub follow_symlinks: bool,
/// Whether to scan hidden files or not. TODO: IMPLEMENT
@ -50,6 +53,10 @@ impl Default for ScannerConfig {
.into_iter()
.map(str::to_owned)
.collect(),
excluded_files: vec![]
.into_iter()
.map(str::to_owned)
.collect(),
read_global_ignore: false,
read_vcsignore: true,
require_git_to_read_vcsignore: true,
@ -125,7 +132,7 @@ pub struct PerformanceConfig {
pub prune: bool, // TODO: IMPLEMENT
/// The maximum number of worker threads to use., or `None` to auto-detect.
pub worker_threads: Option<u32>, // TODO: IMPLEMENT
pub worker_threads: Option<usize>, // TODO: IMPLEMENT
/// The maximum number of entries to index in a single chunk.
pub index_chunk_size: u32, // TODO: IMPLEMENT

View file

@ -2,5 +2,5 @@ pub mod project;
pub mod config;
// Re-export commonly used functions for convenience
pub use project::{get_project_info, sanitize_project_name};
pub use project::{get_project_info};
pub use config::Config;

View file

@ -1,670 +1,95 @@
// use std::borrow::Cow;
// use std::ffi::OsStr;
// use std::io::{self, Write};
// use std::mem;
// use std::path::PathBuf;
// use std::sync::atomic::{AtomicBool, Ordering};
// use std::sync::{Arc, Mutex, MutexGuard};
// use std::thread;
// use std::time::{Duration, Instant};
//
// use anyhow::{anyhow, Result};
// use crossbeam_channel::{bounded, Receiver, RecvTimeoutError, SendError, Sender};
// use etcetera::BaseStrategy;
// use ignore::overrides::{Override, OverrideBuilder};
// use ignore::{WalkBuilder, WalkParallel, WalkState};
// use regex::bytes::Regex;
//
// use crate::config::Config;
// use crate::dir_entry::DirEntry;
// use crate::error::print_error;
// use crate::exec;
// use crate::exit_codes::{merge_exitcodes, ExitCode};
// use crate::filesystem;
// use crate::output;
//
// /// The receiver thread can either be buffering results or directly streaming to the console.
// #[derive(PartialEq)]
// enum ReceiverMode {
// /// Receiver is still buffering in order to sort the results, if the search finishes fast
// /// enough.
// Buffering,
//
// /// Receiver is directly printing results to the output.
// Streaming,
// }
//
// /// The Worker threads can result in a valid entry having PathBuf or an error.
// #[allow(clippy::large_enum_variant)]
// #[derive(Debug)]
// pub enum WorkerResult {
// // Errors should be rare, so it's probably better to allow large_enum_variant than
// // to box the Entry variant
// Entry(ignore::DirEntry), // TODO: CHECK IF ERRORS
// Error(ignore::Error),
// }
//
// /// A batch of WorkerResults to send over a channel.
// #[derive(Clone)]
// struct Batch {
// items: Arc<Mutex<Option<Vec<WorkerResult>>>>,
// }
//
// impl Batch {
// fn new() -> Self {
// Self {
// items: Arc::new(Mutex::new(Some(vec![]))),
// }
// }
//
// fn lock(&self) -> MutexGuard<'_, Option<Vec<WorkerResult>>> {
// self.items.lock().unwrap()
// }
// }
//
// impl IntoIterator for Batch {
// type Item = WorkerResult;
// type IntoIter = std::vec::IntoIter<WorkerResult>;
//
// fn into_iter(self) -> Self::IntoIter {
// self.lock().take().unwrap().into_iter()
// }
// }
//
// /// Wrapper that sends batches of items at once over a channel.
// struct BatchSender {
// batch: Batch,
// tx: Sender<Batch>,
// limit: usize,
// }
//
// impl BatchSender {
// fn new(tx: Sender<Batch>, limit: usize) -> Self {
// Self {
// batch: Batch::new(),
// tx,
// limit,
// }
// }
//
// /// Check if we need to flush a batch.
// fn needs_flush(&self, batch: Option<&Vec<WorkerResult>>) -> bool {
// match batch {
// // Limit the batch size to provide some backpressure
// Some(vec) => vec.len() >= self.limit,
// // Batch was already taken by the receiver, so make a new one
// None => true,
// }
// }
//
// /// Add an item to a batch.
// fn send(&mut self, item: WorkerResult) -> Result<(), SendError<()>> {
// let mut batch = self.batch.lock();
//
// if self.needs_flush(batch.as_ref()) {
// drop(batch);
// self.batch = Batch::new();
// batch = self.batch.lock();
// }
//
// let items = batch.as_mut().unwrap();
// items.push(item);
//
// if items.len() == 1 {
// // New batch, send it over the channel
// self.tx
// .send(self.batch.clone())
// .map_err(|_| SendError(()))?;
// }
//
// Ok(())
// }
// }
//
// /// Maximum size of the output buffer before flushing results to the console
// const MAX_BUFFER_LENGTH: usize = 1000;
// /// Default duration until output buffering switches to streaming.
// const DEFAULT_MAX_BUFFER_TIME: Duration = Duration::from_millis(100);
//
// /// Wrapper for the receiver thread's buffering behavior.
// struct ReceiverBuffer<'a, W> {
// /// The configuration.
// config: &'a Config,
// /// For shutting down the senders.
// quit_flag: &'a AtomicBool,
// /// The ^C notifier.
// interrupt_flag: &'a AtomicBool,
// /// Receiver for worker results.
// rx: Receiver<Batch>,
// /// Standard output.
// stdout: W,
// /// The current buffer mode.
// mode: ReceiverMode,
// /// The deadline to switch to streaming mode.
// deadline: Instant,
// /// The buffer of quickly received paths.
// buffer: Vec<ignore::DirEntry>,
// /// Result count.
// num_results: usize,
// }
//
// impl<'a, W: Write> ReceiverBuffer<'a, W> {
// /// Create a new receiver buffer.
// fn new(state: &'a WorkerState, rx: Receiver<Batch>, stdout: W) -> Self {
// let config = &state.config;
// let quit_flag = state.quit_flag.as_ref();
// let interrupt_flag = state.interrupt_flag.as_ref();
// let max_buffer_time = config.max_buffer_time.unwrap_or(DEFAULT_MAX_BUFFER_TIME);
// let deadline = Instant::now() + max_buffer_time;
//
// Self {
// config,
// quit_flag,
// interrupt_flag,
// rx,
// stdout,
// mode: ReceiverMode::Buffering,
// deadline,
// buffer: Vec::with_capacity(MAX_BUFFER_LENGTH),
// num_results: 0,
// }
// }
//
// /// Process results until finished.
// fn process(&mut self) -> ExitCode {
// loop {
// if let Err(ec) = self.poll() {
// self.quit_flag.store(true, Ordering::Relaxed);
// return ec;
// }
// }
// }
//
// /// Receive the next worker result.
// fn recv(&self) -> Result<Batch, RecvTimeoutError> {
// match self.mode {
// ReceiverMode::Buffering => {
// // Wait at most until we should switch to streaming
// self.rx.recv_deadline(self.deadline)
// }
// ReceiverMode::Streaming => {
// // Wait however long it takes for a result
// Ok(self.rx.recv()?)
// }
// }
// }
//
// /// Wait for a result or state change.
// fn poll(&mut self) -> Result<(), ExitCode> {
// match self.recv() {
// Ok(batch) => {
// for result in batch {
// match result {
// WorkerResult::Entry(dir_entry) => {
// if self.config.quiet {
// return Err(ExitCode::HasResults(true));
// }
//
// match self.mode {
// ReceiverMode::Buffering => {
// self.buffer.push(dir_entry);
// if self.buffer.len() > MAX_BUFFER_LENGTH {
// self.stream()?;
// }
// }
// ReceiverMode::Streaming => {
// self.print(&dir_entry)?;
// }
// }
//
// self.num_results += 1;
// if let Some(max_results) = self.config.max_results {
// if self.num_results >= max_results {
// return self.stop();
// }
// }
// }
// WorkerResult::Error(err) => {
// if self.config.show_filesystem_errors {
// print_error(err.to_string());
// }
// }
// }
// }
//
// // If we don't have another batch ready, flush before waiting
// if self.mode == ReceiverMode::Streaming && self.rx.is_empty() {
// self.flush()?;
// }
// }
// Err(RecvTimeoutError::Timeout) => {
// self.stream()?;
// }
// Err(RecvTimeoutError::Disconnected) => {
// return self.stop();
// }
// }
//
// Ok(())
// }
//
// /// Output a path.
// fn print(&mut self, entry: &DirEntry) -> Result<(), ExitCode> {
// if let Err(e) = output::print_entry(&mut self.stdout, entry, self.config) {
// if e.kind() != ::std::io::ErrorKind::BrokenPipe {
// print_error(format!("Could not write to output: {e}"));
// return Err(ExitCode::GeneralError);
// }
// }
//
// if self.interrupt_flag.load(Ordering::Relaxed) {
// // Ignore any errors on flush, because we're about to exit anyway
// let _ = self.flush();
// return Err(ExitCode::KilledBySigint);
// }
//
// Ok(())
// }
//
// /// Switch ourselves into streaming mode.
// fn stream(&mut self) -> Result<(), ExitCode> {
// self.mode = ReceiverMode::Streaming;
//
// let buffer = mem::take(&mut self.buffer);
// for path in buffer {
// self.print(&path)?;
// }
//
// self.flush()
// }
//
// /// Stop looping.
// fn stop(&mut self) -> Result<(), ExitCode> {
// if self.mode == ReceiverMode::Buffering {
// self.buffer.sort();
// self.stream()?;
// }
//
// if self.config.quiet {
// Err(ExitCode::HasResults(self.num_results > 0))
// } else {
// Err(ExitCode::Success)
// }
// }
//
// /// Flush stdout if necessary.
// fn flush(&mut self) -> Result<(), ExitCode> {
// if self.stdout.flush().is_err() {
// // Probably a broken pipe. Exit gracefully.
// return Err(ExitCode::GeneralError);
// }
// Ok(())
// }
// }
//
// /// State shared by the sender and receiver threads.
// struct WorkerState {
// /// The search patterns.
// patterns: Vec<Regex>,
// /// The command line configuration.
// config: Config,
// /// Flag for cleanly shutting down the parallel walk
// quit_flag: Arc<AtomicBool>,
// /// Flag specifically for quitting due to ^C
// interrupt_flag: Arc<AtomicBool>,
// }
//
// impl WorkerState {
// fn new(patterns: Vec<Regex>, config: Config) -> Self {
// let quit_flag = Arc::new(AtomicBool::new(false));
// let interrupt_flag = Arc::new(AtomicBool::new(false));
//
// Self {
// patterns,
// config,
// quit_flag,
// interrupt_flag,
// }
// }
//
// fn build_overrides(&self, paths: &[PathBuf]) -> Result<Override> {
// let first_path = &paths[0];
// let config = &self.config;
//
// let mut builder = OverrideBuilder::new(first_path);
//
// for pattern in &config.exclude_patterns {
// builder
// .add(pattern)
// .map_err(|e| anyhow!("Malformed exclude pattern: {}", e))?;
// }
//
// builder
// .build()
// .map_err(|_| anyhow!("Mismatch in exclude patterns"))
// }
//
// fn build_walker(&self, paths: &[PathBuf]) -> Result<WalkParallel> {
// let first_path = &paths[0];
// let config = &self.config;
// let overrides = self.build_overrides(paths)?;
//
// let mut builder = WalkBuilder::new(first_path);
// builder
// .hidden(config.ignore_hidden)
// .ignore(config.read_fdignore)
// .parents(config.read_parent_ignore && (config.read_fdignore || config.read_vcsignore))
// .git_ignore(config.read_vcsignore)
// .git_global(config.read_vcsignore)
// .git_exclude(config.read_vcsignore)
// .require_git(config.require_git_to_read_vcsignore)
// .overrides(overrides)
// .follow_links(config.follow_links)
// // No need to check for supported platforms, option is unavailable on unsupported ones
// .same_file_system(config.one_file_system)
// .max_depth(config.max_depth);
//
// if config.read_fdignore {
// builder.add_custom_ignore_filename(".fdignore");
// }
//
// if config.read_global_ignore {
// if let Ok(basedirs) = etcetera::choose_base_strategy() {
// let global_ignore_file = basedirs.config_dir().join("fd").join("ignore");
// if global_ignore_file.is_file() {
// let result = builder.add_ignore(global_ignore_file);
// match result {
// Some(ignore::Error::Partial(_)) => (),
// Some(err) => {
// print_error(format!("Malformed pattern in global ignore file. {err}."));
// }
// None => (),
// }
// }
// }
// }
//
// for ignore_file in &config.ignore_files {
// let result = builder.add_ignore(ignore_file);
// match result {
// Some(ignore::Error::Partial(_)) => (),
// Some(err) => {
// print_error(format!("Malformed pattern in custom ignore file. {err}."));
// }
// None => (),
// }
// }
//
// for path in &paths[1..] {
// builder.add(path);
// }
//
// let walker = builder.threads(config.threads).build_parallel();
// Ok(walker)
// }
//
// /// Run the receiver work, either on this thread or a pool of background
// /// threads (for --exec).
// fn receive(&self, rx: Receiver<Batch>) -> ExitCode {
// let config = &self.config;
//
// // This will be set to `Some` if the `--exec` argument was supplied.
// if let Some(ref cmd) = config.command {
// if cmd.in_batch_mode() {
// exec::batch(rx.into_iter().flatten(), cmd, config)
// } else {
// let out_perm = Mutex::new(());
//
// thread::scope(|scope| {
// // Each spawned job will store its thread handle in here.
// let threads = config.threads;
// let mut handles = Vec::with_capacity(threads);
// for _ in 0..threads {
// let rx = rx.clone();
//
// // Spawn a job thread that will listen for and execute inputs.
// let handle = scope
// .spawn(|| exec::job(rx.into_iter().flatten(), cmd, &out_perm, config));
//
// // Push the handle of the spawned thread into the vector for later joining.
// handles.push(handle);
// }
// let exit_codes = handles.into_iter().map(|handle| handle.join().unwrap());
// merge_exitcodes(exit_codes)
// })
// }
// } else {
// let stdout = io::stdout().lock();
// let stdout = io::BufWriter::new(stdout);
//
// ReceiverBuffer::new(self, rx, stdout).process()
// }
// }
//
// /// Spawn the sender threads.
// fn spawn_senders(&self, walker: WalkParallel, tx: Sender<Batch>) {
// walker.run(|| {
// let patterns = &self.patterns;
// let config = &self.config;
// let quit_flag = self.quit_flag.as_ref();
//
// let mut limit = 0x100;
// if let Some(cmd) = &config.command {
// if !cmd.in_batch_mode() && config.threads > 1 {
// // Evenly distribute work between multiple receivers
// limit = 1;
// }
// }
// let mut tx = BatchSender::new(tx.clone(), limit);
//
// Box::new(move |entry| {
// if quit_flag.load(Ordering::Relaxed) {
// return WalkState::Quit;
// }
//
// let entry = match entry {
// Ok(ref e) if e.depth() == 0 => {
// // Skip the root directory entry.
// return WalkState::Continue;
// }
// Ok(e) => DirEntry::normal(e),
// Err(ignore::Error::WithPath {
// path,
// err: inner_err,
// }) => match inner_err.as_ref() {
// ignore::Error::Io(io_error)
// if io_error.kind() == io::ErrorKind::NotFound
// && path
// .symlink_metadata()
// .ok()
// .is_some_and(|m| m.file_type().is_symlink()) =>
// {
// DirEntry::broken_symlink(path)
// }
// _ => {
// return match tx.send(WorkerResult::Error(ignore::Error::WithPath {
// path,
// err: inner_err,
// })) {
// Ok(_) => WalkState::Continue,
// Err(_) => WalkState::Quit,
// }
// }
// },
// Err(err) => {
// return match tx.send(WorkerResult::Error(err)) {
// Ok(_) => WalkState::Continue,
// Err(_) => WalkState::Quit,
// }
// }
// };
//
// if let Some(min_depth) = config.min_depth {
// if entry.depth().map_or(true, |d| d < min_depth) {
// return WalkState::Continue;
// }
// }
//
// // Check the name first, since it doesn't require metadata
// let entry_path = entry.path();
//
// let search_str: Cow<OsStr> = if config.search_full_path {
// let path_abs_buf = filesystem::path_absolute_form(entry_path)
// .expect("Retrieving absolute path succeeds");
// Cow::Owned(path_abs_buf.as_os_str().to_os_string())
// } else {
// match entry_path.file_name() {
// Some(filename) => Cow::Borrowed(filename),
// None => unreachable!(
// "Encountered file system entry without a file name. This should only \
// happen for paths like 'foo/bar/..' or '/' which are not supposed to \
// appear in a file system traversal."
// ),
// }
// };
//
// if !patterns
// .iter()
// .all(|pat| pat.is_match(&filesystem::osstr_to_bytes(search_str.as_ref())))
// {
// return WalkState::Continue;
// }
//
// // Filter out unwanted extensions.
// if let Some(ref exts_regex) = config.extensions {
// if let Some(path_str) = entry_path.file_name() {
// if !exts_regex.is_match(&filesystem::osstr_to_bytes(path_str)) {
// return WalkState::Continue;
// }
// } else {
// return WalkState::Continue;
// }
// }
//
// // Filter out unwanted file types.
// if let Some(ref file_types) = config.file_types {
// if file_types.should_ignore(&entry) {
// return WalkState::Continue;
// }
// }
//
// #[cfg(unix)]
// {
// if let Some(ref owner_constraint) = config.owner_constraint {
// if let Some(metadata) = entry.metadata() {
// if !owner_constraint.matches(metadata) {
// return WalkState::Continue;
// }
// } else {
// return WalkState::Continue;
// }
// }
// }
//
// // Filter out unwanted sizes if it is a file and we have been given size constraints.
// if !config.size_constraints.is_empty() {
// if entry_path.is_file() {
// if let Some(metadata) = entry.metadata() {
// let file_size = metadata.len();
// if config
// .size_constraints
// .iter()
// .any(|sc| !sc.is_within(file_size))
// {
// return WalkState::Continue;
// }
// } else {
// return WalkState::Continue;
// }
// } else {
// return WalkState::Continue;
// }
// }
//
// // Filter out unwanted modification times
// if !config.time_constraints.is_empty() {
// let mut matched = false;
// if let Some(metadata) = entry.metadata() {
// if let Ok(modified) = metadata.modified() {
// matched = config
// .time_constraints
// .iter()
// .all(|tf| tf.applies_to(&modified));
// }
// }
// if !matched {
// return WalkState::Continue;
// }
// }
//
// if config.is_printing() {
// if let Some(ls_colors) = &config.ls_colors {
// // Compute colors in parallel
// entry.style(ls_colors);
// }
// }
//
// let send_result = tx.send(WorkerResult::Entry(entry));
//
// if send_result.is_err() {
// return WalkState::Quit;
// }
//
// // Apply pruning.
// if config.prune {
// return WalkState::Skip;
// }
//
// WalkState::Continue
// })
// });
// }
//
// /// Perform the recursive scan.
// fn scan(&self, paths: &[PathBuf]) -> Result<ExitCode> {
// let config = &self.config;
// let walker = self.build_walker(paths)?;
//
// if config.ls_colors.is_some() && config.is_printing() {
// let quit_flag = Arc::clone(&self.quit_flag);
// let interrupt_flag = Arc::clone(&self.interrupt_flag);
//
// ctrlc::set_handler(move || {
// quit_flag.store(true, Ordering::Relaxed);
//
// if interrupt_flag.fetch_or(true, Ordering::Relaxed) {
// // Ctrl-C has been pressed twice, exit NOW
// ExitCode::KilledBySigint.exit();
// }
// })
// .unwrap();
// }
//
// let (tx, rx) = bounded(2 * config.threads);
//
// let exit_code = thread::scope(|scope| {
// // Spawn the receiver thread(s)
// let receiver = scope.spawn(|| self.receive(rx));
//
// // Spawn the sender threads.
// self.spawn_senders(walker, tx);
//
// receiver.join().unwrap()
// });
//
// if self.interrupt_flag.load(Ordering::Relaxed) {
// Ok(ExitCode::KilledBySigint)
// } else {
// Ok(exit_code)
// }
// }
// }
//
// /// Recursively scan the given search path for files / pathnames matching the patterns.
// ///
// /// If the `--exec` argument was supplied, this will create a thread pool for executing
// /// jobs in parallel from a given command line and the discovered paths. Otherwise, each
// /// path will simply be written to standard output.
// pub fn scan(paths: &[PathBuf], patterns: Vec<Regex>, config: Config) -> Result<ExitCode> {
// WorkerState::new(patterns, config).scan(paths)
// }
use crossbeam_channel::{bounded, Receiver};
use ignore::{WalkBuilder, WalkState};
use std::{path::{Path, PathBuf}, thread};
use ignore::overrides::OverrideBuilder;
use crate::utils::Config;
const BATCH_SIZE: usize = 5;
type Batch = Vec<PathBuf>;
struct Batcher {
tx: crossbeam_channel::Sender<Batch>,
batch: Batch,
}
impl Batcher {
fn push(&mut self, p: PathBuf) {
self.batch.push(p);
if self.batch.len() == BATCH_SIZE {
self.flush();
}
}
fn flush(&mut self) {
if !self.batch.is_empty() {
let _ = self.tx.send(std::mem::take(&mut self.batch));
}
}
}
impl Drop for Batcher {
fn drop(&mut self) {
// guarantees the remainder is sent when the worker is dropped
self.flush();
}
}
/// Walk `root`, send file paths to the returned receiver.
pub fn spawn_senders(
root: &Path,
cfg: &Config
) -> Receiver<Batch> {
let mut ob = OverrideBuilder::new(root);
for ext in &cfg.scanner.excluded_extensions {
ob.add(&format!("!*.{ext}")).unwrap();
}
for dir in &cfg.scanner.excluded_directories {
ob.add(&format!("!**/{dir}/**")).unwrap();
}
let overrides = ob.build().unwrap();
let worker_thrs = cfg.performance.worker_threads.unwrap_or(num_cpus::get());
let (tx, rx) = bounded::<Batch>(worker_thrs * 2usize);
let root = root.to_path_buf();
let scan_hidden = cfg.scanner.scan_hidden_files;
let follow_links = cfg.scanner.follow_symlinks;
thread::spawn(move || {
let walker = WalkBuilder::new(root)
.hidden(!scan_hidden)
.follow_links(follow_links)
.threads(worker_thrs)
.overrides(overrides)
.build_parallel();
walker.run(move || {
let tx = tx.clone();
let mut batch = Vec::<PathBuf>::with_capacity(256);
Box::new(move |entry| {
tracing::info!("walking: {:?}", entry);
let mut b = Batcher { tx: tx.clone(), batch: Vec::with_capacity(BATCH_SIZE) };
match entry {
Ok(e) if e.file_type().map_or(false, |ft| ft.is_file()) => {
b.push(e.into_path());
if batch.len() == BATCH_SIZE {
let _ = tx.send(std::mem::take(&mut batch));
}
}
Err(err) => eprintln!("walk error: {err}"),
_ => {}
}
WalkState::Continue
})
});
});
rx
}