Add max_file_size_mb and high_only logic to scanning process:

- Implement `max_file_size_mb` to restrict files for scanning based on size.
- Refactor `high_only` handling to modify `min_severity` in `Config`.
- Update `ScannerConfig` to use `Option<u64>` for optional size limits.
- Remove redundant `high_only` parameter from `scan::handle` function.
- Improve batch processing in `walk` for efficient file scanning.
This commit is contained in:
elipeter 2025-06-23 16:51:39 +02:00
parent b3e0db449d
commit 80c0bc9845
6 changed files with 47 additions and 15 deletions

View file

@ -5,16 +5,19 @@ pub mod clean;
use crate::cli::Commands;
use std::path::Path;
use crate::patterns::Severity;
use crate::utils::config::Config;
pub fn handle_command(
command: Commands,
database_dir: &Path,
config: &Config
config: &mut Config
) -> Result<(), Box<dyn std::error::Error>> {
match command {
Commands::Scan { path, no_index, rebuild_index, format, high_only } => {
scan::handle(&path, no_index, rebuild_index, format, high_only, database_dir, config)
if high_only { config.scanner.min_severity = Severity::High };
scan::handle(&path, no_index, rebuild_index, format, database_dir, config)
}
Commands::Index { action } => {
index::handle(action, database_dir, config)

View file

@ -30,7 +30,6 @@ pub fn handle(
no_index: bool,
rebuild_index: bool,
format: String,
high_only: bool,
database_dir: &Path,
config: &Config,
) -> Result<(), Box<dyn std::error::Error>> {
@ -52,7 +51,7 @@ pub fn handle(
if format == "console" || format == "" && config.output.default_format == "console" {
for d in &diags {
if high_only && d.severity != Severity::High {
if d.severity != Severity::High {
continue;
}
let sev_str = match d.severity {

View file

@ -53,9 +53,9 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let database_dir = proj_dirs.data_local_dir();
fs::create_dir_all(database_dir)?;
let config = Config::load(config_dir)?;
let mut config = Config::load(config_dir)?;
commands::handle_command(cli.command, database_dir, &config)?;
commands::handle_command(cli.command, database_dir, &mut config)?;
Ok(())
}

View file

@ -10,25 +10,25 @@ pub struct ScannerConfig {
/// The minimum severity level to output
pub min_severity: Severity,
/// The maximum file size to scan, in megabytes. TODO: IMPLEMENT
pub max_file_size_mb: u64,
/// The maximum file size to scan, in megabytes.
pub max_file_size_mb: Option<u64>,
/// File extensions to exclude from scanning. TODO: IMPLEMENT
/// File extensions to exclude from scanning.
pub excluded_extensions: Vec<String>,
/// Directories to exclude from scanning. TODO: IMPLEMENT
/// Directories to exclude from scanning.
pub excluded_directories: Vec<String>,
/// Excluded files
pub excluded_files: Vec<String>,
/// Whether to respect the global ignore file or not. TODO: IMPLEMENT
/// Whether to respect the global ignore file or not.
pub read_global_ignore: bool,
/// Whether to respect VCS ignore files (`.gitignore`, ..) or not. TODO: IMPLEMENT
/// Whether to respect VCS ignore files (`.gitignore`, ..) or not.
pub read_vcsignore: bool,
/// Whether to require a `.git` directory to respect gitignore files. TODO: IMPLEMENT
/// Whether to require a `.git` directory to respect gitignore files.
pub require_git_to_read_vcsignore: bool,
/// Whether to limit the search to starting file system or not.
@ -37,14 +37,14 @@ pub struct ScannerConfig {
/// Whether to follow symlinks or not.
pub follow_symlinks: bool,
/// Whether to scan hidden files or not. TODO: IMPLEMENT
/// Whether to scan hidden files or not.
pub scan_hidden_files: bool,
}
impl Default for ScannerConfig {
fn default() -> Self {
Self {
min_severity: Severity::Low,
max_file_size_mb: 100,
max_file_size_mb: None,
excluded_extensions: vec![
"jpg", "png", "gif", "mp4", "avi", "mkv",
"zip", "tar", "gz", "exe", "dll", "so",

View file

@ -63,6 +63,7 @@ pub fn spawn_senders(
let root = root.to_path_buf();
let scan_hidden = cfg.scanner.scan_hidden_files;
let follow_links = cfg.scanner.follow_symlinks;
let max_bytes: u64 = (cfg.scanner.max_file_size_mb.unwrap_or(0)) * 1_048_576;
thread::spawn(move || {
let walker = WalkBuilder::new(root)
@ -72,6 +73,8 @@ pub fn spawn_senders(
.overrides(overrides)
.build_parallel();
/*
walker.run(move || {
let tx = tx.clone();
let mut batch = Vec::<PathBuf>::with_capacity(256);
@ -94,6 +97,33 @@ pub fn spawn_senders(
WalkState::Continue
})
});
*/
walker.run(move || {
let mut batcher = Batcher {
tx: tx.clone(),
batch: Vec::with_capacity(BATCH_SIZE),
};
Box::new(move |entry| {
tracing::debug!("walking: {:?}", entry);
let e = match entry {
Ok(e) if e.file_type().map(|ft| ft.is_file()).unwrap_or(false) => e,
_ => return WalkState::Continue,
};
if max_bytes != 0 {
match e.metadata() {
Ok(m) if m.len() <= max_bytes => {},
_ => return WalkState::Continue,
}
}
tracing::debug!("scanning file: {:?}", e);
batcher.push(e.into_path());
if batcher.batch.len() == BATCH_SIZE {
let _ = batcher.tx.send(std::mem::take(&mut batcher.batch));
}
WalkState::Continue
})
});
});