2026-02-24 23:44:07 -05:00
|
|
|
|
pub(crate) use crate::ast::{
|
2026-02-25 04:02:11 -05:00
|
|
|
|
analyse_file_fused, extract_summaries_from_bytes, run_rules_on_bytes, run_rules_on_file,
|
2026-02-24 23:44:07 -05:00
|
|
|
|
};
|
2025-06-24 20:27:06 +02:00
|
|
|
|
use crate::database::index::{Indexer, IssueRow};
|
|
|
|
|
|
use crate::errors::NyxResult;
|
|
|
|
|
|
use crate::patterns::Severity;
|
2026-02-25 04:02:11 -05:00
|
|
|
|
use crate::summary::{self, GlobalSummaries};
|
2025-06-24 20:27:06 +02:00
|
|
|
|
use crate::utils::config::Config;
|
2025-06-16 16:46:22 +02:00
|
|
|
|
use crate::utils::project::get_project_info;
|
2026-02-24 23:44:07 -05:00
|
|
|
|
use crate::walk::spawn_file_walker;
|
2025-06-17 16:46:45 +02:00
|
|
|
|
use console::style;
|
2025-06-24 20:27:06 +02:00
|
|
|
|
use dashmap::DashMap;
|
2026-02-25 04:02:11 -05:00
|
|
|
|
use indicatif::{ProgressBar, ProgressStyle};
|
2025-06-17 20:45:33 +02:00
|
|
|
|
use r2d2::Pool;
|
|
|
|
|
|
use r2d2_sqlite::SqliteConnectionManager;
|
2025-06-17 19:54:03 +02:00
|
|
|
|
use rayon::prelude::*;
|
2025-06-23 18:25:10 +02:00
|
|
|
|
use std::collections::BTreeMap;
|
2026-02-24 23:44:07 -05:00
|
|
|
|
use std::path::{Path, PathBuf};
|
|
|
|
|
|
use std::sync::Arc;
|
2025-06-17 16:46:45 +02:00
|
|
|
|
|
2026-02-25 04:02:11 -05:00
|
|
|
|
fn make_progress_bar(len: u64, msg: &str, show: bool) -> ProgressBar {
|
|
|
|
|
|
if !show {
|
|
|
|
|
|
return ProgressBar::hidden();
|
|
|
|
|
|
}
|
|
|
|
|
|
let pb = ProgressBar::new(len);
|
|
|
|
|
|
pb.set_style(
|
|
|
|
|
|
ProgressStyle::with_template(
|
|
|
|
|
|
"{spinner:.green} {msg} [{bar:30.cyan/blue}] {pos}/{len} ({eta})",
|
|
|
|
|
|
)
|
|
|
|
|
|
.unwrap()
|
|
|
|
|
|
.progress_chars("##-"),
|
|
|
|
|
|
);
|
|
|
|
|
|
pb.set_message(msg.to_string());
|
|
|
|
|
|
pb
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-24 23:44:07 -05:00
|
|
|
|
#[derive(Debug, Clone, serde::Serialize)]
|
2025-06-17 16:46:45 +02:00
|
|
|
|
pub struct Diag {
|
2026-02-24 23:44:07 -05:00
|
|
|
|
pub path: String,
|
|
|
|
|
|
pub line: usize,
|
|
|
|
|
|
pub col: usize,
|
|
|
|
|
|
pub severity: Severity,
|
|
|
|
|
|
pub id: String,
|
2025-06-17 16:46:45 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Entry point called by the CLI.
|
2025-06-16 16:46:22 +02:00
|
|
|
|
pub fn handle(
|
|
|
|
|
|
path: &str,
|
|
|
|
|
|
no_index: bool,
|
|
|
|
|
|
rebuild_index: bool,
|
2025-06-17 16:46:45 +02:00
|
|
|
|
format: String,
|
2025-06-16 16:46:22 +02:00
|
|
|
|
database_dir: &Path,
|
|
|
|
|
|
config: &Config,
|
2025-06-23 20:59:49 +02:00
|
|
|
|
) -> NyxResult<()> {
|
2025-06-16 16:46:22 +02:00
|
|
|
|
let scan_path = Path::new(path).canonicalize()?;
|
|
|
|
|
|
let (project_name, db_path) = get_project_info(&scan_path, database_dir)?;
|
2025-06-23 20:27:16 +02:00
|
|
|
|
|
2026-02-25 04:02:11 -05:00
|
|
|
|
let suppress_status = config.output.quiet || format == "json" || format == "sarif";
|
|
|
|
|
|
if !suppress_status {
|
|
|
|
|
|
println!(
|
|
|
|
|
|
"{} {}...\n",
|
|
|
|
|
|
style("Checking").green().bold(),
|
|
|
|
|
|
&project_name
|
|
|
|
|
|
);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
let show_progress = format != "json" && format != "sarif" && !config.output.quiet;
|
2025-06-24 20:27:06 +02:00
|
|
|
|
|
2025-06-23 17:49:15 +02:00
|
|
|
|
let diags: Vec<Diag> = if no_index {
|
2026-02-25 04:02:11 -05:00
|
|
|
|
scan_filesystem(&scan_path, config, show_progress)?
|
2025-06-16 16:46:22 +02:00
|
|
|
|
} else {
|
|
|
|
|
|
if rebuild_index || !db_path.exists() {
|
2025-06-17 17:42:41 +02:00
|
|
|
|
tracing::debug!("Scanning filesystem index filesystem");
|
2026-02-25 04:02:11 -05:00
|
|
|
|
crate::commands::index::build_index(
|
|
|
|
|
|
&project_name,
|
|
|
|
|
|
&scan_path,
|
|
|
|
|
|
&db_path,
|
|
|
|
|
|
config,
|
|
|
|
|
|
show_progress,
|
|
|
|
|
|
)?;
|
2025-06-16 16:46:22 +02:00
|
|
|
|
}
|
2025-06-17 17:52:22 +02:00
|
|
|
|
|
2025-06-17 20:45:33 +02:00
|
|
|
|
let pool = Indexer::init(&db_path)?;
|
2026-02-25 04:02:11 -05:00
|
|
|
|
if config.database.vacuum_on_startup {
|
|
|
|
|
|
let idx = Indexer::from_pool(&project_name, &pool)?;
|
|
|
|
|
|
idx.vacuum()?;
|
|
|
|
|
|
}
|
|
|
|
|
|
scan_with_index_parallel(&project_name, pool, config, show_progress)?
|
2025-06-23 17:49:15 +02:00
|
|
|
|
};
|
2025-06-16 16:46:22 +02:00
|
|
|
|
|
2025-06-23 17:45:54 +02:00
|
|
|
|
tracing::debug!("Found {:?} issues.", diags.len());
|
2025-06-23 18:25:10 +02:00
|
|
|
|
|
2026-02-24 23:44:07 -05:00
|
|
|
|
if format == "json" {
|
|
|
|
|
|
let json = serde_json::to_string(&diags)
|
|
|
|
|
|
.map_err(|e| crate::errors::NyxError::Msg(e.to_string()))?;
|
|
|
|
|
|
println!("{json}");
|
|
|
|
|
|
return Ok(());
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-25 04:02:11 -05:00
|
|
|
|
if format == "sarif" {
|
|
|
|
|
|
let sarif = crate::output::build_sarif(&diags, &scan_path);
|
|
|
|
|
|
let json = serde_json::to_string_pretty(&sarif)
|
|
|
|
|
|
.map_err(|e| crate::errors::NyxError::Msg(e.to_string()))?;
|
|
|
|
|
|
println!("{json}");
|
|
|
|
|
|
return Ok(());
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-06-24 20:27:06 +02:00
|
|
|
|
if format == "console" || (format.is_empty() && config.output.default_format == "console") {
|
2025-06-23 17:45:54 +02:00
|
|
|
|
tracing::debug!("Printing to console");
|
2025-06-23 18:25:10 +02:00
|
|
|
|
let mut grouped: BTreeMap<&str, Vec<&Diag>> = BTreeMap::new();
|
2025-06-17 16:46:45 +02:00
|
|
|
|
for d in &diags {
|
2025-06-23 18:25:10 +02:00
|
|
|
|
grouped.entry(&d.path).or_default().push(d);
|
|
|
|
|
|
}
|
2025-06-23 20:27:16 +02:00
|
|
|
|
|
|
|
|
|
|
for (path, issues) in &grouped {
|
2025-06-23 18:25:10 +02:00
|
|
|
|
println!("{}", style(path).blue().underlined());
|
|
|
|
|
|
for d in issues {
|
2025-06-24 20:27:06 +02:00
|
|
|
|
println!(
|
2026-02-25 04:02:11 -05:00
|
|
|
|
" {:>4}:{:<4} {} {}",
|
2025-06-24 20:27:06 +02:00
|
|
|
|
d.line,
|
|
|
|
|
|
d.col,
|
2026-02-25 04:02:11 -05:00
|
|
|
|
d.severity.colored_tag(),
|
2025-06-24 20:27:06 +02:00
|
|
|
|
style(&d.id).bold()
|
|
|
|
|
|
);
|
2025-06-23 18:25:10 +02:00
|
|
|
|
}
|
2025-06-23 20:27:16 +02:00
|
|
|
|
println!();
|
2025-06-17 16:46:45 +02:00
|
|
|
|
}
|
2025-06-23 18:25:10 +02:00
|
|
|
|
|
2025-06-24 20:27:06 +02:00
|
|
|
|
println!(
|
|
|
|
|
|
"{} '{}' generated {} issues.",
|
|
|
|
|
|
style("warning").yellow().bold(),
|
|
|
|
|
|
style(project_name).white().bold(),
|
|
|
|
|
|
style(diags.len()).bold()
|
|
|
|
|
|
);
|
2026-02-24 23:44:07 -05:00
|
|
|
|
println!("\t");
|
2025-06-16 16:46:22 +02:00
|
|
|
|
}
|
|
|
|
|
|
Ok(())
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-06-17 16:46:45 +02:00
|
|
|
|
// --------------------------------------------------------------------------------------------
|
2026-02-24 23:44:07 -05:00
|
|
|
|
// Two‑pass scanning (no index)
|
2025-06-17 16:46:45 +02:00
|
|
|
|
// --------------------------------------------------------------------------------------------
|
2025-06-16 23:47:50 +02:00
|
|
|
|
|
2026-02-24 23:44:07 -05:00
|
|
|
|
/// Walk the filesystem and perform a two‑pass scan:
|
|
|
|
|
|
///
|
|
|
|
|
|
/// **Pass 1** – Parse every file and extract function summaries.
|
|
|
|
|
|
/// **Pass 2** – Re‑parse every file and run taint analysis with the
|
|
|
|
|
|
/// merged cross‑file summaries.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// AST pattern queries are run during pass 2 (they don't depend on summaries).
|
2026-02-25 04:02:11 -05:00
|
|
|
|
pub(crate) fn scan_filesystem(
|
|
|
|
|
|
root: &Path,
|
|
|
|
|
|
cfg: &Config,
|
|
|
|
|
|
show_progress: bool,
|
|
|
|
|
|
) -> NyxResult<Vec<Diag>> {
|
2026-02-24 23:44:07 -05:00
|
|
|
|
// ── Collect file list ────────────────────────────────────────────────
|
|
|
|
|
|
let all_paths: Vec<PathBuf> = {
|
|
|
|
|
|
let _span = tracing::info_span!("walk_files").entered();
|
|
|
|
|
|
let (rx, handle) = spawn_file_walker(root, cfg);
|
2026-02-25 04:02:11 -05:00
|
|
|
|
// Drain the channel BEFORE joining the walker thread.
|
|
|
|
|
|
// The channel is bounded, so joining first would deadlock once
|
|
|
|
|
|
// the walker fills it and blocks on send.
|
|
|
|
|
|
let paths: Vec<PathBuf> = rx.into_iter().flatten().collect();
|
2026-02-24 23:44:07 -05:00
|
|
|
|
if let Err(err) = handle.join() {
|
|
|
|
|
|
tracing::error!("walker thread panicked: {:#?}", err);
|
|
|
|
|
|
}
|
2026-02-25 04:02:11 -05:00
|
|
|
|
paths
|
2026-02-24 23:44:07 -05:00
|
|
|
|
};
|
|
|
|
|
|
tracing::info!(file_count = all_paths.len(), "file walk complete");
|
2025-06-23 20:27:16 +02:00
|
|
|
|
|
2026-02-24 23:44:07 -05:00
|
|
|
|
let needs_taint = cfg.scanner.mode == crate::utils::config::AnalysisMode::Full
|
|
|
|
|
|
|| cfg.scanner.mode == crate::utils::config::AnalysisMode::Taint;
|
|
|
|
|
|
|
2026-02-25 04:02:11 -05:00
|
|
|
|
if !needs_taint {
|
|
|
|
|
|
// ── AST-only: single fused pass (no cross-file context needed) ──
|
|
|
|
|
|
let _span = tracing::info_span!("ast_only_analysis", files = all_paths.len()).entered();
|
|
|
|
|
|
let pb = make_progress_bar(all_paths.len() as u64, "Running analysis", show_progress);
|
2026-02-24 23:44:07 -05:00
|
|
|
|
|
2026-02-25 04:02:11 -05:00
|
|
|
|
let mut diags: Vec<Diag> = all_paths
|
2026-02-24 23:44:07 -05:00
|
|
|
|
.par_iter()
|
2026-02-25 04:02:11 -05:00
|
|
|
|
.flat_map_iter(|path| {
|
|
|
|
|
|
let result = match analyse_file_fused(
|
|
|
|
|
|
&std::fs::read(path).unwrap_or_default(),
|
|
|
|
|
|
path,
|
|
|
|
|
|
cfg,
|
|
|
|
|
|
None,
|
|
|
|
|
|
Some(root),
|
|
|
|
|
|
) {
|
|
|
|
|
|
Ok(r) => r.diags,
|
|
|
|
|
|
Err(e) => {
|
|
|
|
|
|
tracing::warn!("analysis: {}: {e}", path.display());
|
|
|
|
|
|
vec![]
|
|
|
|
|
|
}
|
|
|
|
|
|
};
|
|
|
|
|
|
pb.inc(1);
|
|
|
|
|
|
result
|
2026-02-24 23:44:07 -05:00
|
|
|
|
})
|
|
|
|
|
|
.collect();
|
2026-02-25 04:02:11 -05:00
|
|
|
|
pb.finish_and_clear();
|
|
|
|
|
|
|
|
|
|
|
|
if let Some(max) = cfg.output.max_results {
|
|
|
|
|
|
diags.truncate(max as usize);
|
|
|
|
|
|
}
|
|
|
|
|
|
return Ok(diags);
|
|
|
|
|
|
}
|
2026-02-24 23:44:07 -05:00
|
|
|
|
|
2026-02-25 04:02:11 -05:00
|
|
|
|
// ── Taint mode: two-pass with fused pass 1 ──────────────────────────
|
|
|
|
|
|
//
|
|
|
|
|
|
// Pass 1 (fused): parse + CFG (once!) → extract summaries + run
|
|
|
|
|
|
// AST queries + local taint + CFG structural analyses.
|
|
|
|
|
|
// Summaries are collected for the cross-file merge.
|
|
|
|
|
|
//
|
|
|
|
|
|
// Pass 2: re-run full analysis with global summaries injected.
|
|
|
|
|
|
// This requires a second parse+CFG, but ONLY for taint-mode files
|
|
|
|
|
|
// that need cross-file context. For repos where most functions
|
|
|
|
|
|
// don't have unresolved callees, pass 1 results are already correct.
|
|
|
|
|
|
|
|
|
|
|
|
// ── Pass 1: fused summary extraction + parallel merge ──────────────
|
|
|
|
|
|
//
|
|
|
|
|
|
// Each rayon thread builds a local `GlobalSummaries` from its chunk,
|
|
|
|
|
|
// then the per-thread maps are merged in a binary reduce tree.
|
|
|
|
|
|
// This eliminates the serial merge_summaries bottleneck.
|
|
|
|
|
|
let global_summaries: GlobalSummaries = {
|
|
|
|
|
|
let _span = tracing::info_span!("pass1_fused", files = all_paths.len()).entered();
|
|
|
|
|
|
let pb = make_progress_bar(
|
|
|
|
|
|
all_paths.len() as u64,
|
|
|
|
|
|
"Pass 1: Extracting summaries",
|
|
|
|
|
|
show_progress,
|
|
|
|
|
|
);
|
2026-02-24 23:44:07 -05:00
|
|
|
|
let root_str = root.to_string_lossy();
|
2026-02-25 04:02:11 -05:00
|
|
|
|
|
|
|
|
|
|
let gs = all_paths
|
|
|
|
|
|
.par_iter()
|
|
|
|
|
|
.fold(GlobalSummaries::new, |mut local_gs, path| {
|
|
|
|
|
|
if let Ok(bytes) = std::fs::read(path) {
|
|
|
|
|
|
match analyse_file_fused(&bytes, path, cfg, None, Some(root)) {
|
|
|
|
|
|
Ok(r) => {
|
|
|
|
|
|
for s in r.summaries {
|
|
|
|
|
|
let key = s.func_key(Some(&root_str));
|
|
|
|
|
|
local_gs.insert(key, s);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
Err(e) => {
|
|
|
|
|
|
tracing::warn!("pass 1: {}: {e}", path.display());
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
} else {
|
|
|
|
|
|
tracing::warn!("pass 1: cannot read {}", path.display());
|
|
|
|
|
|
}
|
|
|
|
|
|
pb.inc(1);
|
|
|
|
|
|
local_gs
|
|
|
|
|
|
})
|
|
|
|
|
|
.reduce(GlobalSummaries::new, |mut a, b| {
|
|
|
|
|
|
a.merge(b);
|
|
|
|
|
|
a
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
pb.finish_and_clear();
|
|
|
|
|
|
tracing::info!("pass 1 complete");
|
|
|
|
|
|
gs
|
2026-02-24 23:44:07 -05:00
|
|
|
|
};
|
|
|
|
|
|
|
2026-02-25 04:02:11 -05:00
|
|
|
|
// ── Pass 2: re-run with cross-file global summaries ──────────────────
|
2026-02-24 23:44:07 -05:00
|
|
|
|
let mut diags: Vec<Diag> = {
|
|
|
|
|
|
let _span = tracing::info_span!("pass2_analysis", files = all_paths.len()).entered();
|
2026-02-25 04:02:11 -05:00
|
|
|
|
let pb = make_progress_bar(
|
|
|
|
|
|
all_paths.len() as u64,
|
|
|
|
|
|
"Pass 2: Running analysis",
|
|
|
|
|
|
show_progress,
|
|
|
|
|
|
);
|
2026-02-24 23:44:07 -05:00
|
|
|
|
|
2026-02-25 04:02:11 -05:00
|
|
|
|
let result: Vec<Diag> = all_paths
|
2026-02-24 23:44:07 -05:00
|
|
|
|
.par_iter()
|
2026-02-25 04:02:11 -05:00
|
|
|
|
.flat_map_iter(|path| {
|
|
|
|
|
|
let result = match run_rules_on_file(path, cfg, Some(&global_summaries), Some(root))
|
|
|
|
|
|
{
|
|
|
|
|
|
Ok(d) => d,
|
|
|
|
|
|
Err(e) => {
|
|
|
|
|
|
tracing::warn!("pass 2: {}: {e}", path.display());
|
|
|
|
|
|
vec![]
|
|
|
|
|
|
}
|
|
|
|
|
|
};
|
|
|
|
|
|
pb.inc(1);
|
|
|
|
|
|
result
|
|
|
|
|
|
})
|
|
|
|
|
|
.collect();
|
|
|
|
|
|
pb.finish_and_clear();
|
|
|
|
|
|
result
|
2026-02-24 23:44:07 -05:00
|
|
|
|
};
|
|
|
|
|
|
tracing::info!(diags = diags.len(), "pass 2 complete");
|
2025-06-24 20:27:06 +02:00
|
|
|
|
|
2025-06-24 22:44:57 +02:00
|
|
|
|
if let Some(max) = cfg.output.max_results {
|
2025-06-24 23:38:32 +02:00
|
|
|
|
diags.truncate(max as usize);
|
2025-06-24 22:44:57 +02:00
|
|
|
|
}
|
2025-06-24 23:38:32 +02:00
|
|
|
|
|
2025-06-24 22:44:57 +02:00
|
|
|
|
Ok(diags)
|
2025-06-16 23:47:50 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-24 23:44:07 -05:00
|
|
|
|
// --------------------------------------------------------------------------------------------
|
|
|
|
|
|
// Two‑pass scanning (with index)
|
|
|
|
|
|
// --------------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
/// Indexed two‑pass scan:
|
|
|
|
|
|
///
|
|
|
|
|
|
/// **Pass 1** – For every file that needs scanning, extract summaries and
|
|
|
|
|
|
/// persist them to the database. Unchanged files keep their
|
|
|
|
|
|
/// existing summaries.
|
|
|
|
|
|
/// **Pass 2** – Load *all* summaries from the DB, merge them, and re‑run
|
|
|
|
|
|
/// taint analysis on every file with the full cross‑file view.
|
|
|
|
|
|
/// Files whose *own* code has not changed AND whose
|
|
|
|
|
|
/// dependencies have not changed can serve cached issues
|
|
|
|
|
|
/// instead. (Today we conservatively re‑analyse every file in
|
|
|
|
|
|
/// pass 2; caching will be refined in approach 2 / 3.)
|
2025-06-23 20:27:16 +02:00
|
|
|
|
pub fn scan_with_index_parallel(
|
2025-06-17 16:46:45 +02:00
|
|
|
|
project: &str,
|
2025-06-17 20:45:33 +02:00
|
|
|
|
pool: Arc<Pool<SqliteConnectionManager>>,
|
2025-06-17 16:46:45 +02:00
|
|
|
|
cfg: &Config,
|
2026-02-25 04:02:11 -05:00
|
|
|
|
show_progress: bool,
|
2025-06-23 20:27:16 +02:00
|
|
|
|
) -> NyxResult<Vec<Diag>> {
|
2025-06-17 20:45:33 +02:00
|
|
|
|
let files = {
|
|
|
|
|
|
let idx = Indexer::from_pool(project, &pool)?;
|
|
|
|
|
|
idx.get_files(project)?
|
|
|
|
|
|
};
|
2025-06-24 23:38:32 +02:00
|
|
|
|
|
2026-02-24 23:44:07 -05:00
|
|
|
|
let needs_taint = cfg.scanner.mode == crate::utils::config::AnalysisMode::Full
|
|
|
|
|
|
|| cfg.scanner.mode == crate::utils::config::AnalysisMode::Taint;
|
|
|
|
|
|
|
|
|
|
|
|
// ── Pass 1: ensure summaries are up‑to‑date ──────────────────────────
|
|
|
|
|
|
if needs_taint {
|
|
|
|
|
|
let _span = tracing::info_span!("pass1_indexed", files = files.len()).entered();
|
2026-02-25 04:02:11 -05:00
|
|
|
|
let pb = make_progress_bar(
|
|
|
|
|
|
files.len() as u64,
|
|
|
|
|
|
"Pass 1: Extracting summaries",
|
|
|
|
|
|
show_progress,
|
|
|
|
|
|
);
|
2026-02-24 23:44:07 -05:00
|
|
|
|
|
|
|
|
|
|
files.par_iter().for_each_init(
|
|
|
|
|
|
|| Indexer::from_pool(project, &pool).expect("db pool"),
|
|
|
|
|
|
|idx, path| {
|
2026-02-25 04:02:11 -05:00
|
|
|
|
// Read once, hash once — use the hash for the change check
|
|
|
|
|
|
// to avoid a second file read inside should_scan.
|
|
|
|
|
|
if let Ok(bytes) = std::fs::read(path) {
|
|
|
|
|
|
let hash = Indexer::digest_bytes(&bytes);
|
|
|
|
|
|
let needs_scan = idx.should_scan_with_hash(path, &hash).unwrap_or(true);
|
|
|
|
|
|
if needs_scan {
|
|
|
|
|
|
match extract_summaries_from_bytes(&bytes, path, cfg) {
|
|
|
|
|
|
Ok(sums) => {
|
|
|
|
|
|
idx.replace_summaries_for_file(path, &hash, &sums).ok();
|
|
|
|
|
|
}
|
|
|
|
|
|
Err(e) => {
|
|
|
|
|
|
tracing::warn!("pass 1: {}: {e}", path.display());
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2026-02-24 23:44:07 -05:00
|
|
|
|
}
|
2026-02-25 04:02:11 -05:00
|
|
|
|
} else {
|
|
|
|
|
|
tracing::warn!("pass 1: cannot read {}", path.display());
|
2026-02-24 23:44:07 -05:00
|
|
|
|
}
|
2026-02-25 04:02:11 -05:00
|
|
|
|
pb.inc(1);
|
2026-02-24 23:44:07 -05:00
|
|
|
|
},
|
|
|
|
|
|
);
|
2026-02-25 04:02:11 -05:00
|
|
|
|
pb.finish_and_clear();
|
2026-02-24 23:44:07 -05:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ── Load global summaries ────────────────────────────────────────────
|
|
|
|
|
|
let global_summaries: Option<GlobalSummaries> = if needs_taint {
|
|
|
|
|
|
let _span = tracing::info_span!("load_summaries_db").entered();
|
|
|
|
|
|
let idx = Indexer::from_pool(project, &pool)?;
|
|
|
|
|
|
let all = idx.load_all_summaries()?;
|
|
|
|
|
|
tracing::info!(summaries = all.len(), "loaded cross-file summaries from DB");
|
|
|
|
|
|
Some(summary::merge_summaries(all, None))
|
|
|
|
|
|
} else {
|
|
|
|
|
|
None
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// ── Pass 2: full analysis ────────────────────────────────────────────
|
|
|
|
|
|
let _span = tracing::info_span!("pass2_indexed").entered();
|
2026-02-25 04:02:11 -05:00
|
|
|
|
let pb2 = make_progress_bar(
|
|
|
|
|
|
files.len() as u64,
|
|
|
|
|
|
"Pass 2: Running analysis",
|
|
|
|
|
|
show_progress,
|
|
|
|
|
|
);
|
2025-06-23 20:27:16 +02:00
|
|
|
|
let diag_map: DashMap<String, Vec<Diag>> = DashMap::new();
|
2025-06-17 20:45:33 +02:00
|
|
|
|
|
2025-06-24 20:27:06 +02:00
|
|
|
|
files.into_par_iter().for_each_init(
|
|
|
|
|
|
|| Indexer::from_pool(project, &pool).expect("db pool"),
|
|
|
|
|
|
|idx, path| {
|
2026-02-25 04:02:11 -05:00
|
|
|
|
// Read file once for both change-detection and analysis.
|
|
|
|
|
|
let bytes_opt = std::fs::read(&path).ok();
|
|
|
|
|
|
let hash = bytes_opt.as_ref().map(|b| Indexer::digest_bytes(b));
|
|
|
|
|
|
|
2026-02-24 23:44:07 -05:00
|
|
|
|
// In pass 2 we always re-analyse when taint is enabled because
|
|
|
|
|
|
// global summaries may have changed even if this file didn't.
|
|
|
|
|
|
// For AST-only mode, we can still use the cached issues.
|
|
|
|
|
|
let needs_scan = if needs_taint {
|
|
|
|
|
|
true // conservative: always re-analyse in taint mode
|
|
|
|
|
|
} else {
|
2026-02-25 04:02:11 -05:00
|
|
|
|
match (&hash, &bytes_opt) {
|
|
|
|
|
|
(Some(h), _) => idx.should_scan_with_hash(&path, h).unwrap_or(true),
|
|
|
|
|
|
_ => true,
|
|
|
|
|
|
}
|
2026-02-24 23:44:07 -05:00
|
|
|
|
};
|
2025-06-24 20:27:06 +02:00
|
|
|
|
|
|
|
|
|
|
let mut diags = if needs_scan {
|
2026-02-25 04:02:11 -05:00
|
|
|
|
let d = match &bytes_opt {
|
|
|
|
|
|
Some(bytes) => {
|
|
|
|
|
|
run_rules_on_bytes(bytes, &path, cfg, global_summaries.as_ref(), None)
|
|
|
|
|
|
.unwrap_or_default()
|
|
|
|
|
|
}
|
|
|
|
|
|
None => run_rules_on_file(&path, cfg, global_summaries.as_ref(), None)
|
|
|
|
|
|
.unwrap_or_default(),
|
|
|
|
|
|
};
|
2026-02-24 23:44:07 -05:00
|
|
|
|
|
2026-02-25 04:02:11 -05:00
|
|
|
|
// Persist issues + update file record (use pre-computed hash)
|
|
|
|
|
|
let file_id = match &hash {
|
|
|
|
|
|
Some(h) => idx.upsert_file_with_hash(&path, h).unwrap_or_default(),
|
|
|
|
|
|
None => idx.upsert_file(&path).unwrap_or_default(),
|
|
|
|
|
|
};
|
2025-06-24 20:27:06 +02:00
|
|
|
|
idx.replace_issues(
|
|
|
|
|
|
file_id,
|
|
|
|
|
|
d.iter().map(|d| IssueRow {
|
|
|
|
|
|
rule_id: &d.id,
|
|
|
|
|
|
severity: d.severity.as_db_str(),
|
|
|
|
|
|
line: d.line as i64,
|
|
|
|
|
|
col: d.col as i64,
|
|
|
|
|
|
}),
|
|
|
|
|
|
)
|
|
|
|
|
|
.ok();
|
|
|
|
|
|
d
|
|
|
|
|
|
} else {
|
|
|
|
|
|
idx.get_issues_from_file(&path).unwrap_or_default()
|
|
|
|
|
|
};
|
2025-06-28 17:36:14 +02:00
|
|
|
|
|
|
|
|
|
|
match cfg.scanner.mode {
|
|
|
|
|
|
crate::utils::config::AnalysisMode::Ast => {
|
2026-02-24 23:44:07 -05:00
|
|
|
|
diags.retain(|d| !d.id.starts_with("taint") && !d.id.starts_with("cfg-"));
|
2025-06-28 17:36:14 +02:00
|
|
|
|
}
|
|
|
|
|
|
crate::utils::config::AnalysisMode::Taint => {
|
2026-02-24 23:44:07 -05:00
|
|
|
|
diags.retain(|d| d.id.starts_with("taint") || d.id.starts_with("cfg-"));
|
2025-06-28 17:36:14 +02:00
|
|
|
|
}
|
|
|
|
|
|
crate::utils::config::AnalysisMode::Full => {}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-06-24 20:27:06 +02:00
|
|
|
|
if !diags.is_empty() {
|
|
|
|
|
|
diag_map
|
|
|
|
|
|
.entry(path.to_string_lossy().to_string())
|
2025-06-23 20:27:16 +02:00
|
|
|
|
.or_default()
|
|
|
|
|
|
.append(&mut diags);
|
2025-06-24 20:27:06 +02:00
|
|
|
|
}
|
2026-02-25 04:02:11 -05:00
|
|
|
|
pb2.inc(1);
|
2025-06-24 20:27:06 +02:00
|
|
|
|
},
|
|
|
|
|
|
);
|
2026-02-25 04:02:11 -05:00
|
|
|
|
pb2.finish_and_clear();
|
2025-06-17 20:45:33 +02:00
|
|
|
|
|
2025-06-24 20:27:06 +02:00
|
|
|
|
let mut diags: Vec<Diag> = diag_map.into_iter().flat_map(|(_, v)| v).collect();
|
|
|
|
|
|
|
|
|
|
|
|
if let Some(max) = cfg.output.max_results {
|
|
|
|
|
|
diags.truncate(max as usize);
|
|
|
|
|
|
}
|
2025-06-24 23:38:32 +02:00
|
|
|
|
|
2025-06-24 20:27:06 +02:00
|
|
|
|
Ok(diags)
|
2025-06-16 16:46:22 +02:00
|
|
|
|
}
|
2025-06-24 23:57:27 +02:00
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn scan_with_index_parallel_uses_existing_index_without_rescanning() {
|
|
|
|
|
|
let mut cfg = Config::default();
|
|
|
|
|
|
cfg.performance.worker_threads = Some(1);
|
|
|
|
|
|
cfg.performance.channel_multiplier = 1;
|
|
|
|
|
|
cfg.performance.batch_size = 2;
|
|
|
|
|
|
|
|
|
|
|
|
let td = tempfile::tempdir().unwrap();
|
|
|
|
|
|
let project_dir = td.path().join("proj");
|
|
|
|
|
|
std::fs::create_dir(&project_dir).unwrap();
|
|
|
|
|
|
std::fs::write(project_dir.join("foo.txt"), "abc").unwrap();
|
|
|
|
|
|
|
|
|
|
|
|
let (project_name, db_path) = get_project_info(&project_dir, td.path()).unwrap();
|
2026-02-25 04:02:11 -05:00
|
|
|
|
crate::commands::index::build_index(&project_name, &project_dir, &db_path, &cfg, false)
|
|
|
|
|
|
.unwrap();
|
2025-06-24 23:57:27 +02:00
|
|
|
|
|
|
|
|
|
|
let pool = Indexer::init(&db_path).unwrap();
|
|
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
|
Indexer::from_pool(&project_name, &pool)
|
|
|
|
|
|
.unwrap()
|
|
|
|
|
|
.get_files(&project_name)
|
|
|
|
|
|
.unwrap()
|
|
|
|
|
|
.len(),
|
|
|
|
|
|
1
|
|
|
|
|
|
);
|
|
|
|
|
|
|
2026-02-25 04:02:11 -05:00
|
|
|
|
let diags = scan_with_index_parallel(&project_name, Arc::clone(&pool), &cfg, false)
|
2025-06-24 23:57:27 +02:00
|
|
|
|
.expect("scan should succeed");
|
|
|
|
|
|
|
|
|
|
|
|
assert!(diags.is_empty());
|
|
|
|
|
|
}
|