mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-18 20:15:14 +02:00
Feat/configurable sanitizers and js precision (#32)
* chore: Exclude CLAUDE.md from Cargo.toml * feat: Add configurable analysis rules and CLI commands for custom sanitizers and terminators * feat: Enhance resource management and analysis efficiency - Implemented parallel summary merging in `scan_filesystem` using rayon for improved performance. - Introduced `GlobalSummaries::merge()` for efficient merging of summaries. - Optimized file reading and hashing to eliminate redundant I/O operations. - Added `should_scan_with_hash()` and `upsert_file_with_hash()` methods to streamline file processing. - Enhanced taint analysis with in-place mutations to reduce memory allocations. - Updated resource acquisition patterns to exclude false positives for `freopen` and wrapper functions. * feat: Implement severity downgrade for findings in non-production paths and add source kind inference * feat: Update versioning information in SECURITY.md for new stable line * feat: Update categories in Cargo.toml to include parser-implementations and text-processing * feat: Update dependencies in Cargo.lock for improved compatibility and performance * feat: Update dependencies in Cargo.lock and Cargo.toml for improved compatibility
This commit is contained in:
parent
f96a89e7c1
commit
19b578c5c4
37 changed files with 3775 additions and 432 deletions
213
src/commands/config.rs
Normal file
213
src/commands/config.rs
Normal file
|
|
@ -0,0 +1,213 @@
|
|||
use crate::errors::NyxResult;
|
||||
use crate::utils::config::{AnalysisRulesConfig, Config, ConfigLabelRule};
|
||||
use console::style;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
/// Show the effective merged configuration as TOML.
|
||||
pub fn show(config: &Config) -> NyxResult<()> {
|
||||
let toml_str =
|
||||
toml::to_string_pretty(config).map_err(|e| format!("Failed to serialize config: {e}"))?;
|
||||
println!("{toml_str}");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Print the configuration directory path.
|
||||
pub fn path(config_dir: &Path) -> NyxResult<()> {
|
||||
println!("{}", config_dir.display());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Add a label rule to `nyx.local`.
|
||||
pub fn add_rule(
|
||||
config_dir: &Path,
|
||||
lang: &str,
|
||||
matcher: &str,
|
||||
kind: &str,
|
||||
cap: &str,
|
||||
) -> NyxResult<()> {
|
||||
// Validate kind
|
||||
if !["source", "sanitizer", "sink"].contains(&kind) {
|
||||
return Err(
|
||||
format!("Invalid kind '{kind}'. Must be one of: source, sanitizer, sink").into(),
|
||||
);
|
||||
}
|
||||
|
||||
// Validate cap
|
||||
if crate::labels::parse_cap(cap).is_none() {
|
||||
return Err(format!(
|
||||
"Invalid cap '{cap}'. Must be one of: env_var, html_escape, shell_escape, url_encode, json_parse, file_io, all"
|
||||
)
|
||||
.into());
|
||||
}
|
||||
|
||||
let local_path = config_dir.join("nyx.local");
|
||||
let mut config: Config = if local_path.exists() {
|
||||
let content = fs::read_to_string(&local_path)?;
|
||||
toml::from_str(&content)?
|
||||
} else {
|
||||
Config::default()
|
||||
};
|
||||
|
||||
let lang_cfg = config
|
||||
.analysis
|
||||
.languages
|
||||
.entry(lang.to_string())
|
||||
.or_default();
|
||||
|
||||
let new_rule = ConfigLabelRule {
|
||||
matchers: vec![matcher.to_string()],
|
||||
kind: kind.to_string(),
|
||||
cap: cap.to_string(),
|
||||
};
|
||||
|
||||
// Dedup
|
||||
if !lang_cfg.rules.contains(&new_rule) {
|
||||
lang_cfg.rules.push(new_rule);
|
||||
}
|
||||
|
||||
write_local_config(&local_path, &config)?;
|
||||
|
||||
println!(
|
||||
"{}: Added {} rule for `{}` ({}) in {}",
|
||||
style("ok").green().bold(),
|
||||
kind,
|
||||
matcher,
|
||||
cap,
|
||||
lang
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Add a terminator to `nyx.local`.
|
||||
pub fn add_terminator(config_dir: &Path, lang: &str, name: &str) -> NyxResult<()> {
|
||||
let local_path = config_dir.join("nyx.local");
|
||||
let mut config: Config = if local_path.exists() {
|
||||
let content = fs::read_to_string(&local_path)?;
|
||||
toml::from_str(&content)?
|
||||
} else {
|
||||
Config::default()
|
||||
};
|
||||
|
||||
let lang_cfg = config
|
||||
.analysis
|
||||
.languages
|
||||
.entry(lang.to_string())
|
||||
.or_default();
|
||||
|
||||
if !lang_cfg.terminators.contains(&name.to_string()) {
|
||||
lang_cfg.terminators.push(name.to_string());
|
||||
}
|
||||
|
||||
write_local_config(&local_path, &config)?;
|
||||
|
||||
println!(
|
||||
"{}: Added terminator `{}` for {}",
|
||||
style("ok").green().bold(),
|
||||
name,
|
||||
lang
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Write only the non-default portions to nyx.local.
|
||||
fn write_local_config(path: &Path, config: &Config) -> NyxResult<()> {
|
||||
// Only write the analysis section to nyx.local to keep it minimal.
|
||||
// Other settings keep their defaults unless previously customized.
|
||||
let mut local = Config {
|
||||
analysis: config.analysis.clone(),
|
||||
..Config::default()
|
||||
};
|
||||
|
||||
// Strip empty language entries
|
||||
local.analysis.languages.retain(|_, v| {
|
||||
!v.rules.is_empty() || !v.terminators.is_empty() || !v.event_handlers.is_empty()
|
||||
});
|
||||
|
||||
// If no analysis rules, only write the analysis section
|
||||
if local.analysis.languages.is_empty() {
|
||||
local.analysis = AnalysisRulesConfig::default();
|
||||
}
|
||||
|
||||
let toml_str =
|
||||
toml::to_string_pretty(&local).map_err(|e| format!("Failed to serialize config: {e}"))?;
|
||||
fs::write(path, toml_str)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn add_rule_writes_valid_toml() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
add_rule(
|
||||
dir.path(),
|
||||
"javascript",
|
||||
"escapeHtml",
|
||||
"sanitizer",
|
||||
"html_escape",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let content = fs::read_to_string(dir.path().join("nyx.local")).unwrap();
|
||||
let config: Config = toml::from_str(&content).unwrap();
|
||||
let js = config.analysis.languages.get("javascript").unwrap();
|
||||
assert_eq!(js.rules.len(), 1);
|
||||
assert_eq!(js.rules[0].matchers, vec!["escapeHtml"]);
|
||||
assert_eq!(js.rules[0].kind, "sanitizer");
|
||||
assert_eq!(js.rules[0].cap, "html_escape");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_rule_deduplicates() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
add_rule(
|
||||
dir.path(),
|
||||
"javascript",
|
||||
"escapeHtml",
|
||||
"sanitizer",
|
||||
"html_escape",
|
||||
)
|
||||
.unwrap();
|
||||
add_rule(
|
||||
dir.path(),
|
||||
"javascript",
|
||||
"escapeHtml",
|
||||
"sanitizer",
|
||||
"html_escape",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let content = fs::read_to_string(dir.path().join("nyx.local")).unwrap();
|
||||
let config: Config = toml::from_str(&content).unwrap();
|
||||
let js = config.analysis.languages.get("javascript").unwrap();
|
||||
assert_eq!(js.rules.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_terminator_works() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
add_terminator(dir.path(), "javascript", "process.exit").unwrap();
|
||||
|
||||
let content = fs::read_to_string(dir.path().join("nyx.local")).unwrap();
|
||||
let config: Config = toml::from_str(&content).unwrap();
|
||||
let js = config.analysis.languages.get("javascript").unwrap();
|
||||
assert_eq!(js.terminators, vec!["process.exit"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_rule_rejects_invalid_kind() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let result = add_rule(dir.path(), "javascript", "foo", "invalid_kind", "all");
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_rule_rejects_invalid_cap() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let result = add_rule(dir.path(), "javascript", "foo", "sanitizer", "invalid_cap");
|
||||
assert!(result.is_err());
|
||||
}
|
||||
}
|
||||
|
|
@ -5,10 +5,10 @@ use crate::patterns::Severity;
|
|||
use crate::utils::Config;
|
||||
use crate::utils::project::get_project_info;
|
||||
use crate::walk::spawn_file_walker;
|
||||
use blake3;
|
||||
use bytesize::ByteSize;
|
||||
use chrono::{DateTime, Local};
|
||||
use console::style;
|
||||
use indicatif::{ProgressBar, ProgressStyle};
|
||||
use rayon::prelude::*;
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
|
|
@ -25,7 +25,13 @@ pub fn handle(
|
|||
let (project_name, db_path) = get_project_info(&build_path, database_dir)?;
|
||||
|
||||
if force || !db_path.exists() {
|
||||
build_index(&project_name, &build_path, &db_path, config)?;
|
||||
build_index(
|
||||
&project_name,
|
||||
&build_path,
|
||||
&db_path,
|
||||
config,
|
||||
!config.output.quiet,
|
||||
)?;
|
||||
println!(
|
||||
"✔ {} {}",
|
||||
style("Index built:").green(),
|
||||
|
|
@ -84,6 +90,7 @@ pub fn build_index(
|
|||
project_path: &std::path::Path,
|
||||
db_path: &std::path::Path,
|
||||
config: &Config,
|
||||
show_progress: bool,
|
||||
) -> NyxResult<()> {
|
||||
tracing::debug!("Building index for: {}", project_name);
|
||||
fs::File::create(db_path)?;
|
||||
|
|
@ -97,10 +104,27 @@ pub fn build_index(
|
|||
tracing::debug!("Cleaned index for: {}", project_name);
|
||||
|
||||
let (rx, handle) = spawn_file_walker(project_path, config);
|
||||
// Drain the channel BEFORE joining — the bounded channel will deadlock
|
||||
// if we join first and the walker blocks on send.
|
||||
let paths: Vec<PathBuf> = rx.into_iter().flatten().collect();
|
||||
if let Err(err) = handle.join() {
|
||||
tracing::error!("walker thread panicked: {:#?}", err);
|
||||
}
|
||||
let paths: Vec<PathBuf> = rx.into_iter().flatten().collect();
|
||||
|
||||
let pb = if show_progress {
|
||||
let pb = ProgressBar::new(paths.len() as u64);
|
||||
pb.set_style(
|
||||
ProgressStyle::with_template(
|
||||
"{spinner:.green} {msg} [{bar:30.cyan/blue}] {pos}/{len} ({eta})",
|
||||
)
|
||||
.unwrap()
|
||||
.progress_chars("##-"),
|
||||
);
|
||||
pb.set_message("Indexing files");
|
||||
pb
|
||||
} else {
|
||||
ProgressBar::hidden()
|
||||
};
|
||||
|
||||
paths
|
||||
.into_par_iter()
|
||||
|
|
@ -108,18 +132,15 @@ pub fn build_index(
|
|||
let mut idx = Indexer::from_pool(project_name, &pool)?;
|
||||
|
||||
// Read once, hash once — pass bytes to both rule execution and
|
||||
// summary extraction.
|
||||
// summary extraction. Use pre-computed hash for upsert to avoid
|
||||
// a redundant file read inside upsert_file.
|
||||
let bytes = std::fs::read(&path)?;
|
||||
let hash = {
|
||||
let mut hasher = blake3::Hasher::new();
|
||||
hasher.update(&bytes);
|
||||
hasher.finalize().as_bytes().to_vec()
|
||||
};
|
||||
let hash = Indexer::digest_bytes(&bytes);
|
||||
|
||||
// Run AST-only rules (no taint yet — summaries come later in scan)
|
||||
let issues =
|
||||
crate::commands::scan::run_rules_on_bytes(&bytes, &path, config, None, None)?;
|
||||
let file_id = idx.upsert_file(&path)?;
|
||||
let file_id = idx.upsert_file_with_hash(&path, &hash)?;
|
||||
|
||||
let rows: Vec<IssueRow> = issues
|
||||
.iter()
|
||||
|
|
@ -144,8 +165,10 @@ pub fn build_index(
|
|||
idx.replace_summaries_for_file(&path, &hash, &sums)?;
|
||||
}
|
||||
|
||||
pb.inc(1);
|
||||
Ok(())
|
||||
})?;
|
||||
pb.finish_and_clear();
|
||||
|
||||
{
|
||||
let idx = Indexer::from_pool(project_name, &pool)?;
|
||||
|
|
@ -170,7 +193,7 @@ fn build_index_creates_db_and_registers_files() {
|
|||
|
||||
let db_path = td.path().join("proj.sqlite");
|
||||
|
||||
build_index("proj", &project_dir, &db_path, &cfg).expect("index build should succeed");
|
||||
build_index("proj", &project_dir, &db_path, &cfg, false).expect("index build should succeed");
|
||||
|
||||
// ── Assert ────────────────────────────────────────────────────────────────
|
||||
assert!(db_path.is_file(), "SQLite file must exist");
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
pub mod clean;
|
||||
pub mod config;
|
||||
pub mod index;
|
||||
pub mod list;
|
||||
pub mod scan;
|
||||
|
|
@ -12,6 +13,7 @@ use std::path::Path;
|
|||
pub fn handle_command(
|
||||
command: Commands,
|
||||
database_dir: &Path,
|
||||
config_dir: &Path,
|
||||
config: &mut Config,
|
||||
) -> NyxResult<()> {
|
||||
match command {
|
||||
|
|
@ -24,6 +26,7 @@ pub fn handle_command(
|
|||
ast_only,
|
||||
cfg_only,
|
||||
all_targets,
|
||||
include_nonprod,
|
||||
} => {
|
||||
if high_only {
|
||||
config.scanner.min_severity = Severity::High
|
||||
|
|
@ -41,10 +44,37 @@ pub fn handle_command(
|
|||
config.scanner.mode = AnalysisMode::Full
|
||||
};
|
||||
|
||||
scan::handle(&path, no_index, rebuild_index, format, database_dir, config)
|
||||
if include_nonprod {
|
||||
config.scanner.include_nonprod = true
|
||||
};
|
||||
|
||||
scan::handle(&path, no_index, rebuild_index, format, database_dir, config)?;
|
||||
}
|
||||
Commands::Index { action } => {
|
||||
index::handle(action, database_dir, config)?;
|
||||
}
|
||||
Commands::List { verbose } => {
|
||||
list::handle(verbose, database_dir)?;
|
||||
}
|
||||
Commands::Clean { project, all } => {
|
||||
clean::handle(project, all, database_dir)?;
|
||||
}
|
||||
Commands::Config { action } => {
|
||||
use crate::cli::ConfigAction;
|
||||
match action {
|
||||
ConfigAction::Show => self::config::show(config)?,
|
||||
ConfigAction::Path => self::config::path(config_dir)?,
|
||||
ConfigAction::AddRule {
|
||||
lang,
|
||||
matcher,
|
||||
kind,
|
||||
cap,
|
||||
} => self::config::add_rule(config_dir, &lang, &matcher, &kind, &cap)?,
|
||||
ConfigAction::AddTerminator { lang, name } => {
|
||||
self::config::add_terminator(config_dir, &lang, &name)?
|
||||
}
|
||||
}
|
||||
}
|
||||
Commands::Index { action } => index::handle(action, database_dir, config),
|
||||
Commands::List { verbose } => list::handle(verbose, database_dir),
|
||||
Commands::Clean { project, all } => clean::handle(project, all, database_dir),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,16 +1,16 @@
|
|||
pub(crate) use crate::ast::{
|
||||
extract_summaries_from_bytes, extract_summaries_from_file, run_rules_on_bytes,
|
||||
run_rules_on_file,
|
||||
analyse_file_fused, extract_summaries_from_bytes, run_rules_on_bytes, run_rules_on_file,
|
||||
};
|
||||
use crate::database::index::{Indexer, IssueRow};
|
||||
use crate::errors::NyxResult;
|
||||
use crate::patterns::Severity;
|
||||
use crate::summary::{self, FuncSummary, GlobalSummaries};
|
||||
use crate::summary::{self, GlobalSummaries};
|
||||
use crate::utils::config::Config;
|
||||
use crate::utils::project::get_project_info;
|
||||
use crate::walk::spawn_file_walker;
|
||||
use console::style;
|
||||
use dashmap::DashMap;
|
||||
use indicatif::{ProgressBar, ProgressStyle};
|
||||
use r2d2::Pool;
|
||||
use r2d2_sqlite::SqliteConnectionManager;
|
||||
use rayon::prelude::*;
|
||||
|
|
@ -18,6 +18,22 @@ use std::collections::BTreeMap;
|
|||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
fn make_progress_bar(len: u64, msg: &str, show: bool) -> ProgressBar {
|
||||
if !show {
|
||||
return ProgressBar::hidden();
|
||||
}
|
||||
let pb = ProgressBar::new(len);
|
||||
pb.set_style(
|
||||
ProgressStyle::with_template(
|
||||
"{spinner:.green} {msg} [{bar:30.cyan/blue}] {pos}/{len} ({eta})",
|
||||
)
|
||||
.unwrap()
|
||||
.progress_chars("##-"),
|
||||
);
|
||||
pb.set_message(msg.to_string());
|
||||
pb
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, serde::Serialize)]
|
||||
pub struct Diag {
|
||||
pub path: String,
|
||||
|
|
@ -39,22 +55,37 @@ pub fn handle(
|
|||
let scan_path = Path::new(path).canonicalize()?;
|
||||
let (project_name, db_path) = get_project_info(&scan_path, database_dir)?;
|
||||
|
||||
println!(
|
||||
"{} {}...\n",
|
||||
style("Checking").green().bold(),
|
||||
&project_name
|
||||
);
|
||||
let suppress_status = config.output.quiet || format == "json" || format == "sarif";
|
||||
if !suppress_status {
|
||||
println!(
|
||||
"{} {}...\n",
|
||||
style("Checking").green().bold(),
|
||||
&project_name
|
||||
);
|
||||
}
|
||||
|
||||
let show_progress = format != "json" && format != "sarif" && !config.output.quiet;
|
||||
|
||||
let diags: Vec<Diag> = if no_index {
|
||||
scan_filesystem(&scan_path, config)?
|
||||
scan_filesystem(&scan_path, config, show_progress)?
|
||||
} else {
|
||||
if rebuild_index || !db_path.exists() {
|
||||
tracing::debug!("Scanning filesystem index filesystem");
|
||||
crate::commands::index::build_index(&project_name, &scan_path, &db_path, config)?;
|
||||
crate::commands::index::build_index(
|
||||
&project_name,
|
||||
&scan_path,
|
||||
&db_path,
|
||||
config,
|
||||
show_progress,
|
||||
)?;
|
||||
}
|
||||
|
||||
let pool = Indexer::init(&db_path)?;
|
||||
scan_with_index_parallel(&project_name, pool, config)?
|
||||
if config.database.vacuum_on_startup {
|
||||
let idx = Indexer::from_pool(&project_name, &pool)?;
|
||||
idx.vacuum()?;
|
||||
}
|
||||
scan_with_index_parallel(&project_name, pool, config, show_progress)?
|
||||
};
|
||||
|
||||
tracing::debug!("Found {:?} issues.", diags.len());
|
||||
|
|
@ -66,6 +97,14 @@ pub fn handle(
|
|||
return Ok(());
|
||||
}
|
||||
|
||||
if format == "sarif" {
|
||||
let sarif = crate::output::build_sarif(&diags, &scan_path);
|
||||
let json = serde_json::to_string_pretty(&sarif)
|
||||
.map_err(|e| crate::errors::NyxError::Msg(e.to_string()))?;
|
||||
println!("{json}");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if format == "console" || (format.is_empty() && config.output.default_format == "console") {
|
||||
tracing::debug!("Printing to console");
|
||||
let mut grouped: BTreeMap<&str, Vec<&Diag>> = BTreeMap::new();
|
||||
|
|
@ -77,10 +116,10 @@ pub fn handle(
|
|||
println!("{}", style(path).blue().underlined());
|
||||
for d in issues {
|
||||
println!(
|
||||
" {:>4}:{:<4} [{:}] {:}",
|
||||
" {:>4}:{:<4} {} {}",
|
||||
d.line,
|
||||
d.col,
|
||||
d.severity,
|
||||
d.severity.colored_tag(),
|
||||
style(&d.id).bold()
|
||||
);
|
||||
}
|
||||
|
|
@ -109,55 +148,144 @@ pub fn handle(
|
|||
/// merged cross‑file summaries.
|
||||
///
|
||||
/// AST pattern queries are run during pass 2 (they don't depend on summaries).
|
||||
pub(crate) fn scan_filesystem(root: &Path, cfg: &Config) -> NyxResult<Vec<Diag>> {
|
||||
pub(crate) fn scan_filesystem(
|
||||
root: &Path,
|
||||
cfg: &Config,
|
||||
show_progress: bool,
|
||||
) -> NyxResult<Vec<Diag>> {
|
||||
// ── Collect file list ────────────────────────────────────────────────
|
||||
let all_paths: Vec<PathBuf> = {
|
||||
let _span = tracing::info_span!("walk_files").entered();
|
||||
let (rx, handle) = spawn_file_walker(root, cfg);
|
||||
// Drain the channel BEFORE joining the walker thread.
|
||||
// The channel is bounded, so joining first would deadlock once
|
||||
// the walker fills it and blocks on send.
|
||||
let paths: Vec<PathBuf> = rx.into_iter().flatten().collect();
|
||||
if let Err(err) = handle.join() {
|
||||
tracing::error!("walker thread panicked: {:#?}", err);
|
||||
}
|
||||
rx.into_iter().flatten().collect()
|
||||
paths
|
||||
};
|
||||
tracing::info!(file_count = all_paths.len(), "file walk complete");
|
||||
|
||||
// ── Pass 1: extract summaries ────────────────────────────────────────
|
||||
let needs_taint = cfg.scanner.mode == crate::utils::config::AnalysisMode::Full
|
||||
|| cfg.scanner.mode == crate::utils::config::AnalysisMode::Taint;
|
||||
|
||||
let global_summaries: Option<GlobalSummaries> = if needs_taint {
|
||||
let _span = tracing::info_span!("pass1_summaries", files = all_paths.len()).entered();
|
||||
if !needs_taint {
|
||||
// ── AST-only: single fused pass (no cross-file context needed) ──
|
||||
let _span = tracing::info_span!("ast_only_analysis", files = all_paths.len()).entered();
|
||||
let pb = make_progress_bar(all_paths.len() as u64, "Running analysis", show_progress);
|
||||
|
||||
let collected: Vec<FuncSummary> = all_paths
|
||||
let mut diags: Vec<Diag> = all_paths
|
||||
.par_iter()
|
||||
.flat_map_iter(|path| match extract_summaries_from_file(path, cfg) {
|
||||
Ok(sums) => sums,
|
||||
Err(e) => {
|
||||
tracing::warn!("pass 1: failed to summarise {}: {e}", path.display());
|
||||
vec![]
|
||||
}
|
||||
.flat_map_iter(|path| {
|
||||
let result = match analyse_file_fused(
|
||||
&std::fs::read(path).unwrap_or_default(),
|
||||
path,
|
||||
cfg,
|
||||
None,
|
||||
Some(root),
|
||||
) {
|
||||
Ok(r) => r.diags,
|
||||
Err(e) => {
|
||||
tracing::warn!("analysis: {}: {e}", path.display());
|
||||
vec![]
|
||||
}
|
||||
};
|
||||
pb.inc(1);
|
||||
result
|
||||
})
|
||||
.collect();
|
||||
pb.finish_and_clear();
|
||||
|
||||
tracing::info!(summaries = collected.len(), "pass 1 complete");
|
||||
let _merge_span = tracing::info_span!("merge_summaries").entered();
|
||||
if let Some(max) = cfg.output.max_results {
|
||||
diags.truncate(max as usize);
|
||||
}
|
||||
return Ok(diags);
|
||||
}
|
||||
|
||||
// ── Taint mode: two-pass with fused pass 1 ──────────────────────────
|
||||
//
|
||||
// Pass 1 (fused): parse + CFG (once!) → extract summaries + run
|
||||
// AST queries + local taint + CFG structural analyses.
|
||||
// Summaries are collected for the cross-file merge.
|
||||
//
|
||||
// Pass 2: re-run full analysis with global summaries injected.
|
||||
// This requires a second parse+CFG, but ONLY for taint-mode files
|
||||
// that need cross-file context. For repos where most functions
|
||||
// don't have unresolved callees, pass 1 results are already correct.
|
||||
|
||||
// ── Pass 1: fused summary extraction + parallel merge ──────────────
|
||||
//
|
||||
// Each rayon thread builds a local `GlobalSummaries` from its chunk,
|
||||
// then the per-thread maps are merged in a binary reduce tree.
|
||||
// This eliminates the serial merge_summaries bottleneck.
|
||||
let global_summaries: GlobalSummaries = {
|
||||
let _span = tracing::info_span!("pass1_fused", files = all_paths.len()).entered();
|
||||
let pb = make_progress_bar(
|
||||
all_paths.len() as u64,
|
||||
"Pass 1: Extracting summaries",
|
||||
show_progress,
|
||||
);
|
||||
let root_str = root.to_string_lossy();
|
||||
Some(summary::merge_summaries(collected, Some(&root_str)))
|
||||
} else {
|
||||
None
|
||||
|
||||
let gs = all_paths
|
||||
.par_iter()
|
||||
.fold(GlobalSummaries::new, |mut local_gs, path| {
|
||||
if let Ok(bytes) = std::fs::read(path) {
|
||||
match analyse_file_fused(&bytes, path, cfg, None, Some(root)) {
|
||||
Ok(r) => {
|
||||
for s in r.summaries {
|
||||
let key = s.func_key(Some(&root_str));
|
||||
local_gs.insert(key, s);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("pass 1: {}: {e}", path.display());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
tracing::warn!("pass 1: cannot read {}", path.display());
|
||||
}
|
||||
pb.inc(1);
|
||||
local_gs
|
||||
})
|
||||
.reduce(GlobalSummaries::new, |mut a, b| {
|
||||
a.merge(b);
|
||||
a
|
||||
});
|
||||
|
||||
pb.finish_and_clear();
|
||||
tracing::info!("pass 1 complete");
|
||||
gs
|
||||
};
|
||||
|
||||
// ── Pass 2: full analysis with cross‑file context ────────────────────
|
||||
// ── Pass 2: re-run with cross-file global summaries ──────────────────
|
||||
let mut diags: Vec<Diag> = {
|
||||
let _span = tracing::info_span!("pass2_analysis", files = all_paths.len()).entered();
|
||||
let pb = make_progress_bar(
|
||||
all_paths.len() as u64,
|
||||
"Pass 2: Running analysis",
|
||||
show_progress,
|
||||
);
|
||||
|
||||
all_paths
|
||||
let result: Vec<Diag> = all_paths
|
||||
.par_iter()
|
||||
.map(|path| run_rules_on_file(path, cfg, global_summaries.as_ref(), Some(root)))
|
||||
.try_reduce(Vec::new, |mut a, mut b| {
|
||||
a.append(&mut b);
|
||||
Ok(a)
|
||||
})?
|
||||
.flat_map_iter(|path| {
|
||||
let result = match run_rules_on_file(path, cfg, Some(&global_summaries), Some(root))
|
||||
{
|
||||
Ok(d) => d,
|
||||
Err(e) => {
|
||||
tracing::warn!("pass 2: {}: {e}", path.display());
|
||||
vec![]
|
||||
}
|
||||
};
|
||||
pb.inc(1);
|
||||
result
|
||||
})
|
||||
.collect();
|
||||
pb.finish_and_clear();
|
||||
result
|
||||
};
|
||||
tracing::info!(diags = diags.len(), "pass 2 complete");
|
||||
|
||||
|
|
@ -187,6 +315,7 @@ pub fn scan_with_index_parallel(
|
|||
project: &str,
|
||||
pool: Arc<Pool<SqliteConnectionManager>>,
|
||||
cfg: &Config,
|
||||
show_progress: bool,
|
||||
) -> NyxResult<Vec<Diag>> {
|
||||
let files = {
|
||||
let idx = Indexer::from_pool(project, &pool)?;
|
||||
|
|
@ -199,39 +328,37 @@ pub fn scan_with_index_parallel(
|
|||
// ── Pass 1: ensure summaries are up‑to‑date ──────────────────────────
|
||||
if needs_taint {
|
||||
let _span = tracing::info_span!("pass1_indexed", files = files.len()).entered();
|
||||
let pb = make_progress_bar(
|
||||
files.len() as u64,
|
||||
"Pass 1: Extracting summaries",
|
||||
show_progress,
|
||||
);
|
||||
|
||||
files.par_iter().for_each_init(
|
||||
|| Indexer::from_pool(project, &pool).expect("db pool"),
|
||||
|idx, path| {
|
||||
let needs_scan = idx.should_scan(path).unwrap_or(true);
|
||||
if !needs_scan {
|
||||
return; // summaries in DB are still valid
|
||||
}
|
||||
|
||||
// Read once, hash once, extract summaries from bytes.
|
||||
let bytes = match std::fs::read(path) {
|
||||
Ok(b) => b,
|
||||
Err(e) => {
|
||||
tracing::warn!("pass 1: cannot read {}: {e}", path.display());
|
||||
return;
|
||||
}
|
||||
};
|
||||
let hash = {
|
||||
let mut h = blake3::Hasher::new();
|
||||
h.update(&bytes);
|
||||
h.finalize().as_bytes().to_vec()
|
||||
};
|
||||
|
||||
match extract_summaries_from_bytes(&bytes, path, cfg) {
|
||||
Ok(sums) => {
|
||||
idx.replace_summaries_for_file(path, &hash, &sums).ok();
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("pass 1: {}: {e}", path.display());
|
||||
}
|
||||
// Read once, hash once — use the hash for the change check
|
||||
// to avoid a second file read inside should_scan.
|
||||
if let Ok(bytes) = std::fs::read(path) {
|
||||
let hash = Indexer::digest_bytes(&bytes);
|
||||
let needs_scan = idx.should_scan_with_hash(path, &hash).unwrap_or(true);
|
||||
if needs_scan {
|
||||
match extract_summaries_from_bytes(&bytes, path, cfg) {
|
||||
Ok(sums) => {
|
||||
idx.replace_summaries_for_file(path, &hash, &sums).ok();
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("pass 1: {}: {e}", path.display());
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
tracing::warn!("pass 1: cannot read {}", path.display());
|
||||
}
|
||||
pb.inc(1);
|
||||
},
|
||||
);
|
||||
pb.finish_and_clear();
|
||||
}
|
||||
|
||||
// ── Load global summaries ────────────────────────────────────────────
|
||||
|
|
@ -247,26 +374,47 @@ pub fn scan_with_index_parallel(
|
|||
|
||||
// ── Pass 2: full analysis ────────────────────────────────────────────
|
||||
let _span = tracing::info_span!("pass2_indexed").entered();
|
||||
let pb2 = make_progress_bar(
|
||||
files.len() as u64,
|
||||
"Pass 2: Running analysis",
|
||||
show_progress,
|
||||
);
|
||||
let diag_map: DashMap<String, Vec<Diag>> = DashMap::new();
|
||||
|
||||
files.into_par_iter().for_each_init(
|
||||
|| Indexer::from_pool(project, &pool).expect("db pool"),
|
||||
|idx, path| {
|
||||
// Read file once for both change-detection and analysis.
|
||||
let bytes_opt = std::fs::read(&path).ok();
|
||||
let hash = bytes_opt.as_ref().map(|b| Indexer::digest_bytes(b));
|
||||
|
||||
// In pass 2 we always re-analyse when taint is enabled because
|
||||
// global summaries may have changed even if this file didn't.
|
||||
// For AST-only mode, we can still use the cached issues.
|
||||
let needs_scan = if needs_taint {
|
||||
true // conservative: always re-analyse in taint mode
|
||||
} else {
|
||||
idx.should_scan(&path).unwrap_or(true)
|
||||
match (&hash, &bytes_opt) {
|
||||
(Some(h), _) => idx.should_scan_with_hash(&path, h).unwrap_or(true),
|
||||
_ => true,
|
||||
}
|
||||
};
|
||||
|
||||
let mut diags = if needs_scan {
|
||||
let d = run_rules_on_file(&path, cfg, global_summaries.as_ref(), None)
|
||||
.unwrap_or_default();
|
||||
let d = match &bytes_opt {
|
||||
Some(bytes) => {
|
||||
run_rules_on_bytes(bytes, &path, cfg, global_summaries.as_ref(), None)
|
||||
.unwrap_or_default()
|
||||
}
|
||||
None => run_rules_on_file(&path, cfg, global_summaries.as_ref(), None)
|
||||
.unwrap_or_default(),
|
||||
};
|
||||
|
||||
// Persist issues + update file record
|
||||
let file_id = idx.upsert_file(&path).unwrap_or_default();
|
||||
// Persist issues + update file record (use pre-computed hash)
|
||||
let file_id = match &hash {
|
||||
Some(h) => idx.upsert_file_with_hash(&path, h).unwrap_or_default(),
|
||||
None => idx.upsert_file(&path).unwrap_or_default(),
|
||||
};
|
||||
idx.replace_issues(
|
||||
file_id,
|
||||
d.iter().map(|d| IssueRow {
|
||||
|
|
@ -298,8 +446,10 @@ pub fn scan_with_index_parallel(
|
|||
.or_default()
|
||||
.append(&mut diags);
|
||||
}
|
||||
pb2.inc(1);
|
||||
},
|
||||
);
|
||||
pb2.finish_and_clear();
|
||||
|
||||
let mut diags: Vec<Diag> = diag_map.into_iter().flat_map(|(_, v)| v).collect();
|
||||
|
||||
|
|
@ -323,7 +473,8 @@ fn scan_with_index_parallel_uses_existing_index_without_rescanning() {
|
|||
std::fs::write(project_dir.join("foo.txt"), "abc").unwrap();
|
||||
|
||||
let (project_name, db_path) = get_project_info(&project_dir, td.path()).unwrap();
|
||||
crate::commands::index::build_index(&project_name, &project_dir, &db_path, &cfg).unwrap();
|
||||
crate::commands::index::build_index(&project_name, &project_dir, &db_path, &cfg, false)
|
||||
.unwrap();
|
||||
|
||||
let pool = Indexer::init(&db_path).unwrap();
|
||||
|
||||
|
|
@ -336,7 +487,7 @@ fn scan_with_index_parallel_uses_existing_index_without_rescanning() {
|
|||
1
|
||||
);
|
||||
|
||||
let diags = scan_with_index_parallel(&project_name, Arc::clone(&pool), &cfg)
|
||||
let diags = scan_with_index_parallel(&project_name, Arc::clone(&pool), &cfg, false)
|
||||
.expect("scan should succeed");
|
||||
|
||||
assert!(diags.is_empty());
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue