mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-06 19:35:13 +02:00
* refactor: Update comments for clarity and add expectations.json files for performance metrics * feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks * feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks * refactor: Simplify code formatting for better readability in multiple files * refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration * feat: Update Java and Python patterns to include new security rules * refactor: Improve comment clarity and consistency across multiple Rust files * refactor: Simplify code formatting for improved readability in integration tests and module files * refactor: Improve comment formatting and enhance clarity in assertions across multiple files
392 lines
13 KiB
Rust
392 lines
13 KiB
Rust
use crate::cli::IndexAction;
|
|
use crate::database::index::{Indexer, IssueRow};
|
|
use crate::errors::NyxResult;
|
|
use crate::patterns::Severity;
|
|
use crate::server::progress::{ScanMetrics, ScanProgress, ScanStage};
|
|
use crate::server::scan_log::ScanLogCollector;
|
|
use crate::utils::Config;
|
|
use crate::utils::project::get_project_info;
|
|
use crate::walk::spawn_file_walker;
|
|
use bytesize::ByteSize;
|
|
use chrono::{DateTime, Local};
|
|
use console::style;
|
|
use indicatif::{ProgressBar, ProgressStyle};
|
|
use rayon::prelude::*;
|
|
use std::fs;
|
|
use std::path::PathBuf;
|
|
use std::process::exit;
|
|
use std::sync::Arc;
|
|
use std::sync::atomic::Ordering::Relaxed;
|
|
|
|
pub fn handle(
|
|
action: IndexAction,
|
|
database_dir: &std::path::Path,
|
|
config: &Config,
|
|
) -> NyxResult<()> {
|
|
match action {
|
|
IndexAction::Build { path, force } => {
|
|
let build_path = std::path::Path::new(&path).canonicalize()?;
|
|
let (project_name, db_path) = get_project_info(&build_path, database_dir)?;
|
|
|
|
if force || !db_path.exists() {
|
|
build_index(
|
|
&project_name,
|
|
&build_path,
|
|
&db_path,
|
|
config,
|
|
!config.output.quiet,
|
|
)?;
|
|
println!(
|
|
"✔ {} {}",
|
|
style("Index built:").green(),
|
|
style(db_path.display()).white().bold()
|
|
);
|
|
exit(0);
|
|
} else {
|
|
println!(
|
|
"{} {}",
|
|
style("↩ Index already exists").yellow(),
|
|
style("(use --force to rebuild)").dim()
|
|
);
|
|
exit(0);
|
|
}
|
|
}
|
|
IndexAction::Status { path } => {
|
|
let status_path = std::path::Path::new(&path).canonicalize()?;
|
|
let (project_name, db_path) = get_project_info(&status_path, database_dir)?;
|
|
|
|
println!("{}", style("Index status").bold());
|
|
println!(
|
|
" {:10} {}",
|
|
style("Project").dim(),
|
|
style(&project_name).white().bold()
|
|
);
|
|
println!(
|
|
" {:10} {}",
|
|
style("Path").dim(),
|
|
style(db_path.display()).underlined()
|
|
);
|
|
|
|
if db_path.exists() {
|
|
let meta = fs::metadata(&db_path)?;
|
|
let size = ByteSize::b(meta.len());
|
|
let mtime: DateTime<Local> = meta.modified()?.into();
|
|
println!(
|
|
" {:10} {} {}",
|
|
style("Indexed").dim(),
|
|
style("✔").green().bold(),
|
|
style(mtime.format("%Y-%m-%d %H:%M:%S")).dim()
|
|
);
|
|
println!(" {:10} {}", style("Size").dim(), size);
|
|
} else {
|
|
println!(
|
|
" {:10} {} {}",
|
|
style("Indexed").dim(),
|
|
style("✖").red().bold(),
|
|
style("(run `nyx index build` to create)").dim()
|
|
);
|
|
}
|
|
|
|
exit(0);
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn build_index(
|
|
project_name: &str,
|
|
project_path: &std::path::Path,
|
|
db_path: &std::path::Path,
|
|
config: &Config,
|
|
show_progress: bool,
|
|
) -> NyxResult<()> {
|
|
build_index_with_observer(
|
|
project_name,
|
|
project_path,
|
|
db_path,
|
|
config,
|
|
show_progress,
|
|
None,
|
|
None,
|
|
None,
|
|
)
|
|
}
|
|
|
|
#[allow(clippy::too_many_arguments)]
|
|
pub fn build_index_with_observer(
|
|
project_name: &str,
|
|
project_path: &std::path::Path,
|
|
db_path: &std::path::Path,
|
|
config: &Config,
|
|
show_progress: bool,
|
|
progress: Option<&Arc<ScanProgress>>,
|
|
metrics: Option<&Arc<ScanMetrics>>,
|
|
logs: Option<&Arc<ScanLogCollector>>,
|
|
) -> NyxResult<()> {
|
|
// Pass 1 of the indexed scan reads persisted summaries produced here, so
|
|
// framework context must be populated at index-build time, otherwise
|
|
// framework-conditional label rules never contribute to the summaries
|
|
// and indexed scans diverge from non-indexed ones. Matches the
|
|
// auto-fill in scan_filesystem_with_observer /
|
|
// scan_with_index_parallel_observer.
|
|
let owned_cfg = crate::commands::scan::ensure_framework_ctx(project_path, config);
|
|
let config = owned_cfg.as_ref().unwrap_or(config);
|
|
|
|
tracing::debug!("Building index for: {}", project_name);
|
|
let pool = Indexer::init(db_path)?;
|
|
{
|
|
let idx = Indexer::from_pool(project_name, &pool)?;
|
|
idx.clear()?;
|
|
}
|
|
|
|
tracing::debug!("Cleaned index for: {}", project_name);
|
|
|
|
if let Some(p) = progress {
|
|
p.set_stage(ScanStage::Discovering);
|
|
}
|
|
if let Some(l) = logs {
|
|
l.info(
|
|
format!("Rebuilding index for {}", project_path.display()),
|
|
None,
|
|
);
|
|
}
|
|
|
|
let walk_start = std::time::Instant::now();
|
|
let (rx, handle) = spawn_file_walker(project_path, config);
|
|
// Drain the channel BEFORE joining, the bounded channel will deadlock
|
|
// if we join first and the walker blocks on send.
|
|
let paths: Vec<PathBuf> = rx.into_iter().flatten().collect();
|
|
if let Err(err) = handle.join() {
|
|
tracing::error!("walker thread panicked: {:#?}", err);
|
|
if let Some(l) = logs {
|
|
l.error(
|
|
"Walker thread panicked during index rebuild",
|
|
None,
|
|
Some(format!("{err:#?}")),
|
|
);
|
|
}
|
|
}
|
|
if let Some(p) = progress {
|
|
p.record_walk_ms(walk_start.elapsed().as_millis() as u64);
|
|
p.set_files_discovered(paths.len() as u64);
|
|
p.set_stage(ScanStage::Indexing);
|
|
}
|
|
if let Some(l) = logs {
|
|
l.info(
|
|
format!(
|
|
"Index rebuild discovered {} files in {}ms",
|
|
paths.len(),
|
|
walk_start.elapsed().as_millis()
|
|
),
|
|
None,
|
|
);
|
|
}
|
|
|
|
let pb = if show_progress {
|
|
let pb = ProgressBar::new(paths.len() as u64);
|
|
pb.set_style(
|
|
ProgressStyle::with_template(
|
|
"{spinner:.green} {msg} [{bar:30.cyan/blue}] {pos}/{len} ({eta})",
|
|
)
|
|
.unwrap()
|
|
.progress_chars("##-"),
|
|
);
|
|
pb.set_message("Indexing files");
|
|
pb
|
|
} else {
|
|
ProgressBar::hidden()
|
|
};
|
|
|
|
let progress = progress.cloned();
|
|
let metrics = metrics.cloned();
|
|
let logs = logs.cloned();
|
|
let pass1_start = std::time::Instant::now();
|
|
paths
|
|
.into_par_iter()
|
|
.try_for_each(|path| -> NyxResult<()> {
|
|
let mut idx = Indexer::from_pool(project_name, &pool)?;
|
|
|
|
// Read once, hash once, pass bytes to both rule execution and
|
|
// summary extraction. Use pre-computed hash for upsert to avoid
|
|
// a redundant file read inside upsert_file.
|
|
let bytes = std::fs::read(&path)?;
|
|
let hash = Indexer::digest_bytes(&bytes);
|
|
|
|
// Parse once and persist every artifact we can reuse later:
|
|
// findings, coarse summaries, and precise SSA summaries.
|
|
let fused = crate::commands::scan::analyse_file_fused(
|
|
&bytes,
|
|
&path,
|
|
config,
|
|
None,
|
|
Some(project_path),
|
|
)?;
|
|
if let Some(ref p) = progress {
|
|
p.inc_parsed(1);
|
|
p.set_current_file(&path.to_string_lossy());
|
|
if let Some(lang) = fused.summaries.first().map(|s| s.lang.as_str()) {
|
|
p.record_language(lang);
|
|
}
|
|
}
|
|
if let Some(ref m) = metrics {
|
|
m.cfg_nodes.fetch_add(fused.cfg_nodes as u64, Relaxed);
|
|
}
|
|
let file_id = idx.upsert_file_with_hash(&path, &hash)?;
|
|
|
|
let rows: Vec<IssueRow> = fused
|
|
.diags
|
|
.iter()
|
|
.map(|d| IssueRow {
|
|
rule_id: d.id.as_ref(),
|
|
severity: match d.severity {
|
|
Severity::High => "HIGH",
|
|
Severity::Medium => "MEDIUM",
|
|
Severity::Low => "LOW",
|
|
},
|
|
line: d.line as i64,
|
|
col: d.col as i64,
|
|
})
|
|
.collect();
|
|
|
|
idx.replace_issues(file_id, rows)?;
|
|
|
|
if !fused.summaries.is_empty() {
|
|
idx.replace_summaries_for_file(&path, &hash, &fused.summaries)?;
|
|
}
|
|
|
|
if !fused.ssa_summaries.is_empty() {
|
|
let ssa_rows: Vec<_> = fused
|
|
.ssa_summaries
|
|
.into_iter()
|
|
.map(|(key, sum)| {
|
|
(
|
|
key.name,
|
|
key.arity.unwrap_or(0),
|
|
key.lang.as_str().to_string(),
|
|
key.namespace,
|
|
key.container,
|
|
key.disambig,
|
|
key.kind,
|
|
sum,
|
|
)
|
|
})
|
|
.collect();
|
|
idx.replace_ssa_summaries_for_file(&path, &hash, &ssa_rows)?;
|
|
}
|
|
|
|
// Persist SSA callee bodies at index-build time so CLI-initiated
|
|
// rebuilds (`--index rebuild`) populate the same
|
|
// `ssa_function_bodies` rows that `scan_with_index_parallel`
|
|
// would have written via its pass-1 branch. Without this,
|
|
// indexed scans load zero cross-file bodies and cross-file
|
|
// inline silently falls back to summary resolution.
|
|
if !fused.ssa_bodies.is_empty() {
|
|
let body_rows: Vec<_> = fused
|
|
.ssa_bodies
|
|
.into_iter()
|
|
.map(|(key, body)| {
|
|
(
|
|
key.name,
|
|
key.arity.unwrap_or(0),
|
|
key.lang.as_str().to_string(),
|
|
key.namespace,
|
|
key.container,
|
|
key.disambig,
|
|
key.kind,
|
|
body,
|
|
)
|
|
})
|
|
.collect();
|
|
idx.replace_ssa_bodies_for_file(&path, &hash, &body_rows)?;
|
|
}
|
|
|
|
pb.inc(1);
|
|
Ok(())
|
|
})?;
|
|
pb.finish_and_clear();
|
|
if let Some(p) = &progress {
|
|
p.record_pass1_ms(pass1_start.elapsed().as_millis() as u64);
|
|
}
|
|
if let Some(l) = &logs {
|
|
l.info(
|
|
format!(
|
|
"Index rebuild complete in {}ms",
|
|
pass1_start.elapsed().as_millis()
|
|
),
|
|
None,
|
|
);
|
|
}
|
|
|
|
{
|
|
let idx = Indexer::from_pool(project_name, &pool)?;
|
|
idx.vacuum()?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn build_index_creates_db_and_registers_files() {
|
|
let mut cfg = Config::default();
|
|
cfg.performance.worker_threads = Some(1);
|
|
cfg.performance.channel_multiplier = 1;
|
|
cfg.performance.batch_size = 2;
|
|
|
|
let td = tempfile::tempdir().unwrap();
|
|
let project_dir = td.path().join("proj");
|
|
fs::create_dir(&project_dir).unwrap();
|
|
let f_txt = project_dir.join("readme.txt");
|
|
fs::write(&f_txt, "hello").unwrap();
|
|
|
|
let db_path = td.path().join("proj.sqlite");
|
|
|
|
build_index("proj", &project_dir, &db_path, &cfg, false).expect("index build should succeed");
|
|
|
|
// ── Assert ────────────────────────────────────────────────────────────────
|
|
assert!(db_path.is_file(), "SQLite file must exist");
|
|
|
|
let pool = Indexer::init(&db_path).unwrap();
|
|
let idx = Indexer::from_pool("proj", &pool).unwrap();
|
|
let files = idx.get_files("proj").unwrap();
|
|
assert_eq!(files.len(), 1, "exactly one file indexed");
|
|
assert_eq!(files[0], f_txt);
|
|
}
|
|
|
|
#[test]
|
|
fn build_index_persists_ssa_summaries() {
|
|
let mut cfg = Config::default();
|
|
cfg.performance.worker_threads = Some(1);
|
|
cfg.performance.channel_multiplier = 1;
|
|
cfg.performance.batch_size = 2;
|
|
|
|
let td = tempfile::tempdir().unwrap();
|
|
let project_dir = td.path().join("proj");
|
|
fs::create_dir(&project_dir).unwrap();
|
|
fs::write(
|
|
project_dir.join("app.js"),
|
|
r#"var express = require('express');
|
|
var app = express();
|
|
|
|
function cleanHtml(input) {
|
|
return DOMPurify.sanitize(input);
|
|
}
|
|
|
|
app.get('/safe', function(req, res) {
|
|
var name = req.query.name;
|
|
var safe = cleanHtml(name);
|
|
res.send(safe);
|
|
});
|
|
"#,
|
|
)
|
|
.unwrap();
|
|
|
|
let db_path = td.path().join("proj.sqlite");
|
|
build_index("proj", &project_dir, &db_path, &cfg, false).expect("index build should succeed");
|
|
|
|
let pool = Indexer::init(&db_path).unwrap();
|
|
let idx = Indexer::from_pool("proj", &pool).unwrap();
|
|
let ssa = idx.load_all_ssa_summaries().unwrap();
|
|
assert!(
|
|
!ssa.is_empty(),
|
|
"index build should persist SSA summaries for functions with non-trivial SSA effects"
|
|
);
|
|
}
|