2025-06-16 16:46:22 +02:00
|
|
|
use crate::cli::IndexAction;
|
2025-06-17 17:42:41 +02:00
|
|
|
use crate::database::index::{Indexer, IssueRow};
|
2025-06-24 20:27:06 +02:00
|
|
|
use crate::errors::NyxResult;
|
2025-06-17 17:42:41 +02:00
|
|
|
use crate::patterns::Severity;
|
|
|
|
|
use crate::utils::Config;
|
2025-06-16 16:46:22 +02:00
|
|
|
use crate::utils::project::get_project_info;
|
2026-02-24 23:44:07 -05:00
|
|
|
use crate::walk::spawn_file_walker;
|
2025-06-24 20:27:06 +02:00
|
|
|
use bytesize::ByteSize;
|
|
|
|
|
use chrono::{DateTime, Local};
|
|
|
|
|
use console::style;
|
2026-02-25 04:02:11 -05:00
|
|
|
use indicatif::{ProgressBar, ProgressStyle};
|
2025-06-17 20:45:33 +02:00
|
|
|
use rayon::prelude::*;
|
2025-06-24 20:27:06 +02:00
|
|
|
use std::fs;
|
2026-02-24 23:44:07 -05:00
|
|
|
use std::path::PathBuf;
|
2025-06-24 20:27:06 +02:00
|
|
|
use std::process::exit;
|
2025-06-16 16:46:22 +02:00
|
|
|
|
|
|
|
|
pub fn handle(
|
|
|
|
|
action: IndexAction,
|
|
|
|
|
database_dir: &std::path::Path,
|
2025-06-17 17:42:41 +02:00
|
|
|
config: &Config,
|
2025-06-23 20:59:49 +02:00
|
|
|
) -> NyxResult<()> {
|
2025-06-16 16:46:22 +02:00
|
|
|
match action {
|
|
|
|
|
IndexAction::Build { path, force } => {
|
|
|
|
|
let build_path = std::path::Path::new(&path).canonicalize()?;
|
|
|
|
|
let (project_name, db_path) = get_project_info(&build_path, database_dir)?;
|
|
|
|
|
|
|
|
|
|
if force || !db_path.exists() {
|
2026-02-25 04:02:11 -05:00
|
|
|
build_index(
|
|
|
|
|
&project_name,
|
|
|
|
|
&build_path,
|
|
|
|
|
&db_path,
|
|
|
|
|
config,
|
|
|
|
|
!config.output.quiet,
|
|
|
|
|
)?;
|
2025-06-24 20:27:06 +02:00
|
|
|
println!(
|
|
|
|
|
"✔ {} {}",
|
|
|
|
|
style("Index built:").green(),
|
|
|
|
|
style(db_path.display()).white().bold()
|
|
|
|
|
);
|
2025-06-23 20:27:16 +02:00
|
|
|
exit(0);
|
2025-06-16 16:46:22 +02:00
|
|
|
} else {
|
2025-06-24 20:27:06 +02:00
|
|
|
println!(
|
|
|
|
|
"{} {}",
|
|
|
|
|
style("↩ Index already exists").yellow(),
|
|
|
|
|
style("(use --force to rebuild)").dim()
|
|
|
|
|
);
|
2025-06-23 20:27:16 +02:00
|
|
|
exit(0);
|
2025-06-16 16:46:22 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
IndexAction::Status { path } => {
|
|
|
|
|
let status_path = std::path::Path::new(&path).canonicalize()?;
|
|
|
|
|
let (project_name, db_path) = get_project_info(&status_path, database_dir)?;
|
|
|
|
|
|
2025-06-23 19:37:19 +02:00
|
|
|
println!("{}", style("Project status").blue().bold().underlined());
|
2025-06-24 20:27:06 +02:00
|
|
|
println!(
|
|
|
|
|
" {:14} {}",
|
|
|
|
|
style("Project"),
|
|
|
|
|
style(&project_name).white().bold()
|
|
|
|
|
);
|
|
|
|
|
println!(
|
|
|
|
|
" {:14} {}",
|
|
|
|
|
style("Index path"),
|
|
|
|
|
style(db_path.display()).underlined()
|
|
|
|
|
);
|
|
|
|
|
println!(
|
|
|
|
|
" {:14} {}",
|
|
|
|
|
style("Exists"),
|
|
|
|
|
style(db_path.exists()).bold()
|
|
|
|
|
);
|
2025-06-16 16:46:22 +02:00
|
|
|
|
|
|
|
|
if db_path.exists() {
|
2025-06-23 19:37:19 +02:00
|
|
|
let meta = fs::metadata(&db_path)?;
|
|
|
|
|
let size = ByteSize::b(meta.len());
|
|
|
|
|
let mtime: DateTime<Local> = meta.modified()?.into();
|
2025-06-24 20:27:06 +02:00
|
|
|
println!(" {:14} {}", style("Size"), size);
|
|
|
|
|
println!(
|
|
|
|
|
" {:14} {}",
|
|
|
|
|
style("Modified"),
|
|
|
|
|
mtime.format("%Y-%m-%d %H:%M:%S")
|
|
|
|
|
);
|
2025-06-16 16:46:22 +02:00
|
|
|
}
|
2025-06-24 20:27:06 +02:00
|
|
|
|
2025-06-23 19:37:19 +02:00
|
|
|
exit(0);
|
2025-06-16 16:46:22 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn build_index(
|
2025-06-17 17:42:41 +02:00
|
|
|
project_name: &str,
|
|
|
|
|
project_path: &std::path::Path,
|
2025-06-16 16:46:22 +02:00
|
|
|
db_path: &std::path::Path,
|
2025-06-17 17:42:41 +02:00
|
|
|
config: &Config,
|
2026-02-25 04:02:11 -05:00
|
|
|
show_progress: bool,
|
2025-06-23 20:59:49 +02:00
|
|
|
) -> NyxResult<()> {
|
2025-06-17 17:42:41 +02:00
|
|
|
tracing::debug!("Building index for: {}", project_name);
|
2025-06-16 16:46:22 +02:00
|
|
|
fs::File::create(db_path)?;
|
2025-06-24 20:27:06 +02:00
|
|
|
|
2025-06-17 20:45:33 +02:00
|
|
|
let pool = Indexer::init(db_path)?;
|
|
|
|
|
{
|
2025-06-23 20:59:49 +02:00
|
|
|
let idx = Indexer::from_pool(project_name, &pool)?;
|
2025-06-17 20:45:33 +02:00
|
|
|
idx.clear()?;
|
|
|
|
|
}
|
2025-06-17 17:42:41 +02:00
|
|
|
|
2025-06-17 20:45:33 +02:00
|
|
|
tracing::debug!("Cleaned index for: {}", project_name);
|
2025-06-24 20:27:06 +02:00
|
|
|
|
2026-02-24 23:44:07 -05:00
|
|
|
let (rx, handle) = spawn_file_walker(project_path, config);
|
2026-02-25 04:02:11 -05:00
|
|
|
// Drain the channel BEFORE joining — the bounded channel will deadlock
|
|
|
|
|
// if we join first and the walker blocks on send.
|
|
|
|
|
let paths: Vec<PathBuf> = rx.into_iter().flatten().collect();
|
2026-02-24 23:44:07 -05:00
|
|
|
if let Err(err) = handle.join() {
|
|
|
|
|
tracing::error!("walker thread panicked: {:#?}", err);
|
|
|
|
|
}
|
2026-02-25 04:02:11 -05:00
|
|
|
|
|
|
|
|
let pb = if show_progress {
|
|
|
|
|
let pb = ProgressBar::new(paths.len() as u64);
|
|
|
|
|
pb.set_style(
|
|
|
|
|
ProgressStyle::with_template(
|
|
|
|
|
"{spinner:.green} {msg} [{bar:30.cyan/blue}] {pos}/{len} ({eta})",
|
|
|
|
|
)
|
|
|
|
|
.unwrap()
|
|
|
|
|
.progress_chars("##-"),
|
|
|
|
|
);
|
|
|
|
|
pb.set_message("Indexing files");
|
|
|
|
|
pb
|
|
|
|
|
} else {
|
|
|
|
|
ProgressBar::hidden()
|
|
|
|
|
};
|
2025-06-17 17:42:41 +02:00
|
|
|
|
2026-02-24 23:44:07 -05:00
|
|
|
paths
|
|
|
|
|
.into_par_iter()
|
|
|
|
|
.try_for_each(|path| -> NyxResult<()> {
|
2025-06-24 20:27:06 +02:00
|
|
|
let mut idx = Indexer::from_pool(project_name, &pool)?;
|
2026-02-24 23:44:07 -05:00
|
|
|
|
|
|
|
|
// Read once, hash once — pass bytes to both rule execution and
|
2026-02-25 04:02:11 -05:00
|
|
|
// summary extraction. Use pre-computed hash for upsert to avoid
|
|
|
|
|
// a redundant file read inside upsert_file.
|
2026-02-24 23:44:07 -05:00
|
|
|
let bytes = std::fs::read(&path)?;
|
2026-02-25 04:02:11 -05:00
|
|
|
let hash = Indexer::digest_bytes(&bytes);
|
2026-02-24 23:44:07 -05:00
|
|
|
|
|
|
|
|
// Run AST-only rules (no taint yet — summaries come later in scan)
|
|
|
|
|
let issues =
|
|
|
|
|
crate::commands::scan::run_rules_on_bytes(&bytes, &path, config, None, None)?;
|
2026-02-25 04:02:11 -05:00
|
|
|
let file_id = idx.upsert_file_with_hash(&path, &hash)?;
|
2025-06-24 20:27:06 +02:00
|
|
|
|
|
|
|
|
let rows: Vec<IssueRow> = issues
|
|
|
|
|
.iter()
|
|
|
|
|
.map(|d| IssueRow {
|
|
|
|
|
rule_id: d.id.as_ref(),
|
|
|
|
|
severity: match d.severity {
|
|
|
|
|
Severity::High => "HIGH",
|
|
|
|
|
Severity::Medium => "MEDIUM",
|
|
|
|
|
Severity::Low => "LOW",
|
|
|
|
|
},
|
|
|
|
|
line: d.line as i64,
|
|
|
|
|
col: d.col as i64,
|
|
|
|
|
})
|
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
|
|
idx.replace_issues(file_id, rows)?;
|
2026-02-24 23:44:07 -05:00
|
|
|
|
|
|
|
|
// Extract and persist function summaries for cross-file taint
|
|
|
|
|
let sums = crate::commands::scan::extract_summaries_from_bytes(&bytes, &path, config)
|
|
|
|
|
.unwrap_or_default();
|
|
|
|
|
if !sums.is_empty() {
|
|
|
|
|
idx.replace_summaries_for_file(&path, &hash, &sums)?;
|
|
|
|
|
}
|
|
|
|
|
|
2026-02-25 04:02:11 -05:00
|
|
|
pb.inc(1);
|
2025-06-24 20:27:06 +02:00
|
|
|
Ok(())
|
2026-02-24 23:44:07 -05:00
|
|
|
})?;
|
2026-02-25 04:02:11 -05:00
|
|
|
pb.finish_and_clear();
|
2025-06-24 20:27:06 +02:00
|
|
|
|
2025-06-17 21:00:24 +02:00
|
|
|
{
|
2025-06-23 17:49:15 +02:00
|
|
|
let idx = Indexer::from_pool(project_name, &pool)?;
|
2025-06-17 21:00:24 +02:00
|
|
|
idx.vacuum()?;
|
|
|
|
|
}
|
2025-06-24 20:27:06 +02:00
|
|
|
|
2025-06-16 16:46:22 +02:00
|
|
|
Ok(())
|
2025-06-24 20:27:06 +02:00
|
|
|
}
|
2025-06-24 23:57:27 +02:00
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn build_index_creates_db_and_registers_files() {
|
|
|
|
|
let mut cfg = Config::default();
|
|
|
|
|
cfg.performance.worker_threads = Some(1);
|
|
|
|
|
cfg.performance.channel_multiplier = 1;
|
|
|
|
|
cfg.performance.batch_size = 2;
|
|
|
|
|
|
|
|
|
|
let td = tempfile::tempdir().unwrap();
|
|
|
|
|
let project_dir = td.path().join("proj");
|
|
|
|
|
fs::create_dir(&project_dir).unwrap();
|
|
|
|
|
let f_txt = project_dir.join("readme.txt");
|
|
|
|
|
fs::write(&f_txt, "hello").unwrap();
|
|
|
|
|
|
|
|
|
|
let db_path = td.path().join("proj.sqlite");
|
|
|
|
|
|
2026-02-25 04:02:11 -05:00
|
|
|
build_index("proj", &project_dir, &db_path, &cfg, false).expect("index build should succeed");
|
2025-06-24 23:57:27 +02:00
|
|
|
|
|
|
|
|
// ── Assert ────────────────────────────────────────────────────────────────
|
|
|
|
|
assert!(db_path.is_file(), "SQLite file must exist");
|
|
|
|
|
|
|
|
|
|
let pool = Indexer::init(&db_path).unwrap();
|
|
|
|
|
let idx = Indexer::from_pool("proj", &pool).unwrap();
|
|
|
|
|
let files = idx.get_files("proj").unwrap();
|
|
|
|
|
assert_eq!(files.len(), 1, "exactly one file indexed");
|
|
|
|
|
assert_eq!(files[0], f_txt);
|
|
|
|
|
}
|