mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-06 19:35:13 +02:00
Refactor database schema and scanning process:
- Introduced `issues` table for detailed vulnerability storage. - Enhanced `files` table with project scoping and unique constraints. - Replaced `OutputFormat` enum with `String` for flexibility. - Added support for formatted console output of scan results. - Integrated file and issue updating logic for incremental scans. - Optimized scanning by leveraging database-stored issues.
This commit is contained in:
parent
9ef591c7b1
commit
0eecf886f2
7 changed files with 302 additions and 357 deletions
138
Cargo.lock
generated
138
Cargo.lock
generated
|
|
@ -163,6 +163,19 @@ version = "1.0.4"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
|
||||
|
||||
[[package]]
|
||||
name = "console"
|
||||
version = "0.15.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8"
|
||||
dependencies = [
|
||||
"encode_unicode",
|
||||
"libc",
|
||||
"once_cell",
|
||||
"unicode-width",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "constant_time_eq"
|
||||
version = "0.3.1"
|
||||
|
|
@ -233,22 +246,18 @@ dependencies = [
|
|||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encode_unicode"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
|
||||
|
||||
[[package]]
|
||||
name = "equivalent"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
|
||||
|
||||
[[package]]
|
||||
name = "errno"
|
||||
version = "0.3.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fallible-iterator"
|
||||
version = "0.3.0"
|
||||
|
|
@ -261,24 +270,6 @@ version = "0.1.9"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
|
||||
|
||||
[[package]]
|
||||
name = "fastrand"
|
||||
version = "2.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
|
||||
|
||||
[[package]]
|
||||
name = "filetime"
|
||||
version = "0.2.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "35c0522e981e68cbfa8c3f978441a5f34b30b96e146b33cd3359176b50fe8586"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"libredox",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "foldhash"
|
||||
version = "0.1.5"
|
||||
|
|
@ -293,19 +284,7 @@ checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
|
|||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"wasi 0.11.1+wasi-snapshot-preview1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"r-efi",
|
||||
"wasi 0.14.2+wasi-0.2.4",
|
||||
"wasi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -409,7 +388,6 @@ checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d"
|
|||
dependencies = [
|
||||
"bitflags",
|
||||
"libc",
|
||||
"redox_syscall",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -422,12 +400,6 @@ dependencies = [
|
|||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.27"
|
||||
|
|
@ -481,15 +453,14 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"blake3",
|
||||
"clap",
|
||||
"console",
|
||||
"crossbeam-channel",
|
||||
"directories",
|
||||
"filetime",
|
||||
"ignore",
|
||||
"num_cpus",
|
||||
"once_cell",
|
||||
"rusqlite",
|
||||
"serde",
|
||||
"tempfile",
|
||||
"toml",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
|
|
@ -565,28 +536,13 @@ dependencies = [
|
|||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "r-efi"
|
||||
version = "5.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5"
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.5.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0d04b7d0ee6b4a0207a0a7adb104d23ecb0b47d6beae7152d0fa34b692b29fd6"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_users"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dd6f9d3d47bdd2ad6945c5015a226ec6155d0bcdfd8f7cd29f86b71f8de99d2b"
|
||||
dependencies = [
|
||||
"getrandom 0.2.16",
|
||||
"getrandom",
|
||||
"libredox",
|
||||
"thiserror",
|
||||
]
|
||||
|
|
@ -649,19 +605,6 @@ dependencies = [
|
|||
"smallvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"errno",
|
||||
"libc",
|
||||
"linux-raw-sys",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.20"
|
||||
|
|
@ -763,19 +706,6 @@ dependencies = [
|
|||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tempfile"
|
||||
version = "3.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1"
|
||||
dependencies = [
|
||||
"fastrand",
|
||||
"getrandom 0.3.3",
|
||||
"once_cell",
|
||||
"rustix",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "2.0.12"
|
||||
|
|
@ -1068,6 +998,12 @@ version = "1.0.18"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c"
|
||||
|
||||
[[package]]
|
||||
name = "utf8parse"
|
||||
version = "0.2.2"
|
||||
|
|
@ -1102,15 +1038,6 @@ version = "0.11.1+wasi-snapshot-preview1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.14.2+wasi-0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
|
||||
dependencies = [
|
||||
"wit-bindgen-rt",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.9"
|
||||
|
|
@ -1223,12 +1150,3 @@ checksum = "74c7b26e3480b707944fc872477815d29a8e429d2f93a1ce000f5fa84a15cbcd"
|
|||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wit-bindgen-rt"
|
||||
version = "0.39.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -3,9 +3,6 @@ name = "nyx"
|
|||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3"
|
||||
|
||||
[dependencies]
|
||||
directories = "6.0.0"
|
||||
clap = { version = "4.5.40", features = ["derive"] }
|
||||
|
|
@ -14,9 +11,7 @@ toml = "0.8.23"
|
|||
tracing-subscriber = { version = "0.3.19", features = ["env-filter", "json", "ansi","time"] }
|
||||
tracing = "0.1.41"
|
||||
num_cpus = "1.17.0"
|
||||
|
||||
rusqlite = "0.36.0"
|
||||
|
||||
ignore = "0.4.23"
|
||||
tree-sitter = "0.25.6"
|
||||
tree-sitter-rust = "0.24.0"
|
||||
|
|
@ -30,5 +25,5 @@ tree-sitter-php = "0.23.11"
|
|||
tree-sitter-python = "0.23.6"
|
||||
crossbeam-channel = "0.5.15"
|
||||
blake3 = "1.8.2"
|
||||
filetime = "0.2.25"
|
||||
once_cell = "1.21.3"
|
||||
console = "0.15.11"
|
||||
|
|
|
|||
12
src/cli.rs
12
src/cli.rs
|
|
@ -26,8 +26,8 @@ pub enum Commands {
|
|||
rebuild_index: bool,
|
||||
|
||||
/// Output format
|
||||
#[arg(short, long, value_enum, default_value = "table")]
|
||||
format: OutputFormat,
|
||||
#[arg(short, long, value_enum, default_value = "")]
|
||||
format: String,
|
||||
|
||||
/// Show only high severity issues
|
||||
#[arg(long)]
|
||||
|
|
@ -78,11 +78,3 @@ pub enum IndexAction {
|
|||
path: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(clap::ValueEnum, Clone, Debug)]
|
||||
pub enum OutputFormat {
|
||||
Table,
|
||||
Json,
|
||||
Csv,
|
||||
Sarif,
|
||||
}
|
||||
|
|
@ -1,18 +1,30 @@
|
|||
use crate::cli::OutputFormat;
|
||||
use crate::utils::project::get_project_info;
|
||||
use console::style;
|
||||
use std::path::Path;
|
||||
use crate::utils::config::Config;
|
||||
use tree_sitter::{Language, Parser, QueryCursor, StreamingIterator};
|
||||
use crate::database::index::Indexer;
|
||||
|
||||
use crate::database::index::{IssueRow, Indexer};
|
||||
use crate::patterns::Severity;
|
||||
use crate::utils::config::Config;
|
||||
use crate::utils::query_cache;
|
||||
use crate::walk::spawn_senders;
|
||||
|
||||
use tree_sitter::{Language, Parser, QueryCursor, StreamingIterator};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Diag {
|
||||
pub(crate) path: String,
|
||||
pub(crate) line: usize,
|
||||
pub(crate) col: usize,
|
||||
pub(crate) severity: Severity,
|
||||
pub(crate) id: String,
|
||||
}
|
||||
|
||||
/// Entry point called by the CLI.
|
||||
pub fn handle(
|
||||
path: &str,
|
||||
no_index: bool,
|
||||
rebuild_index: bool,
|
||||
format: OutputFormat,
|
||||
format: String,
|
||||
high_only: bool,
|
||||
database_dir: &Path,
|
||||
config: &Config,
|
||||
|
|
@ -20,73 +32,111 @@ pub fn handle(
|
|||
let scan_path = Path::new(path).canonicalize()?;
|
||||
let (project_name, db_path) = get_project_info(&scan_path, database_dir)?;
|
||||
|
||||
tracing::debug!("Config: {:?}", config);
|
||||
tracing::debug!("Scanning project: {}", project_name);
|
||||
tracing::debug!("Scan path: {}", scan_path.display());
|
||||
let mut indexer = Indexer::new(&project_name, &db_path)?;
|
||||
|
||||
let diags: Vec<Diag>;
|
||||
|
||||
if no_index {
|
||||
tracing::debug!("Scanning without index...");
|
||||
scan_filesystem(&scan_path, config)?;
|
||||
diags = scan_filesystem(&scan_path, config)?;
|
||||
} else {
|
||||
if rebuild_index || !db_path.exists() {
|
||||
tracing::debug!("Building/updating index...");
|
||||
crate::commands::index::build_index(&scan_path, &db_path)?;
|
||||
}
|
||||
|
||||
tracing::debug!("Using index: {}", db_path.display());
|
||||
scan_with_index(&scan_path, &db_path, config)?;
|
||||
diags = scan_with_index(&project_name, &db_path, config, &mut indexer)?;
|
||||
}
|
||||
|
||||
tracing::debug!("Output format: {:?}", format);
|
||||
if high_only {
|
||||
tracing::debug!("Filtering: High severity only");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn scan_filesystem(root: &Path, cfg: &Config) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let rx = spawn_senders(root, cfg);
|
||||
|
||||
for batch in rx.iter().flatten() {
|
||||
tracing::debug!("Scanning file: {}", batch.display());
|
||||
scan_single_file(&batch, cfg)?; // <-- your actual scanner
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
fn scan_with_index(root: &Path, db_path: &Path, cfg: &Config) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let indexer = Indexer::new(db_path)
|
||||
.map_err(|e| format!("opening index {}: {e}", db_path.display()))?;
|
||||
|
||||
let rx = spawn_senders(root, cfg);
|
||||
|
||||
for batch in rx.iter().flatten() {
|
||||
let scan = indexer.should_scan(&batch)?;
|
||||
tracing::debug!("Should scan: {}, file: {}", scan, batch.display());
|
||||
if scan {
|
||||
tracing::debug!("Scanning file: {}", batch.display());
|
||||
scan_single_file(&batch, cfg)?; // your scanner
|
||||
indexer.record_scan(&batch)?;
|
||||
if format == "console" || format == "" && config.output.default_format == "console" {
|
||||
for d in &diags {
|
||||
if high_only && d.severity != Severity::High {
|
||||
continue;
|
||||
}
|
||||
let sev_str = match d.severity {
|
||||
Severity::High => style("HIGH").red().bold(),
|
||||
Severity::Medium => style("MEDIUM").yellow().bold(),
|
||||
Severity::Low => style("LOW").cyan().bold(),
|
||||
};
|
||||
println!(
|
||||
"{}:{}:{} [{}] {}",
|
||||
style(d.path.clone()).blue().underlined(),
|
||||
d.line,
|
||||
d.col,
|
||||
sev_str,
|
||||
style(&d.id).bold(),
|
||||
);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn scan_single_file(
|
||||
// --------------------------------------------------------------------------------------------
|
||||
// Scanning helpers
|
||||
// --------------------------------------------------------------------------------------------
|
||||
|
||||
fn scan_filesystem(
|
||||
root: &Path,
|
||||
cfg: &Config,
|
||||
) -> Result<Vec<Diag>, Box<dyn std::error::Error>> {
|
||||
let rx = spawn_senders(root, cfg);
|
||||
let mut issues: Vec<Diag> = Vec::new();
|
||||
for batch in rx.iter().flatten() {
|
||||
issues.append(&mut run_rules_on_file(&batch, cfg)?);
|
||||
}
|
||||
Ok(issues)
|
||||
}
|
||||
|
||||
fn scan_with_index(
|
||||
project: &str,
|
||||
_db_path: &Path,
|
||||
cfg: &Config,
|
||||
indexer: &mut Indexer,
|
||||
) -> Result<Vec<Diag>, Box<dyn std::error::Error>> {
|
||||
let files = indexer.get_files(project).unwrap_or_default();
|
||||
let mut issues: Vec<Diag> = Vec::new();
|
||||
for file in files {
|
||||
if indexer.should_scan(&file)? {
|
||||
let mut diags = run_rules_on_file(&file, cfg)?;
|
||||
let file_id = indexer.upsert_file(&file)?;
|
||||
|
||||
let issue_rows: Vec<IssueRow> = diags
|
||||
.iter()
|
||||
.map(|d| IssueRow {
|
||||
rule_id: d.id.as_ref(),
|
||||
severity: match d.severity {
|
||||
Severity::High => "HIGH",
|
||||
Severity::Medium => "MEDIUM",
|
||||
Severity::Low => "LOW",
|
||||
},
|
||||
line: d.line as i64,
|
||||
col: d.col as i64,
|
||||
})
|
||||
.collect();
|
||||
|
||||
indexer.replace_issues(file_id, issue_rows)?;
|
||||
issues.append(&mut diags);
|
||||
continue;
|
||||
}
|
||||
issues.append(&mut indexer.get_issues_from_file(&file)?);
|
||||
}
|
||||
Ok(issues)
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------
|
||||
// Tree‑sitter‑based rule runner – returns a Vec<Diag>
|
||||
// --------------------------------------------------------------------------------------------
|
||||
fn run_rules_on_file(
|
||||
path: &Path,
|
||||
cfg: &Config, // assume cfg.high_only: bool
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
cfg: &Config,
|
||||
) -> Result<Vec<Diag>, Box<dyn std::error::Error>> {
|
||||
let source = std::fs::read_to_string(path)?;
|
||||
let mut parser = Parser::new();
|
||||
|
||||
let ext = path
|
||||
.extension()
|
||||
.and_then(|s| s.to_str())
|
||||
.unwrap_or_default()
|
||||
.to_ascii_lowercase();
|
||||
|
||||
// Pick the right tree-sitter language *and* pre-compiled queries
|
||||
let (ts_lang, lang_key): (Language, &'static str) = match ext.as_str() {
|
||||
let lang_key = match path
|
||||
.extension()
|
||||
.and_then(|s| s.to_str())
|
||||
.unwrap_or_default()
|
||||
.to_ascii_lowercase()
|
||||
.as_str()
|
||||
{
|
||||
"rs" => (Language::from(tree_sitter_rust::LANGUAGE), "rust"),
|
||||
"c" => (Language::from(tree_sitter_c::LANGUAGE), "c"),
|
||||
"cpp" | "c++" => (Language::from(tree_sitter_cpp::LANGUAGE), "cpp"),
|
||||
|
|
@ -96,69 +146,35 @@ fn scan_single_file(
|
|||
"py" => (Language::from(tree_sitter_python::LANGUAGE), "python"),
|
||||
"ts" | "tsx" => (Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT), "typescript"),
|
||||
"js" => (Language::from(tree_sitter_javascript::LANGUAGE), "javascript"),
|
||||
_ => return Ok(()),
|
||||
_ => return Ok(Vec::new()),
|
||||
};
|
||||
let (ts_lang, lang_name) = lang_key;
|
||||
|
||||
parser.set_language(&ts_lang)?;
|
||||
let tree = parser.parse(&source, None).ok_or("tree‑sitter failed")?;
|
||||
let root = tree.root_node();
|
||||
|
||||
let tree = parser.parse(&source, None).ok_or("tree-sitter failed")?;
|
||||
let root = tree.root_node();
|
||||
|
||||
// ----- run vulnerability patterns -----
|
||||
let compiled = query_cache::for_lang(lang_key, ts_lang);
|
||||
let compiled = query_cache::for_lang(lang_name, ts_lang);
|
||||
let mut cursor = QueryCursor::new();
|
||||
let mut out = Vec::new();
|
||||
|
||||
for cq in &compiled {
|
||||
if cfg.scanner.min_severity > cq.meta.severity {
|
||||
continue;
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut matches = cursor.matches(&cq.query, root, source.as_bytes());
|
||||
|
||||
while let Some(m) = matches.next() {
|
||||
// capture 0 is the one tagged @vuln
|
||||
for cap in m.captures.iter().filter(|c| c.index == 0) {
|
||||
let point = cap.node.start_position();
|
||||
let line = point.row;
|
||||
let col = point.column;
|
||||
|
||||
match cq.meta.severity {
|
||||
Severity::High => {
|
||||
tracing::error!(
|
||||
file = %path.display(),
|
||||
line = line + 1,
|
||||
column = col + 1,
|
||||
id = cq.meta.id,
|
||||
sev = ?Severity::High,
|
||||
"pattern matched"
|
||||
);
|
||||
},
|
||||
Severity::Medium => {
|
||||
tracing::warn!(
|
||||
file = %path.display(),
|
||||
line = line + 1,
|
||||
column = col + 1,
|
||||
id = cq.meta.id,
|
||||
sev = ?Severity::Medium,
|
||||
"pattern matched"
|
||||
);
|
||||
}
|
||||
Severity::Low => {
|
||||
tracing::info!(
|
||||
file = %path.display(),
|
||||
line = line + 1,
|
||||
column = col + 1,
|
||||
id = cq.meta.id,
|
||||
sev = ?Severity::Low,
|
||||
"pattern matched"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
out.push(Diag {
|
||||
path: path.to_string_lossy().to_string(),
|
||||
line: point.row + 1,
|
||||
col: point.column + 1,
|
||||
severity: cq.meta.severity,
|
||||
id: String::from(cq.meta.id),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
Ok(out)
|
||||
}
|
||||
258
src/database.rs
258
src/database.rs
|
|
@ -1,164 +1,174 @@
|
|||
pub mod index {
|
||||
use blake3::Hasher;
|
||||
use rusqlite::{params, Connection, OptionalExtension};
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::str::FromStr;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
use crate::commands::scan::Diag;
|
||||
use crate::patterns::Severity;
|
||||
|
||||
/// Schema: stores digest, file modification time (secs since epoch) and
|
||||
/// last time we *fully* scanned the file.
|
||||
/// DB schema (foreign‑keys enabled).
|
||||
const SCHEMA: &str = r#"
|
||||
CREATE TABLE IF NOT EXISTS files (
|
||||
path TEXT PRIMARY KEY,
|
||||
hash BLOB NOT NULL,
|
||||
mtime INTEGER NOT NULL,
|
||||
scanned_at INTEGER NOT NULL
|
||||
);"#;
|
||||
PRAGMA foreign_keys = ON;
|
||||
|
||||
pub(crate) struct Indexer {
|
||||
CREATE TABLE IF NOT EXISTS files (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
project TEXT NOT NULL,
|
||||
path TEXT NOT NULL,
|
||||
hash BLOB NOT NULL,
|
||||
mtime INTEGER NOT NULL,
|
||||
scanned_at INTEGER NOT NULL,
|
||||
UNIQUE(project, path)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS issues (
|
||||
file_id INTEGER NOT NULL
|
||||
REFERENCES files(id)
|
||||
ON DELETE CASCADE,
|
||||
rule_id TEXT NOT NULL,
|
||||
severity TEXT NOT NULL,
|
||||
line INTEGER NOT NULL,
|
||||
col INTEGER NOT NULL,
|
||||
PRIMARY KEY (file_id, rule_id, line, col)
|
||||
);
|
||||
"#;
|
||||
|
||||
/// A single issue row, ready for insertion.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IssueRow<'a> {
|
||||
pub rule_id: &'a str,
|
||||
pub severity: &'a str,
|
||||
pub line: i64,
|
||||
pub col: i64,
|
||||
}
|
||||
|
||||
pub struct Indexer {
|
||||
conn: Connection,
|
||||
project: String,
|
||||
}
|
||||
|
||||
impl Indexer {
|
||||
pub fn new(database_path: &Path) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
/// Open (or create) the DB at `database_path` for the given project name.
|
||||
pub fn new(project: &str, database_path: &Path) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let conn = Connection::open(database_path)?;
|
||||
conn.execute_batch(SCHEMA)?;
|
||||
Ok(Self { conn })
|
||||
Ok(Self { conn, project: project.to_owned() })
|
||||
}
|
||||
|
||||
/// Returns `true` if the caller should analyze the file, i.e., we have
|
||||
/// never seen it or something changed (mtime or content hash).
|
||||
/// Return true when the file *content* or *mtime* changed since the last scan.
|
||||
pub fn should_scan(&self, path: &Path) -> Result<bool, Box<dyn std::error::Error>> {
|
||||
let meta = fs::metadata(path)?;
|
||||
let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64;
|
||||
|
||||
let digest = Self::digest_file(path)?;
|
||||
|
||||
let row: Option<(Vec<u8>, i64)> = self
|
||||
.conn
|
||||
.query_row(
|
||||
"SELECT hash, mtime FROM files WHERE path = ?1",
|
||||
params![path.to_string_lossy()],
|
||||
|r| Ok((r.get(0)?, r.get(1)?)),
|
||||
)
|
||||
.optional()?;
|
||||
.conn
|
||||
.query_row(
|
||||
"SELECT hash, mtime FROM files WHERE project = ?1 AND path = ?2",
|
||||
params![self.project, path.to_string_lossy()],
|
||||
|r| Ok((r.get(0)?, r.get(1)?)),
|
||||
)
|
||||
.optional()?;
|
||||
|
||||
match row {
|
||||
Some((stored_hash, stored_mtime)) => {
|
||||
Ok(stored_hash != digest || stored_mtime != mtime)
|
||||
}
|
||||
None => Ok(true),
|
||||
}
|
||||
Ok(match row {
|
||||
Some((stored_hash, stored_mtime)) => stored_hash != digest || stored_mtime != mtime,
|
||||
None => true,
|
||||
})
|
||||
}
|
||||
|
||||
/// Persist a fresh scan result.
|
||||
pub fn record_scan(&self, path: &Path) -> Result<(), Box<dyn std::error::Error>> {
|
||||
/// Insert or update the `files` row and return its id.
|
||||
pub fn upsert_file(&self, path: &Path) -> Result<i64, Box<dyn std::error::Error>> {
|
||||
let meta = fs::metadata(path)?;
|
||||
let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64;
|
||||
let scanned_at = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)?
|
||||
.as_secs() as i64;
|
||||
let scanned_at = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64;
|
||||
let digest = Self::digest_file(path)?;
|
||||
|
||||
self.conn.execute(
|
||||
"REPLACE INTO files (path, hash, mtime, scanned_at) VALUES (?1, ?2, ?3, ?4)",
|
||||
params![path.to_string_lossy(), digest, mtime, scanned_at],
|
||||
"INSERT INTO files (project, path, hash, mtime, scanned_at)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5)
|
||||
ON CONFLICT(project,path) DO UPDATE
|
||||
SET hash = excluded.hash,
|
||||
mtime = excluded.mtime,
|
||||
scanned_at = excluded.scanned_at",
|
||||
params![self.project, path.to_string_lossy(), digest, mtime, scanned_at],
|
||||
)?;
|
||||
|
||||
let id: i64 = self.conn.query_row(
|
||||
"SELECT id FROM files WHERE project = ?1 AND path = ?2",
|
||||
params![self.project, path.to_string_lossy()],
|
||||
|r| r.get(0),
|
||||
)?;
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
/// Replace all issues for `file_id` with the supplied set.
|
||||
pub fn replace_issues<'a>(&mut self, file_id: i64, issues: impl IntoIterator<Item = IssueRow<'a>>)
|
||||
-> Result<(), Box<dyn std::error::Error>> {
|
||||
let tx = self.conn.transaction()?;
|
||||
tx.execute("DELETE FROM issues WHERE file_id = ?", params![file_id])?;
|
||||
|
||||
{
|
||||
let mut stmt = tx.prepare(
|
||||
"INSERT INTO issues (file_id, rule_id, severity, line, col)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5)",
|
||||
)?;
|
||||
for iss in issues {
|
||||
stmt.execute(params![file_id, iss.rule_id, iss.severity, iss.line, iss.col])?;
|
||||
}
|
||||
}
|
||||
tx.commit()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Gets the issues for a specific file so we don't have to rescan
|
||||
pub fn get_issues_from_file(
|
||||
&self,
|
||||
path: &Path,
|
||||
) -> Result<Vec<Diag>, Box<dyn std::error::Error>> {
|
||||
let file_id: i64 = self.conn.query_row(
|
||||
"SELECT id FROM files WHERE project = ?1 AND path = ?2",
|
||||
params![self.project, path.to_string_lossy()],
|
||||
|r| r.get(0),
|
||||
)?;
|
||||
|
||||
let mut stmt = self.conn.prepare(
|
||||
"SELECT rule_id, severity, line, col
|
||||
FROM issues
|
||||
WHERE file_id = ?1",
|
||||
)?;
|
||||
|
||||
let issue_iter = stmt.query_map([file_id], |row| {
|
||||
let sev_str: String = row.get(1)?;
|
||||
Ok(Diag {
|
||||
path: path.to_string_lossy().to_string(),
|
||||
id: row.get::<_, String>(0)?, // rule_id
|
||||
line: row.get::<_, i64>(2)? as usize,
|
||||
col: row.get::<_, i64>(3)? as usize,
|
||||
severity: Severity::from_str(&sev_str).unwrap(),
|
||||
})
|
||||
})?;
|
||||
|
||||
Ok(issue_iter.filter_map(Result::ok).collect())
|
||||
}
|
||||
|
||||
/// gets files from the database
|
||||
pub fn get_files(&self, project: &str) -> Result<Vec<std::path::PathBuf>, Box<dyn std::error::Error>> {
|
||||
let mut stmt = self.conn.prepare(
|
||||
"SELECT path
|
||||
FROM files
|
||||
WHERE project = ?1",
|
||||
)?;
|
||||
|
||||
let file_iter = stmt.query_map([project], |row| row.get::<_, String>(0))?;
|
||||
|
||||
Ok(file_iter.map(|p| p.map(PathBuf::from)).collect::<Result<_, _>>()?)
|
||||
}
|
||||
|
||||
fn digest_file(path: &Path) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
|
||||
let mut hasher = Hasher::new();
|
||||
let mut hasher = blake3::Hasher::new();
|
||||
let mut file = fs::File::open(path)?;
|
||||
std::io::copy(&mut file, &mut hasher)?;
|
||||
Ok(hasher.finalize().as_bytes().to_vec())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::database::index::Indexer;
|
||||
use std::error::Error;
|
||||
use std::io::Write;
|
||||
use tempfile::tempdir;
|
||||
|
||||
/// Returns a freshly‑initialised `Indexer` backed by an *in‑memory* SQLite
|
||||
/// database. Using `:memory:` sidesteps file‑system lifetime issues that can
|
||||
/// occur when the temporary database file is deleted while a connection is
|
||||
/// still open.
|
||||
fn new_indexer() -> Indexer {
|
||||
Indexer::new(std::path::Path::new(":memory:"))
|
||||
.expect("create in‑memory Indexer")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn new_file_is_flagged_for_scan() -> Result<(), Box<dyn Error>> {
|
||||
let indexer = new_indexer();
|
||||
|
||||
let dir = tempdir()?;
|
||||
let file_path = dir.path().join("hello.txt");
|
||||
std::fs::write(&file_path, b"hello world")?;
|
||||
|
||||
// File has never been seen ⇒ should be scanned.
|
||||
assert!(indexer.should_scan(&file_path)?);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unchanged_file_is_not_flagged_again() -> Result<(), Box<dyn Error>> {
|
||||
let indexer = new_indexer();
|
||||
let dir = tempdir()?;
|
||||
let file_path = dir.path().join("foo.txt");
|
||||
std::fs::write(&file_path, b"abc123")?;
|
||||
|
||||
// First pass – record the scan result.
|
||||
indexer.record_scan(&file_path)?;
|
||||
|
||||
// Nothing changed – should_scan must return false.
|
||||
assert!(!indexer.should_scan(&file_path)?);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn modified_content_triggers_rescan() -> Result<(), Box<dyn Error>> {
|
||||
let indexer = new_indexer();
|
||||
let dir = tempdir()?;
|
||||
let file_path = dir.path().join("bar.txt");
|
||||
std::fs::write(&file_path, b"first")?;
|
||||
indexer.record_scan(&file_path)?;
|
||||
|
||||
// Append data to change the hash.
|
||||
let mut file = std::fs::OpenOptions::new()
|
||||
.append(true)
|
||||
.open(&file_path)?;
|
||||
writeln!(file, "second line")?;
|
||||
|
||||
assert!(indexer.should_scan(&file_path)?);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn modified_mtime_alone_triggers_rescan() -> Result<(), Box<dyn Error>> {
|
||||
// Compile this test only when the optional `filetime` feature is enabled.
|
||||
{
|
||||
use std::time::{Duration, SystemTime};
|
||||
use filetime::FileTime;
|
||||
|
||||
let indexer = new_indexer();
|
||||
let dir = tempdir()?;
|
||||
let file_path = dir.path().join("baz.txt");
|
||||
std::fs::write(&file_path, b"unchanged content")?;
|
||||
indexer.record_scan(&file_path)?;
|
||||
|
||||
// Bump the modification time without touching the contents.
|
||||
let now_plus = SystemTime::now() + Duration::from_secs(5);
|
||||
let new_mtime = FileTime::from_system_time(now_plus);
|
||||
filetime::set_file_mtime(&file_path, new_mtime)?;
|
||||
|
||||
assert!(indexer.should_scan(&file_path)?);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ mod php;
|
|||
mod python;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::str::FromStr;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
|
|
@ -33,6 +34,19 @@ pub struct Pattern {
|
|||
pub severity: Severity,
|
||||
}
|
||||
|
||||
impl FromStr for Severity { // TODO: FIX
|
||||
type Err = ();
|
||||
|
||||
fn from_str(input: &str) -> Result<Self, Self::Err> {
|
||||
match input.to_lowercase().as_str() {
|
||||
"medium" => Ok(Severity::Medium),
|
||||
"high" => Ok(Severity::High),
|
||||
_ => Ok(Severity::Low),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Global, lazily-initialised registry: lang-name → pattern slice
|
||||
static REGISTRY: Lazy<HashMap<&'static str, &'static [Pattern]>> = Lazy::new(|| {
|
||||
let mut m = HashMap::new();
|
||||
|
|
|
|||
|
|
@ -97,13 +97,13 @@ impl Default for DatabaseConfig {
|
|||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(default)]
|
||||
pub struct OutputConfig {
|
||||
/// The default output format. TODO: IMPLEMENT
|
||||
/// The default output format. TODO: IMPLEMENT others
|
||||
pub default_format: String,
|
||||
|
||||
/// Whether to show progress or not. TODO: IMPLEMENT
|
||||
pub show_progress: bool,
|
||||
|
||||
/// Whether to colorize output or not. TODO: IMPLEMENT
|
||||
/// Whether to colorize output or not. TODO: IMPLEMENT changing to non colored
|
||||
pub color_output: bool,
|
||||
|
||||
/// The maximum number of results to show. TODO: IMPLEMENT
|
||||
|
|
@ -113,7 +113,7 @@ pub struct OutputConfig {
|
|||
impl Default for OutputConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
default_format: "table".into(),
|
||||
default_format: "console".into(),
|
||||
show_progress: true,
|
||||
color_output: true,
|
||||
max_results: None,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue