Add multi-language AST-pattern scanning support

- Introduced `patterns` module with language-specific vulnerability patterns.
- Added `query_cache` utility for caching compiled queries.
- Expanded `scan.rs` to support scanning multiple languages dynamically.
- Updated `Cargo.toml` with additional tree-sitter dependencies.
- Added severity filtering to `ScannerConfig` for better configuration.
This commit is contained in:
elipeter 2025-06-17 01:17:48 +02:00
parent 0831b9fb48
commit 22369cc404
17 changed files with 665 additions and 25 deletions

View file

@ -2,8 +2,9 @@ use crate::cli::OutputFormat;
use crate::utils::project::get_project_info;
use std::path::Path;
use crate::utils::config::Config;
use tree_sitter::{Parser};
use tree_sitter::{Language, Parser, QueryCursor, StreamingIterator};
use crate::database::index::Indexer;
use crate::utils::query_cache;
use crate::walk::spawn_senders;
pub fn handle(
@ -72,34 +73,64 @@ fn scan_with_index(root: &Path, db_path: &Path, cfg: &Config) -> Result<(), Box<
fn scan_single_file(
path: &Path,
_cfg: &Config,
) -> Result<(), Box<dyn std::error::Error>> {
if path.extension().and_then(|s| s.to_str()) != Some("rs") {
return Ok(());
}
cfg: &Config, // assume cfg.high_only: bool
) -> Result<(), Box<dyn std::error::Error>> {
let source = std::fs::read_to_string(path)?;
let mut parser = Parser::new();
parser.set_language(&tree_sitter_rust::LANGUAGE.into())?;
let tree = parser.parse(&source, None).ok_or("tree-sitter failed")?;
let root = tree.root_node();
let mut fn_count = 0;
let mut cursor = root.walk();
for child in root.children(&mut cursor) {
if child.kind() == "function_item" {
fn_count += 1;
}
let ext = path
.extension()
.and_then(|s| s.to_str())
.unwrap_or_default()
.to_ascii_lowercase();
// Pick the right tree-sitter language *and* pre-compiled queries
let (ts_lang, lang_key): (Language, &'static str) = match ext.as_str() {
"rs" => (Language::from(tree_sitter_rust::LANGUAGE), "rust"),
"c" => (Language::from(tree_sitter_c::LANGUAGE), "c"),
"cpp" | "c++" => (Language::from(tree_sitter_cpp::LANGUAGE), "cpp"),
"java" => (Language::from(tree_sitter_java::LANGUAGE), "java"),
"go" => (Language::from(tree_sitter_go::LANGUAGE), "go"),
"php" => (Language::from(tree_sitter_php::LANGUAGE_PHP), "php"),
"py" => (Language::from(tree_sitter_python::LANGUAGE), "python"),
"ts" | "tsx" => (Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT), "typescript"),
"js" => (Language::from(tree_sitter_javascript::LANGUAGE), "javascript"),
_ => return Ok(()),
};
parser.set_language(&ts_lang)?;
let tree = parser.parse(&source, None).ok_or("tree-sitter failed")?;
let root = tree.root_node();
// ----- run vulnerability patterns -----
let compiled = query_cache::for_lang(lang_key, ts_lang);
let mut cursor = QueryCursor::new();
for cq in &compiled {
if cfg.scanner.min_severity > cq.meta.severity {
continue;
}
let mut matches = cursor.matches(&cq.query, root, source.as_bytes());
while let Some(m) = matches.next() {
// capture 0 is the one tagged @vuln
for cap in m.captures.iter().filter(|c| c.index == 0) {
let point = cap.node.start_position();
let line = point.row;
let col = point.column;
tracing::warn!(
file = %path.display(),
line = line + 1,
column = col + 1,
id = cq.meta.id,
sev = ?cq.meta.severity,
"pattern matched"
);
}
}
}
tracing::info!(
"scanned {} found {} Rust function(s)",
path.display(),
fn_count
);
// TODO: real vulnerability/pattern checks go here
Ok(())
}