Add multi-language AST-pattern scanning support

- Introduced `patterns` module with language-specific vulnerability patterns.
- Added `query_cache` utility for caching compiled queries.
- Expanded `scan.rs` to support scanning multiple languages dynamically.
- Updated `Cargo.toml` with additional tree-sitter dependencies.
- Added severity filtering to `ScannerConfig` for better configuration.
This commit is contained in:
elipeter 2025-06-17 01:17:48 +02:00
parent 0831b9fb48
commit 22369cc404
17 changed files with 665 additions and 25 deletions

89
Cargo.lock generated
View file

@ -13,6 +13,7 @@ dependencies = [
"filetime",
"ignore",
"num_cpus",
"once_cell",
"rusqlite",
"serde",
"tempfile",
@ -20,7 +21,15 @@ dependencies = [
"tracing",
"tracing-subscriber",
"tree-sitter",
"tree-sitter-c",
"tree-sitter-cpp",
"tree-sitter-go",
"tree-sitter-java",
"tree-sitter-javascript",
"tree-sitter-php",
"tree-sitter-python",
"tree-sitter-rust",
"tree-sitter-typescript",
]
[[package]]
@ -957,12 +966,82 @@ dependencies = [
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-c"
version = "0.24.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a3aad8f0129083a59fe8596157552d2bb7148c492d44c21558d68ca1c722707"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-cpp"
version = "0.23.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df2196ea9d47b4ab4a31b9297eaa5a5d19a0b121dceb9f118f6790ad0ab94743"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-go"
version = "0.23.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b13d476345220dbe600147dd444165c5791bf85ef53e28acbedd46112ee18431"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-java"
version = "0.23.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0aa6cbcdc8c679b214e616fd3300da67da0e492e066df01bcf5a5921a71e90d6"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-javascript"
version = "0.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf40bf599e0416c16c125c3cec10ee5ddc7d1bb8b0c60fa5c4de249ad34dc1b1"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-language"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4013970217383f67b18aef68f6fb2e8d409bc5755227092d32efb0422ba24b8"
[[package]]
name = "tree-sitter-php"
version = "0.23.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f066e94e9272cfe4f1dcb07a1c50c66097eca648f2d7233d299c8ae9ed8c130c"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-python"
version = "0.23.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d065aaa27f3aaceaf60c1f0e0ac09e1cb9eb8ed28e7bcdaa52129cffc7f4b04"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-rust"
version = "0.24.0"
@ -973,6 +1052,16 @@ dependencies = [
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-typescript"
version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c5f76ed8d947a75cc446d5fccd8b602ebf0cde64ccf2ffa434d873d7a575eff"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "unicode-ident"
version = "1.0.18"

View file

@ -20,6 +20,15 @@ rusqlite = "0.36.0"
ignore = "0.4.23"
tree-sitter = "0.25.6"
tree-sitter-rust = "0.24.0"
tree-sitter-c = "0.24.1"
tree-sitter-cpp = "0.23.4"
tree-sitter-java = "0.23.5"
tree-sitter-typescript = "0.23.2"
tree-sitter-javascript = "0.23.1"
tree-sitter-go = "0.23.4"
tree-sitter-php = "0.23.11"
tree-sitter-python = "0.23.6"
crossbeam-channel = "0.5.15"
blake3 = "1.8.2"
filetime = "0.2.25"
once_cell = "1.21.3"

View file

@ -2,8 +2,9 @@ use crate::cli::OutputFormat;
use crate::utils::project::get_project_info;
use std::path::Path;
use crate::utils::config::Config;
use tree_sitter::{Parser};
use tree_sitter::{Language, Parser, QueryCursor, StreamingIterator};
use crate::database::index::Indexer;
use crate::utils::query_cache;
use crate::walk::spawn_senders;
pub fn handle(
@ -72,34 +73,64 @@ fn scan_with_index(root: &Path, db_path: &Path, cfg: &Config) -> Result<(), Box<
fn scan_single_file(
path: &Path,
_cfg: &Config,
) -> Result<(), Box<dyn std::error::Error>> {
if path.extension().and_then(|s| s.to_str()) != Some("rs") {
return Ok(());
}
cfg: &Config, // assume cfg.high_only: bool
) -> Result<(), Box<dyn std::error::Error>> {
let source = std::fs::read_to_string(path)?;
let mut parser = Parser::new();
parser.set_language(&tree_sitter_rust::LANGUAGE.into())?;
let tree = parser.parse(&source, None).ok_or("tree-sitter failed")?;
let root = tree.root_node();
let mut fn_count = 0;
let mut cursor = root.walk();
for child in root.children(&mut cursor) {
if child.kind() == "function_item" {
fn_count += 1;
}
let ext = path
.extension()
.and_then(|s| s.to_str())
.unwrap_or_default()
.to_ascii_lowercase();
// Pick the right tree-sitter language *and* pre-compiled queries
let (ts_lang, lang_key): (Language, &'static str) = match ext.as_str() {
"rs" => (Language::from(tree_sitter_rust::LANGUAGE), "rust"),
"c" => (Language::from(tree_sitter_c::LANGUAGE), "c"),
"cpp" | "c++" => (Language::from(tree_sitter_cpp::LANGUAGE), "cpp"),
"java" => (Language::from(tree_sitter_java::LANGUAGE), "java"),
"go" => (Language::from(tree_sitter_go::LANGUAGE), "go"),
"php" => (Language::from(tree_sitter_php::LANGUAGE_PHP), "php"),
"py" => (Language::from(tree_sitter_python::LANGUAGE), "python"),
"ts" | "tsx" => (Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT), "typescript"),
"js" => (Language::from(tree_sitter_javascript::LANGUAGE), "javascript"),
_ => return Ok(()),
};
parser.set_language(&ts_lang)?;
let tree = parser.parse(&source, None).ok_or("tree-sitter failed")?;
let root = tree.root_node();
// ----- run vulnerability patterns -----
let compiled = query_cache::for_lang(lang_key, ts_lang);
let mut cursor = QueryCursor::new();
for cq in &compiled {
if cfg.scanner.min_severity > cq.meta.severity {
continue;
}
let mut matches = cursor.matches(&cq.query, root, source.as_bytes());
while let Some(m) = matches.next() {
// capture 0 is the one tagged @vuln
for cap in m.captures.iter().filter(|c| c.index == 0) {
let point = cap.node.start_position();
let line = point.row;
let col = point.column;
tracing::warn!(
file = %path.display(),
line = line + 1,
column = col + 1,
id = cq.meta.id,
sev = ?cq.meta.severity,
"pattern matched"
);
}
}
}
tracing::info!(
"scanned {} found {} Rust function(s)",
path.display(),
fn_count
);
// TODO: real vulnerability/pattern checks go here
Ok(())
}

View file

@ -3,6 +3,7 @@ mod commands;
mod utils;
mod walk;
mod database;
mod patterns;
use crate::utils::Config;
use cli::Cli;

40
src/patterns/c.rs Normal file
View file

@ -0,0 +1,40 @@
use crate::patterns::{Pattern, Severity};
pub const PATTERNS: &[Pattern] = &[
Pattern {
id: "strcpy_call",
description: "strcpy() usage",
query: "(call_expression function: (identifier) @id (#eq? @id \"strcpy\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "strcat_call",
description: "strcat() usage",
query: "(call_expression function: (identifier) @id (#eq? @id \"strcat\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "sprintf_call",
description: "sprintf() (no length limit)",
query: "(call_expression function: (identifier) @id (#eq? @id \"sprintf\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "gets_call",
description: "gets() usage",
query: "(call_expression function: (identifier) @id (#eq? @id \"gets\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "scanf_with_percent_s",
description: "scanf(\"%s\") without length specifier",
query: "(call_expression function: (identifier) @id (#eq? @id \"scanf\") arguments: (argument_list (string_literal) @fmt (#match? @fmt \".*%s.*\"))) @vuln",
severity: Severity::High,
},
Pattern {
id: "system_call",
description: "system() shell execution",
query: "(call_expression function: (identifier) @id (#eq? @id \"system\")) @vuln",
severity: Severity::Medium,
},
];

40
src/patterns/cpp.rs Normal file
View file

@ -0,0 +1,40 @@
use crate::patterns::{Pattern, Severity};
pub const PATTERNS: &[Pattern] = &[
Pattern {
id: "strcpy_call",
description: "strcpy() usage",
query: "(call_expression function: (identifier) @id (#eq? @id \"strcpy\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "strcat_call",
description: "strcat() usage",
query: "(call_expression function: (identifier) @id (#eq? @id \"strcat\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "sprintf_call",
description: "sprintf() (no length limit)",
query: "(call_expression function: (identifier) @id (#eq? @id \"sprintf\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "gets_call",
description: "gets() usage",
query: "(call_expression function: (identifier) @id (#eq? @id \"gets\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "system_call",
description: "system() shell execution",
query: "(call_expression function: (identifier) @id (#eq? @id \"system\")) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "reinterpret_cast",
description: "reinterpret_cast usage",
query: "(reinterpret_cast_expression) @vuln",
severity: Severity::Medium,
},
];

34
src/patterns/go.rs Normal file
View file

@ -0,0 +1,34 @@
use crate::patterns::{Pattern, Severity};
pub const PATTERNS: &[Pattern] = &[
Pattern {
id: "exec_command",
description: "os/exec Command construction",
query: "(call_expression function: (selector_expression field: (field_identifier) @f (#eq? @f \"Command\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "http_insecure_tls",
description: "&http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}}",
query: "(composite_literal type: (selector_expression field: (field_identifier) @t (#eq? @t \"Transport\")) body: (literal_value (keyed_element key: (identifier) @k (#eq? @k \"TLSClientConfig\") value: (composite_literal body: (literal_value (keyed_element key: (identifier) @ik (#eq? @ik \"InsecureSkipVerify\") value: (true)))))) @vuln",
severity: Severity::High,
},
Pattern {
id: "unsafe_pointer",
description: "Use of unsafe.Pointer",
query: "(qualified_type type: (selector_expression field: (field_identifier) @f (#eq? @f \"Pointer\"))) @vuln",
severity: Severity::High,
},
Pattern {
id: "md5_sha1",
description: "crypto/md5 or crypto/sha1 usage",
query: "(call_expression function: (selector_expression object: (identifier) @pkg (#match? @pkg \"md5|sha1\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "hardcoded_secret",
description: "Hard-coded string that looks like an API key/token",
query: "(interpreted_string_literal) @s (#match? @s \"(?i)(api|secret|token|password)[=:]?[ \\t]*[A-Za-z0-9_\\-]{8,}\")",
severity: Severity::Low,
},
];

40
src/patterns/java.rs Normal file
View file

@ -0,0 +1,40 @@
use crate::patterns::{Pattern, Severity};
pub const PATTERNS: &[Pattern] = &[
Pattern {
id: "runtime_exec",
description: "Runtime.getRuntime().exec(...) arbitrary-command execution",
query: "(method_invocation object: (method_invocation name: (identifier) @n (#eq? @n \"getRuntime\")) name: (identifier) @id (#eq? @id \"exec\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "class_for_name",
description: "Dynamic reflection via Class.forName(...)",
query: "(method_invocation object: (identifier) @c (#eq? @c \"Class\") name: (identifier) @id (#eq? @id \"forName\")) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "object_deserialization",
description: "java.io.ObjectInputStream#readObject() deserialization",
query: "(method_invocation object: (identifier) @o (#eq? @o \"ObjectInputStream\") name: (identifier) @id (#eq? @id \"readObject\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "insecure_random",
description: "java.util.Random used where SecureRandom is expected",
query: "(object_creation_expression type: (identifier) @t (#eq? @t \"Random\")) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "thread_stop",
description: "Deprecated Thread.stop() invocation",
query: "(method_invocation name: (identifier) @id (#eq? @id \"stop\") object: (identifier) @obj (#eq? @obj \"Thread\")) @vuln",
severity: Severity::Low,
},
Pattern {
id: "sql_concat",
description: "SQL built with string concatenation",
query: "(method_invocation name: (identifier) @id (#match? @id \"execute(Query|Update)?\") arguments: (argument_list (binary_expression) @concat)) @vuln",
severity: Severity::Medium,
},
];

View file

@ -0,0 +1,40 @@
use crate::patterns::{Pattern, Severity};
pub const PATTERNS: &[Pattern] = &[
Pattern {
id: "eval_call",
description: "Use of eval()",
query: "(call_expression function: (identifier) @id (#eq? @id \"eval\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "new_function",
description: "new Function() constructor",
query: "(new_expression constructor: (identifier) @id (#eq? @id \"Function\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "document_write",
description: "document.write() call",
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"document\") property: (property_identifier) @prop (#eq? @prop \"write\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "inner_html_assignment",
description: "Assignment to element.innerHTML",
query: "(assignment_expression left: (member_expression property: (property_identifier) @prop (#eq? @prop \"innerHTML\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "settimeout_string",
description: "setTimeout / setInterval with a string argument",
query: "(call_expression function: (identifier) @id (#match? @id \"setTimeout|setInterval\") arguments: (arguments (string) @code . _)) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "json_parse",
description: "JSON.parse on dynamic string",
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"JSON\") property: (property_identifier) @prop (#eq? @prop \"parse\"))) @vuln",
severity: Severity::Low,
},
];

79
src/patterns/mod.rs Normal file
View file

@ -0,0 +1,79 @@
pub mod rust;
pub mod typescript;
pub mod javascript;
pub mod cpp;
pub mod c;
mod java;
mod go;
mod php;
mod python;
use std::collections::HashMap;
use serde::{Deserialize, Serialize};
use once_cell::sync::Lazy;
/// How bad / noisy a pattern is considered.
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd)]
pub enum Severity {
Low,
Medium,
High,
}
/// One AST pattern with a tree-sitter query and meta-data.
#[derive(Debug, Clone, Serialize)]
pub struct Pattern {
/// Unique identifier (snake-case preferred).
pub id: &'static str,
/// Human-readable explanation.
pub description: &'static str,
/// tree-sitter query string.
pub query: &'static str,
/// Rough severity bucket.
pub severity: Severity,
}
/// Global, lazily-initialised registry: lang-name → pattern slice
static REGISTRY: Lazy<HashMap<&'static str, &'static [Pattern]>> = Lazy::new(|| {
let mut m = HashMap::new();
// ---- Rust ----
m.insert("rust", rust::PATTERNS);
// ---- TypeScript ----
m.insert("typescript", typescript::PATTERNS);
m.insert("ts", typescript::PATTERNS);
m.insert("tsx", typescript::PATTERNS);
// ---- JavaScript ----
m.insert("javascript", javascript::PATTERNS);
m.insert("js", javascript::PATTERNS);
// ---- C & C++ ----
m.insert("c", c::PATTERNS);
m.insert("cpp", cpp::PATTERNS);
m.insert("c++", cpp::PATTERNS);
// ---- Other languages in the folder ----
m.insert("java", java::PATTERNS);
m.insert("go", go::PATTERNS);
m.insert("php", php::PATTERNS);
m.insert("python", python::PATTERNS);
m.insert("py", python::PATTERNS);
tracing::debug!("AST-pattern registry initialised ({} languages)", m.len());
m
});
/// Return all patterns for the requested language (case-insensitive).
///
/// Unknown languages yield an **empty** `Vec`.
pub fn load(lang: &str) -> Vec<Pattern> {
let key = lang.to_ascii_lowercase();
REGISTRY
.get(key.as_str())
.copied() // `&'static [Pattern]` → *copy* the slice pointer
.unwrap_or(&[]) // unknown lang ⇒ empty slice
.to_vec() // caller owns the `Vec`
}

40
src/patterns/php.rs Normal file
View file

@ -0,0 +1,40 @@
use crate::patterns::{Pattern, Severity};
pub const PATTERNS: &[Pattern] = &[
Pattern {
id: "eval_call",
description: "eval($code) execution",
query: "(function_call_expression function: (name) @n (#eq? @n \"eval\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "preg_replace_e",
description: "preg_replace with deprecated /e modifier",
query: "(function_call_expression function: (name) @n (#eq? @n \"preg_replace\") arguments: (arguments (string) @pat (#match? @pat \"/.*e.*$/\"))) @vuln",
severity: Severity::High,
},
Pattern {
id: "create_function",
description: "create_function(...) anonymous eval-like",
query: "(function_call_expression function: (name) @n (#eq? @n \"create_function\")) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "unserialize_call",
description: "unserialize(...) on user input",
query: "(function_call_expression function: (name) @n (#eq? @n \"unserialize\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "mysql_query_concat",
description: "mysql_query with concatenated SQL",
query: "(function_call_expression function: (name) @n (#eq? @n \"mysql_query\") arguments: (arguments (binary_expression) @concat)) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "system_call",
description: "system()/shell_exec()/exec() command execution",
query: "(function_call_expression function: (name) @n (#match? @n \"system|shell_exec|exec|passthru\")) @vuln",
severity: Severity::Medium,
},
];

40
src/patterns/python.rs Normal file
View file

@ -0,0 +1,40 @@
use crate::patterns::{Pattern, Severity};
pub const PATTERNS: &[Pattern] = &[
Pattern {
id: "eval_call",
description: "eval() on dynamic input",
query: "(call function: (identifier) @id (#eq? @id \"eval\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "exec_call",
description: "exec(...) execution of dynamic code",
query: "(call function: (identifier) @id (#eq? @id \"exec\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "pickle_load",
description: "pickle.load / loads unsafe deserialization",
query: "(call function: (attribute attribute: (identifier) @attr (#match? @attr \"load(s)?\") object: (identifier) @pkg (#eq? @pkg \"pickle\"))) @vuln",
severity: Severity::High,
},
Pattern {
id: "subprocess_shell_true",
description: "subprocess.* with shell=True",
query: "(call function: (attribute object: (identifier) @pkg (#eq? @pkg \"subprocess\")) arguments: (argument_list . (keyword_argument name: (identifier) @k (#eq? @k \"shell\")) (true) @val)) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "random_random",
description: "random.random() for security-sensitive randomness",
query: "(call function: (attribute attribute: (identifier) @attr (#eq? @attr \"random\") object: (identifier) @pkg (#eq? @pkg \"random\"))) @vuln",
severity: Severity::Low,
},
Pattern {
id: "sql_concat",
description: "SQL query built via f-string or +-concat",
query: "(call function: (attribute attribute: (identifier) @m (#match? @m \"execute|executemany\")) arguments: (argument_list (f_string) @fstr)) @vuln",
severity: Severity::Medium,
},
];

68
src/patterns/rust.rs Normal file
View file

@ -0,0 +1,68 @@
use crate::patterns::{Pattern, Severity};
/// The full catalogue.
///
/// *Feel free to prune, extend, or tweak severities to suit your own threat
/// model.*
pub const PATTERNS: &[Pattern] = &[
Pattern {
id: "unsafe_block",
description: "Use of an `unsafe` block",
query: "(unsafe_block) @vuln",
severity: Severity::High,
},
Pattern {
id: "unsafe_fn",
description: "`unsafe fn` declaration",
query: "(function_item (modifier) @kw (#eq? @kw \"unsafe\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "unwrap_call",
description: "`.unwrap()` call (may panic)",
query: "(call_expression function: (field_expression field: (field_identifier) @name (#eq? @name \"unwrap\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "expect_call",
description: "`.expect()` call (may panic)",
query: "(call_expression function: (field_expression field: (field_identifier) @name (#eq? @name \"expect\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "panic_macro",
description: "`panic!` macro invocation",
query: "(macro_invocation (identifier) @id (#eq? @id \"panic\")) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "todo_or_unimplemented",
description: "`todo!()` / `unimplemented!()` placeholder",
query: "(macro_invocation (identifier) @id (#match? @id \"todo|unimplemented\")) @vuln",
severity: Severity::Low,
},
Pattern {
id: "transmute_call",
description: "`std::mem::transmute` call",
query: "(call_expression function: (scoped_identifier path: (identifier) @p (#eq? @p \"mem\") name: (identifier) @f (#eq? @f \"transmute\"))) @vuln",
severity: Severity::High,
},
Pattern {
id: "get_unchecked",
description: "`get_unchecked` or `get_unchecked_mut` slice access",
query: "(call_expression function: (field_expression field: (field_identifier) @m (#match? @m \"get_unchecked(_mut)?\"))) @vuln",
severity: Severity::High,
},
Pattern {
id: "copy_nonoverlapping",
description: "Raw pointer `copy_nonoverlapping`",
query: "(call_expression function: (scoped_identifier path: (identifier) @p (#eq? @p \"ptr\") name: (identifier) @f (#eq? @f \"copy_nonoverlapping\"))) @vuln",
severity: Severity::High,
},
Pattern {
id: "narrow_cast_with_as",
description: "`as` cast to an 8-/16-bit integer (possible truncation)",
query: "(as_expression left: (_) right: (primitive_type) @to (#match? @to \"u8|i8|u16|i16\")) @vuln",
severity: Severity::Low,
},
];

View file

@ -0,0 +1,46 @@
use crate::patterns::{Pattern, Severity};
pub const PATTERNS: &[Pattern] = &[
Pattern {
id: "eval_call",
description: "Use of eval()",
query: "(call_expression function: (identifier) @id (#eq? @id \"eval\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "new_function",
description: "new Function() constructor",
query: "(new_expression constructor: (identifier) @id (#eq? @id \"Function\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "document_write",
description: "document.write() call",
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"document\") property: (property_identifier) @prop (#eq? @prop \"write\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "inner_html_assignment",
description: "Assignment to element.innerHTML",
query: "(assignment_expression left: (member_expression property: (property_identifier) @prop (#eq? @prop \"innerHTML\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "settimeout_string",
description: "setTimeout / setInterval with a string argument",
query: "(call_expression function: (identifier) @id (#match? @id \"setTimeout|setInterval\") arguments: (arguments (string) @code . _)) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "any_type",
description: "Type annotation of `any`",
query: "(type_annotation (predefined_type) @t (#eq? @t \"any\")) @vuln",
severity: Severity::Low,
},
Pattern {
id: "json_parse",
description: "JSON.parse on dynamic string",
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"JSON\") property: (property_identifier) @prop (#eq? @prop \"parse\"))) @vuln",
severity: Severity::Low,
},
];

View file

@ -2,10 +2,14 @@ use serde::{Deserialize, Serialize};
use std::path::{Path};
use std::fs;
use toml;
use crate::patterns::Severity;
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(default)]
pub struct ScannerConfig {
/// The minimum severity level to output
pub min_severity: Severity,
/// The maximum file size to scan, in megabytes. TODO: IMPLEMENT
pub max_file_size_mb: u64,
@ -39,6 +43,7 @@ pub struct ScannerConfig {
impl Default for ScannerConfig {
fn default() -> Self {
Self {
min_severity: Severity::Low,
max_file_size_mb: 100,
excluded_extensions: vec![
"jpg", "png", "gif", "mp4", "avi", "mkv",

View file

@ -1,5 +1,6 @@
pub mod project;
pub mod config;
pub(crate) mod query_cache;
// Re-export commonly used functions for convenience
pub use project::{get_project_info};

37
src/utils/query_cache.rs Normal file
View file

@ -0,0 +1,37 @@
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
use once_cell::sync::Lazy;
use tree_sitter::{Language, Query};
use crate::patterns::{self, Pattern};
#[derive(Clone)]
pub struct CompiledQuery {
pub meta: Pattern,
pub query: Arc<Query>,
}
static CACHE: Lazy<RwLock<HashMap<&'static str, Vec<CompiledQuery>>>> =
Lazy::new(|| RwLock::new(HashMap::new()));
pub fn for_lang(lang: &'static str, ts_lang: Language) -> Vec<CompiledQuery> {
// fast-path read
if let Some(v) = CACHE.read().unwrap().get(lang) {
return v.clone();
}
// compile under write-lock exactly once
let patterns = patterns::load(lang);
let mut vec = Vec::with_capacity(patterns.len());
for p in patterns {
match Query::new(&ts_lang, p.query) {
Ok(q) => vec.push(CompiledQuery { meta: p, query: Arc::new(q) }),
Err(e) => tracing::warn!(lang, id = p.id, "query compile error: {e}"),
}
}
CACHE.write().unwrap().insert(lang, vec.clone());
vec
}