Add multi-language AST-pattern scanning support

- Introduced `patterns` module with language-specific vulnerability patterns.
- Added `query_cache` utility for caching compiled queries.
- Expanded `scan.rs` to support scanning multiple languages dynamically.
- Updated `Cargo.toml` with additional tree-sitter dependencies.
- Added severity filtering to `ScannerConfig` for better configuration.
This commit is contained in:
elipeter 2025-06-17 01:17:48 +02:00
parent 0831b9fb48
commit 22369cc404
17 changed files with 665 additions and 25 deletions

40
src/patterns/c.rs Normal file
View file

@ -0,0 +1,40 @@
use crate::patterns::{Pattern, Severity};
pub const PATTERNS: &[Pattern] = &[
Pattern {
id: "strcpy_call",
description: "strcpy() usage",
query: "(call_expression function: (identifier) @id (#eq? @id \"strcpy\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "strcat_call",
description: "strcat() usage",
query: "(call_expression function: (identifier) @id (#eq? @id \"strcat\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "sprintf_call",
description: "sprintf() (no length limit)",
query: "(call_expression function: (identifier) @id (#eq? @id \"sprintf\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "gets_call",
description: "gets() usage",
query: "(call_expression function: (identifier) @id (#eq? @id \"gets\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "scanf_with_percent_s",
description: "scanf(\"%s\") without length specifier",
query: "(call_expression function: (identifier) @id (#eq? @id \"scanf\") arguments: (argument_list (string_literal) @fmt (#match? @fmt \".*%s.*\"))) @vuln",
severity: Severity::High,
},
Pattern {
id: "system_call",
description: "system() shell execution",
query: "(call_expression function: (identifier) @id (#eq? @id \"system\")) @vuln",
severity: Severity::Medium,
},
];

40
src/patterns/cpp.rs Normal file
View file

@ -0,0 +1,40 @@
use crate::patterns::{Pattern, Severity};
pub const PATTERNS: &[Pattern] = &[
Pattern {
id: "strcpy_call",
description: "strcpy() usage",
query: "(call_expression function: (identifier) @id (#eq? @id \"strcpy\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "strcat_call",
description: "strcat() usage",
query: "(call_expression function: (identifier) @id (#eq? @id \"strcat\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "sprintf_call",
description: "sprintf() (no length limit)",
query: "(call_expression function: (identifier) @id (#eq? @id \"sprintf\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "gets_call",
description: "gets() usage",
query: "(call_expression function: (identifier) @id (#eq? @id \"gets\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "system_call",
description: "system() shell execution",
query: "(call_expression function: (identifier) @id (#eq? @id \"system\")) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "reinterpret_cast",
description: "reinterpret_cast usage",
query: "(reinterpret_cast_expression) @vuln",
severity: Severity::Medium,
},
];

34
src/patterns/go.rs Normal file
View file

@ -0,0 +1,34 @@
use crate::patterns::{Pattern, Severity};
pub const PATTERNS: &[Pattern] = &[
Pattern {
id: "exec_command",
description: "os/exec Command construction",
query: "(call_expression function: (selector_expression field: (field_identifier) @f (#eq? @f \"Command\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "http_insecure_tls",
description: "&http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}}",
query: "(composite_literal type: (selector_expression field: (field_identifier) @t (#eq? @t \"Transport\")) body: (literal_value (keyed_element key: (identifier) @k (#eq? @k \"TLSClientConfig\") value: (composite_literal body: (literal_value (keyed_element key: (identifier) @ik (#eq? @ik \"InsecureSkipVerify\") value: (true)))))) @vuln",
severity: Severity::High,
},
Pattern {
id: "unsafe_pointer",
description: "Use of unsafe.Pointer",
query: "(qualified_type type: (selector_expression field: (field_identifier) @f (#eq? @f \"Pointer\"))) @vuln",
severity: Severity::High,
},
Pattern {
id: "md5_sha1",
description: "crypto/md5 or crypto/sha1 usage",
query: "(call_expression function: (selector_expression object: (identifier) @pkg (#match? @pkg \"md5|sha1\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "hardcoded_secret",
description: "Hard-coded string that looks like an API key/token",
query: "(interpreted_string_literal) @s (#match? @s \"(?i)(api|secret|token|password)[=:]?[ \\t]*[A-Za-z0-9_\\-]{8,}\")",
severity: Severity::Low,
},
];

40
src/patterns/java.rs Normal file
View file

@ -0,0 +1,40 @@
use crate::patterns::{Pattern, Severity};
pub const PATTERNS: &[Pattern] = &[
Pattern {
id: "runtime_exec",
description: "Runtime.getRuntime().exec(...) arbitrary-command execution",
query: "(method_invocation object: (method_invocation name: (identifier) @n (#eq? @n \"getRuntime\")) name: (identifier) @id (#eq? @id \"exec\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "class_for_name",
description: "Dynamic reflection via Class.forName(...)",
query: "(method_invocation object: (identifier) @c (#eq? @c \"Class\") name: (identifier) @id (#eq? @id \"forName\")) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "object_deserialization",
description: "java.io.ObjectInputStream#readObject() deserialization",
query: "(method_invocation object: (identifier) @o (#eq? @o \"ObjectInputStream\") name: (identifier) @id (#eq? @id \"readObject\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "insecure_random",
description: "java.util.Random used where SecureRandom is expected",
query: "(object_creation_expression type: (identifier) @t (#eq? @t \"Random\")) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "thread_stop",
description: "Deprecated Thread.stop() invocation",
query: "(method_invocation name: (identifier) @id (#eq? @id \"stop\") object: (identifier) @obj (#eq? @obj \"Thread\")) @vuln",
severity: Severity::Low,
},
Pattern {
id: "sql_concat",
description: "SQL built with string concatenation",
query: "(method_invocation name: (identifier) @id (#match? @id \"execute(Query|Update)?\") arguments: (argument_list (binary_expression) @concat)) @vuln",
severity: Severity::Medium,
},
];

View file

@ -0,0 +1,40 @@
use crate::patterns::{Pattern, Severity};
pub const PATTERNS: &[Pattern] = &[
Pattern {
id: "eval_call",
description: "Use of eval()",
query: "(call_expression function: (identifier) @id (#eq? @id \"eval\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "new_function",
description: "new Function() constructor",
query: "(new_expression constructor: (identifier) @id (#eq? @id \"Function\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "document_write",
description: "document.write() call",
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"document\") property: (property_identifier) @prop (#eq? @prop \"write\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "inner_html_assignment",
description: "Assignment to element.innerHTML",
query: "(assignment_expression left: (member_expression property: (property_identifier) @prop (#eq? @prop \"innerHTML\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "settimeout_string",
description: "setTimeout / setInterval with a string argument",
query: "(call_expression function: (identifier) @id (#match? @id \"setTimeout|setInterval\") arguments: (arguments (string) @code . _)) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "json_parse",
description: "JSON.parse on dynamic string",
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"JSON\") property: (property_identifier) @prop (#eq? @prop \"parse\"))) @vuln",
severity: Severity::Low,
},
];

79
src/patterns/mod.rs Normal file
View file

@ -0,0 +1,79 @@
pub mod rust;
pub mod typescript;
pub mod javascript;
pub mod cpp;
pub mod c;
mod java;
mod go;
mod php;
mod python;
use std::collections::HashMap;
use serde::{Deserialize, Serialize};
use once_cell::sync::Lazy;
/// How bad / noisy a pattern is considered.
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd)]
pub enum Severity {
Low,
Medium,
High,
}
/// One AST pattern with a tree-sitter query and meta-data.
#[derive(Debug, Clone, Serialize)]
pub struct Pattern {
/// Unique identifier (snake-case preferred).
pub id: &'static str,
/// Human-readable explanation.
pub description: &'static str,
/// tree-sitter query string.
pub query: &'static str,
/// Rough severity bucket.
pub severity: Severity,
}
/// Global, lazily-initialised registry: lang-name → pattern slice
static REGISTRY: Lazy<HashMap<&'static str, &'static [Pattern]>> = Lazy::new(|| {
let mut m = HashMap::new();
// ---- Rust ----
m.insert("rust", rust::PATTERNS);
// ---- TypeScript ----
m.insert("typescript", typescript::PATTERNS);
m.insert("ts", typescript::PATTERNS);
m.insert("tsx", typescript::PATTERNS);
// ---- JavaScript ----
m.insert("javascript", javascript::PATTERNS);
m.insert("js", javascript::PATTERNS);
// ---- C & C++ ----
m.insert("c", c::PATTERNS);
m.insert("cpp", cpp::PATTERNS);
m.insert("c++", cpp::PATTERNS);
// ---- Other languages in the folder ----
m.insert("java", java::PATTERNS);
m.insert("go", go::PATTERNS);
m.insert("php", php::PATTERNS);
m.insert("python", python::PATTERNS);
m.insert("py", python::PATTERNS);
tracing::debug!("AST-pattern registry initialised ({} languages)", m.len());
m
});
/// Return all patterns for the requested language (case-insensitive).
///
/// Unknown languages yield an **empty** `Vec`.
pub fn load(lang: &str) -> Vec<Pattern> {
let key = lang.to_ascii_lowercase();
REGISTRY
.get(key.as_str())
.copied() // `&'static [Pattern]` → *copy* the slice pointer
.unwrap_or(&[]) // unknown lang ⇒ empty slice
.to_vec() // caller owns the `Vec`
}

40
src/patterns/php.rs Normal file
View file

@ -0,0 +1,40 @@
use crate::patterns::{Pattern, Severity};
pub const PATTERNS: &[Pattern] = &[
Pattern {
id: "eval_call",
description: "eval($code) execution",
query: "(function_call_expression function: (name) @n (#eq? @n \"eval\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "preg_replace_e",
description: "preg_replace with deprecated /e modifier",
query: "(function_call_expression function: (name) @n (#eq? @n \"preg_replace\") arguments: (arguments (string) @pat (#match? @pat \"/.*e.*$/\"))) @vuln",
severity: Severity::High,
},
Pattern {
id: "create_function",
description: "create_function(...) anonymous eval-like",
query: "(function_call_expression function: (name) @n (#eq? @n \"create_function\")) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "unserialize_call",
description: "unserialize(...) on user input",
query: "(function_call_expression function: (name) @n (#eq? @n \"unserialize\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "mysql_query_concat",
description: "mysql_query with concatenated SQL",
query: "(function_call_expression function: (name) @n (#eq? @n \"mysql_query\") arguments: (arguments (binary_expression) @concat)) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "system_call",
description: "system()/shell_exec()/exec() command execution",
query: "(function_call_expression function: (name) @n (#match? @n \"system|shell_exec|exec|passthru\")) @vuln",
severity: Severity::Medium,
},
];

40
src/patterns/python.rs Normal file
View file

@ -0,0 +1,40 @@
use crate::patterns::{Pattern, Severity};
pub const PATTERNS: &[Pattern] = &[
Pattern {
id: "eval_call",
description: "eval() on dynamic input",
query: "(call function: (identifier) @id (#eq? @id \"eval\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "exec_call",
description: "exec(...) execution of dynamic code",
query: "(call function: (identifier) @id (#eq? @id \"exec\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "pickle_load",
description: "pickle.load / loads unsafe deserialization",
query: "(call function: (attribute attribute: (identifier) @attr (#match? @attr \"load(s)?\") object: (identifier) @pkg (#eq? @pkg \"pickle\"))) @vuln",
severity: Severity::High,
},
Pattern {
id: "subprocess_shell_true",
description: "subprocess.* with shell=True",
query: "(call function: (attribute object: (identifier) @pkg (#eq? @pkg \"subprocess\")) arguments: (argument_list . (keyword_argument name: (identifier) @k (#eq? @k \"shell\")) (true) @val)) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "random_random",
description: "random.random() for security-sensitive randomness",
query: "(call function: (attribute attribute: (identifier) @attr (#eq? @attr \"random\") object: (identifier) @pkg (#eq? @pkg \"random\"))) @vuln",
severity: Severity::Low,
},
Pattern {
id: "sql_concat",
description: "SQL query built via f-string or +-concat",
query: "(call function: (attribute attribute: (identifier) @m (#match? @m \"execute|executemany\")) arguments: (argument_list (f_string) @fstr)) @vuln",
severity: Severity::Medium,
},
];

68
src/patterns/rust.rs Normal file
View file

@ -0,0 +1,68 @@
use crate::patterns::{Pattern, Severity};
/// The full catalogue.
///
/// *Feel free to prune, extend, or tweak severities to suit your own threat
/// model.*
pub const PATTERNS: &[Pattern] = &[
Pattern {
id: "unsafe_block",
description: "Use of an `unsafe` block",
query: "(unsafe_block) @vuln",
severity: Severity::High,
},
Pattern {
id: "unsafe_fn",
description: "`unsafe fn` declaration",
query: "(function_item (modifier) @kw (#eq? @kw \"unsafe\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "unwrap_call",
description: "`.unwrap()` call (may panic)",
query: "(call_expression function: (field_expression field: (field_identifier) @name (#eq? @name \"unwrap\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "expect_call",
description: "`.expect()` call (may panic)",
query: "(call_expression function: (field_expression field: (field_identifier) @name (#eq? @name \"expect\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "panic_macro",
description: "`panic!` macro invocation",
query: "(macro_invocation (identifier) @id (#eq? @id \"panic\")) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "todo_or_unimplemented",
description: "`todo!()` / `unimplemented!()` placeholder",
query: "(macro_invocation (identifier) @id (#match? @id \"todo|unimplemented\")) @vuln",
severity: Severity::Low,
},
Pattern {
id: "transmute_call",
description: "`std::mem::transmute` call",
query: "(call_expression function: (scoped_identifier path: (identifier) @p (#eq? @p \"mem\") name: (identifier) @f (#eq? @f \"transmute\"))) @vuln",
severity: Severity::High,
},
Pattern {
id: "get_unchecked",
description: "`get_unchecked` or `get_unchecked_mut` slice access",
query: "(call_expression function: (field_expression field: (field_identifier) @m (#match? @m \"get_unchecked(_mut)?\"))) @vuln",
severity: Severity::High,
},
Pattern {
id: "copy_nonoverlapping",
description: "Raw pointer `copy_nonoverlapping`",
query: "(call_expression function: (scoped_identifier path: (identifier) @p (#eq? @p \"ptr\") name: (identifier) @f (#eq? @f \"copy_nonoverlapping\"))) @vuln",
severity: Severity::High,
},
Pattern {
id: "narrow_cast_with_as",
description: "`as` cast to an 8-/16-bit integer (possible truncation)",
query: "(as_expression left: (_) right: (primitive_type) @to (#match? @to \"u8|i8|u16|i16\")) @vuln",
severity: Severity::Low,
},
];

View file

@ -0,0 +1,46 @@
use crate::patterns::{Pattern, Severity};
pub const PATTERNS: &[Pattern] = &[
Pattern {
id: "eval_call",
description: "Use of eval()",
query: "(call_expression function: (identifier) @id (#eq? @id \"eval\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "new_function",
description: "new Function() constructor",
query: "(new_expression constructor: (identifier) @id (#eq? @id \"Function\")) @vuln",
severity: Severity::High,
},
Pattern {
id: "document_write",
description: "document.write() call",
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"document\") property: (property_identifier) @prop (#eq? @prop \"write\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "inner_html_assignment",
description: "Assignment to element.innerHTML",
query: "(assignment_expression left: (member_expression property: (property_identifier) @prop (#eq? @prop \"innerHTML\"))) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "settimeout_string",
description: "setTimeout / setInterval with a string argument",
query: "(call_expression function: (identifier) @id (#match? @id \"setTimeout|setInterval\") arguments: (arguments (string) @code . _)) @vuln",
severity: Severity::Medium,
},
Pattern {
id: "any_type",
description: "Type annotation of `any`",
query: "(type_annotation (predefined_type) @t (#eq? @t \"any\")) @vuln",
severity: Severity::Low,
},
Pattern {
id: "json_parse",
description: "JSON.parse on dynamic string",
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"JSON\") property: (property_identifier) @prop (#eq? @prop \"parse\"))) @vuln",
severity: Severity::Low,
},
];