From 22369cc404995f38e8012d0728e87c94cb22face Mon Sep 17 00:00:00 2001 From: elipeter Date: Tue, 17 Jun 2025 01:17:48 +0200 Subject: [PATCH] Add multi-language AST-pattern scanning support - Introduced `patterns` module with language-specific vulnerability patterns. - Added `query_cache` utility for caching compiled queries. - Expanded `scan.rs` to support scanning multiple languages dynamically. - Updated `Cargo.toml` with additional tree-sitter dependencies. - Added severity filtering to `ScannerConfig` for better configuration. --- Cargo.lock | 89 ++++++++++++++++++++++++++++++++++++++ Cargo.toml | 9 ++++ src/commands/scan.rs | 81 +++++++++++++++++++++++----------- src/main.rs | 1 + src/patterns/c.rs | 40 +++++++++++++++++ src/patterns/cpp.rs | 40 +++++++++++++++++ src/patterns/go.rs | 34 +++++++++++++++ src/patterns/java.rs | 40 +++++++++++++++++ src/patterns/javascript.rs | 40 +++++++++++++++++ src/patterns/mod.rs | 79 +++++++++++++++++++++++++++++++++ src/patterns/php.rs | 40 +++++++++++++++++ src/patterns/python.rs | 40 +++++++++++++++++ src/patterns/rust.rs | 68 +++++++++++++++++++++++++++++ src/patterns/typescript.rs | 46 ++++++++++++++++++++ src/utils/config.rs | 5 +++ src/utils/mod.rs | 1 + src/utils/query_cache.rs | 37 ++++++++++++++++ 17 files changed, 665 insertions(+), 25 deletions(-) create mode 100644 src/patterns/c.rs create mode 100644 src/patterns/cpp.rs create mode 100644 src/patterns/go.rs create mode 100644 src/patterns/java.rs create mode 100644 src/patterns/javascript.rs create mode 100644 src/patterns/mod.rs create mode 100644 src/patterns/php.rs create mode 100644 src/patterns/python.rs create mode 100644 src/patterns/rust.rs create mode 100644 src/patterns/typescript.rs create mode 100644 src/utils/query_cache.rs diff --git a/Cargo.lock b/Cargo.lock index d082f325..0d8119b7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13,6 +13,7 @@ dependencies = [ "filetime", "ignore", "num_cpus", + "once_cell", "rusqlite", "serde", "tempfile", @@ -20,7 +21,15 @@ dependencies = [ "tracing", "tracing-subscriber", "tree-sitter", + "tree-sitter-c", + "tree-sitter-cpp", + "tree-sitter-go", + "tree-sitter-java", + "tree-sitter-javascript", + "tree-sitter-php", + "tree-sitter-python", "tree-sitter-rust", + "tree-sitter-typescript", ] [[package]] @@ -957,12 +966,82 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-c" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a3aad8f0129083a59fe8596157552d2bb7148c492d44c21558d68ca1c722707" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-cpp" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df2196ea9d47b4ab4a31b9297eaa5a5d19a0b121dceb9f118f6790ad0ab94743" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-go" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b13d476345220dbe600147dd444165c5791bf85ef53e28acbedd46112ee18431" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-java" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0aa6cbcdc8c679b214e616fd3300da67da0e492e066df01bcf5a5921a71e90d6" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-javascript" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf40bf599e0416c16c125c3cec10ee5ddc7d1bb8b0c60fa5c4de249ad34dc1b1" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-language" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4013970217383f67b18aef68f6fb2e8d409bc5755227092d32efb0422ba24b8" +[[package]] +name = "tree-sitter-php" +version = "0.23.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f066e94e9272cfe4f1dcb07a1c50c66097eca648f2d7233d299c8ae9ed8c130c" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-python" +version = "0.23.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d065aaa27f3aaceaf60c1f0e0ac09e1cb9eb8ed28e7bcdaa52129cffc7f4b04" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-rust" version = "0.24.0" @@ -973,6 +1052,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-typescript" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5f76ed8d947a75cc446d5fccd8b602ebf0cde64ccf2ffa434d873d7a575eff" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "unicode-ident" version = "1.0.18" diff --git a/Cargo.toml b/Cargo.toml index 9b3f94c5..41c07734 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,15 @@ rusqlite = "0.36.0" ignore = "0.4.23" tree-sitter = "0.25.6" tree-sitter-rust = "0.24.0" +tree-sitter-c = "0.24.1" +tree-sitter-cpp = "0.23.4" +tree-sitter-java = "0.23.5" +tree-sitter-typescript = "0.23.2" +tree-sitter-javascript = "0.23.1" +tree-sitter-go = "0.23.4" +tree-sitter-php = "0.23.11" +tree-sitter-python = "0.23.6" crossbeam-channel = "0.5.15" blake3 = "1.8.2" filetime = "0.2.25" +once_cell = "1.21.3" diff --git a/src/commands/scan.rs b/src/commands/scan.rs index 42cc9032..ebdb7100 100644 --- a/src/commands/scan.rs +++ b/src/commands/scan.rs @@ -2,8 +2,9 @@ use crate::cli::OutputFormat; use crate::utils::project::get_project_info; use std::path::Path; use crate::utils::config::Config; -use tree_sitter::{Parser}; +use tree_sitter::{Language, Parser, QueryCursor, StreamingIterator}; use crate::database::index::Indexer; +use crate::utils::query_cache; use crate::walk::spawn_senders; pub fn handle( @@ -72,34 +73,64 @@ fn scan_with_index(root: &Path, db_path: &Path, cfg: &Config) -> Result<(), Box< fn scan_single_file( path: &Path, - _cfg: &Config, -) -> Result<(), Box> { - if path.extension().and_then(|s| s.to_str()) != Some("rs") { - return Ok(()); - } - + cfg: &Config, // assume cfg.high_only: bool +) -> Result<(), Box> { let source = std::fs::read_to_string(path)?; - let mut parser = Parser::new(); - parser.set_language(&tree_sitter_rust::LANGUAGE.into())?; - let tree = parser.parse(&source, None).ok_or("tree-sitter failed")?; - let root = tree.root_node(); - - let mut fn_count = 0; - let mut cursor = root.walk(); - for child in root.children(&mut cursor) { - if child.kind() == "function_item" { - fn_count += 1; - } + let ext = path + .extension() + .and_then(|s| s.to_str()) + .unwrap_or_default() + .to_ascii_lowercase(); + + // Pick the right tree-sitter language *and* pre-compiled queries + let (ts_lang, lang_key): (Language, &'static str) = match ext.as_str() { + "rs" => (Language::from(tree_sitter_rust::LANGUAGE), "rust"), + "c" => (Language::from(tree_sitter_c::LANGUAGE), "c"), + "cpp" | "c++" => (Language::from(tree_sitter_cpp::LANGUAGE), "cpp"), + "java" => (Language::from(tree_sitter_java::LANGUAGE), "java"), + "go" => (Language::from(tree_sitter_go::LANGUAGE), "go"), + "php" => (Language::from(tree_sitter_php::LANGUAGE_PHP), "php"), + "py" => (Language::from(tree_sitter_python::LANGUAGE), "python"), + "ts" | "tsx" => (Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT), "typescript"), + "js" => (Language::from(tree_sitter_javascript::LANGUAGE), "javascript"), + _ => return Ok(()), + }; + + parser.set_language(&ts_lang)?; + + let tree = parser.parse(&source, None).ok_or("tree-sitter failed")?; + let root = tree.root_node(); + + // ----- run vulnerability patterns ----- + let compiled = query_cache::for_lang(lang_key, ts_lang); + let mut cursor = QueryCursor::new(); + + for cq in &compiled { + if cfg.scanner.min_severity > cq.meta.severity { + continue; + } + + let mut matches = cursor.matches(&cq.query, root, source.as_bytes()); + + while let Some(m) = matches.next() { + // capture 0 is the one tagged @vuln + for cap in m.captures.iter().filter(|c| c.index == 0) { + let point = cap.node.start_position(); + let line = point.row; + let col = point.column; + tracing::warn!( + file = %path.display(), + line = line + 1, + column = col + 1, + id = cq.meta.id, + sev = ?cq.meta.severity, + "pattern matched" + ); + } + } } - tracing::info!( - "scanned {} – found {} Rust function(s)", - path.display(), - fn_count - ); - - // TODO: real vulnerability/pattern checks go here Ok(()) } \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index bb097c4a..8920bdef 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,6 +3,7 @@ mod commands; mod utils; mod walk; mod database; +mod patterns; use crate::utils::Config; use cli::Cli; diff --git a/src/patterns/c.rs b/src/patterns/c.rs new file mode 100644 index 00000000..e3ef156c --- /dev/null +++ b/src/patterns/c.rs @@ -0,0 +1,40 @@ +use crate::patterns::{Pattern, Severity}; + +pub const PATTERNS: &[Pattern] = &[ + Pattern { + id: "strcpy_call", + description: "strcpy() usage", + query: "(call_expression function: (identifier) @id (#eq? @id \"strcpy\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "strcat_call", + description: "strcat() usage", + query: "(call_expression function: (identifier) @id (#eq? @id \"strcat\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "sprintf_call", + description: "sprintf() (no length limit)", + query: "(call_expression function: (identifier) @id (#eq? @id \"sprintf\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "gets_call", + description: "gets() usage", + query: "(call_expression function: (identifier) @id (#eq? @id \"gets\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "scanf_with_percent_s", + description: "scanf(\"%s\") without length specifier", + query: "(call_expression function: (identifier) @id (#eq? @id \"scanf\") arguments: (argument_list (string_literal) @fmt (#match? @fmt \".*%s.*\"))) @vuln", + severity: Severity::High, + }, + Pattern { + id: "system_call", + description: "system() shell execution", + query: "(call_expression function: (identifier) @id (#eq? @id \"system\")) @vuln", + severity: Severity::Medium, + }, +]; diff --git a/src/patterns/cpp.rs b/src/patterns/cpp.rs new file mode 100644 index 00000000..e67196a5 --- /dev/null +++ b/src/patterns/cpp.rs @@ -0,0 +1,40 @@ +use crate::patterns::{Pattern, Severity}; + +pub const PATTERNS: &[Pattern] = &[ + Pattern { + id: "strcpy_call", + description: "strcpy() usage", + query: "(call_expression function: (identifier) @id (#eq? @id \"strcpy\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "strcat_call", + description: "strcat() usage", + query: "(call_expression function: (identifier) @id (#eq? @id \"strcat\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "sprintf_call", + description: "sprintf() (no length limit)", + query: "(call_expression function: (identifier) @id (#eq? @id \"sprintf\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "gets_call", + description: "gets() usage", + query: "(call_expression function: (identifier) @id (#eq? @id \"gets\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "system_call", + description: "system() shell execution", + query: "(call_expression function: (identifier) @id (#eq? @id \"system\")) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "reinterpret_cast", + description: "reinterpret_cast usage", + query: "(reinterpret_cast_expression) @vuln", + severity: Severity::Medium, + }, +]; diff --git a/src/patterns/go.rs b/src/patterns/go.rs new file mode 100644 index 00000000..c4d2efb5 --- /dev/null +++ b/src/patterns/go.rs @@ -0,0 +1,34 @@ +use crate::patterns::{Pattern, Severity}; + +pub const PATTERNS: &[Pattern] = &[ + Pattern { + id: "exec_command", + description: "os/exec Command construction", + query: "(call_expression function: (selector_expression field: (field_identifier) @f (#eq? @f \"Command\"))) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "http_insecure_tls", + description: "&http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}}", + query: "(composite_literal type: (selector_expression field: (field_identifier) @t (#eq? @t \"Transport\")) body: (literal_value (keyed_element key: (identifier) @k (#eq? @k \"TLSClientConfig\") value: (composite_literal body: (literal_value (keyed_element key: (identifier) @ik (#eq? @ik \"InsecureSkipVerify\") value: (true)))))) @vuln", + severity: Severity::High, + }, + Pattern { + id: "unsafe_pointer", + description: "Use of unsafe.Pointer", + query: "(qualified_type type: (selector_expression field: (field_identifier) @f (#eq? @f \"Pointer\"))) @vuln", + severity: Severity::High, + }, + Pattern { + id: "md5_sha1", + description: "crypto/md5 or crypto/sha1 usage", + query: "(call_expression function: (selector_expression object: (identifier) @pkg (#match? @pkg \"md5|sha1\"))) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "hardcoded_secret", + description: "Hard-coded string that looks like an API key/token", + query: "(interpreted_string_literal) @s (#match? @s \"(?i)(api|secret|token|password)[=:]?[ \\t]*[A-Za-z0-9_\\-]{8,}\")", + severity: Severity::Low, + }, +]; diff --git a/src/patterns/java.rs b/src/patterns/java.rs new file mode 100644 index 00000000..cebdcf69 --- /dev/null +++ b/src/patterns/java.rs @@ -0,0 +1,40 @@ +use crate::patterns::{Pattern, Severity}; + +pub const PATTERNS: &[Pattern] = &[ + Pattern { + id: "runtime_exec", + description: "Runtime.getRuntime().exec(...) – arbitrary-command execution", + query: "(method_invocation object: (method_invocation name: (identifier) @n (#eq? @n \"getRuntime\")) name: (identifier) @id (#eq? @id \"exec\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "class_for_name", + description: "Dynamic reflection via Class.forName(...)", + query: "(method_invocation object: (identifier) @c (#eq? @c \"Class\") name: (identifier) @id (#eq? @id \"forName\")) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "object_deserialization", + description: "java.io.ObjectInputStream#readObject() deserialization", + query: "(method_invocation object: (identifier) @o (#eq? @o \"ObjectInputStream\") name: (identifier) @id (#eq? @id \"readObject\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "insecure_random", + description: "java.util.Random used where SecureRandom is expected", + query: "(object_creation_expression type: (identifier) @t (#eq? @t \"Random\")) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "thread_stop", + description: "Deprecated Thread.stop() invocation", + query: "(method_invocation name: (identifier) @id (#eq? @id \"stop\") object: (identifier) @obj (#eq? @obj \"Thread\")) @vuln", + severity: Severity::Low, + }, + Pattern { + id: "sql_concat", + description: "SQL built with string concatenation", + query: "(method_invocation name: (identifier) @id (#match? @id \"execute(Query|Update)?\") arguments: (argument_list (binary_expression) @concat)) @vuln", + severity: Severity::Medium, + }, +]; diff --git a/src/patterns/javascript.rs b/src/patterns/javascript.rs new file mode 100644 index 00000000..cce31a17 --- /dev/null +++ b/src/patterns/javascript.rs @@ -0,0 +1,40 @@ +use crate::patterns::{Pattern, Severity}; + +pub const PATTERNS: &[Pattern] = &[ + Pattern { + id: "eval_call", + description: "Use of eval()", + query: "(call_expression function: (identifier) @id (#eq? @id \"eval\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "new_function", + description: "new Function() constructor", + query: "(new_expression constructor: (identifier) @id (#eq? @id \"Function\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "document_write", + description: "document.write() call", + query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"document\") property: (property_identifier) @prop (#eq? @prop \"write\"))) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "inner_html_assignment", + description: "Assignment to element.innerHTML", + query: "(assignment_expression left: (member_expression property: (property_identifier) @prop (#eq? @prop \"innerHTML\"))) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "settimeout_string", + description: "setTimeout / setInterval with a string argument", + query: "(call_expression function: (identifier) @id (#match? @id \"setTimeout|setInterval\") arguments: (arguments (string) @code . _)) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "json_parse", + description: "JSON.parse on dynamic string", + query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"JSON\") property: (property_identifier) @prop (#eq? @prop \"parse\"))) @vuln", + severity: Severity::Low, + }, +]; diff --git a/src/patterns/mod.rs b/src/patterns/mod.rs new file mode 100644 index 00000000..5c2c532a --- /dev/null +++ b/src/patterns/mod.rs @@ -0,0 +1,79 @@ +pub mod rust; +pub mod typescript; +pub mod javascript; +pub mod cpp; +pub mod c; +mod java; +mod go; +mod php; +mod python; + +use std::collections::HashMap; +use serde::{Deserialize, Serialize}; +use once_cell::sync::Lazy; + +/// How bad / noisy a pattern is considered. +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd)] +pub enum Severity { + Low, + Medium, + High, +} + +/// One AST pattern with a tree-sitter query and meta-data. +#[derive(Debug, Clone, Serialize)] +pub struct Pattern { + /// Unique identifier (snake-case preferred). + pub id: &'static str, + /// Human-readable explanation. + pub description: &'static str, + /// tree-sitter query string. + pub query: &'static str, + /// Rough severity bucket. + pub severity: Severity, +} + +/// Global, lazily-initialised registry: lang-name → pattern slice +static REGISTRY: Lazy> = Lazy::new(|| { + let mut m = HashMap::new(); + + // ---- Rust ---- + m.insert("rust", rust::PATTERNS); + + // ---- TypeScript ---- + m.insert("typescript", typescript::PATTERNS); + m.insert("ts", typescript::PATTERNS); + m.insert("tsx", typescript::PATTERNS); + + // ---- JavaScript ---- + m.insert("javascript", javascript::PATTERNS); + m.insert("js", javascript::PATTERNS); + + // ---- C & C++ ---- + m.insert("c", c::PATTERNS); + m.insert("cpp", cpp::PATTERNS); + m.insert("c++", cpp::PATTERNS); + + // ---- Other languages in the folder ---- + m.insert("java", java::PATTERNS); + m.insert("go", go::PATTERNS); + m.insert("php", php::PATTERNS); + m.insert("python", python::PATTERNS); + m.insert("py", python::PATTERNS); + + tracing::debug!("AST-pattern registry initialised ({} languages)", m.len()); + + m +}); + +/// Return all patterns for the requested language (case-insensitive). +/// +/// Unknown languages yield an **empty** `Vec`. +pub fn load(lang: &str) -> Vec { + let key = lang.to_ascii_lowercase(); + REGISTRY + .get(key.as_str()) + .copied() // `&'static [Pattern]` → *copy* the slice pointer + .unwrap_or(&[]) // unknown lang ⇒ empty slice + .to_vec() // caller owns the `Vec` +} \ No newline at end of file diff --git a/src/patterns/php.rs b/src/patterns/php.rs new file mode 100644 index 00000000..fec96d31 --- /dev/null +++ b/src/patterns/php.rs @@ -0,0 +1,40 @@ +use crate::patterns::{Pattern, Severity}; + +pub const PATTERNS: &[Pattern] = &[ + Pattern { + id: "eval_call", + description: "eval($code) execution", + query: "(function_call_expression function: (name) @n (#eq? @n \"eval\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "preg_replace_e", + description: "preg_replace with deprecated /e modifier", + query: "(function_call_expression function: (name) @n (#eq? @n \"preg_replace\") arguments: (arguments (string) @pat (#match? @pat \"/.*e.*$/\"))) @vuln", + severity: Severity::High, + }, + Pattern { + id: "create_function", + description: "create_function(...) anonymous eval-like", + query: "(function_call_expression function: (name) @n (#eq? @n \"create_function\")) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "unserialize_call", + description: "unserialize(...) on user input", + query: "(function_call_expression function: (name) @n (#eq? @n \"unserialize\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "mysql_query_concat", + description: "mysql_query with concatenated SQL", + query: "(function_call_expression function: (name) @n (#eq? @n \"mysql_query\") arguments: (arguments (binary_expression) @concat)) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "system_call", + description: "system()/shell_exec()/exec() command execution", + query: "(function_call_expression function: (name) @n (#match? @n \"system|shell_exec|exec|passthru\")) @vuln", + severity: Severity::Medium, + }, +]; diff --git a/src/patterns/python.rs b/src/patterns/python.rs new file mode 100644 index 00000000..cd605880 --- /dev/null +++ b/src/patterns/python.rs @@ -0,0 +1,40 @@ +use crate::patterns::{Pattern, Severity}; + +pub const PATTERNS: &[Pattern] = &[ + Pattern { + id: "eval_call", + description: "eval() on dynamic input", + query: "(call function: (identifier) @id (#eq? @id \"eval\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "exec_call", + description: "exec(...) execution of dynamic code", + query: "(call function: (identifier) @id (#eq? @id \"exec\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "pickle_load", + description: "pickle.load / loads – unsafe deserialization", + query: "(call function: (attribute attribute: (identifier) @attr (#match? @attr \"load(s)?\") object: (identifier) @pkg (#eq? @pkg \"pickle\"))) @vuln", + severity: Severity::High, + }, + Pattern { + id: "subprocess_shell_true", + description: "subprocess.* with shell=True", + query: "(call function: (attribute object: (identifier) @pkg (#eq? @pkg \"subprocess\")) arguments: (argument_list . (keyword_argument name: (identifier) @k (#eq? @k \"shell\")) (true) @val)) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "random_random", + description: "random.random() for security-sensitive randomness", + query: "(call function: (attribute attribute: (identifier) @attr (#eq? @attr \"random\") object: (identifier) @pkg (#eq? @pkg \"random\"))) @vuln", + severity: Severity::Low, + }, + Pattern { + id: "sql_concat", + description: "SQL query built via f-string or +-concat", + query: "(call function: (attribute attribute: (identifier) @m (#match? @m \"execute|executemany\")) arguments: (argument_list (f_string) @fstr)) @vuln", + severity: Severity::Medium, + }, +]; diff --git a/src/patterns/rust.rs b/src/patterns/rust.rs new file mode 100644 index 00000000..635608b5 --- /dev/null +++ b/src/patterns/rust.rs @@ -0,0 +1,68 @@ +use crate::patterns::{Pattern, Severity}; + +/// The full catalogue. +/// +/// *Feel free to prune, extend, or tweak severities to suit your own threat +/// model.* +pub const PATTERNS: &[Pattern] = &[ + Pattern { + id: "unsafe_block", + description: "Use of an `unsafe` block", + query: "(unsafe_block) @vuln", + severity: Severity::High, + }, + Pattern { + id: "unsafe_fn", + description: "`unsafe fn` declaration", + query: "(function_item (modifier) @kw (#eq? @kw \"unsafe\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "unwrap_call", + description: "`.unwrap()` call (may panic)", + query: "(call_expression function: (field_expression field: (field_identifier) @name (#eq? @name \"unwrap\"))) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "expect_call", + description: "`.expect()` call (may panic)", + query: "(call_expression function: (field_expression field: (field_identifier) @name (#eq? @name \"expect\"))) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "panic_macro", + description: "`panic!` macro invocation", + query: "(macro_invocation (identifier) @id (#eq? @id \"panic\")) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "todo_or_unimplemented", + description: "`todo!()` / `unimplemented!()` placeholder", + query: "(macro_invocation (identifier) @id (#match? @id \"todo|unimplemented\")) @vuln", + severity: Severity::Low, + }, + Pattern { + id: "transmute_call", + description: "`std::mem::transmute` call", + query: "(call_expression function: (scoped_identifier path: (identifier) @p (#eq? @p \"mem\") name: (identifier) @f (#eq? @f \"transmute\"))) @vuln", + severity: Severity::High, + }, + Pattern { + id: "get_unchecked", + description: "`get_unchecked` or `get_unchecked_mut` slice access", + query: "(call_expression function: (field_expression field: (field_identifier) @m (#match? @m \"get_unchecked(_mut)?\"))) @vuln", + severity: Severity::High, + }, + Pattern { + id: "copy_nonoverlapping", + description: "Raw pointer `copy_nonoverlapping`", + query: "(call_expression function: (scoped_identifier path: (identifier) @p (#eq? @p \"ptr\") name: (identifier) @f (#eq? @f \"copy_nonoverlapping\"))) @vuln", + severity: Severity::High, + }, + Pattern { + id: "narrow_cast_with_as", + description: "`as` cast to an 8-/16-bit integer (possible truncation)", + query: "(as_expression left: (_) right: (primitive_type) @to (#match? @to \"u8|i8|u16|i16\")) @vuln", + severity: Severity::Low, + }, +]; diff --git a/src/patterns/typescript.rs b/src/patterns/typescript.rs new file mode 100644 index 00000000..30627c16 --- /dev/null +++ b/src/patterns/typescript.rs @@ -0,0 +1,46 @@ +use crate::patterns::{Pattern, Severity}; + +pub const PATTERNS: &[Pattern] = &[ + Pattern { + id: "eval_call", + description: "Use of eval()", + query: "(call_expression function: (identifier) @id (#eq? @id \"eval\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "new_function", + description: "new Function() constructor", + query: "(new_expression constructor: (identifier) @id (#eq? @id \"Function\")) @vuln", + severity: Severity::High, + }, + Pattern { + id: "document_write", + description: "document.write() call", + query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"document\") property: (property_identifier) @prop (#eq? @prop \"write\"))) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "inner_html_assignment", + description: "Assignment to element.innerHTML", + query: "(assignment_expression left: (member_expression property: (property_identifier) @prop (#eq? @prop \"innerHTML\"))) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "settimeout_string", + description: "setTimeout / setInterval with a string argument", + query: "(call_expression function: (identifier) @id (#match? @id \"setTimeout|setInterval\") arguments: (arguments (string) @code . _)) @vuln", + severity: Severity::Medium, + }, + Pattern { + id: "any_type", + description: "Type annotation of `any`", + query: "(type_annotation (predefined_type) @t (#eq? @t \"any\")) @vuln", + severity: Severity::Low, + }, + Pattern { + id: "json_parse", + description: "JSON.parse on dynamic string", + query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"JSON\") property: (property_identifier) @prop (#eq? @prop \"parse\"))) @vuln", + severity: Severity::Low, + }, +]; \ No newline at end of file diff --git a/src/utils/config.rs b/src/utils/config.rs index 3810b1ff..7ac2fe94 100644 --- a/src/utils/config.rs +++ b/src/utils/config.rs @@ -2,10 +2,14 @@ use serde::{Deserialize, Serialize}; use std::path::{Path}; use std::fs; use toml; +use crate::patterns::Severity; #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(default)] pub struct ScannerConfig { + /// The minimum severity level to output + pub min_severity: Severity, + /// The maximum file size to scan, in megabytes. TODO: IMPLEMENT pub max_file_size_mb: u64, @@ -39,6 +43,7 @@ pub struct ScannerConfig { impl Default for ScannerConfig { fn default() -> Self { Self { + min_severity: Severity::Low, max_file_size_mb: 100, excluded_extensions: vec![ "jpg", "png", "gif", "mp4", "avi", "mkv", diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 48b89867..080265c8 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1,5 +1,6 @@ pub mod project; pub mod config; +pub(crate) mod query_cache; // Re-export commonly used functions for convenience pub use project::{get_project_info}; diff --git a/src/utils/query_cache.rs b/src/utils/query_cache.rs new file mode 100644 index 00000000..e88bcd62 --- /dev/null +++ b/src/utils/query_cache.rs @@ -0,0 +1,37 @@ +use std::collections::HashMap; +use std::sync::{Arc, RwLock}; + +use once_cell::sync::Lazy; +use tree_sitter::{Language, Query}; + +use crate::patterns::{self, Pattern}; + +#[derive(Clone)] +pub struct CompiledQuery { + pub meta: Pattern, + pub query: Arc, +} + +static CACHE: Lazy>>> = + Lazy::new(|| RwLock::new(HashMap::new())); + +pub fn for_lang(lang: &'static str, ts_lang: Language) -> Vec { + // fast-path read + if let Some(v) = CACHE.read().unwrap().get(lang) { + return v.clone(); + } + + // compile under write-lock exactly once + let patterns = patterns::load(lang); + let mut vec = Vec::with_capacity(patterns.len()); + + for p in patterns { + match Query::new(&ts_lang, p.query) { + Ok(q) => vec.push(CompiledQuery { meta: p, query: Arc::new(q) }), + Err(e) => tracing::warn!(lang, id = p.id, "query compile error: {e}"), + } + } + + CACHE.write().unwrap().insert(lang, vec.clone()); + vec +} \ No newline at end of file