mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-06 19:35:13 +02:00
Add multi-language AST-pattern scanning support
- Introduced `patterns` module with language-specific vulnerability patterns. - Added `query_cache` utility for caching compiled queries. - Expanded `scan.rs` to support scanning multiple languages dynamically. - Updated `Cargo.toml` with additional tree-sitter dependencies. - Added severity filtering to `ScannerConfig` for better configuration.
This commit is contained in:
parent
0831b9fb48
commit
22369cc404
17 changed files with 665 additions and 25 deletions
89
Cargo.lock
generated
89
Cargo.lock
generated
|
|
@ -13,6 +13,7 @@ dependencies = [
|
|||
"filetime",
|
||||
"ignore",
|
||||
"num_cpus",
|
||||
"once_cell",
|
||||
"rusqlite",
|
||||
"serde",
|
||||
"tempfile",
|
||||
|
|
@ -20,7 +21,15 @@ dependencies = [
|
|||
"tracing",
|
||||
"tracing-subscriber",
|
||||
"tree-sitter",
|
||||
"tree-sitter-c",
|
||||
"tree-sitter-cpp",
|
||||
"tree-sitter-go",
|
||||
"tree-sitter-java",
|
||||
"tree-sitter-javascript",
|
||||
"tree-sitter-php",
|
||||
"tree-sitter-python",
|
||||
"tree-sitter-rust",
|
||||
"tree-sitter-typescript",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -957,12 +966,82 @@ dependencies = [
|
|||
"tree-sitter-language",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-c"
|
||||
version = "0.24.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a3aad8f0129083a59fe8596157552d2bb7148c492d44c21558d68ca1c722707"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter-language",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-cpp"
|
||||
version = "0.23.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df2196ea9d47b4ab4a31b9297eaa5a5d19a0b121dceb9f118f6790ad0ab94743"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter-language",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-go"
|
||||
version = "0.23.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b13d476345220dbe600147dd444165c5791bf85ef53e28acbedd46112ee18431"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter-language",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-java"
|
||||
version = "0.23.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0aa6cbcdc8c679b214e616fd3300da67da0e492e066df01bcf5a5921a71e90d6"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter-language",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-javascript"
|
||||
version = "0.23.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bf40bf599e0416c16c125c3cec10ee5ddc7d1bb8b0c60fa5c4de249ad34dc1b1"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter-language",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-language"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c4013970217383f67b18aef68f6fb2e8d409bc5755227092d32efb0422ba24b8"
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-php"
|
||||
version = "0.23.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f066e94e9272cfe4f1dcb07a1c50c66097eca648f2d7233d299c8ae9ed8c130c"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter-language",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-python"
|
||||
version = "0.23.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3d065aaa27f3aaceaf60c1f0e0ac09e1cb9eb8ed28e7bcdaa52129cffc7f4b04"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter-language",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-rust"
|
||||
version = "0.24.0"
|
||||
|
|
@ -973,6 +1052,16 @@ dependencies = [
|
|||
"tree-sitter-language",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-typescript"
|
||||
version = "0.23.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6c5f76ed8d947a75cc446d5fccd8b602ebf0cde64ccf2ffa434d873d7a575eff"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter-language",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.18"
|
||||
|
|
|
|||
|
|
@ -20,6 +20,15 @@ rusqlite = "0.36.0"
|
|||
ignore = "0.4.23"
|
||||
tree-sitter = "0.25.6"
|
||||
tree-sitter-rust = "0.24.0"
|
||||
tree-sitter-c = "0.24.1"
|
||||
tree-sitter-cpp = "0.23.4"
|
||||
tree-sitter-java = "0.23.5"
|
||||
tree-sitter-typescript = "0.23.2"
|
||||
tree-sitter-javascript = "0.23.1"
|
||||
tree-sitter-go = "0.23.4"
|
||||
tree-sitter-php = "0.23.11"
|
||||
tree-sitter-python = "0.23.6"
|
||||
crossbeam-channel = "0.5.15"
|
||||
blake3 = "1.8.2"
|
||||
filetime = "0.2.25"
|
||||
once_cell = "1.21.3"
|
||||
|
|
|
|||
|
|
@ -2,8 +2,9 @@ use crate::cli::OutputFormat;
|
|||
use crate::utils::project::get_project_info;
|
||||
use std::path::Path;
|
||||
use crate::utils::config::Config;
|
||||
use tree_sitter::{Parser};
|
||||
use tree_sitter::{Language, Parser, QueryCursor, StreamingIterator};
|
||||
use crate::database::index::Indexer;
|
||||
use crate::utils::query_cache;
|
||||
use crate::walk::spawn_senders;
|
||||
|
||||
pub fn handle(
|
||||
|
|
@ -72,34 +73,64 @@ fn scan_with_index(root: &Path, db_path: &Path, cfg: &Config) -> Result<(), Box<
|
|||
|
||||
fn scan_single_file(
|
||||
path: &Path,
|
||||
_cfg: &Config,
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
if path.extension().and_then(|s| s.to_str()) != Some("rs") {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
cfg: &Config, // assume cfg.high_only: bool
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let source = std::fs::read_to_string(path)?;
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(&tree_sitter_rust::LANGUAGE.into())?;
|
||||
|
||||
let tree = parser.parse(&source, None).ok_or("tree-sitter failed")?;
|
||||
let root = tree.root_node();
|
||||
|
||||
let mut fn_count = 0;
|
||||
let mut cursor = root.walk();
|
||||
for child in root.children(&mut cursor) {
|
||||
if child.kind() == "function_item" {
|
||||
fn_count += 1;
|
||||
}
|
||||
let ext = path
|
||||
.extension()
|
||||
.and_then(|s| s.to_str())
|
||||
.unwrap_or_default()
|
||||
.to_ascii_lowercase();
|
||||
|
||||
// Pick the right tree-sitter language *and* pre-compiled queries
|
||||
let (ts_lang, lang_key): (Language, &'static str) = match ext.as_str() {
|
||||
"rs" => (Language::from(tree_sitter_rust::LANGUAGE), "rust"),
|
||||
"c" => (Language::from(tree_sitter_c::LANGUAGE), "c"),
|
||||
"cpp" | "c++" => (Language::from(tree_sitter_cpp::LANGUAGE), "cpp"),
|
||||
"java" => (Language::from(tree_sitter_java::LANGUAGE), "java"),
|
||||
"go" => (Language::from(tree_sitter_go::LANGUAGE), "go"),
|
||||
"php" => (Language::from(tree_sitter_php::LANGUAGE_PHP), "php"),
|
||||
"py" => (Language::from(tree_sitter_python::LANGUAGE), "python"),
|
||||
"ts" | "tsx" => (Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT), "typescript"),
|
||||
"js" => (Language::from(tree_sitter_javascript::LANGUAGE), "javascript"),
|
||||
_ => return Ok(()),
|
||||
};
|
||||
|
||||
parser.set_language(&ts_lang)?;
|
||||
|
||||
let tree = parser.parse(&source, None).ok_or("tree-sitter failed")?;
|
||||
let root = tree.root_node();
|
||||
|
||||
// ----- run vulnerability patterns -----
|
||||
let compiled = query_cache::for_lang(lang_key, ts_lang);
|
||||
let mut cursor = QueryCursor::new();
|
||||
|
||||
for cq in &compiled {
|
||||
if cfg.scanner.min_severity > cq.meta.severity {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut matches = cursor.matches(&cq.query, root, source.as_bytes());
|
||||
|
||||
while let Some(m) = matches.next() {
|
||||
// capture 0 is the one tagged @vuln
|
||||
for cap in m.captures.iter().filter(|c| c.index == 0) {
|
||||
let point = cap.node.start_position();
|
||||
let line = point.row;
|
||||
let col = point.column;
|
||||
tracing::warn!(
|
||||
file = %path.display(),
|
||||
line = line + 1,
|
||||
column = col + 1,
|
||||
id = cq.meta.id,
|
||||
sev = ?cq.meta.severity,
|
||||
"pattern matched"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"scanned {} – found {} Rust function(s)",
|
||||
path.display(),
|
||||
fn_count
|
||||
);
|
||||
|
||||
// TODO: real vulnerability/pattern checks go here
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -3,6 +3,7 @@ mod commands;
|
|||
mod utils;
|
||||
mod walk;
|
||||
mod database;
|
||||
mod patterns;
|
||||
|
||||
use crate::utils::Config;
|
||||
use cli::Cli;
|
||||
|
|
|
|||
40
src/patterns/c.rs
Normal file
40
src/patterns/c.rs
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
use crate::patterns::{Pattern, Severity};
|
||||
|
||||
pub const PATTERNS: &[Pattern] = &[
|
||||
Pattern {
|
||||
id: "strcpy_call",
|
||||
description: "strcpy() usage",
|
||||
query: "(call_expression function: (identifier) @id (#eq? @id \"strcpy\")) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "strcat_call",
|
||||
description: "strcat() usage",
|
||||
query: "(call_expression function: (identifier) @id (#eq? @id \"strcat\")) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "sprintf_call",
|
||||
description: "sprintf() (no length limit)",
|
||||
query: "(call_expression function: (identifier) @id (#eq? @id \"sprintf\")) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "gets_call",
|
||||
description: "gets() usage",
|
||||
query: "(call_expression function: (identifier) @id (#eq? @id \"gets\")) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "scanf_with_percent_s",
|
||||
description: "scanf(\"%s\") without length specifier",
|
||||
query: "(call_expression function: (identifier) @id (#eq? @id \"scanf\") arguments: (argument_list (string_literal) @fmt (#match? @fmt \".*%s.*\"))) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "system_call",
|
||||
description: "system() shell execution",
|
||||
query: "(call_expression function: (identifier) @id (#eq? @id \"system\")) @vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
];
|
||||
40
src/patterns/cpp.rs
Normal file
40
src/patterns/cpp.rs
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
use crate::patterns::{Pattern, Severity};
|
||||
|
||||
pub const PATTERNS: &[Pattern] = &[
|
||||
Pattern {
|
||||
id: "strcpy_call",
|
||||
description: "strcpy() usage",
|
||||
query: "(call_expression function: (identifier) @id (#eq? @id \"strcpy\")) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "strcat_call",
|
||||
description: "strcat() usage",
|
||||
query: "(call_expression function: (identifier) @id (#eq? @id \"strcat\")) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "sprintf_call",
|
||||
description: "sprintf() (no length limit)",
|
||||
query: "(call_expression function: (identifier) @id (#eq? @id \"sprintf\")) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "gets_call",
|
||||
description: "gets() usage",
|
||||
query: "(call_expression function: (identifier) @id (#eq? @id \"gets\")) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "system_call",
|
||||
description: "system() shell execution",
|
||||
query: "(call_expression function: (identifier) @id (#eq? @id \"system\")) @vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "reinterpret_cast",
|
||||
description: "reinterpret_cast usage",
|
||||
query: "(reinterpret_cast_expression) @vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
];
|
||||
34
src/patterns/go.rs
Normal file
34
src/patterns/go.rs
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
use crate::patterns::{Pattern, Severity};
|
||||
|
||||
pub const PATTERNS: &[Pattern] = &[
|
||||
Pattern {
|
||||
id: "exec_command",
|
||||
description: "os/exec Command construction",
|
||||
query: "(call_expression function: (selector_expression field: (field_identifier) @f (#eq? @f \"Command\"))) @vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "http_insecure_tls",
|
||||
description: "&http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}}",
|
||||
query: "(composite_literal type: (selector_expression field: (field_identifier) @t (#eq? @t \"Transport\")) body: (literal_value (keyed_element key: (identifier) @k (#eq? @k \"TLSClientConfig\") value: (composite_literal body: (literal_value (keyed_element key: (identifier) @ik (#eq? @ik \"InsecureSkipVerify\") value: (true)))))) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "unsafe_pointer",
|
||||
description: "Use of unsafe.Pointer",
|
||||
query: "(qualified_type type: (selector_expression field: (field_identifier) @f (#eq? @f \"Pointer\"))) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "md5_sha1",
|
||||
description: "crypto/md5 or crypto/sha1 usage",
|
||||
query: "(call_expression function: (selector_expression object: (identifier) @pkg (#match? @pkg \"md5|sha1\"))) @vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "hardcoded_secret",
|
||||
description: "Hard-coded string that looks like an API key/token",
|
||||
query: "(interpreted_string_literal) @s (#match? @s \"(?i)(api|secret|token|password)[=:]?[ \\t]*[A-Za-z0-9_\\-]{8,}\")",
|
||||
severity: Severity::Low,
|
||||
},
|
||||
];
|
||||
40
src/patterns/java.rs
Normal file
40
src/patterns/java.rs
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
use crate::patterns::{Pattern, Severity};
|
||||
|
||||
pub const PATTERNS: &[Pattern] = &[
|
||||
Pattern {
|
||||
id: "runtime_exec",
|
||||
description: "Runtime.getRuntime().exec(...) – arbitrary-command execution",
|
||||
query: "(method_invocation object: (method_invocation name: (identifier) @n (#eq? @n \"getRuntime\")) name: (identifier) @id (#eq? @id \"exec\")) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "class_for_name",
|
||||
description: "Dynamic reflection via Class.forName(...)",
|
||||
query: "(method_invocation object: (identifier) @c (#eq? @c \"Class\") name: (identifier) @id (#eq? @id \"forName\")) @vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "object_deserialization",
|
||||
description: "java.io.ObjectInputStream#readObject() deserialization",
|
||||
query: "(method_invocation object: (identifier) @o (#eq? @o \"ObjectInputStream\") name: (identifier) @id (#eq? @id \"readObject\")) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "insecure_random",
|
||||
description: "java.util.Random used where SecureRandom is expected",
|
||||
query: "(object_creation_expression type: (identifier) @t (#eq? @t \"Random\")) @vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "thread_stop",
|
||||
description: "Deprecated Thread.stop() invocation",
|
||||
query: "(method_invocation name: (identifier) @id (#eq? @id \"stop\") object: (identifier) @obj (#eq? @obj \"Thread\")) @vuln",
|
||||
severity: Severity::Low,
|
||||
},
|
||||
Pattern {
|
||||
id: "sql_concat",
|
||||
description: "SQL built with string concatenation",
|
||||
query: "(method_invocation name: (identifier) @id (#match? @id \"execute(Query|Update)?\") arguments: (argument_list (binary_expression) @concat)) @vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
];
|
||||
40
src/patterns/javascript.rs
Normal file
40
src/patterns/javascript.rs
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
use crate::patterns::{Pattern, Severity};
|
||||
|
||||
pub const PATTERNS: &[Pattern] = &[
|
||||
Pattern {
|
||||
id: "eval_call",
|
||||
description: "Use of eval()",
|
||||
query: "(call_expression function: (identifier) @id (#eq? @id \"eval\")) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "new_function",
|
||||
description: "new Function() constructor",
|
||||
query: "(new_expression constructor: (identifier) @id (#eq? @id \"Function\")) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "document_write",
|
||||
description: "document.write() call",
|
||||
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"document\") property: (property_identifier) @prop (#eq? @prop \"write\"))) @vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "inner_html_assignment",
|
||||
description: "Assignment to element.innerHTML",
|
||||
query: "(assignment_expression left: (member_expression property: (property_identifier) @prop (#eq? @prop \"innerHTML\"))) @vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "settimeout_string",
|
||||
description: "setTimeout / setInterval with a string argument",
|
||||
query: "(call_expression function: (identifier) @id (#match? @id \"setTimeout|setInterval\") arguments: (arguments (string) @code . _)) @vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "json_parse",
|
||||
description: "JSON.parse on dynamic string",
|
||||
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"JSON\") property: (property_identifier) @prop (#eq? @prop \"parse\"))) @vuln",
|
||||
severity: Severity::Low,
|
||||
},
|
||||
];
|
||||
79
src/patterns/mod.rs
Normal file
79
src/patterns/mod.rs
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
pub mod rust;
|
||||
pub mod typescript;
|
||||
pub mod javascript;
|
||||
pub mod cpp;
|
||||
pub mod c;
|
||||
mod java;
|
||||
mod go;
|
||||
mod php;
|
||||
mod python;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
/// How bad / noisy a pattern is considered.
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd)]
|
||||
pub enum Severity {
|
||||
Low,
|
||||
Medium,
|
||||
High,
|
||||
}
|
||||
|
||||
/// One AST pattern with a tree-sitter query and meta-data.
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct Pattern {
|
||||
/// Unique identifier (snake-case preferred).
|
||||
pub id: &'static str,
|
||||
/// Human-readable explanation.
|
||||
pub description: &'static str,
|
||||
/// tree-sitter query string.
|
||||
pub query: &'static str,
|
||||
/// Rough severity bucket.
|
||||
pub severity: Severity,
|
||||
}
|
||||
|
||||
/// Global, lazily-initialised registry: lang-name → pattern slice
|
||||
static REGISTRY: Lazy<HashMap<&'static str, &'static [Pattern]>> = Lazy::new(|| {
|
||||
let mut m = HashMap::new();
|
||||
|
||||
// ---- Rust ----
|
||||
m.insert("rust", rust::PATTERNS);
|
||||
|
||||
// ---- TypeScript ----
|
||||
m.insert("typescript", typescript::PATTERNS);
|
||||
m.insert("ts", typescript::PATTERNS);
|
||||
m.insert("tsx", typescript::PATTERNS);
|
||||
|
||||
// ---- JavaScript ----
|
||||
m.insert("javascript", javascript::PATTERNS);
|
||||
m.insert("js", javascript::PATTERNS);
|
||||
|
||||
// ---- C & C++ ----
|
||||
m.insert("c", c::PATTERNS);
|
||||
m.insert("cpp", cpp::PATTERNS);
|
||||
m.insert("c++", cpp::PATTERNS);
|
||||
|
||||
// ---- Other languages in the folder ----
|
||||
m.insert("java", java::PATTERNS);
|
||||
m.insert("go", go::PATTERNS);
|
||||
m.insert("php", php::PATTERNS);
|
||||
m.insert("python", python::PATTERNS);
|
||||
m.insert("py", python::PATTERNS);
|
||||
|
||||
tracing::debug!("AST-pattern registry initialised ({} languages)", m.len());
|
||||
|
||||
m
|
||||
});
|
||||
|
||||
/// Return all patterns for the requested language (case-insensitive).
|
||||
///
|
||||
/// Unknown languages yield an **empty** `Vec`.
|
||||
pub fn load(lang: &str) -> Vec<Pattern> {
|
||||
let key = lang.to_ascii_lowercase();
|
||||
REGISTRY
|
||||
.get(key.as_str())
|
||||
.copied() // `&'static [Pattern]` → *copy* the slice pointer
|
||||
.unwrap_or(&[]) // unknown lang ⇒ empty slice
|
||||
.to_vec() // caller owns the `Vec`
|
||||
}
|
||||
40
src/patterns/php.rs
Normal file
40
src/patterns/php.rs
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
use crate::patterns::{Pattern, Severity};
|
||||
|
||||
pub const PATTERNS: &[Pattern] = &[
|
||||
Pattern {
|
||||
id: "eval_call",
|
||||
description: "eval($code) execution",
|
||||
query: "(function_call_expression function: (name) @n (#eq? @n \"eval\")) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "preg_replace_e",
|
||||
description: "preg_replace with deprecated /e modifier",
|
||||
query: "(function_call_expression function: (name) @n (#eq? @n \"preg_replace\") arguments: (arguments (string) @pat (#match? @pat \"/.*e.*$/\"))) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "create_function",
|
||||
description: "create_function(...) anonymous eval-like",
|
||||
query: "(function_call_expression function: (name) @n (#eq? @n \"create_function\")) @vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "unserialize_call",
|
||||
description: "unserialize(...) on user input",
|
||||
query: "(function_call_expression function: (name) @n (#eq? @n \"unserialize\")) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "mysql_query_concat",
|
||||
description: "mysql_query with concatenated SQL",
|
||||
query: "(function_call_expression function: (name) @n (#eq? @n \"mysql_query\") arguments: (arguments (binary_expression) @concat)) @vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "system_call",
|
||||
description: "system()/shell_exec()/exec() command execution",
|
||||
query: "(function_call_expression function: (name) @n (#match? @n \"system|shell_exec|exec|passthru\")) @vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
];
|
||||
40
src/patterns/python.rs
Normal file
40
src/patterns/python.rs
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
use crate::patterns::{Pattern, Severity};
|
||||
|
||||
pub const PATTERNS: &[Pattern] = &[
|
||||
Pattern {
|
||||
id: "eval_call",
|
||||
description: "eval() on dynamic input",
|
||||
query: "(call function: (identifier) @id (#eq? @id \"eval\")) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "exec_call",
|
||||
description: "exec(...) execution of dynamic code",
|
||||
query: "(call function: (identifier) @id (#eq? @id \"exec\")) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "pickle_load",
|
||||
description: "pickle.load / loads – unsafe deserialization",
|
||||
query: "(call function: (attribute attribute: (identifier) @attr (#match? @attr \"load(s)?\") object: (identifier) @pkg (#eq? @pkg \"pickle\"))) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "subprocess_shell_true",
|
||||
description: "subprocess.* with shell=True",
|
||||
query: "(call function: (attribute object: (identifier) @pkg (#eq? @pkg \"subprocess\")) arguments: (argument_list . (keyword_argument name: (identifier) @k (#eq? @k \"shell\")) (true) @val)) @vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "random_random",
|
||||
description: "random.random() for security-sensitive randomness",
|
||||
query: "(call function: (attribute attribute: (identifier) @attr (#eq? @attr \"random\") object: (identifier) @pkg (#eq? @pkg \"random\"))) @vuln",
|
||||
severity: Severity::Low,
|
||||
},
|
||||
Pattern {
|
||||
id: "sql_concat",
|
||||
description: "SQL query built via f-string or +-concat",
|
||||
query: "(call function: (attribute attribute: (identifier) @m (#match? @m \"execute|executemany\")) arguments: (argument_list (f_string) @fstr)) @vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
];
|
||||
68
src/patterns/rust.rs
Normal file
68
src/patterns/rust.rs
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
use crate::patterns::{Pattern, Severity};
|
||||
|
||||
/// The full catalogue.
|
||||
///
|
||||
/// *Feel free to prune, extend, or tweak severities to suit your own threat
|
||||
/// model.*
|
||||
pub const PATTERNS: &[Pattern] = &[
|
||||
Pattern {
|
||||
id: "unsafe_block",
|
||||
description: "Use of an `unsafe` block",
|
||||
query: "(unsafe_block) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "unsafe_fn",
|
||||
description: "`unsafe fn` declaration",
|
||||
query: "(function_item (modifier) @kw (#eq? @kw \"unsafe\")) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "unwrap_call",
|
||||
description: "`.unwrap()` call (may panic)",
|
||||
query: "(call_expression function: (field_expression field: (field_identifier) @name (#eq? @name \"unwrap\"))) @vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "expect_call",
|
||||
description: "`.expect()` call (may panic)",
|
||||
query: "(call_expression function: (field_expression field: (field_identifier) @name (#eq? @name \"expect\"))) @vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "panic_macro",
|
||||
description: "`panic!` macro invocation",
|
||||
query: "(macro_invocation (identifier) @id (#eq? @id \"panic\")) @vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "todo_or_unimplemented",
|
||||
description: "`todo!()` / `unimplemented!()` placeholder",
|
||||
query: "(macro_invocation (identifier) @id (#match? @id \"todo|unimplemented\")) @vuln",
|
||||
severity: Severity::Low,
|
||||
},
|
||||
Pattern {
|
||||
id: "transmute_call",
|
||||
description: "`std::mem::transmute` call",
|
||||
query: "(call_expression function: (scoped_identifier path: (identifier) @p (#eq? @p \"mem\") name: (identifier) @f (#eq? @f \"transmute\"))) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "get_unchecked",
|
||||
description: "`get_unchecked` or `get_unchecked_mut` slice access",
|
||||
query: "(call_expression function: (field_expression field: (field_identifier) @m (#match? @m \"get_unchecked(_mut)?\"))) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "copy_nonoverlapping",
|
||||
description: "Raw pointer `copy_nonoverlapping`",
|
||||
query: "(call_expression function: (scoped_identifier path: (identifier) @p (#eq? @p \"ptr\") name: (identifier) @f (#eq? @f \"copy_nonoverlapping\"))) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "narrow_cast_with_as",
|
||||
description: "`as` cast to an 8-/16-bit integer (possible truncation)",
|
||||
query: "(as_expression left: (_) right: (primitive_type) @to (#match? @to \"u8|i8|u16|i16\")) @vuln",
|
||||
severity: Severity::Low,
|
||||
},
|
||||
];
|
||||
46
src/patterns/typescript.rs
Normal file
46
src/patterns/typescript.rs
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
use crate::patterns::{Pattern, Severity};
|
||||
|
||||
pub const PATTERNS: &[Pattern] = &[
|
||||
Pattern {
|
||||
id: "eval_call",
|
||||
description: "Use of eval()",
|
||||
query: "(call_expression function: (identifier) @id (#eq? @id \"eval\")) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "new_function",
|
||||
description: "new Function() constructor",
|
||||
query: "(new_expression constructor: (identifier) @id (#eq? @id \"Function\")) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "document_write",
|
||||
description: "document.write() call",
|
||||
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"document\") property: (property_identifier) @prop (#eq? @prop \"write\"))) @vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "inner_html_assignment",
|
||||
description: "Assignment to element.innerHTML",
|
||||
query: "(assignment_expression left: (member_expression property: (property_identifier) @prop (#eq? @prop \"innerHTML\"))) @vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "settimeout_string",
|
||||
description: "setTimeout / setInterval with a string argument",
|
||||
query: "(call_expression function: (identifier) @id (#match? @id \"setTimeout|setInterval\") arguments: (arguments (string) @code . _)) @vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "any_type",
|
||||
description: "Type annotation of `any`",
|
||||
query: "(type_annotation (predefined_type) @t (#eq? @t \"any\")) @vuln",
|
||||
severity: Severity::Low,
|
||||
},
|
||||
Pattern {
|
||||
id: "json_parse",
|
||||
description: "JSON.parse on dynamic string",
|
||||
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"JSON\") property: (property_identifier) @prop (#eq? @prop \"parse\"))) @vuln",
|
||||
severity: Severity::Low,
|
||||
},
|
||||
];
|
||||
|
|
@ -2,10 +2,14 @@ use serde::{Deserialize, Serialize};
|
|||
use std::path::{Path};
|
||||
use std::fs;
|
||||
use toml;
|
||||
use crate::patterns::Severity;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(default)]
|
||||
pub struct ScannerConfig {
|
||||
/// The minimum severity level to output
|
||||
pub min_severity: Severity,
|
||||
|
||||
/// The maximum file size to scan, in megabytes. TODO: IMPLEMENT
|
||||
pub max_file_size_mb: u64,
|
||||
|
||||
|
|
@ -39,6 +43,7 @@ pub struct ScannerConfig {
|
|||
impl Default for ScannerConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
min_severity: Severity::Low,
|
||||
max_file_size_mb: 100,
|
||||
excluded_extensions: vec![
|
||||
"jpg", "png", "gif", "mp4", "avi", "mkv",
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
pub mod project;
|
||||
pub mod config;
|
||||
pub(crate) mod query_cache;
|
||||
|
||||
// Re-export commonly used functions for convenience
|
||||
pub use project::{get_project_info};
|
||||
|
|
|
|||
37
src/utils/query_cache.rs
Normal file
37
src/utils/query_cache.rs
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
use tree_sitter::{Language, Query};
|
||||
|
||||
use crate::patterns::{self, Pattern};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct CompiledQuery {
|
||||
pub meta: Pattern,
|
||||
pub query: Arc<Query>,
|
||||
}
|
||||
|
||||
static CACHE: Lazy<RwLock<HashMap<&'static str, Vec<CompiledQuery>>>> =
|
||||
Lazy::new(|| RwLock::new(HashMap::new()));
|
||||
|
||||
pub fn for_lang(lang: &'static str, ts_lang: Language) -> Vec<CompiledQuery> {
|
||||
// fast-path read
|
||||
if let Some(v) = CACHE.read().unwrap().get(lang) {
|
||||
return v.clone();
|
||||
}
|
||||
|
||||
// compile under write-lock exactly once
|
||||
let patterns = patterns::load(lang);
|
||||
let mut vec = Vec::with_capacity(patterns.len());
|
||||
|
||||
for p in patterns {
|
||||
match Query::new(&ts_lang, p.query) {
|
||||
Ok(q) => vec.push(CompiledQuery { meta: p, query: Arc::new(q) }),
|
||||
Err(e) => tracing::warn!(lang, id = p.id, "query compile error: {e}"),
|
||||
}
|
||||
}
|
||||
|
||||
CACHE.write().unwrap().insert(lang, vec.clone());
|
||||
vec
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue