From b3870997d703e9a10c38f1f9e273ecddada54fe5 Mon Sep 17 00:00:00 2001 From: elipeter Date: Tue, 24 Jun 2025 18:53:31 +0200 Subject: [PATCH] Add Ruby AST support using `tree-sitter-ruby` - Added `tree-sitter-ruby` dependency to `Cargo.toml` and `Cargo.lock`. - Introduced `patterns/ruby.rs` with Ruby-specific AST patterns for vulnerability detection. - Updated `patterns/mod.rs` and `ast.rs` to support Ruby AST parsing and pattern registry initialization. --- Cargo.lock | 11 ++++ Cargo.toml | 1 + src/ast.rs | 1 + src/patterns/mod.rs | 23 ++------ src/patterns/ruby.rs | 137 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 156 insertions(+), 17 deletions(-) create mode 100644 src/patterns/ruby.rs diff --git a/Cargo.lock b/Cargo.lock index 1f6b6aef..2676bf42 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -615,6 +615,7 @@ dependencies = [ "tree-sitter-javascript", "tree-sitter-php", "tree-sitter-python", + "tree-sitter-ruby", "tree-sitter-rust", "tree-sitter-typescript", ] @@ -1254,6 +1255,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-ruby" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be0484ea4ef6bb9c575b4fdabde7e31340a8d2dbc7d52b321ac83da703249f95" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-rust" version = "0.24.0" diff --git a/Cargo.toml b/Cargo.toml index efb7b5a3..5574ae83 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,7 @@ tree-sitter-javascript = "0.23.1" tree-sitter-go = "0.23.4" tree-sitter-php = "0.23.11" tree-sitter-python = "0.23.6" +tree-sitter-ruby = "0.23.1" crossbeam-channel = "0.5.15" blake3 = "1.8.2" once_cell = "1.21.3" diff --git a/src/ast.rs b/src/ast.rs index 15725f16..3bb85bf7 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -32,6 +32,7 @@ pub(crate) fn run_rules_on_file( Some("py") => (Language::from(tree_sitter_python::LANGUAGE), "python"), Some("ts") => (Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT), "typescript"), Some("js") => (Language::from(tree_sitter_javascript::LANGUAGE), "javascript"), + Some("rb") => (Language::from(tree_sitter_ruby::LANGUAGE), "ruby"), _ => return Ok(vec![]), }; diff --git a/src/patterns/mod.rs b/src/patterns/mod.rs index f9c25609..06f67b80 100644 --- a/src/patterns/mod.rs +++ b/src/patterns/mod.rs @@ -7,6 +7,7 @@ mod java; mod go; mod php; mod python; +mod ruby; use std::collections::HashMap; use std::fmt; @@ -52,20 +53,6 @@ impl FromStr for Severity { // TODO: FIX } } -// /// How bad / noisy a pattern is considered. -// #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd)] -// pub enum Severity { -// Low, -// Medium, -// High, -// } -// -// impl Severity { -// pub(crate) fn as_db_str(&self) -> &str { -// todo!() -// } -// } - /// One AST pattern with a tree-sitter query and meta-data. #[derive(Debug, Clone, Serialize)] pub struct Pattern { @@ -108,6 +95,8 @@ static REGISTRY: Lazy> = Lazy::new(|| m.insert("php", php::PATTERNS); m.insert("python", python::PATTERNS); m.insert("py", python::PATTERNS); + m.insert("ruby", ruby::PATTERNS); + m.insert("rb", ruby::PATTERNS); tracing::debug!("AST-pattern registry initialised ({} languages)", m.len()); @@ -121,7 +110,7 @@ pub fn load(lang: &str) -> Vec { let key = lang.to_ascii_lowercase(); REGISTRY .get(key.as_str()) - .copied() // `&'static [Pattern]` → *copy* the slice pointer - .unwrap_or(&[]) // unknown lang ⇒ empty slice - .to_vec() // caller owns the `Vec` + .copied() + .unwrap_or(&[]) + .to_vec() } \ No newline at end of file diff --git a/src/patterns/ruby.rs b/src/patterns/ruby.rs new file mode 100644 index 00000000..9fc939a3 --- /dev/null +++ b/src/patterns/ruby.rs @@ -0,0 +1,137 @@ +use crate::patterns::{Pattern, Severity}; +pub const PATTERNS: &[Pattern] = &[ + // ---------- Runtime code-execution primitives ---------- + Pattern { + id: "eval_call", + description: "Kernel#eval usage", + query: r#" + (call + (identifier) @id + (#eq? @id "eval") + ) @vuln + "#, + severity: Severity::High, + }, + Pattern { + id: "instance_eval_call", + description: "Object#instance_eval usage", + query: r#" + (call + (identifier) @id + (#eq? @id "instance_eval") + ) @vuln + "#, + severity: Severity::High, + }, + Pattern { + id: "class_eval_call", + description: "Module#class_eval / module_eval usage", + query: r#" + (call + (identifier) @id + (#match? @id "^(class_eval|module_eval)$") + ) @vuln + "#, + severity: Severity::High, + }, + + // ---------- Shell execution ---------- + Pattern { + id: "system_exec_interp", + description: "system/exec with string interpolation", + query: r#" + (call + method: (identifier) @m + (#match? @m "^(system|exec)$") + arguments: (argument_list + (string + (interpolation)+ @vuln + ) + ) + ) + "#, + severity: Severity::High, + }, + Pattern { + id: "backtick_command", + description: "Back-tick shell execution", + // `uname -a` + query: r#"(shell_command) @vuln"#, + severity: Severity::High, + }, + + // ---------- Dangerous deserialisation ---------- + Pattern { + id: "yaml_load", + description: "YAML.load / Psych.load (arbitrary object deserialisation)", + query: r#" + (call + receiver: (constant) @recv + (#match? @recv "^(YAML|Psych)$") + method: (identifier) @m + (#eq? @m "load") + ) @vuln + "#, + severity: Severity::High, + }, + Pattern { + id: "marshal_load", + description: "Marshal.load usage", + query: r#" + (call + receiver: (constant) @recv + (#eq? @recv "Marshal") + method: (identifier) @m + (#eq? @m "load") + ) @vuln + "#, + severity: Severity::High, + }, + + // ---------- Reflection / meta-programming ---------- + Pattern { + id: "send_dynamic", + description: "send() with dynamic first argument (not a literal symbol)", + query: r#" + (call + method: (identifier) @m + (#eq? @m "send") + arguments: (argument_list + [ + (identifier) ; send(method_name_var, …) + (string (interpolation)+) ; send("user_#{role}", …) + ] @vuln + ) + ) + "#, + severity: Severity::Medium, + }, + Pattern { + id: "constantize_call", + description: "ActiveSupport constantize / safe_constantize on tainted data", + query: r#" + (call + method: (identifier) @m + (#match? @m "^(constantize|safe_constantize)$") + ) @vuln + "#, + severity: Severity::Medium, + }, + + // ---------- Insecure resource access ---------- + Pattern { + id: "open_uri_http", + description: "Kernel#open with HTTP(S) URL (open-uri auto-follow)", + query: r#" + (call + method: (identifier) @m + (#eq? @m "open") + arguments: (argument_list + (string) @url + (#match? @url "^\"https?://") + ) + ) @vuln + "#, + severity: Severity::Medium, + }, +];