Add Ruby AST support using tree-sitter-ruby

- Added `tree-sitter-ruby` dependency to `Cargo.toml` and `Cargo.lock`.
- Introduced `patterns/ruby.rs` with Ruby-specific AST patterns for vulnerability detection.
- Updated `patterns/mod.rs` and `ast.rs` to support Ruby AST parsing and pattern registry initialization.
This commit is contained in:
elipeter 2025-06-24 18:53:31 +02:00
parent 484f4b6d05
commit b3870997d7
5 changed files with 156 additions and 17 deletions

11
Cargo.lock generated
View file

@ -615,6 +615,7 @@ dependencies = [
"tree-sitter-javascript",
"tree-sitter-php",
"tree-sitter-python",
"tree-sitter-ruby",
"tree-sitter-rust",
"tree-sitter-typescript",
]
@ -1254,6 +1255,16 @@ dependencies = [
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-ruby"
version = "0.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be0484ea4ef6bb9c575b4fdabde7e31340a8d2dbc7d52b321ac83da703249f95"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-rust"
version = "0.24.0"

View file

@ -23,6 +23,7 @@ tree-sitter-javascript = "0.23.1"
tree-sitter-go = "0.23.4"
tree-sitter-php = "0.23.11"
tree-sitter-python = "0.23.6"
tree-sitter-ruby = "0.23.1"
crossbeam-channel = "0.5.15"
blake3 = "1.8.2"
once_cell = "1.21.3"

View file

@ -32,6 +32,7 @@ pub(crate) fn run_rules_on_file(
Some("py") => (Language::from(tree_sitter_python::LANGUAGE), "python"),
Some("ts") => (Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT), "typescript"),
Some("js") => (Language::from(tree_sitter_javascript::LANGUAGE), "javascript"),
Some("rb") => (Language::from(tree_sitter_ruby::LANGUAGE), "ruby"),
_ => return Ok(vec![]),
};

View file

@ -7,6 +7,7 @@ mod java;
mod go;
mod php;
mod python;
mod ruby;
use std::collections::HashMap;
use std::fmt;
@ -52,20 +53,6 @@ impl FromStr for Severity { // TODO: FIX
}
}
// /// How bad / noisy a pattern is considered.
// #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd)]
// pub enum Severity {
// Low,
// Medium,
// High,
// }
//
// impl Severity {
// pub(crate) fn as_db_str(&self) -> &str {
// todo!()
// }
// }
/// One AST pattern with a tree-sitter query and meta-data.
#[derive(Debug, Clone, Serialize)]
pub struct Pattern {
@ -108,6 +95,8 @@ static REGISTRY: Lazy<HashMap<&'static str, &'static [Pattern]>> = Lazy::new(||
m.insert("php", php::PATTERNS);
m.insert("python", python::PATTERNS);
m.insert("py", python::PATTERNS);
m.insert("ruby", ruby::PATTERNS);
m.insert("rb", ruby::PATTERNS);
tracing::debug!("AST-pattern registry initialised ({} languages)", m.len());
@ -121,7 +110,7 @@ pub fn load(lang: &str) -> Vec<Pattern> {
let key = lang.to_ascii_lowercase();
REGISTRY
.get(key.as_str())
.copied() // `&'static [Pattern]` → *copy* the slice pointer
.unwrap_or(&[]) // unknown lang ⇒ empty slice
.to_vec() // caller owns the `Vec`
.copied()
.unwrap_or(&[])
.to_vec()
}

137
src/patterns/ruby.rs Normal file
View file

@ -0,0 +1,137 @@
use crate::patterns::{Pattern, Severity};
pub const PATTERNS: &[Pattern] = &[
// ---------- Runtime code-execution primitives ----------
Pattern {
id: "eval_call",
description: "Kernel#eval usage",
query: r#"
(call
(identifier) @id
(#eq? @id "eval")
) @vuln
"#,
severity: Severity::High,
},
Pattern {
id: "instance_eval_call",
description: "Object#instance_eval usage",
query: r#"
(call
(identifier) @id
(#eq? @id "instance_eval")
) @vuln
"#,
severity: Severity::High,
},
Pattern {
id: "class_eval_call",
description: "Module#class_eval / module_eval usage",
query: r#"
(call
(identifier) @id
(#match? @id "^(class_eval|module_eval)$")
) @vuln
"#,
severity: Severity::High,
},
// ---------- Shell execution ----------
Pattern {
id: "system_exec_interp",
description: "system/exec with string interpolation",
query: r#"
(call
method: (identifier) @m
(#match? @m "^(system|exec)$")
arguments: (argument_list
(string
(interpolation)+ @vuln
)
)
)
"#,
severity: Severity::High,
},
Pattern {
id: "backtick_command",
description: "Back-tick shell execution",
// `uname -a`
query: r#"(shell_command) @vuln"#,
severity: Severity::High,
},
// ---------- Dangerous deserialisation ----------
Pattern {
id: "yaml_load",
description: "YAML.load / Psych.load (arbitrary object deserialisation)",
query: r#"
(call
receiver: (constant) @recv
(#match? @recv "^(YAML|Psych)$")
method: (identifier) @m
(#eq? @m "load")
) @vuln
"#,
severity: Severity::High,
},
Pattern {
id: "marshal_load",
description: "Marshal.load usage",
query: r#"
(call
receiver: (constant) @recv
(#eq? @recv "Marshal")
method: (identifier) @m
(#eq? @m "load")
) @vuln
"#,
severity: Severity::High,
},
// ---------- Reflection / meta-programming ----------
Pattern {
id: "send_dynamic",
description: "send() with dynamic first argument (not a literal symbol)",
query: r#"
(call
method: (identifier) @m
(#eq? @m "send")
arguments: (argument_list
[
(identifier) ; send(method_name_var, )
(string (interpolation)+) ; send("user_#{role}", )
] @vuln
)
)
"#,
severity: Severity::Medium,
},
Pattern {
id: "constantize_call",
description: "ActiveSupport constantize / safe_constantize on tainted data",
query: r#"
(call
method: (identifier) @m
(#match? @m "^(constantize|safe_constantize)$")
) @vuln
"#,
severity: Severity::Medium,
},
// ---------- Insecure resource access ----------
Pattern {
id: "open_uri_http",
description: "Kernel#open with HTTP(S) URL (open-uri auto-follow)",
query: r#"
(call
method: (identifier) @m
(#eq? @m "open")
arguments: (argument_list
(string) @url
(#match? @url "^\"https?://")
)
) @vuln
"#,
severity: Severity::Medium,
},
];