mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
Python fp and docs updtes (#58)
* refactor: Update comments for clarity and add expectations.json files for performance metrics * feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks * feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks * refactor: Simplify code formatting for better readability in multiple files * refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration * feat: Update Java and Python patterns to include new security rules * refactor: Improve comment clarity and consistency across multiple Rust files * refactor: Simplify code formatting for improved readability in integration tests and module files * refactor: Improve comment formatting and enhance clarity in assertions across multiple files
This commit is contained in:
parent
4db0805de6
commit
a438886217
291 changed files with 9485 additions and 3851 deletions
|
|
@ -43,7 +43,7 @@ pub fn scan_ejs_file(path: &Path, bytes: &[u8]) -> Vec<Diag> {
|
|||
// Advance past this match for the next iteration.
|
||||
search_from = abs_end + 2; // skip "%>"
|
||||
|
||||
// Skip <%- include(...) %> — EJS partial inclusion, not user-controlled.
|
||||
// Skip <%- include(...) %>, EJS partial inclusion, not user-controlled.
|
||||
if is_include_call(expr) {
|
||||
continue;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ pub const PATTERNS: &[Pattern] = &[
|
|||
Pattern {
|
||||
id: "java.deser.readobject",
|
||||
description: "ObjectInputStream.readObject() performs unsafe deserialization",
|
||||
// Match any .readObject() call — the method name is specific enough.
|
||||
// Match any .readObject() call, the method name is specific enough.
|
||||
query: r#"(method_invocation
|
||||
name: (identifier) @id (#eq? @id "readObject"))
|
||||
@vuln"#,
|
||||
|
|
@ -21,6 +21,46 @@ pub const PATTERNS: &[Pattern] = &[
|
|||
category: PatternCategory::Deserialization,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: SnakeYAML deserialization (CVE-2022-1471) ──────────────
|
||||
// `new Yaml()` constructed without a `SafeConstructor` argument
|
||||
// accepts arbitrary YAML tags (`!!javax.script.ScriptEngineManager`,
|
||||
// `!!java.net.URLClassLoader`, …) and instantiates any class via
|
||||
// reflection. SnakeYAML 2.0 swapped the default to SafeConstructor
|
||||
// but pre-2.0 deployments stay vulnerable until call sites are
|
||||
// patched. We match the empty-arg form `new Yaml()` only, so the
|
||||
// explicit-SafeConstructor remediation form
|
||||
// `new Yaml(new SafeConstructor(new LoaderOptions()))` is silent.
|
||||
Pattern {
|
||||
id: "java.deser.snakeyaml_unsafe_constructor",
|
||||
description: "new Yaml() without SafeConstructor accepts arbitrary class tags (CVE-2022-1471)",
|
||||
query: r#"(object_creation_expression
|
||||
type: (type_identifier) @t (#eq? @t "Yaml")
|
||||
arguments: (argument_list) @args (#eq? @args "()"))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Deserialization,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: Apache Commons Text Text4Shell (CVE-2022-42889) ────────
|
||||
// `StringSubstitutor.createInterpolator()` enables `script:`,
|
||||
// `dns:`, and `url:` lookups by default, `${script:js:…}`
|
||||
// evaluates JavaScript via the JSR-223 ScriptEngineManager. The
|
||||
// factory call is itself the structural bug; the recommended app-
|
||||
// side mitigation builds a `StringSubstitutor` directly with a
|
||||
// restricted lookup map.
|
||||
Pattern {
|
||||
id: "java.code_exec.text4shell_interpolator",
|
||||
description: "StringSubstitutor.createInterpolator() enables script:/dns:/url: evaluation (CVE-2022-42889)",
|
||||
query: r#"(method_invocation
|
||||
object: (identifier) @c (#eq? @c "StringSubstitutor")
|
||||
name: (identifier) @id (#eq? @id "createInterpolator"))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CodeExec,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: Command execution ──────────────────────────────────────
|
||||
Pattern {
|
||||
id: "java.cmdi.runtime_exec",
|
||||
|
|
|
|||
|
|
@ -1,42 +1,4 @@
|
|||
//! # AST Pattern Conventions
|
||||
//!
|
||||
//! Each language file exports a `PATTERNS` slice of [`Pattern`] structs.
|
||||
//!
|
||||
//! ## ID format
|
||||
//!
|
||||
//! `<lang>.<category>.<specific>` — e.g. `java.deser.readobject`, `py.cmdi.os_system`.
|
||||
//!
|
||||
//! Language prefixes: `rs`, `java`, `py`, `js`, `ts`, `c`, `cpp`, `go`, `php`, `rb`.
|
||||
//!
|
||||
//! ## Tiers
|
||||
//!
|
||||
//! * **Tier A** — structural presence is high-signal (e.g. `gets()`, `eval()`).
|
||||
//! * **Tier B** — requires a heuristic guard in the query (e.g. SQL with concatenated
|
||||
//! arg, format-string with variable first arg).
|
||||
//!
|
||||
//! ## Severity
|
||||
//!
|
||||
//! * **High** — command exec, deserialization, banned C functions.
|
||||
//! * **Medium** — SQL concat, reflection, XSS sinks, casts.
|
||||
//! * **Low** — weak crypto, insecure randomness, code-quality (`unwrap`/`expect`/`panic`).
|
||||
//!
|
||||
//! Note: the default `min_severity` filter skips Low patterns; they only appear when
|
||||
//! the user explicitly lowers the threshold.
|
||||
//!
|
||||
//! ## No-duplicate rule
|
||||
//!
|
||||
//! If a vulnerability class is already detected by taint analysis (e.g. `eval` as a
|
||||
//! sink, `system` as a sink), the AST pattern is still kept for `--ast-only` mode but
|
||||
//! uses a distinct ID namespace (`js.code_exec.eval` vs `taint-unsanitised-flow`).
|
||||
//! The dedup pass in `ast.rs` prevents exact-duplicate findings at the same location.
|
||||
//!
|
||||
//! ## Adding a new pattern
|
||||
//!
|
||||
//! 1. Pick the language file under `src/patterns/<lang>.rs`.
|
||||
//! 2. Choose tier, category, severity per the rules above.
|
||||
//! 3. Write the tree-sitter query — test with `cargo test --test pattern_tests`.
|
||||
//! 4. Add a snippet to `tests/fixtures/patterns/<lang>/positive.<ext>`.
|
||||
//! 5. Add the ID to the positive test assertion in `tests/pattern_tests.rs`.
|
||||
#![doc = include_str!(concat!(env!("OUT_DIR"), "/patterns.md"))]
|
||||
|
||||
pub mod c;
|
||||
pub mod cpp;
|
||||
|
|
@ -68,7 +30,7 @@ pub enum Severity {
|
|||
impl Severity {
|
||||
/// Bracketed, colored, fixed-width tag for aligned console output.
|
||||
///
|
||||
/// Returns e.g. `"[HIGH] "` or `"[MEDIUM]"` — always 8 visible characters
|
||||
/// Returns e.g. `"[HIGH] "` or `"[MEDIUM]"`, always 8 visible characters
|
||||
/// so the column after the tag lines up regardless of severity.
|
||||
#[allow(dead_code)] // public API for lib consumers
|
||||
pub fn colored_tag(self) -> String {
|
||||
|
|
@ -123,9 +85,9 @@ impl FromStr for Severity {
|
|||
/// A parsed severity filter expression.
|
||||
///
|
||||
/// Supports three forms:
|
||||
/// - Single level: `"HIGH"` — matches only that level
|
||||
/// - Comma list: `"HIGH,MEDIUM"` — matches any listed level
|
||||
/// - Threshold: `">=MEDIUM"` — matches that level and above
|
||||
/// - Single level: `"HIGH"`, matches only that level
|
||||
/// - Comma list: `"HIGH,MEDIUM"`, matches any listed level
|
||||
/// - Threshold: `">=MEDIUM"`, matches that level and above
|
||||
///
|
||||
/// Parsing is case-insensitive and tolerates whitespace around tokens.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
|
|
@ -242,7 +204,7 @@ impl PatternCategory {
|
|||
/// One AST pattern with a tree-sitter query and meta-data.
|
||||
#[derive(Debug, Clone, Serialize, PartialEq)]
|
||||
pub struct Pattern {
|
||||
/// Unique identifier — `<lang>.<category>.<specific>` preferred.
|
||||
/// Unique identifier, `<lang>.<category>.<specific>` preferred.
|
||||
pub id: &'static str,
|
||||
/// Human-readable explanation.
|
||||
pub description: &'static str,
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ use crate::patterns::{Pattern, PatternCategory, PatternTier, Severity};
|
|||
///
|
||||
/// Taint rules cover `eval`/`exec`, `os.system`/`os.popen`/`subprocess.*`,
|
||||
/// and `cursor.execute`. AST patterns here add coverage for **deserialization**,
|
||||
/// **subprocess shell=True** (Tier B — taint doesn't check keyword args), and
|
||||
/// **subprocess shell=True** (Tier B, taint doesn't check keyword args), and
|
||||
/// **code execution** sinks that taint cannot structurally verify.
|
||||
pub const PATTERNS: &[Pattern] = &[
|
||||
// ── Tier A: Code execution ─────────────────────────────────────────
|
||||
|
|
@ -121,14 +121,45 @@ pub const PATTERNS: &[Pattern] = &[
|
|||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier B: SQL injection (format/concat heuristic) ────────────────
|
||||
// Catches both `cursor.execute(query + user)` (binary_operator concat)
|
||||
// and `cursor.execute(f"... {user} ...")` (f-string with interpolation).
|
||||
// f-strings appear as a `string` node with `interpolation` children in
|
||||
// tree-sitter-python; the alternation lets the same pattern cover both
|
||||
// the historical % / + concat shapes and the modern f-string SQLi shape
|
||||
// that surfaces in CVE-2025-24793 (snowflake-connector-python),
|
||||
// CVE-2025-69662 (geopandas), and dozens of similar cursor.execute
|
||||
// call sites across the corpus.
|
||||
Pattern {
|
||||
id: "py.sqli.execute_format",
|
||||
description: "cursor.execute with string concatenation risks SQL injection",
|
||||
description: "cursor.execute with string concatenation or f-string risks SQL injection",
|
||||
query: r#"(call
|
||||
function: (attribute
|
||||
attribute: (identifier) @fn (#eq? @fn "execute"))
|
||||
arguments: (argument_list
|
||||
(binary_operator) @arg))
|
||||
[(binary_operator)
|
||||
(string (interpolation))] @arg))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::B,
|
||||
category: PatternCategory::SqlInjection,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
// SQLAlchemy `text(<concat-or-fstring>)`, same Tier B heuristic
|
||||
// applied to the SQLAlchemy raw-SQL constructor. Catches the
|
||||
// CVE-2025-69662 (geopandas) shape:
|
||||
// connection.execute(text(f"SELECT … '{geom_name}' …"))
|
||||
// where the f-string interpolation is the injection point and the
|
||||
// surrounding `connection.execute` would otherwise hide the unsafe
|
||||
// construction from the simple execute_format pattern.
|
||||
Pattern {
|
||||
id: "py.sqli.text_format",
|
||||
description: "sqlalchemy text() with f-string or string concat risks SQL injection",
|
||||
query: r#"(call
|
||||
function: [(identifier) @fn (attribute attribute: (identifier) @fn)]
|
||||
(#eq? @fn "text")
|
||||
arguments: (argument_list
|
||||
[(binary_operator)
|
||||
(string (interpolation))] @arg))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::B,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue