mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-12 19:55:14 +02:00
Dynamic (#77)
This commit is contained in:
parent
55247b7fcd
commit
991c84a1eb
1464 changed files with 225448 additions and 1985 deletions
|
|
@ -85,6 +85,7 @@ pub fn scan_ejs_file(path: &Path, bytes: &[u8]) -> Vec<Diag> {
|
|||
rollup: None,
|
||||
finding_id: String::new(),
|
||||
alternative_finding_ids: Vec::new(),
|
||||
stable_hash: 0,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -114,43 +114,72 @@ pub const PATTERNS: &[Pattern] = &[
|
|||
confidence: Confidence::Medium,
|
||||
},
|
||||
// ── Tier A: Weak crypto ────────────────────────────────────────────
|
||||
//
|
||||
// The `type:`/`object:` node is matched with the `(_)` wildcard and a
|
||||
// text `#match?` rather than a bare `(type_identifier) (#eq? …)` so the
|
||||
// fully-qualified call shapes that dominate real code (and the entire
|
||||
// OWASP Benchmark) are caught: `new java.util.Random()` parses the type
|
||||
// as a `scoped_type_identifier`, not a `type_identifier`, which the old
|
||||
// `#eq? @t "Random"` query silently never matched (0 crypto findings on
|
||||
// the whole corpus). The fix keeps the reliable `#eq?` but captures the
|
||||
// LAST type-name segment from either a bare `(type_identifier)` or the
|
||||
// direct `(type_identifier)` child of a `(scoped_type_identifier)`, so
|
||||
// both `new Random()` and `new java.util.Random()` match while
|
||||
// `SecureRandom` (a different whole segment) does not.
|
||||
Pattern {
|
||||
id: "java.crypto.insecure_random",
|
||||
description: "new Random() (java.util.Random) is not cryptographically secure",
|
||||
query: r#"(object_creation_expression
|
||||
type: (type_identifier) @t (#eq? @t "Random"))
|
||||
type: [
|
||||
(type_identifier) @t
|
||||
(scoped_type_identifier (type_identifier) @t)
|
||||
]
|
||||
(#eq? @t "Random"))
|
||||
@vuln"#,
|
||||
severity: Severity::Low,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Crypto,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
// Weak crypto algorithm passed to a `getInstance("…")` factory, keyed on
|
||||
// the algorithm string so the qualifier (`javax.crypto.Cipher` /
|
||||
// `java.security.MessageDigest` FQN or a bare class) does not matter — the
|
||||
// old per-class queries pinned `object: (identifier) "MessageDigest"` /
|
||||
// `"Random"` and silently never matched the fully-qualified call shapes
|
||||
// that dominate real code (0 crypto findings on the whole OWASP corpus).
|
||||
// Three alternations, all proven to fire from this `(string_literal)`
|
||||
// position:
|
||||
// * `^.des/` — single-DES *cipher transforms* (`"DES/CBC/PKCS5Padding"`).
|
||||
// The trailing `/` (mode separator) is required so the genuinely-weak
|
||||
// single-DES Cipher fires while a bare `KeyGenerator.getInstance("DES")`
|
||||
// key-spec and the stronger triple-DES `"DESede/…"` (which the OWASP
|
||||
// Benchmark labels benign) do NOT — `"DESe"` has no `/` after `des`.
|
||||
// * `^.(rc2|rc4|blowfish)` — broken stream/block ciphers (rare, real).
|
||||
// * `^.(md2|md4|md5|sha1|sha-1).$` — broken hash digests as the WHOLE
|
||||
// algorithm string (the trailing `.$` matches the closing quote so
|
||||
// `"SHA1PRNG"` / `"HmacSHA1"` / `"SHA-256"` do NOT match).
|
||||
// `getInstance` with any of these is `Cipher`/`MessageDigest` by
|
||||
// construction; strong transforms (`AES/CBC`, `AES/GCM`, `SHA-256`) miss.
|
||||
Pattern {
|
||||
id: "java.crypto.weak_digest",
|
||||
description: "MessageDigest.getInstance(\"MD5\"/\"SHA1\") uses a weak hash algorithm",
|
||||
id: "java.crypto.weak_algorithm",
|
||||
description: "Cipher/MessageDigest.getInstance with a broken algorithm (DES/RC4/MD5/SHA-1)",
|
||||
query: r#"(method_invocation
|
||||
object: (identifier) @c (#eq? @c "MessageDigest")
|
||||
name: (identifier) @id (#eq? @id "getInstance")
|
||||
arguments: (argument_list
|
||||
(string_literal) @alg (#match? @alg "(?i)(md5|sha-?1)")))
|
||||
@vuln"#,
|
||||
severity: Severity::Low,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Crypto,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
// ── Tier A: XSS (servlet) ──────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "java.xss.getwriter_print",
|
||||
description: "response.getWriter().print/println writes output without encoding",
|
||||
query: r#"(method_invocation
|
||||
object: (method_invocation
|
||||
name: (identifier) @gw (#eq? @gw "getWriter"))
|
||||
name: (identifier) @id (#match? @id "^(print|println|write)$"))
|
||||
(string_literal) @alg (#match? @alg "(?i)(^.des/|^.(rc2|rc4|blowfish)|^.(md2|md4|md5|sha1|sha-1).$)")))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Xss,
|
||||
confidence: Confidence::High,
|
||||
category: PatternCategory::Crypto,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
// Tier A reflected-XSS was previously a bare syntactic match on every
|
||||
// `response.getWriter().print/println/write(...)` regardless of whether the
|
||||
// written value was attacker-controlled or already HTML-encoded. On the
|
||||
// OWASP Benchmark that fired ~4400 times at precision 0.05 (it flagged
|
||||
// constant strings and `ESAPI.encoder().encodeForHTML(...)`-wrapped output
|
||||
// identically to a raw tainted write). Reflected XSS is now a taint sink
|
||||
// (`Sink(Cap::HTML_ESCAPE)` on the servlet writer verbs in
|
||||
// `labels/java.rs`), which fires only when an un-encoded tainted value
|
||||
// reaches the writer, so the syntactic pattern is retired.
|
||||
];
|
||||
|
|
|
|||
|
|
@ -162,6 +162,24 @@ pub const PATTERNS: &[Pattern] = &[
|
|||
category: PatternCategory::Secrets,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
// ── Tier A: Hardcoded cryptographic key/secret config ──────────────
|
||||
// Crypto-key-shaped keys (`cookieSecret`, `cryptoKey`, `signingKey`, …) the
|
||||
// anchored `hardcoded_secret` regex misses. Emits a `crypto`-bucketing id
|
||||
// (a `*.secrets.*` id buckets as `other`). Benign `publicKey`/`primaryKey`/
|
||||
// `keyName`/bare `key` are rejected by the prefix requirement.
|
||||
Pattern {
|
||||
id: "js.crypto.hardcoded_key",
|
||||
description: "Hardcoded cryptographic key/secret in source config",
|
||||
query: r#"(pair
|
||||
key: (property_identifier) @key
|
||||
(#match? @key "(?i)^([a-z0-9]+secret|(crypto|cookie|session|signing|encryption|encrypt|private|master|jwt|hmac|secret)key|api[_-]?key|access[_-]?key|secret[_-]?key|private[_-]?key|encryption[_-]?key|signing[_-]?key)$")
|
||||
value: (string) @val (#match? @val "[^\"']{3,}"))
|
||||
@vuln"#,
|
||||
severity: Severity::Low,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Crypto,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
// ── Tier A: Open redirect ──────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "js.xss.location_assign",
|
||||
|
|
|
|||
|
|
@ -193,8 +193,8 @@ impl SeverityFilter {
|
|||
|
||||
/// Pattern confidence tier.
|
||||
///
|
||||
/// * **A** – Structural presence alone is high-signal (e.g. `gets()`, `eval()`).
|
||||
/// * **B** – Requires a simple heuristic guard in the query (e.g. SQL with
|
||||
/// * **A**: structural presence alone is high-signal (e.g. `gets()`, `eval()`).
|
||||
/// * **B**: requires a simple heuristic guard in the query (e.g. SQL with
|
||||
/// concatenated arg, file-open with non-literal path).
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq, Serialize, Deserialize)]
|
||||
pub enum PatternTier {
|
||||
|
|
@ -220,6 +220,31 @@ impl std::fmt::Display for FindingCategory {
|
|||
}
|
||||
}
|
||||
|
||||
impl FindingCategory {
|
||||
/// Category for a structural / state-machine finding identified by its
|
||||
/// rule id.
|
||||
///
|
||||
/// Resource-management and error-handling defects (`state-resource-leak`,
|
||||
/// `cfg-resource-leak`, `cfg-error-fallthrough`) are *reliability* bugs,
|
||||
/// not security vulnerabilities: a leaked file handle or an unhandled
|
||||
/// error path is a correctness/robustness issue, not an exploitable flow.
|
||||
/// Emitting them as `Security` floods security reports (and security
|
||||
/// benchmarks) with non-security noise. Everything else routed through
|
||||
/// the structural/state pipeline — taint sinks (`cfg-unguarded-sink`),
|
||||
/// authorization gaps (`cfg-auth-gap`, `state-unauthed-access`) and
|
||||
/// memory-safety state errors (`state-use-after-close`,
|
||||
/// `state-double-close`) — stays `Security`.
|
||||
pub fn for_structural_rule(rule_id: &str) -> FindingCategory {
|
||||
match rule_id {
|
||||
"state-resource-leak"
|
||||
| "state-resource-leak-possible"
|
||||
| "cfg-resource-leak"
|
||||
| "cfg-error-fallthrough" => FindingCategory::Reliability,
|
||||
_ => FindingCategory::Security,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Vulnerability class that a pattern detects.
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq, Serialize, Deserialize)]
|
||||
pub enum PatternCategory {
|
||||
|
|
|
|||
|
|
@ -193,6 +193,34 @@ pub const PATTERNS: &[Pattern] = &[
|
|||
category: PatternCategory::Crypto,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
// Bare-call forms after `from hashlib import md5, sha1` (the qualified
|
||||
// `hashlib.md5(...)` form above is an `attribute` call and never matches
|
||||
// these `identifier`-function queries, so there is no double-count). Closes
|
||||
// the dvpwa weak-hash recall gap. Held at Low confidence: a project-local
|
||||
// function literally named `md5`/`sha1` is a rare incidental FP, so this
|
||||
// sits below the default high-confidence surface.
|
||||
Pattern {
|
||||
id: "py.crypto.md5_bare",
|
||||
description: "md5() (from hashlib) uses a weak hash algorithm",
|
||||
query: r#"(call
|
||||
function: (identifier) @fn (#eq? @fn "md5"))
|
||||
@vuln"#,
|
||||
severity: Severity::Low,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Crypto,
|
||||
confidence: Confidence::Low,
|
||||
},
|
||||
Pattern {
|
||||
id: "py.crypto.sha1_bare",
|
||||
description: "sha1() (from hashlib) uses a weak hash algorithm",
|
||||
query: r#"(call
|
||||
function: (identifier) @fn (#eq? @fn "sha1"))
|
||||
@vuln"#,
|
||||
severity: Severity::Low,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Crypto,
|
||||
confidence: Confidence::Low,
|
||||
},
|
||||
// ── Tier A: Template injection ─────────────────────────────────────
|
||||
Pattern {
|
||||
id: "py.xss.jinja_from_string",
|
||||
|
|
|
|||
|
|
@ -133,6 +133,22 @@ pub const PATTERNS: &[Pattern] = &[
|
|||
category: PatternCategory::Secrets,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
// ── Tier A: Hardcoded cryptographic key/secret config ──────────────
|
||||
// Crypto-key-shaped keys the anchored `hardcoded_secret` regex misses;
|
||||
// emits a `crypto`-bucketing rule id. See javascript.rs for rationale.
|
||||
Pattern {
|
||||
id: "ts.crypto.hardcoded_key",
|
||||
description: "Hardcoded cryptographic key/secret in source config",
|
||||
query: r#"(pair
|
||||
key: (property_identifier) @key
|
||||
(#match? @key "(?i)^([a-z0-9]+secret|(crypto|cookie|session|signing|encryption|encrypt|private|master|jwt|hmac|secret)key|api[_-]?key|access[_-]?key|secret[_-]?key|private[_-]?key|encryption[_-]?key|signing[_-]?key)$")
|
||||
value: (string) @val (#match? @val "[^\"']{3,}"))
|
||||
@vuln"#,
|
||||
severity: Severity::Low,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Crypto,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
// ── Tier A: TypeScript-specific type-safety escapes ────────────────
|
||||
Pattern {
|
||||
id: "ts.quality.any_annotation",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue