new capacity bits (#67)

This commit is contained in:
Eli Peter 2026-05-07 01:29:31 -04:00 committed by GitHub
parent afaffc0df6
commit 7d0e7320e2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
261 changed files with 10591 additions and 231 deletions

View file

@ -1181,7 +1181,12 @@ fn type_kind_tag(k: &TypeKind) -> String {
TypeKind::LocalCollection => "LocalCollection".into(),
TypeKind::RequestBuilder => "RequestBuilder".into(),
TypeKind::JpaCriteriaQuery => "JpaCriteriaQuery".into(),
TypeKind::LdapClient => "LdapClient".into(),
TypeKind::XPathClient => "XPathClient".into(),
TypeKind::XmlParser => "XmlParser".into(),
TypeKind::Template => "Template".into(),
TypeKind::Dto(_) => "Dto".into(),
TypeKind::NullPrototypeObject => "NullPrototypeObject".into(),
}
}
@ -1538,6 +1543,8 @@ pub fn analyse_function_taint(
receiver_seed: None,
const_values: Some(&opt.const_values),
type_facts: Some(&opt.type_facts),
xml_parser_config: Some(&opt.xml_parser_config),
xpath_config: Some(&opt.xpath_config),
ssa_summaries: None,
extra_labels: None,
callee_bodies: None,

View file

@ -138,6 +138,7 @@ pub struct RuleListItem {
pub enabled: bool,
pub is_custom: bool,
pub is_gated: bool,
pub is_class: bool,
pub case_sensitive: bool,
pub finding_count: usize,
pub suppression_rate: f64,
@ -156,6 +157,7 @@ pub struct RuleDetailView {
pub enabled: bool,
pub is_custom: bool,
pub is_gated: bool,
pub is_class: bool,
pub finding_count: usize,
pub suppression_rate: f64,
pub example_findings: Vec<RelatedFindingView>,

View file

@ -25,6 +25,20 @@ fn extract_family(rule_id: &str) -> &str {
rule_id
}
/// True when `rule_id` either equals `prefix` or starts with `prefix`
/// followed by one of the recognised separator characters used by the
/// finding-id emitter. Prevents `taint-ssrf-allowlist-violation`
/// from silently inheriting `taint-ssrf`'s OWASP bucket.
fn matches_cap_rule_id(rule_id: &str, prefix: &str) -> bool {
if !rule_id.starts_with(prefix) {
return false;
}
matches!(
rule_id.as_bytes().get(prefix.len()),
None | Some(b' ') | Some(b'(') | Some(b'.')
)
}
/// Return the OWASP 2021 (code, label) pair for a given rule id, or `None` if unmapped.
pub fn owasp_bucket_for(rule_id: &str) -> Option<(&'static str, &'static str)> {
let family = extract_family(rule_id);
@ -32,6 +46,27 @@ pub fn owasp_bucket_for(rule_id: &str) -> Option<(&'static str, &'static str)> {
return None;
}
// Cap-class rule ids carry their canonical OWASP code in
// `CAP_RULE_REGISTRY`; consult that first so adding a new cap class
// does not require updating two tables. The legacy family-token
// dispatch below covers per-language tree-sitter pattern rules
// (`js.xss.outer_html` style) that have no cap entry.
//
// Match shape: exact equality, or registry id followed by a separator
// that the emitter actually uses (` ` for ` (source 1:1)` suffixes,
// `(` for `(source 1:1)` style without a leading space, `.` for
// dotted variants like `rs.auth.missing_ownership_check.taint`).
// Plain `starts_with` would silently bucket a future
// `taint-ssrf-allowlist-violation` under the SSRF entry; the
// separator gate keeps unrelated suffixes from inheriting a parent
// bucket.
if let Some(meta) = crate::labels::CAP_RULE_REGISTRY
.iter()
.find(|m| matches_cap_rule_id(rule_id, m.rule_id))
{
return Some((meta.owasp_code, meta.owasp_label));
}
Some(match family {
// A01, Broken Access Control
"auth" | "csrf" | "mass_assign" | "path" | "redirect" => ("A01", "Broken Access Control"),
@ -39,10 +74,10 @@ pub fn owasp_bucket_for(rule_id: &str) -> Option<(&'static str, &'static str)> {
"crypto" | "secrets" => ("A02", "Cryptographic Failures"),
// A03, Injection (covers SQLi, XSS, command, code-eval, template, NoSQL, LDAP, reflection,
// and engine-level taint findings without a more specific family tag).
"sqli" | "xss" | "cmdi" | "code_exec" | "template" | "nosql" | "ldap" | "reflection"
| "taint" => ("A03", "Injection"),
// A05, Security Misconfiguration (TLS verify off, cookie flags, prototype pollution)
"config" | "transport" | "prototype" => ("A05", "Security Misconfiguration"),
"sqli" | "xss" | "cmdi" | "code_exec" | "template" | "nosql" | "ldap" | "xpath"
| "header" | "reflection" | "taint" => ("A03", "Injection"),
// A05, Security Misconfiguration (TLS verify off, cookie flags, prototype pollution, XXE)
"config" | "transport" | "prototype" | "xxe" => ("A05", "Security Misconfiguration"),
// A08, Software and Data Integrity Failures
"deser" => ("A08", "Software and Data Integrity Failures"),
// A09, Logging & Monitoring Failures
@ -112,6 +147,30 @@ fn issue_category_label(rule_id: &str) -> &'static str {
if rule_id.starts_with("taint-data-exfiltration") {
return "Data Exfiltration";
}
// Cap-class rule ids share the `taint` family token but each represent
// a distinct vulnerability class. Match them before falling through
// to family-based dispatch so the dashboard surfaces the right badge.
if rule_id.starts_with("taint-ldap-injection") {
return "LDAP Injection";
}
if rule_id.starts_with("taint-xpath-injection") {
return "XPath Injection";
}
if rule_id.starts_with("taint-header-injection") {
return "Header Injection";
}
if rule_id.starts_with("taint-open-redirect") {
return "Open Redirect";
}
if rule_id.starts_with("taint-template-injection") {
return "Template Injection";
}
if rule_id.starts_with("taint-xxe") {
return "XXE";
}
if rule_id.starts_with("taint-prototype-pollution") {
return "Prototype Pollution";
}
match extract_family(rule_id) {
"sqli" => "SQL Injection",
"xss" => "Cross-Site Scripting",
@ -229,6 +288,40 @@ mod tests {
assert_eq!(out[2].count, 2);
}
#[test]
fn cap_rule_id_match_requires_separator() {
// Exact match → bucketed.
assert_eq!(
owasp_bucket_for("taint-ssrf"),
Some(("A10", "Server-Side Request Forgery"))
);
// Suffix after recognised separators is bucketed.
assert_eq!(
owasp_bucket_for("taint-ssrf (source 1:1)"),
Some(("A10", "Server-Side Request Forgery"))
);
assert_eq!(
owasp_bucket_for("taint-ssrf(source 1:1)"),
Some(("A10", "Server-Side Request Forgery"))
);
// Dotted suffix (used by `rs.auth.missing_ownership_check.taint`).
assert_eq!(
owasp_bucket_for("rs.auth.missing_ownership_check.taint"),
Some(("A01", "Broken Access Control"))
);
// Hyphenated suffix without separator must NOT silently inherit
// the parent bucket. Falls through to the family-token table,
// where `ssrf` still resolves to A10, so use a hypothetical
// sibling that would only resolve via the cap registry.
assert_eq!(
owasp_bucket_for("taint-ldap-injection-allowlist"),
// Family token "taint" → A03; without separator gating this
// would have inherited the LDAP entry's A03 anyway, but the
// important property is that the registry match was rejected.
Some(("A03", "Injection"))
);
}
#[test]
fn issue_category_label_routes_data_exfil_to_dedicated_bucket() {
// `taint-data-exfiltration` shares the `taint` family token with

View file

@ -53,6 +53,8 @@ fn build_rule_list(state: &AppState) -> Vec<RuleInfo> {
case_sensitive: cr.case_sensitive,
is_custom: true,
is_gated: false,
is_class: false,
emission_active: true,
enabled,
});
}
@ -89,6 +91,7 @@ async fn list_rules(State(state): State<AppState>) -> Json<Vec<RuleListItem>> {
enabled: r.enabled,
is_custom: r.is_custom,
is_gated: r.is_gated,
is_class: r.is_class,
case_sensitive: r.case_sensitive,
finding_count: count,
suppression_rate: rate,
@ -134,6 +137,7 @@ async fn get_rule(
enabled: rule.enabled,
is_custom: rule.is_custom,
is_gated: rule.is_gated,
is_class: rule.is_class,
finding_count: total,
suppression_rate: rate,
example_findings: examples,