nyx/src/labels/c.rs
2026-05-07 01:29:31 -04:00

187 lines
7.1 KiB
Rust

use crate::labels::{Cap, DataLabel, GateActivation, Kind, LabelRule, ParamConfig, SinkGate};
use phf::{Map, phf_map};
pub static RULES: &[LabelRule] = &[
// ─────────── Sources ───────────
LabelRule {
matchers: &["getenv"],
label: DataLabel::Source(Cap::all()),
case_sensitive: false,
},
LabelRule {
matchers: &["fgets", "scanf", "fscanf", "gets", "read"],
label: DataLabel::Source(Cap::all()),
case_sensitive: false,
},
// Network input sources
LabelRule {
matchers: &["recv", "recvfrom"],
label: DataLabel::Source(Cap::all()),
case_sensitive: false,
},
// ───────── Sanitizers ──────────
// Generic `sanitize_*` prefix: clears the full cap mask. A function
// named `sanitize_*` is a developer-asserted general-purpose
// sanitizer; without a more specific signal (e.g. an explicit
// sanitizer label rule with a narrower cap), assume it covers every
// taint cap that flows through it. Narrowing to a single cap (e.g.
// HTML_ESCAPE) under-clears developer-named sanitizers and produces
// FPs whenever the downstream sink belongs to a different cap (e.g.
// FMT_STRING via printf), which is the typical case in C/C++ code.
LabelRule {
matchers: &["sanitize_"],
label: DataLabel::Sanitizer(Cap::all()),
case_sensitive: false,
},
// Type conversion sanitizers
LabelRule {
matchers: &["atoi", "atol", "strtol", "strtoul"],
label: DataLabel::Sanitizer(Cap::all()),
case_sensitive: false,
},
// ─────────── Sinks ─────────────
LabelRule {
matchers: &[
"system", "popen", "exec", "execl", "execlp", "execle", "execve", "execvp",
],
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
case_sensitive: false,
},
LabelRule {
matchers: &["sprintf", "strcpy", "strcat"],
label: DataLabel::Sink(Cap::HTML_ESCAPE),
case_sensitive: false,
},
LabelRule {
matchers: &["printf", "fprintf"],
label: DataLabel::Sink(Cap::FMT_STRING),
case_sensitive: false,
},
LabelRule {
matchers: &["fopen", "open"],
label: DataLabel::Sink(Cap::FILE_IO),
case_sensitive: false,
},
LabelRule {
matchers: &["curl_easy_perform"],
label: DataLabel::Sink(Cap::SSRF),
case_sensitive: false,
},
// ─── LDAP injection sinks ───
//
// OpenLDAP / libldap surface: `ldap_search_s(ld, base, scope, filter, ...)`
// and the asynchronous variant `ldap_search_ext_s(ld, base, scope, filter,
// attrs, attrsonly, serverctrls, clientctrls, timeout, sizelimit, *res)`.
// The filter argument (position 3) is the LDAP-injection vector. No
// standard libldap escape helper exists in the C surface; sanitisation is
// typically caller-implemented (`sanitize_*` covers the developer-named
// case via the existing prefix rule above).
LabelRule {
matchers: &["ldap_search_s", "ldap_search_ext_s"],
label: DataLabel::Sink(Cap::LDAP_INJECTION),
case_sensitive: false,
},
// ─── XPath injection sinks ───
//
// libxml2 evaluation entry points: `xmlXPathEvalExpression(expr, ctx)`,
// `xmlXPathEval(expr, ctx)`, `xmlXPathCompile(expr)`. The expression
// string is arg 0 and is the canonical XPath-injection vector.
LabelRule {
matchers: &["xmlXPathEvalExpression", "xmlXPathEval", "xmlXPathCompile"],
label: DataLabel::Sink(Cap::XPATH_INJECTION),
case_sensitive: false,
},
];
/// Gated sinks for C.
///
/// `curl_easy_setopt(handle, option, payload)` is libcurl's option-binding
/// interface; the option identifier at arg 1 selects which slot the payload
/// fills. `CURLOPT_POSTFIELDS` and `CURLOPT_COPYPOSTFIELDS` carry the
/// request body, while other CURLOPT_* constants designate URL / auth / TLS
/// behaviour and are not DATA_EXFIL-relevant. Gating on the macro identifier
/// keeps the rule from over-firing on `curl_easy_setopt(h, CURLOPT_URL, url)`
/// (covered separately by the `curl_easy_perform` SSRF flat sink).
///
/// Identifier-based activation is enabled via the macro-arg fallback in
/// `cfg::mod::classify_gated_sink` for `lang == "c"`. Header-parsing
/// libraries (e.g. libmicrohttpd, mongoose) lack a stable surface and are
/// left to project-specific config.
pub static GATED_SINKS: &[SinkGate] = &[SinkGate {
callee_matcher: "curl_easy_setopt",
arg_index: 1,
dangerous_values: &["CURLOPT_POSTFIELDS", "CURLOPT_COPYPOSTFIELDS"],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::DATA_EXFIL),
case_sensitive: true,
payload_args: &[2],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::ValueMatch,
}];
pub static KINDS: Map<&'static str, Kind> = phf_map! {
// control-flow
"if_statement" => Kind::If,
"while_statement" => Kind::While,
"for_statement" => Kind::For,
"do_statement" => Kind::While,
"switch_statement" => Kind::Switch,
"case_statement" => Kind::Block,
"labeled_statement" => Kind::Block,
"return_statement" => Kind::Return,
"break_statement" => Kind::Break,
"continue_statement" => Kind::Continue,
// structure
"translation_unit" => Kind::SourceFile,
"compound_statement" => Kind::Block,
"else_clause" => Kind::Block,
"function_definition" => Kind::Function,
// data-flow
"call_expression" => Kind::CallFn,
"assignment_expression" => Kind::Assignment,
"declaration" => Kind::CallWrapper,
"expression_statement" => Kind::CallWrapper,
// trivia
"comment" => Kind::Trivia,
";" => Kind::Trivia, "," => Kind::Trivia,
"(" => Kind::Trivia, ")" => Kind::Trivia,
"{" => Kind::Trivia, "}" => Kind::Trivia,
"\n" => Kind::Trivia,
"preproc_include" => Kind::Trivia,
"preproc_def" => Kind::Trivia,
};
pub static PARAM_CONFIG: ParamConfig = ParamConfig {
params_field: "parameters",
param_node_kinds: &["parameter_declaration"],
self_param_kinds: &[],
ident_fields: &["declarator", "name"],
};
/// Benchmark-driven output-parameter source positions for known C APIs.
/// Maps callee name → argument positions that receive Source taint.
pub static OUTPUT_PARAM_SOURCES: &[(&str, &[usize])] = &[
("fgets", &[0]), // fgets(buf, size, stream), buf receives input
("gets", &[0]), // gets(buf), buf receives input
("recv", &[1]), // recv(fd, buf, len, flags)
("recvfrom", &[1]), // recvfrom(fd, buf, len, flags, ...)
];
/// Arg-to-arg taint propagation for known C functions.
pub static ARG_PROPAGATIONS: &[super::ArgPropagation] = &[
super::ArgPropagation {
callee: "inet_pton",
from_args: &[1],
to_args: &[2],
},
super::ArgPropagation {
callee: "inet_aton",
from_args: &[0],
to_args: &[1],
},
];