new capacity bits (#67)

This commit is contained in:
Eli Peter 2026-05-07 01:29:31 -04:00 committed by GitHub
parent afaffc0df6
commit 7d0e7320e2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
261 changed files with 10591 additions and 231 deletions

View file

@ -580,9 +580,19 @@ pub(crate) fn analyse_file_with_lowered(
f.source.index(),
!f.path_validated,
f.path_hash,
f.effective_sink_caps.bits(),
)
});
all_findings.dedup_by_key(|f| {
(
f.body_id,
f.sink,
f.source,
f.path_validated,
f.path_hash,
f.effective_sink_caps.bits(),
)
});
all_findings.dedup_by_key(|f| (f.body_id, f.sink, f.source, f.path_validated, f.path_hash));
// 5. Assign stable finding IDs now that `body_id` has been set and
// the dedup has picked the final set of distinct flows. The ID
@ -679,9 +689,118 @@ fn containment_order(bodies: &[BodyCfg]) -> Vec<usize> {
order
}
/// Build a `var_name → TypeKind` map from a body's optimised SSA + type-fact
/// result. Used by [`analyse_multi_body`] to forward closure-captured types
/// from a parent body into its children, so that bound-variable receiver
/// idioms (`const c = ldap.createClient(...); function f() { c.search(...) }`)
/// pick up `TypeKind::LdapClient` on the inner reference via the
/// [`ssa_transfer::resolve_type_qualified_labels`] receiver scan.
///
/// Conflict policy: if the same `var_name` reaches multiple SSA values with
/// distinct `TypeKind`s the entry is dropped — propagating an ambiguous type
/// into a child body would fabricate facts, while dropping it just falls back
/// to the existing structural resolution paths.
fn extract_named_type_facts(
ssa: &crate::ssa::SsaBody,
type_facts: &crate::ssa::type_facts::TypeFactResult,
) -> HashMap<String, crate::ssa::type_facts::TypeKind> {
use crate::ssa::type_facts::TypeKind;
let mut acc: HashMap<String, TypeKind> = HashMap::new();
let mut conflicts: HashSet<String> = HashSet::new();
for block in &ssa.blocks {
for inst in block.phis.iter().chain(block.body.iter()) {
let Some(name) = inst.var_name.as_deref() else {
continue;
};
if conflicts.contains(name) {
continue;
}
let Some(kind) = type_facts.get_type(inst.value) else {
continue;
};
if matches!(kind, TypeKind::Unknown) {
continue;
}
match acc.get(name) {
Some(existing) if existing != kind => {
acc.remove(name);
conflicts.insert(name.to_string());
}
Some(_) => {}
None => {
acc.insert(name.to_string(), kind.clone());
}
}
}
}
acc
}
/// Inject parent-known closure-capture types into a per-body
/// [`crate::ssa::type_facts::TypeFactResult`].
///
/// Scoped lowering ([`crate::ssa::lower_to_ssa_with_params`]) injects a
/// `SsaOp::Param` (or `SsaOp::SelfParam`) at the entry block for every
/// free / closure-captured variable read by the body. The per-body type
/// analysis can only seed declared formal-parameter types (via
/// `BodyMeta.param_types`); free variables are left as `TypeKind::Unknown`
/// because their definition lives in an enclosing body whose SSA is not
/// in scope.
///
/// This pass walks the entry block's synthetic prologue and, for each
/// external Param whose name resolves in `parent_var_types`, inserts the
/// matching [`crate::ssa::type_facts::TypeFact`] into `type_facts.facts`.
/// Strictly additive: existing facts (e.g. a fact already produced by
/// `BodyMeta.param_types` seeding for a real formal that happens to share
/// a name) are never overwritten.
fn inject_external_type_facts(
ssa: &crate::ssa::SsaBody,
type_facts: &mut crate::ssa::type_facts::TypeFactResult,
parent_var_types: &HashMap<String, crate::ssa::type_facts::TypeKind>,
) {
use crate::ssa::ir::SsaOp;
use crate::ssa::type_facts::TypeFact;
if parent_var_types.is_empty() || ssa.blocks.is_empty() {
return;
}
for inst in ssa.blocks[0].body.iter() {
if !matches!(inst.op, SsaOp::Param { .. } | SsaOp::SelfParam) {
continue;
}
if type_facts.facts.contains_key(&inst.value) {
// `analyze_types_with_param_types` may have already typed this
// value via a non-Unknown entry from BodyMeta.param_types; in
// that case the formal-parameter declaration wins. Note: the
// analysis seeds an Unknown placeholder for unparameterised
// Param ops, so we still need to override Unknown entries.
if !matches!(
type_facts.facts.get(&inst.value).map(|f| &f.kind),
Some(crate::ssa::type_facts::TypeKind::Unknown)
) {
continue;
}
}
let Some(name) = inst.var_name.as_deref() else {
continue;
};
let Some(kind) = parent_var_types.get(name) else {
continue;
};
let nullable = matches!(kind, crate::ssa::type_facts::TypeKind::Null);
type_facts.facts.insert(
inst.value,
TypeFact {
kind: kind.clone(),
nullable,
},
);
}
}
/// Analyse a single body with an optional parent seed.
///
/// Shared logic extracted from `analyse_multi_body` to avoid deep nesting.
#[allow(clippy::type_complexity)]
fn analyse_body_with_seed(
body: &BodyCfg,
lang: Lang,
@ -698,9 +817,11 @@ fn analyse_body_with_seed(
seed: Option<&HashMap<ssa_transfer::BindingKey, crate::taint::domain::VarTaint>>,
import_bindings: Option<&crate::cfg::ImportBindings>,
cross_file_bodies: Option<&std::collections::HashMap<FuncKey, ssa_transfer::CalleeSsaBody>>,
parent_var_types: Option<&HashMap<String, crate::ssa::type_facts::TypeKind>>,
) -> (
Vec<Finding>,
Option<HashMap<ssa_transfer::BindingKey, crate::taint::domain::VarTaint>>,
Option<HashMap<String, crate::ssa::type_facts::TypeKind>>,
) {
let cfg = &body.graph;
let entry = body.entry;
@ -757,12 +878,21 @@ fn analyse_body_with_seed(
match ssa_result {
Ok(mut ssa_body) => {
let opt = crate::ssa::optimize_ssa_with_param_types(
let mut opt = crate::ssa::optimize_ssa_with_param_types(
&mut ssa_body,
cfg,
Some(lang),
&body.meta.param_types,
);
// Forward parent-body type facts onto closure-captured Param ops
// before any consumer reads `opt.type_facts`. This is the lever
// that makes bound-variable receiver idioms work in scoped bodies
// (`let c = ldap.createClient(...); function f() { c.search(...) }`)
// — without it the inner `c` SSA value stays Unknown because the
// per-body type-fact pass cannot see the enclosing definition.
if let Some(pvt) = parent_var_types {
inject_external_type_facts(&ssa_body, &mut opt.type_facts, pvt);
}
if tracing::enabled!(tracing::Level::TRACE) {
tracing::trace!(
func = body.meta.name.as_deref().unwrap_or("<anon>"),
@ -811,6 +941,8 @@ fn analyse_body_with_seed(
receiver_seed: None,
const_values: Some(&opt.const_values),
type_facts: Some(&opt.type_facts),
xml_parser_config: Some(&opt.xml_parser_config),
xpath_config: Some(&opt.xpath_config),
ssa_summaries,
extra_labels,
base_aliases: Some(&opt.alias_result),
@ -909,7 +1041,16 @@ fn analyse_body_with_seed(
&transfer,
body_id,
);
(findings, Some(exit_state))
// Snapshot named TypeKinds so child bodies can pick up
// closure-captured types (e.g. an outer `LdapClient` flowing
// into an inner function via free-variable read).
let named_types = extract_named_type_facts(&ssa_body, &opt.type_facts);
let named_types = if named_types.is_empty() {
None
} else {
Some(named_types)
};
(findings, Some(exit_state), named_types)
}
Err(e) => {
// SSA lowering produced no analyzable body. We still surface
@ -929,7 +1070,7 @@ fn analyse_body_with_seed(
// Drain the collector so the note does not bleed into the
// next body (which will call reset on entry, but be explicit).
let _ = ssa_transfer::take_body_engine_notes();
(Vec::new(), None)
(Vec::new(), None, None)
}
}
}
@ -967,6 +1108,14 @@ fn analyse_multi_body(
HashMap<ssa_transfer::BindingKey, crate::taint::domain::VarTaint>,
> = HashMap::new();
// Per-body `var_name → TypeKind` snapshots, used to forward closure-
// captured types from parent bodies into their children's type-fact
// results. Only populated when a body produces a non-empty set of
// typed named values, i.e. it has at least one named SSA value with
// a concrete `TypeKind` after optimisation.
let mut body_var_types: HashMap<BodyId, HashMap<String, crate::ssa::type_facts::TypeKind>> =
HashMap::new();
// ── Pass 1: lexical containment propagation ──────────────────────
for &idx in &order {
let body = &file_cfg.bodies[idx];
@ -975,8 +1124,12 @@ fn analyse_multi_body(
.meta
.parent_body_id
.and_then(|pid| body_exit_states.get(&pid));
let parent_var_types = body
.meta
.parent_body_id
.and_then(|pid| body_var_types.get(&pid));
let (findings, exit_state) = analyse_body_with_seed(
let (findings, exit_state, var_types) = analyse_body_with_seed(
body,
lang,
namespace,
@ -990,6 +1143,7 @@ fn analyse_multi_body(
parent_seed,
import_bindings,
cross_file_bodies,
parent_var_types,
);
tracing::debug!(
body_id = body.meta.id.0,
@ -1003,6 +1157,9 @@ fn analyse_multi_body(
if let Some(es) = exit_state {
body_exit_states.insert(body.meta.id, es);
}
if let Some(vt) = var_types {
body_var_types.insert(body.meta.id, vt);
}
}
// ── Pass 2: JS/TS iterative convergence ──────────────────────────
@ -1163,8 +1320,12 @@ fn analyse_multi_body(
.meta
.parent_body_id
.and_then(|pid| body_exit_states.get(&pid));
let parent_var_types = body
.meta
.parent_body_id
.and_then(|pid| body_var_types.get(&pid));
let (findings, exit_state) = analyse_body_with_seed(
let (findings, exit_state, var_types) = analyse_body_with_seed(
body,
lang,
namespace,
@ -1178,11 +1339,15 @@ fn analyse_multi_body(
parent_seed,
import_bindings,
cross_file_bodies,
parent_var_types,
);
// Phase-B: replace (not append) this body's findings
// in the cache. Previous rounds' findings for this
// body are superseded by the new round's output.
findings_by_body.insert(body.meta.id, findings);
if let Some(vt) = var_types {
body_var_types.insert(body.meta.id, vt);
}
if let Some(es) = exit_state {
// Phase-C Gauss-Seidel: immediately publish this
// body's filtered exit into `current_seed` and
@ -2073,6 +2238,8 @@ fn augment_summaries_with_child_sinks(
receiver_seed: None,
const_values: None,
type_facts: None,
xml_parser_config: None,
xpath_config: None,
ssa_summaries: Some(summaries),
extra_labels: None,
base_aliases: None,
@ -2135,6 +2302,8 @@ fn augment_summaries_with_child_sinks(
receiver_seed: None,
const_values: None,
type_facts: None,
xml_parser_config: None,
xpath_config: None,
ssa_summaries: Some(summaries),
extra_labels: None,
base_aliases: None,

View file

@ -30,6 +30,26 @@ pub enum PredicateKind {
/// and the **false branch is the validated path**. Use inverted polarity
/// when applying branch predicates.
ShellMetaValidated,
/// Inline relative-URL validation: `x.startsWith("/")` / `x.starts_with("/")`
/// / `x.startswith("/")` / `strpos(x, "/") === 0`. The TRUE branch
/// constrains `x` to a relative path (no scheme, no `//host`), which is
/// the standard inline form of an open-redirect sanitiser when the
/// developer didn't extract a named helper. Cap-aware: clears
/// [`crate::labels::Cap::OPEN_REDIRECT`] only on the validated branch
/// so non-redirect sinks downstream still fire on the residual taint.
/// Mirrors [`ShellMetaValidated`](Self::ShellMetaValidated) but with
/// non-inverted polarity (true branch is the validated path).
RelativeUrlValidated,
/// Inline URL-parse + host-allowlist validation:
/// `new URL(x).host === ALLOWED` (JS/TS),
/// `urlparse(x).netloc == ALLOWED` (Python),
/// `urlparse(x).hostname in ALLOWED_HOSTS` (Python).
/// The TRUE branch constrains the parsed URL's host to a developer-chosen
/// allowlist value, the canonical multi-statement open-redirect sanitiser
/// for absolute URLs. Cap-aware: clears
/// [`crate::labels::Cap::OPEN_REDIRECT`] only on the validated branch so
/// non-redirect sinks downstream still fire on residual taint.
HostAllowlistValidated,
/// Bounded-length rejection: `x.len() > N` / `x.length < N` with N >= 2.
///
/// Commonly paired with `ShellMetaValidated` in OR-chain rejection
@ -178,6 +198,324 @@ fn is_metachar_regex_class(text: &str) -> bool {
false
}
/// Check whether `text` is an inline relative-URL validation: a leading-
/// slash check on a string variable. Recognised shapes:
///
/// * `<X>.startsWith("/")` — JS/TS/Java/Kotlin
/// * `<X>.starts_with("/")` — Rust
/// * `<X>.startswith("/")` — Python
/// * `strpos($X, "/") === 0` / `mb_strpos(...)` — PHP
/// * `<X>[0] === "/"` / `<X>[0] == '/'` — JS/TS direct index
///
/// Negation prefixes (`!`, `not`) are NOT stripped, the caller's
/// classification path handles those uniformly via the predicate
/// polarity inversion machinery.
fn is_leading_slash_check(text: &str) -> bool {
let lower = text.to_ascii_lowercase();
// Method-call form: `.startswith("/")` covers JS/TS/Java (`startsWith`
// lower-cases to `startswith`), Python (`startswith`), Rust
// (`starts_with` → `starts_with` after lower). Keep the variants
// explicit so we don't miss the underscore form.
for method in [".startswith(", ".starts_with("] {
if let Some(idx) = lower.find(method) {
let args_start = idx + method.len();
if let Some(needle) = extract_first_string_arg(&lower[args_start..]) {
if needle == "/" {
return true;
}
}
}
}
// PHP `strpos($x, "/") === 0` / `mb_strpos($x, "/") === 0` — leading-
// slash detection via offset-zero substring match. Both equality
// forms (`===`, `==`) accepted; the `0` literal is the load-bearing
// bit. Conservative: requires the closing `=== 0` form; bare
// `strpos(...)` (truthy check) is not recognised.
for prefix in ["strpos(", "mb_strpos("] {
if let Some(start) = lower.find(prefix) {
let after = &lower[start + prefix.len()..];
// Find the closing paren of the strpos call.
let mut depth = 1usize;
let bytes = after.as_bytes();
let mut close = None;
let mut i = 0;
while i < bytes.len() {
match bytes[i] {
b'(' => depth += 1,
b')' => {
depth -= 1;
if depth == 0 {
close = Some(i);
break;
}
}
_ => {}
}
i += 1;
}
let Some(close) = close else { continue };
let args = &after[..close];
// Need at least one comma so we have two args.
let mut depth = 0i32;
let mut comma = None;
for (j, ch) in args.char_indices() {
match ch {
'(' | '[' | '{' => depth += 1,
')' | ']' | '}' => depth -= 1,
',' if depth == 0 => {
comma = Some(j);
break;
}
_ => {}
}
}
let Some(comma) = comma else { continue };
let second = args[comma + 1..].trim();
// Strip optional surrounding parens / quotes.
let needle = second.trim_matches(|c: char| c == '"' || c == '\'');
if needle != "/" {
continue;
}
// Tail after the strpos `)` should compare against 0 with
// `===` / `==`. Allow whitespace.
let tail = after[close + 1..].trim_start();
if let Some(rest) = tail.strip_prefix("===").or_else(|| tail.strip_prefix("==")) {
if rest.trim() == "0" {
return true;
}
}
}
}
// Direct subscript form: `<X>[0] === '/'` / `<X>[0] == "/"`.
// Conservative: the literal `[0]` immediately followed by an
// equality op and a single-char `/` literal.
for op in ["===", "=="] {
let probe = format!("[0] {}", op);
if let Some(idx) = lower.find(&probe) {
let after = lower[idx + probe.len()..].trim_start();
if after.starts_with("'/'") || after.starts_with("\"/\"") {
return true;
}
}
// Without spaces around the operator: `[0]==='/'`.
let probe_tight = format!("[0]{}", op);
if let Some(idx) = lower.find(&probe_tight) {
let after = lower[idx + probe_tight.len()..].trim_start();
if after.starts_with("'/'") || after.starts_with("\"/\"") {
return true;
}
}
}
false
}
/// Check whether `text` is an inline URL-parse + host-allowlist validation.
///
/// Recognises the canonical multi-statement open-redirect sanitiser shapes:
///
/// * `new URL(<X>).host === ALLOWED` / `new URL(<X>).hostname === ALLOWED`
/// / `new URL(<X>).origin === ALLOWED` (JS/TS) — accepts `==` and `===`.
/// * `urlparse(<X>).netloc == ALLOWED` / `urlparse(<X>).hostname == ALLOWED`
/// (Python `urllib.parse.urlparse` and the `urlparse.urlparse` legacy alias)
/// — accepts `==`.
/// * `urllib.parse.urlparse(<X>).netloc == ALLOWED` (qualified Python form).
/// * `<parsed>.host_str() == ALLOWED` (Rust `url::Url::host_str()`).
/// * `<parsed>.Host == ALLOWED` / `<parsed>.Hostname() == ALLOWED`
/// (Go `*url.URL` — case-sensitive capital `H`).
///
/// The Rust/Go forms intentionally do not look for the parse call in the
/// condition text — those parse on a separate line (`let parsed = Url::parse(x)?`,
/// `parsed, err := url.Parse(x)`) and the validated branch then references
/// `parsed` directly as the redirect target. Distinctive accessor names
/// (`.host_str()`, capital-`H` `.Host`/`.Hostname()`) gate the match so a bare
/// `u.host == X` (lowercase, ambiguous) still falls through to `Comparison`.
///
/// The right-hand side may be a string literal or a bare identifier
/// (`ALLOWED_HOST` / `cfg.allowed_origin`) — what matters is that the
/// validation pins the parsed host to one fixed value, locking off the
/// scheme/authority that would otherwise let the redirect leave the trusted
/// origin. The membership form
/// `ALLOWED_HOSTS.includes(new URL(<X>).host)` / `urlparse(<X>).host in ALLOWED`
/// is intentionally NOT recognised here, those fall through to
/// `AllowlistCheck` whose generic validated-must mechanic already clears
/// every cap for the matched receiver / member token.
///
/// Negation prefixes are not stripped, the caller's polarity-inversion
/// machinery handles `!`-wrapped forms uniformly.
fn is_host_allowlist_check(text: &str) -> bool {
let lower = text.to_ascii_lowercase();
// Need an equality operator so we know the host is being pinned to a
// specific allowed value (not e.g. assigned, indexed, or used as a key).
if !(lower.contains("==") || lower.contains("!=")) {
return false;
}
let has_parse_call = lower.contains("new url(")
|| lower.contains("urlparse(")
|| lower.contains("url.parse(")
|| lower.contains("urllib.parse.urlparse(");
if has_parse_call {
// Need a host-style accessor on the parse result.
return lower.contains(".host")
|| lower.contains(".hostname")
|| lower.contains(".netloc")
|| lower.contains(".origin");
}
// Multi-statement form: parse happened on a prior line. Match
// distinctive Rust/Go accessor names so we don't misclassify a
// generic `obj.host == X` field comparison.
//
// Rust: `parsed.host_str() == Some("x")`
// Go: `parsed.Host == "x"` / `parsed.Hostname() == "x"`
//
// `.host_str()` is Rust-specific (lowercase-stable identifier).
// `.Host`/`.Hostname()` use case-sensitive capital `H` to avoid
// matching lowercase `u.host` (which `host_allowlist_requires_parse_call`
// explicitly excludes).
if lower.contains(".host_str(") {
return true;
}
if has_capital_host_accessor(text) {
return true;
}
false
}
/// Test whether `text` contains a Go-style capital-`H` URL host accessor:
/// `.Host` (followed by whitespace or `==`/`!=`) or `.Hostname(`.
fn has_capital_host_accessor(text: &str) -> bool {
if text.contains(".Hostname(") {
return true;
}
let mut rest = text;
while let Some(pos) = rest.find(".Host") {
let after = &rest[pos + ".Host".len()..];
// Reject `.Hostname` (handled above) and any continuation that
// would make `.Host` part of a longer identifier (`.Hostess` etc.).
let next = after.chars().next();
let is_terminator = match next {
None => true,
Some(c) => !c.is_ascii_alphanumeric() && c != '_',
};
if is_terminator {
// Require an equality op somewhere after the accessor so it's
// a comparison, not e.g. an assignment target.
let trimmed = after.trim_start();
if trimmed.starts_with("==") || trimmed.starts_with("!=") {
return true;
}
}
rest = after;
}
false
}
/// Extract the parse-call argument from a host-allowlist condition.
///
/// Inline form (single-statement parse + check, JS/TS/Python):
/// recognises `new URL(<X>)`, `urlparse(<X>)`, `URL.parse(<X>)`,
/// `urllib.parse.urlparse(<X>)`. Returns `Some("X")` when the argument is a
/// bare identifier (with optional `&` or PHP `$` sigil stripped).
///
/// Multi-statement form (Rust/Go): recognises the receiver of `.host_str()`,
/// case-sensitive `.Host`/`.Hostname()` and returns the receiver identifier
/// (the parsed-URL var), which is what downstream code redirects on.
///
/// Returns `None` for nested expressions / multi-arg calls so branch
/// narrowing doesn't widen to a non-existent var. Mirrors the conservative
/// target shape used by [`extract_validation_target`].
fn extract_host_allowlist_target(text: &str) -> Option<String> {
let lower = text.to_ascii_lowercase();
for probe in [
"new url(",
"urllib.parse.urlparse(",
"urlparse(",
"url.parse(",
] {
if let Some(idx) = lower.find(probe) {
let args_start = idx + probe.len();
if args_start <= text.len() {
if let Some(first_arg) = first_call_arg(&text[args_start..]) {
let first_arg = first_arg.strip_prefix('&').unwrap_or(first_arg).trim();
let first_arg = first_arg.strip_prefix('$').unwrap_or(first_arg);
if !first_arg.is_empty() && is_identifier(first_arg) {
return Some(first_arg.to_string());
}
}
}
}
}
// Multi-statement form: receiver of the host accessor is the
// parsed-URL var. Walk the original text (case-sensitive for Go).
extract_host_accessor_receiver(text)
}
/// Walk `text` for `<receiver>.host_str(` (Rust), `<receiver>.Host` followed
/// by `==`/`!=` (Go), or `<receiver>.Hostname(` (Go). Returns `Some(receiver)`
/// when the receiver is a bare identifier (optionally with a `&` deref-prefix
/// stripped, e.g. Rust `&parsed.host_str()`); `None` otherwise.
fn extract_host_accessor_receiver(text: &str) -> Option<String> {
let probes: &[(&str, bool)] = &[
(".host_str(", false), // Rust, case-stable
(".Hostname(", false), // Go
(".Host", true), // Go, requires `==`/`!=` after
];
for (probe, requires_eq) in probes {
if let Some(idx) = text.find(probe) {
if *requires_eq {
let after = &text[idx + probe.len()..];
// Reject `.Hostname` (handled by its own probe) and any
// longer-identifier continuation.
if let Some(c) = after.chars().next()
&& (c.is_ascii_alphanumeric() || c == '_')
{
continue;
}
let trimmed = after.trim_start();
if !(trimmed.starts_with("==") || trimmed.starts_with("!=")) {
continue;
}
}
let before = &text[..idx];
// Receiver = trailing identifier of `before`, optionally
// preceded by `&` (Rust deref). `parsed.foo.host_str()`
// would yield `foo`, which is not a parse var, so we
// conservatively reject any receiver with a `.` or `::`.
let recv = trailing_identifier(before)?;
if recv.contains('.') || recv.contains(':') {
return None;
}
return Some(recv);
}
}
None
}
/// Walk back from the end of `s` and return the trailing identifier token.
///
/// `&parsed` → `Some("parsed")`, `foo.bar` → `Some("bar")`,
/// `()` → `None`. Used by [`extract_host_accessor_receiver`] to pull the
/// parsed-URL var out of `parsed.host_str() == ...`.
fn trailing_identifier(s: &str) -> Option<String> {
let bytes = s.as_bytes();
let mut end = bytes.len();
while end > 0 {
let c = bytes[end - 1];
if c.is_ascii_alphanumeric() || c == b'_' {
end -= 1;
} else {
break;
}
}
if end == bytes.len() {
return None;
}
let ident = &s[end..];
if ident.is_empty() || ident.as_bytes()[0].is_ascii_digit() {
return None;
}
Some(ident.to_string())
}
/// Check whether `text` looks like a bounded-length rejection:
/// `x.len() > N`, `x.len() < N`, `x.length >= N`, etc. where `N` is an
/// integer literal >= 2. Excludes `> 0` / `>= 1` / `< 1`, those are
@ -330,6 +668,28 @@ pub fn classify_condition(text: &str) -> PredicateKind {
return PredicateKind::ShellMetaValidated;
}
// ── Inline relative-URL validation ──────────────────────────────────
//
// `x.startsWith("/")` (JS/TS/Java/Kotlin), `x.starts_with("/")` (Rust),
// `x.startswith("/")` (Python), `strpos($x, "/") === 0` (PHP).
// The TRUE branch constrains `x` to a leading-slash relative path —
// the canonical inline open-redirect sanitiser. Matched BEFORE
// AllowlistCheck (which would otherwise capture `.starts_with(`).
if is_leading_slash_check(text) {
return PredicateKind::RelativeUrlValidated;
}
// ── Host-allowlist URL-parse validation ─────────────────────────────
//
// `new URL(x).host === ALLOWED` (JS/TS), `urlparse(x).netloc == ALLOWED`
// (Python), etc. Matched BEFORE AllowlistCheck so the membership form
// `ALLOWED.includes(new URL(x).host)` doesn't fall through here, and
// BEFORE the generic Comparison branch so the equality operator
// doesn't classify generically.
if is_host_allowlist_check(text) {
return PredicateKind::HostAllowlistValidated;
}
// ── Allowlist / membership checks ────────────────────────────────────
if lower.contains(".includes(")
|| lower.contains(".include?(")
@ -552,6 +912,19 @@ pub fn classify_condition_with_target(text: &str) -> (PredicateKind, Option<Stri
let target = extract_validation_target(text);
(kind, target)
}
PredicateKind::RelativeUrlValidated => {
// Receiver of `.startsWith("/")` / `.startswith("/")` /
// `.starts_with("/")`, or first arg of `strpos($x, "/")`.
// Same machinery as ShellMetaValidated.
let target = extract_validation_target(text);
(kind, target)
}
PredicateKind::HostAllowlistValidated => {
// Argument of the parse call: `new URL(x).host` → `x`,
// `urlparse(x).netloc` → `x`.
let target = extract_host_allowlist_target(text);
(kind, target)
}
PredicateKind::Comparison => {
// `x === '/login'`, `x == 5`, `null != obj`, when exactly one
// side is a literal, extract the identifier side as the target.
@ -1731,6 +2104,150 @@ mod tests {
assert!(is_bounded_length_check("x.len() > 2"));
assert!(is_bounded_length_check("x.len() <= 256"));
}
// ── HostAllowlistValidated ────────────────────────────────────────────
#[test]
fn classify_host_allowlist_js_strict_eq() {
assert_eq!(
classify_condition("new URL(target).host === ALLOWED_HOST"),
PredicateKind::HostAllowlistValidated
);
assert_eq!(
classify_condition("new URL(target).hostname === \"trusted.example.com\""),
PredicateKind::HostAllowlistValidated
);
assert_eq!(
classify_condition("new URL(target).origin === ALLOWED_ORIGIN"),
PredicateKind::HostAllowlistValidated
);
}
#[test]
fn classify_host_allowlist_python_urlparse() {
assert_eq!(
classify_condition("urlparse(target).netloc == ALLOWED_HOST"),
PredicateKind::HostAllowlistValidated
);
assert_eq!(
classify_condition("urllib.parse.urlparse(target).hostname == \"trusted.example.com\""),
PredicateKind::HostAllowlistValidated
);
}
#[test]
fn target_host_allowlist_extracts_parse_arg_js() {
let (kind, target) =
classify_condition_with_target("new URL(target).host === ALLOWED_HOST");
assert_eq!(kind, PredicateKind::HostAllowlistValidated);
assert_eq!(target.as_deref(), Some("target"));
}
#[test]
fn target_host_allowlist_extracts_parse_arg_python() {
let (kind, target) =
classify_condition_with_target("urlparse(target).netloc == ALLOWED_HOST");
assert_eq!(kind, PredicateKind::HostAllowlistValidated);
assert_eq!(target.as_deref(), Some("target"));
}
#[test]
fn host_allowlist_requires_parse_call() {
// Bare `.host == X` without a parse call is not host-allowlist.
let kind = classify_condition("u.host == ALLOWED_HOST");
assert_ne!(kind, PredicateKind::HostAllowlistValidated);
}
#[test]
fn host_allowlist_requires_equality_op() {
// `new URL(x)` without an equality op is not host-allowlist.
let kind = classify_condition("new URL(target).host");
assert_ne!(kind, PredicateKind::HostAllowlistValidated);
}
// ── Multi-statement form: Rust `.host_str()` ──────────────────────────
#[test]
fn classify_host_allowlist_rust_host_str() {
assert_eq!(
classify_condition("parsed.host_str() == Some(\"trusted.example.com\")"),
PredicateKind::HostAllowlistValidated
);
}
#[test]
fn target_host_allowlist_rust_host_str_extracts_receiver() {
let (kind, target) =
classify_condition_with_target("parsed.host_str() == Some(\"trusted.example.com\")");
assert_eq!(kind, PredicateKind::HostAllowlistValidated);
assert_eq!(target.as_deref(), Some("parsed"));
}
#[test]
fn target_host_allowlist_rust_host_str_strips_amp_deref() {
// `&parsed.host_str()` is not idiomatic but we still pull out the
// receiver via the trailing-identifier walk.
let (kind, target) =
classify_condition_with_target("&parsed.host_str() == Some(\"trusted.com\")");
assert_eq!(kind, PredicateKind::HostAllowlistValidated);
assert_eq!(target.as_deref(), Some("parsed"));
}
// ── Multi-statement form: Go `.Host` / `.Hostname()` ──────────────────
#[test]
fn classify_host_allowlist_go_capital_host() {
assert_eq!(
classify_condition("parsed.Host == \"trusted.example.com\""),
PredicateKind::HostAllowlistValidated
);
}
#[test]
fn classify_host_allowlist_go_hostname_method() {
assert_eq!(
classify_condition("parsed.Hostname() == \"trusted.example.com\""),
PredicateKind::HostAllowlistValidated
);
}
#[test]
fn target_host_allowlist_go_extracts_receiver() {
let (kind, target) =
classify_condition_with_target("parsed.Host == \"trusted.example.com\"");
assert_eq!(kind, PredicateKind::HostAllowlistValidated);
assert_eq!(target.as_deref(), Some("parsed"));
}
#[test]
fn target_host_allowlist_go_hostname_extracts_receiver() {
let (kind, target) =
classify_condition_with_target("parsed.Hostname() == \"trusted.example.com\"");
assert_eq!(kind, PredicateKind::HostAllowlistValidated);
assert_eq!(target.as_deref(), Some("parsed"));
}
#[test]
fn host_allowlist_rejects_lowercase_host_field() {
// `.host` (lowercase) without a parse call must NOT match — that
// shape is too generic (could be any struct field named `host`).
let kind = classify_condition("u.host == ALLOWED_HOST");
assert_ne!(kind, PredicateKind::HostAllowlistValidated);
}
#[test]
fn host_allowlist_rejects_capital_host_without_eq() {
// `parsed.Host` used as a side-effect call argument, not a guard.
let kind = classify_condition("log(parsed.Host)");
assert_ne!(kind, PredicateKind::HostAllowlistValidated);
}
#[test]
fn host_allowlist_rejects_capital_host_substring_in_identifier() {
// `.Hostess` is NOT `.Host` — must not match.
let kind = classify_condition("party.Hostess == \"alice\"");
assert_ne!(kind, PredicateKind::HostAllowlistValidated);
}
}
#[cfg(test)]

View file

@ -277,7 +277,14 @@ pub fn ssa_events_to_findings(
ssa: &SsaBody,
cfg: &Cfg,
) -> Vec<crate::taint::Finding> {
type FindingDedupKey = (usize, usize, Option<(String, u32, u32)>);
// The dedup key includes `cap_bits` so the multi-gate dispatch can
// co-emit separate findings for distinct capabilities at the same
// (origin, sink) pair (e.g. PHP `header("Location: " . $url)` fires
// both HEADER_INJECTION and OPEN_REDIRECT, attributed by the gate
// filters' per-cap masks). Single-cap call sites are unaffected:
// every event in that case carries the same `sink_caps`, so the key
// collapses identically with or without the extra component.
type FindingDedupKey = (usize, usize, Option<(String, u32, u32)>, u32);
let mut findings = Vec::new();
let mut seen: HashSet<FindingDedupKey> = HashSet::new();
@ -345,12 +352,14 @@ pub fn ssa_events_to_findings(
.as_ref()
.map(|l| (l.file_rel.clone(), l.line, l.col));
for (val, caps, origins) in &event.tainted_values {
let cap_specificity = (*caps & event.sink_caps).bits().count_ones() as u8;
let effective_caps = event.sink_caps & *caps;
let cap_specificity = effective_caps.bits().count_ones() as u8;
for origin in origins {
if seen.insert((
origin.node.index(),
event.sink_node.index(),
loc_key.clone(),
effective_caps.bits(),
)) {
let hop_count = block_distance(ssa, origin.node, event.sink_node);
let flow_steps = reconstruct_flow_path(*val, origin, event.sink_node, ssa, cfg);

View file

@ -21,7 +21,7 @@ pub(super) const MAX_INLINE_BLOCKS: usize = 500;
/// Compact cache key: per-arg-position cap bits (sorted, non-empty
/// only). Origin identity is not part of the key.
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub(crate) struct ArgTaintSig(pub(super) SmallVec<[(usize, u16); 4]>);
pub(crate) struct ArgTaintSig(pub(super) SmallVec<[(usize, u32); 4]>);
/// Call-site-adapted result of inline-analyzing a callee. Built fresh
/// per call site so origins point to the current caller's chain.
@ -79,7 +79,7 @@ pub(crate) struct ReturnShape {
impl CachedInlineShape {
/// Cap bits of the return value, or zero if this shape records "no
/// return taint". Used by [`inline_cache_fingerprint`].
fn return_caps_bits(&self) -> u16 {
fn return_caps_bits(&self) -> u32 {
self.0.as_ref().map(|s| s.caps.bits()).unwrap_or(0)
}
}
@ -101,7 +101,7 @@ pub(crate) fn inline_cache_clear_epoch(cache: &mut InlineCache) {
#[allow(dead_code)]
pub(crate) fn inline_cache_fingerprint(
cache: &InlineCache,
) -> HashMap<(FuncKey, ArgTaintSig), u16> {
) -> HashMap<(FuncKey, ArgTaintSig), u32> {
cache
.iter()
.map(|(k, v)| (k.clone(), v.return_caps_bits()))

View file

@ -105,6 +105,18 @@ pub struct SsaTaintTransfer<'a> {
/// Type facts from type analysis.
/// Used for type-aware sink filtering (e.g., suppress SQL injection for int-typed values).
pub type_facts: Option<&'a crate::ssa::type_facts::TypeFactResult>,
/// XML-parser config facts. Used to suppress XXE bits at parse-class
/// sinks whose receiver was provably hardened
/// (`setFeature(FEATURE_SECURE_PROCESSING, true)`, etc.). Strictly
/// additive: `None` falls back to the existing flat / gated XXE
/// classification.
pub xml_parser_config: Option<&'a crate::ssa::xml_config::XmlParserConfigResult>,
/// XPath-receiver config facts. Used to suppress XPATH_INJECTION at
/// `evaluate` / `compile` sinks whose receiver was provably bound to
/// an `XPathVariableResolver` (parameterised-XPath shape). Strictly
/// additive: `None` falls back to the existing flat / gated XPATH
/// classification.
pub xpath_config: Option<&'a crate::ssa::xpath_config::XPathConfigResult>,
/// Precise per-function SSA summaries for intra-file callee resolution.
/// Checked before legacy FuncSummary resolution.
///
@ -1207,6 +1219,85 @@ fn apply_branch_predicates(
}
}
// RelativeUrlValidated: TRUE branch is the validated path
// (`x.startsWith("/")` succeeded → `x` cannot redirect off-host).
// Cap-aware: clear `Cap::OPEN_REDIRECT` only; non-redirect sinks
// (XSS / SQLi / FILE_IO) downstream still fire on residual taint.
if kind == PredicateKind::RelativeUrlValidated && polarity {
for var in condition_vars {
let mut to_clear: SmallVec<[SsaValue; 4]> = SmallVec::new();
for (val, _) in state.values.iter() {
if let Some(name) = ssa
.value_defs
.get(val.0 as usize)
.and_then(|vd| vd.var_name.as_deref())
{
if name == var {
to_clear.push(*val);
}
}
}
for val in to_clear {
if let Some(taint) = state.get(val).cloned() {
let new_caps = taint.caps & !Cap::OPEN_REDIRECT;
if new_caps.is_empty() {
state.remove(val);
} else {
state.set(
val,
VarTaint {
caps: new_caps,
origins: taint.origins,
uses_summary: taint.uses_summary,
},
);
}
}
}
}
}
// HostAllowlistValidated: TRUE branch is the validated path
// (`new URL(x).host === ALLOWED` succeeded → `x` cannot redirect off-host).
// Cap-aware: clear `Cap::OPEN_REDIRECT` only; non-redirect sinks downstream
// still fire on the residual taint caps. Mirrors the
// `RelativeUrlValidated` handler exactly, the only difference is the
// recogniser shape (multi-statement parse + host comparison instead of
// inline leading-slash check).
if kind == PredicateKind::HostAllowlistValidated && polarity {
for var in condition_vars {
let mut to_clear: SmallVec<[SsaValue; 4]> = SmallVec::new();
for (val, _) in state.values.iter() {
if let Some(name) = ssa
.value_defs
.get(val.0 as usize)
.and_then(|vd| vd.var_name.as_deref())
{
if name == var {
to_clear.push(*val);
}
}
}
for val in to_clear {
if let Some(taint) = state.get(val).cloned() {
let new_caps = taint.caps & !Cap::OPEN_REDIRECT;
if new_caps.is_empty() {
state.remove(val);
} else {
state.set(
val,
VarTaint {
caps: new_caps,
origins: taint.origins,
uses_summary: taint.uses_summary,
},
);
}
}
}
}
}
// ShellMetaValidated: inverted polarity, the FALSE branch (no metachar
// found) is the validated path; the TRUE branch is the rejection path.
//
@ -2203,6 +2294,8 @@ fn inline_analyse_callee(
receiver_seed: receiver_seed.as_ref(),
const_values: Some(&callee_body.opt.const_values),
type_facts: Some(&callee_body.opt.type_facts),
xml_parser_config: Some(&callee_body.opt.xml_parser_config),
xpath_config: Some(&callee_body.opt.xpath_config),
ssa_summaries: transfer.ssa_summaries,
extra_labels: transfer.extra_labels,
base_aliases: Some(&callee_body.opt.alias_result),
@ -5891,6 +5984,34 @@ fn collect_block_events(
sink_caps &= !Cap::DATA_EXFIL;
}
// Receiver-type-incompatibility stripping. When the receiver's type
// proves a structurally-attached cap cannot apply (e.g. an
// `LdapClient` receiver carrying an `HTML_ESCAPE` Sink label that was
// attached to the CFG node by a `*.send`/`*.json`-style suffix
// matcher), drop the offending bits *before* the type-qualified-
// resolution branch below, so that branch is reachable on the
// remaining empty `sink_caps` and can re-anchor a precise sink class
// (`LdapClient.search` → `Cap::LDAP_INJECTION`). Both the
// flow-sensitive type from `path_env` and the static type from
// `type_facts` are consulted; the static path is what enables
// closure-captured receivers (parent body → child body via
// [`crate::taint::inject_external_type_facts`]) to participate.
if let SsaOp::Call {
receiver: Some(rv), ..
} = &inst.op
{
if let Some(ref env) = state.path_env {
if let Some(kind) = env.get(*rv).types.as_singleton() {
sink_caps &= !receiver_incompatible_sink_caps(&kind, sink_caps);
}
}
if let Some(tf) = transfer.type_facts {
if let Some(kind) = tf.get_type(*rv) {
sink_caps &= !receiver_incompatible_sink_caps(kind, sink_caps);
}
}
}
// Type-qualified sink resolution: when normal sink resolution found nothing,
// try using the receiver's inferred type to construct a qualified callee name.
if sink_caps.is_empty() {
@ -5954,6 +6075,39 @@ fn collect_block_events(
}
}
// ADD XXE on opt-in. When the receiver was constructed
// with an explicit external-entity opt-in
// (`new XMLParser({ processEntities: true })`,
// `lxml.etree.XMLParser(resolve_entities=True)`), the subsequent
// `parser.parse(xml)` is an XXE flow even though the callee
// carries no flat XXE rule (fast-xml-parser and lxml are
// XXE-safe by default). Runs BEFORE the empty check below so a
// previously-empty sink_caps becomes non-empty and downstream
// emission proceeds. The complementary `xxe_safe` suppress path
// still runs after this; a call where the receiver was both
// opt-in AND later hardened by a setter results in net-zero
// (suppress strips what we added).
if let SsaOp::Call {
receiver: Some(rv),
callee: callee_str,
..
} = &inst.op
{
if let Some(xc) = transfer.xml_parser_config {
if xc.is_unsafe_explicit(*rv) {
let suffix = callee_str
.rsplit(['.', ':'])
.next()
.unwrap_or(callee_str.as_str());
// `feed` covers Python lxml incremental parsing
// (`parser.feed(body); parser.close()`).
if matches!(suffix, "parse" | "parseString" | "parseFromString" | "feed") {
sink_caps |= Cap::XXE;
}
}
}
}
if sink_caps.is_empty() {
// Callback pattern: check if callee has source_to_callback and the
// actual callback argument has a matching param_to_sink.
@ -6055,17 +6209,89 @@ fn collect_block_events(
continue;
}
// Receiver type incompatibility check.
// If the receiver's flow-sensitive type proves it cannot be the kind
// of object the sink expects (e.g., Int receiver → not an HTTP response
// sink), strip those sink caps.
if let Some(ref env) = state.path_env {
if sink_caps.is_empty() {
continue;
}
// XXE config-fact suppression. A parse-class sink whose receiver
// was provably hardened (`setFeature(FEATURE_SECURE_PROCESSING,
// true)`, `setExpandEntityReferences(false)`, etc.) is not an XXE
// flow. Drop the bit before downstream sink emission. Runs after
// type-qualified resolution / module alias resolution so the XXE
// bit added by `XmlParser.parse` resolution is visible here.
if sink_caps.intersects(Cap::XXE) {
if let SsaOp::Call {
receiver: Some(rv), ..
} = &inst.op
{
if let Some(kind) = env.get(*rv).types.as_singleton() {
sink_caps &= !receiver_incompatible_sink_caps(&kind, sink_caps);
if let Some(xc) = transfer.xml_parser_config {
if crate::ssa::xml_config::xxe_safe(Some(*rv), xc) {
sink_caps &= !Cap::XXE;
}
}
}
}
if sink_caps.is_empty() {
continue;
}
// XPath resolver-binding suppression. An XPath `evaluate` /
// `compile` sink whose receiver was provably bound to an
// `XPathVariableResolver` is treated as parameterised and the
// XPATH_INJECTION bit is stripped. Mirrors the XXE config-fact
// shape above. Only fires when the receiver also carries
// `TypeKind::XPathClient` (gates the suppression behind
// type-fact disambiguation so a generic `obj.evaluate(...)`
// matched as XPATH_INJECTION via name-only labelling does not
// accidentally clear).
if sink_caps.intersects(Cap::XPATH_INJECTION) {
if let SsaOp::Call {
receiver: Some(rv), ..
} = &inst.op
{
if let Some(xpc) = transfer.xpath_config {
let receiver_is_xpath = transfer
.type_facts
.and_then(|tf| tf.get_type(*rv))
.map(|kind| matches!(kind, crate::ssa::type_facts::TypeKind::XPathClient))
.unwrap_or(false);
if receiver_is_xpath && crate::ssa::xpath_config::xpath_safe(Some(*rv), xpc) {
sink_caps &= !Cap::XPATH_INJECTION;
}
}
}
}
if sink_caps.is_empty() {
continue;
}
// Prototype-pollution suppression (flow-sensitive).
// `Object.create(null)` produces a `NullPrototypeObject`-typed
// value; subscript writes to such an object cannot pollute
// `Object.prototype` because there is no prototype chain.
// Receiver SsaValue is read off the synthetic `__index_set__`
// Call op; phi joins downgrade to `Unknown` via `TypeFact::meet`
// so an if/else where only one branch initialises with
// `Object.create(null)` keeps the PROTOTYPE_POLLUTION bit on
// the unsafe path.
if sink_caps.intersects(Cap::PROTOTYPE_POLLUTION) {
if let SsaOp::Call {
callee,
receiver: Some(rv),
..
} = &inst.op
{
if callee == "__index_set__" {
let receiver_is_null_proto = transfer
.type_facts
.and_then(|tf| tf.get_type(*rv))
.map(|kind| {
matches!(kind, crate::ssa::type_facts::TypeKind::NullPrototypeObject)
})
.unwrap_or(false);
if receiver_is_null_proto {
sink_caps &= !Cap::PROTOTYPE_POLLUTION;
}
}
}
}
@ -6436,7 +6662,7 @@ fn pick_primary_sink_sites(
return Vec::new();
};
let mut out: Vec<SinkSite> = Vec::new();
let mut seen: HashSet<(String, u32, u32, u16)> = HashSet::new();
let mut seen: HashSet<(String, u32, u32, u32)> = HashSet::new();
for (param_idx, sites) in param_to_sink_sites {
let Some(arg_vals) = args.get(*param_idx) else {
continue;
@ -6475,7 +6701,7 @@ fn pick_primary_sink_sites_from_resolved(
return Vec::new();
}
let mut out: Vec<SinkSite> = Vec::new();
let mut seen: HashSet<(String, u32, u32, u16)> = HashSet::new();
let mut seen: HashSet<(String, u32, u32, u32)> = HashSet::new();
for (_, sites) in param_to_sink_sites {
for site in sites {
if site.line == 0 {
@ -8127,13 +8353,36 @@ fn type_safe_for_taint_sink(kind: &crate::ssa::type_facts::TypeKind, cap: Cap) -
fn receiver_incompatible_sink_caps(kind: &crate::ssa::type_facts::TypeKind, sink_caps: Cap) -> Cap {
use crate::ssa::type_facts::TypeKind;
let mut remove = Cap::empty();
// HTML_ESCAPE requires HTTP response-like receiver
if sink_caps.intersects(Cap::HTML_ESCAPE) {
// HTML_ESCAPE / OPEN_REDIRECT / HEADER_INJECTION all require an HTTP
// response-like receiver: each is a write-side rule that fires when
// attacker data is rendered into / written onto the response stream
// (`*.send` / `*.redirect` / `*.setHeader` / etc.). Receivers proven
// to be a different class — directory-service connections (LDAP),
// database connections, file handles, in-memory collections, query-
// builder objects, URL values, HTTP clients (request-side), and so on
// — cannot host these sinks even when a same-named matcher
// (`*.send`, `*.set`, `*.append`) attaches the label by suffix.
let response_like_caps = Cap::HTML_ESCAPE | Cap::OPEN_REDIRECT | Cap::HEADER_INJECTION;
if sink_caps.intersects(response_like_caps) {
match kind {
TypeKind::HttpResponse => {} // compatible
TypeKind::Unknown | TypeKind::Object => {} // could be response
_ => {
remove |= Cap::HTML_ESCAPE;
remove |= sink_caps & response_like_caps;
}
}
}
// LDAP_INJECTION strictly requires a directory-service receiver.
// Non-LdapClient receivers carrying the cap by accident (e.g. a
// generic `*.search` suffix matcher firing on a Vec/HashMap) get the
// bit stripped. Unknown/Object stay untouched so type-fact gaps
// don't silently drop real sinks.
if sink_caps.intersects(Cap::LDAP_INJECTION) {
match kind {
TypeKind::LdapClient => {} // compatible
TypeKind::Unknown | TypeKind::Object => {} // could be ldap
_ => {
remove |= Cap::LDAP_INJECTION;
}
}
}
@ -9364,7 +9613,7 @@ fn resolve_callee_full(
}
// 0.5) Cross-file SSA summaries (GlobalSummaries.ssa_by_key) with
// optional Phase-6 hierarchy fan-out.
// optional class-hierarchy fan-out.
//
// When the call has an authoritative receiver type AND
// `GlobalSummaries::install_hierarchy` has been called AND the
@ -9468,7 +9717,7 @@ fn resolve_callee_full(
}
}
// 2) Global same-language (FuncSummary path) with Phase-6 hierarchy
// 2) Global same-language (FuncSummary path) with class-hierarchy
// fan-out. Same semantics as step 0.5 but on coarse FuncSummary
// entries, the SSA path missed because no implementer had an SSA
// summary, so we widen the FuncSummary lookup symmetrically.

View file

@ -246,6 +246,8 @@ pub fn extract_ssa_func_summary_full(
receiver_seed: None,
const_values: None,
type_facts: local_type_facts_ref,
xml_parser_config: None,
xpath_config: None,
ssa_summaries,
extra_labels: None,
base_aliases: None,
@ -792,6 +794,8 @@ pub fn extract_ssa_func_summary_full(
receiver_seed: None,
const_values: None,
type_facts: local_type_facts_ref,
xml_parser_config: None,
xpath_config: None,
ssa_summaries,
extra_labels: None,
base_aliases: None,

View file

@ -93,6 +93,8 @@ mod cross_file_tests {
type_facts: crate::ssa::type_facts::TypeFactResult {
facts: std::collections::HashMap::new(),
},
xml_parser_config: crate::ssa::xml_config::XmlParserConfigResult::default(),
xpath_config: crate::ssa::xpath_config::XPathConfigResult::default(),
alias_result: crate::ssa::alias::BaseAliasResult::empty(),
points_to: crate::ssa::heap::PointsToResult::empty(),
module_aliases: std::collections::HashMap::new(),
@ -251,7 +253,7 @@ mod inline_cache_epoch_tests {
ArgTaintSig(SmallVec::new())
}
fn shape(caps_bits: u16) -> CachedInlineShape {
fn shape(caps_bits: u32) -> CachedInlineShape {
CachedInlineShape(Some(ReturnShape {
caps: Cap::from_bits_retain(caps_bits),
internal_origins: SmallVec::new(),
@ -448,7 +450,7 @@ mod binding_key_tests {
// ── seed_lookup ────────────────────────────────────────────────────
fn taint(caps: u16) -> VarTaint {
fn taint(caps: u32) -> VarTaint {
VarTaint {
caps: Cap::from_bits_truncate(caps),
origins: smallvec![],
@ -989,6 +991,8 @@ mod goto_succ_propagation_tests {
receiver_seed: None,
const_values: None,
type_facts: None,
xml_parser_config: None,
xpath_config: None,
ssa_summaries: None,
extra_labels: None,
base_aliases: None,
@ -1079,6 +1083,8 @@ mod goto_succ_propagation_tests {
receiver_seed: None,
const_values: None,
type_facts: None,
xml_parser_config: None,
xpath_config: None,
ssa_summaries: None,
extra_labels: None,
base_aliases: None,
@ -1516,10 +1522,10 @@ mod receiver_candidates_field_proj_tests {
#[test]
fn field_proj_receiver_walks_to_typed_root_in_go() {
// Go is not Rust, so pre-Phase-4 the candidate walk would have
// returned ONLY the immediate receiver (v2 = FieldProj). With
// We walk through FieldProj.receiver to recover v0 (the
// typed root `c`).
// Go is not Rust, so before the FieldProj walk fix the candidate
// walk would have returned ONLY the immediate receiver
// (v2 = FieldProj). We now walk through FieldProj.receiver to
// recover v0 (the typed root `c`).
let body = body_with_field_proj_chain();
let cands =
super::super::receiver_candidates_for_type_lookup(SsaValue(2), Some(&body), Lang::Go);
@ -1709,7 +1715,7 @@ mod fanout_merge_tests {
];
let m = merge_resolved_summaries_fanout(a, b);
let mut sorted: Vec<(usize, u16)> = m
let mut sorted: Vec<(usize, u32)> = m
.param_to_sink
.iter()
.map(|(i, c)| (*i, c.bits()))
@ -2032,6 +2038,8 @@ mod field_write_tests {
receiver_seed: None,
const_values: None,
type_facts: None,
xml_parser_config: None,
xpath_config: None,
ssa_summaries: None,
extra_labels: None,
base_aliases: None,
@ -2114,6 +2122,8 @@ mod field_write_tests {
receiver_seed: None,
const_values: None,
type_facts: None,
xml_parser_config: None,
xpath_config: None,
ssa_summaries: None,
extra_labels: None,
base_aliases: None,
@ -2180,6 +2190,8 @@ mod field_write_tests {
receiver_seed: None,
const_values: None,
type_facts: None,
xml_parser_config: None,
xpath_config: None,
ssa_summaries: None,
extra_labels: None,
base_aliases: None,
@ -2324,6 +2336,8 @@ mod field_write_tests {
receiver_seed: None,
const_values: None,
type_facts: None,
xml_parser_config: None,
xpath_config: None,
ssa_summaries: None,
extra_labels: None,
base_aliases: None,
@ -2420,6 +2434,8 @@ mod container_elem_tests {
receiver_seed: None,
const_values: None,
type_facts: None,
xml_parser_config: None,
xpath_config: None,
ssa_summaries: None,
extra_labels: None,
base_aliases: None,
@ -2697,6 +2713,8 @@ mod container_elem_tests {
receiver_seed: None,
const_values: None,
type_facts: None,
xml_parser_config: None,
xpath_config: None,
ssa_summaries: None,
extra_labels: None,
base_aliases: None,
@ -2833,6 +2851,8 @@ mod container_elem_tests {
receiver_seed: None,
const_values: None,
type_facts: None,
xml_parser_config: None,
xpath_config: None,
ssa_summaries: None,
extra_labels: None,
base_aliases: None,
@ -3387,6 +3407,8 @@ mod field_taint_origin_cap_tests {
receiver_seed: None,
const_values: None,
type_facts: None,
xml_parser_config: None,
xpath_config: None,
ssa_summaries: None,
extra_labels: None,
base_aliases: None,
@ -3673,6 +3695,8 @@ mod pointer_lattice_worklist_tests {
receiver_seed: None,
const_values: None,
type_facts: None,
xml_parser_config: None,
xpath_config: None,
ssa_summaries: None,
extra_labels: None,
base_aliases: None,

View file

@ -45,6 +45,8 @@ fn ssa_analyse_rust(src: &[u8]) -> Vec<Finding> {
receiver_seed: None,
const_values: None,
type_facts: None,
xml_parser_config: None,
xpath_config: None,
ssa_summaries: None,
extra_labels: None,
base_aliases: None,
@ -1669,10 +1671,10 @@ fn cpp_builder_chain_const_host_silent() {
/// inline member-function bodies inside a
/// `class_specifier` must be extracted as separate functions and
/// intra-file calls must resolve to their bodies. Pre-Phase-4, the
/// `class_specifier` AST kind was unmapped in cpp KINDS, so the CFG
/// walker treated the entire class as a leaf `Seq` node and never
/// descended into inline methods.
/// intra-file calls must resolve to their bodies. Before the cpp KINDS
/// fix the `class_specifier` AST kind was unmapped, so the CFG walker
/// treated the entire class as a leaf `Seq` node and never descended
/// into inline methods.
#[test]
fn cpp_inline_class_method_resolves() {
let src = b"#include <cstdlib>\nclass Inner {\npublic:\n void run(const char* arg) { std::system(arg); }\n};\nint main() {\n char* input = std::getenv(\"X\");\n Inner inner;\n inner.run(input);\n return 0;\n}\n";
@ -3768,6 +3770,8 @@ fn assert_ssa_integration(src: &[u8]) {
receiver_seed: None,
const_values: None,
type_facts: None,
xml_parser_config: None,
xpath_config: None,
ssa_summaries: None,
extra_labels: None,
base_aliases: None,
@ -3904,6 +3908,8 @@ fn integ_php_echo_simple_var() {
receiver_seed: None,
const_values: None,
type_facts: None,
xml_parser_config: None,
xpath_config: None,
ssa_summaries: None,
extra_labels: None,
base_aliases: None,
@ -3972,6 +3978,8 @@ fn integ_c_curl_handle_ssrf() {
receiver_seed: None,
const_values: None,
type_facts: None,
xml_parser_config: None,
xpath_config: None,
ssa_summaries: None,
extra_labels: None,
base_aliases: None,