feat(surface): make attack surface first-class in the finding pipeline

This commit is contained in:
elipeter 2026-06-10 13:01:48 -05:00
parent c9776a5caf
commit 1abcdedbfe
48 changed files with 1591 additions and 214 deletions

View file

@ -12,17 +12,33 @@
//! detection pass is added here; the surface layer just lifts the
//! cap-bit information out of the summary.
use super::{DangerousLocal, SourceLocation, SurfaceNode};
use super::{DangerousLocal, SourceLocation, SurfaceNode, cap_label_string, namespace_file};
use crate::labels::Cap;
use crate::summary::GlobalSummaries;
use crate::summary::{FuncSummary, GlobalSummaries};
/// Cap bits that indicate the function is a *local* sink — code exec,
/// unsafe deserialisation, server-side template injection, format
/// string injection. Other sink caps (SQL_QUERY → DataStore;
/// SSRF → ExternalService) live elsewhere in the surface layer so the
/// node taxonomy matches the chain composer's expectations.
/// Cap bits that indicate the function is a *local* sink — a sink with no
/// externally observable side effect that attacker data flows *into*.
/// Other sink caps live elsewhere in the surface layer so the node
/// taxonomy matches the chain composer's expectations: `SQL_QUERY` /
/// `FILE_IO` → DataStore (see [`super::datastore`]); `SSRF` / `DATA_EXFIL`
/// → ExternalService (see [`super::external`]).
///
/// The set was widened from the original four (code-exec, deserialize,
/// SSTI, format-string) to cover every injection-style local sink the
/// label registry can classify, so a function that only builds an LDAP
/// filter, parses XXE-vulnerable XML, or merges into a prototype is no
/// longer absent from the surface map.
fn dangerous_caps() -> Cap {
Cap::CODE_EXEC | Cap::DESERIALIZE | Cap::SSTI | Cap::FMT_STRING
Cap::CODE_EXEC
| Cap::DESERIALIZE
| Cap::SSTI
| Cap::FMT_STRING
| Cap::LDAP_INJECTION
| Cap::XPATH_INJECTION
| Cap::HEADER_INJECTION
| Cap::OPEN_REDIRECT
| Cap::XXE
| Cap::PROTOTYPE_POLLUTION
}
pub fn detect_dangerous_locals(summaries: &GlobalSummaries) -> Vec<SurfaceNode> {
@ -33,19 +49,46 @@ pub fn detect_dangerous_locals(summaries: &GlobalSummaries) -> Vec<SurfaceNode>
if caps.is_empty() {
continue;
}
// Project-relative POSIX file, keyed off the FuncKey namespace so
// a dangerous-local node and the entry-point that reaches it agree
// on file identity (FuncSummary.file_path is an absolute path and
// would never match an entry-point's relative handler file).
let file = namespace_file(&key.namespace).to_string();
let (line, col) = sink_line_col(summary, &file, caps);
out.push(SurfaceNode::DangerousLocal(DangerousLocal {
location: SourceLocation {
file: summary.file_path.clone(),
line: 0,
col: 0,
},
location: SourceLocation { file, line, col },
function_name: key.qualified_name(),
cap_bits: caps.bits(),
label: cap_label_string(caps.bits()),
}));
}
out
}
/// Resolve the `(line, col)` of the dangerous sink inside `summary` by
/// scanning its `param_to_sink` [`crate::summary::SinkSite`] records for a
/// site whose cap intersects the dangerous mask. Prefers a same-file,
/// non-chain-promoted site (the function's own sink) over a deeper
/// chain-hop site so the coordinates point at code in `file`. Falls back
/// to `(0, 0)` when the summary carries no located sink (pass-2 transient
/// summaries, or summaries extracted without tree access).
fn sink_line_col(summary: &FuncSummary, file: &str, mask: Cap) -> (u32, u32) {
let mut fallback: Option<(u32, u32)> = None;
for (_param, sites) in &summary.param_to_sink {
for site in sites {
if site.line == 0 || (site.cap & mask).is_empty() {
continue;
}
let same_file = site.file_rel.is_empty() || site.file_rel == file;
if same_file && !site.from_chain {
return (site.line, site.col);
}
fallback.get_or_insert((site.line, site.col));
}
}
fallback.unwrap_or((0, 0))
}
#[cfg(test)]
mod tests {
use super::*;
@ -64,6 +107,63 @@ mod tests {
(key, summary)
}
#[test]
fn carries_real_span_and_label_from_param_to_sink() {
use crate::summary::SinkSite;
let mut gs = GlobalSummaries::new();
let key = FuncKey::new_function(Lang::Python, "app.py", "render", None);
let site = SinkSite {
file_rel: "app.py".into(),
line: 17,
col: 9,
snippet: "Template(x).render()".into(),
cap: Cap::SSTI,
from_chain: false,
};
let summary = FuncSummary {
name: "render".into(),
file_path: "/abs/app.py".into(), // absolute on purpose
lang: "python".into(),
sink_caps: Cap::SSTI.bits(),
param_to_sink: vec![(0, vec![site].into())],
..Default::default()
};
gs.insert(key, summary);
let nodes = detect_dangerous_locals(&gs);
assert_eq!(nodes.len(), 1);
let SurfaceNode::DangerousLocal(d) = &nodes[0] else {
panic!()
};
// Project-relative file (from the namespace), not the absolute path.
assert_eq!(d.location.file, "app.py");
assert_eq!(d.location.line, 17);
assert_eq!(d.location.col, 9);
assert_eq!(d.label, "ssti");
}
#[test]
fn detects_widened_injection_caps() {
// The widened mask now covers XXE / LDAP / open-redirect etc., which
// the original four-cap mask missed entirely.
for cap in [
Cap::XXE,
Cap::LDAP_INJECTION,
Cap::XPATH_INJECTION,
Cap::OPEN_REDIRECT,
Cap::HEADER_INJECTION,
Cap::PROTOTYPE_POLLUTION,
] {
let mut gs = GlobalSummaries::new();
let (k, s) = summary_with_caps("h", "danger.py", cap);
gs.insert(k, s);
assert_eq!(
detect_dangerous_locals(&gs).len(),
1,
"cap {cap:?} should surface a dangerous-local node"
);
}
}
#[test]
fn detects_eval_sink() {
let mut gs = GlobalSummaries::new();