mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-12 19:55:14 +02:00
feat(surface): make attack surface first-class in the finding pipeline
This commit is contained in:
parent
c9776a5caf
commit
1abcdedbfe
48 changed files with 1591 additions and 214 deletions
|
|
@ -97,6 +97,7 @@ fn parse_timeout_diag(path: &Path, timeout_ms: u64) -> Diag {
|
|||
evidence: Some(evidence),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
@ -711,6 +712,7 @@ fn build_taint_diag(
|
|||
}),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
@ -1400,6 +1402,7 @@ impl<'a> ParsedSource<'a> {
|
|||
}),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
@ -2046,6 +2049,7 @@ impl<'a> ParsedFile<'a> {
|
|||
}),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
@ -2129,6 +2133,7 @@ impl<'a> ParsedFile<'a> {
|
|||
}),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -1044,6 +1044,7 @@ fn auth_finding_to_diag(finding: &checks::AuthFinding, tree: &Tree, file_path: &
|
|||
}),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -404,6 +404,7 @@ mod tests {
|
|||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -192,35 +192,41 @@ pub fn pick_chain_cap(bits: u32) -> Option<Cap> {
|
|||
}
|
||||
|
||||
fn locate_reach(loc: &SourceLocation, surface: &SurfaceMap, reach: Option<&FileReachMap>) -> Reach {
|
||||
// Pass 1: file-local match (legacy behaviour, always applies).
|
||||
for node in &surface.nodes {
|
||||
if let SurfaceNode::EntryPoint(ep) = node
|
||||
&& ep.handler_location.file == loc.file
|
||||
{
|
||||
return Reach::Reachable {
|
||||
location: ep.location.clone(),
|
||||
method: ep.method,
|
||||
route: ep.route.clone(),
|
||||
auth_required: ep.auth_required,
|
||||
};
|
||||
// Within each pass, prefer an *unauthenticated* entry-point over an
|
||||
// auth-gated one: the chain composer scores worst-case exposure, and
|
||||
// taking the first match used to under-report whenever an auth-gated
|
||||
// route happened to sort first in the same file.
|
||||
let pick = |matches_entry: &dyn Fn(&crate::surface::EntryPoint) -> bool| -> Option<Reach> {
|
||||
let mut best: Option<&crate::surface::EntryPoint> = None;
|
||||
for node in &surface.nodes {
|
||||
if let SurfaceNode::EntryPoint(ep) = node
|
||||
&& matches_entry(ep)
|
||||
{
|
||||
if !ep.auth_required {
|
||||
best = Some(ep);
|
||||
break;
|
||||
}
|
||||
best.get_or_insert(ep);
|
||||
}
|
||||
}
|
||||
best.map(|ep| Reach::Reachable {
|
||||
location: ep.location.clone(),
|
||||
method: ep.method,
|
||||
route: ep.route.clone(),
|
||||
auth_required: ep.auth_required,
|
||||
})
|
||||
};
|
||||
// Pass 1: file-local match (legacy behaviour, always applies).
|
||||
if let Some(found) = pick(&|ep| ep.handler_location.file == loc.file) {
|
||||
return found;
|
||||
}
|
||||
// Pass 2: transitive caller match via the call graph. Only fires
|
||||
// when `reach` is supplied — keeps the legacy file-local behaviour
|
||||
// for callers that have not yet wired the call-graph reach map.
|
||||
if let Some(reach) = reach {
|
||||
for node in &surface.nodes {
|
||||
if let SurfaceNode::EntryPoint(ep) = node
|
||||
&& reach.reaches(&ep.handler_location.file, &loc.file)
|
||||
{
|
||||
return Reach::Reachable {
|
||||
location: ep.location.clone(),
|
||||
method: ep.method,
|
||||
route: ep.route.clone(),
|
||||
auth_required: ep.auth_required,
|
||||
};
|
||||
}
|
||||
}
|
||||
if let Some(reach) = reach
|
||||
&& let Some(found) = pick(&|ep| reach.reaches(&ep.handler_location.file, &loc.file))
|
||||
{
|
||||
return found;
|
||||
}
|
||||
Reach::Unreachable
|
||||
}
|
||||
|
|
|
|||
|
|
@ -463,6 +463,7 @@ mod tests {
|
|||
location: loc(file, line),
|
||||
function_name: fname.into(),
|
||||
cap_bits: caps.bits(),
|
||||
label: String::new(),
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -671,6 +672,8 @@ mod tests {
|
|||
location: loc("app.py", 5),
|
||||
kind: DataStoreKind::KeyValue,
|
||||
label: "redis://127.0.0.1:6379".into(),
|
||||
owner: String::new(),
|
||||
access: Default::default(),
|
||||
}));
|
||||
let boosted = find_chains(
|
||||
&[edge()],
|
||||
|
|
|
|||
|
|
@ -167,6 +167,14 @@ pub struct Diag {
|
|||
/// Breakdown of how the ranking score was computed.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub rank_reason: Option<Vec<(String, String)>>,
|
||||
/// Worst-case attack-surface exposure: the externally-reachable
|
||||
/// route that can drive this finding, when the surface map's
|
||||
/// entry-points reach the finding's file (directly or via the call
|
||||
/// graph). `None` when the project has no detected entry-points
|
||||
/// or no route reaches the file. Populated by
|
||||
/// [`crate::surface::exposure::annotate_exposure`] before ranking.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub exposure: Option<crate::surface::exposure::Exposure>,
|
||||
/// Whether this finding was suppressed by an inline `nyx:ignore` directive.
|
||||
#[serde(default, skip_serializing_if = "is_false")]
|
||||
pub suppressed: bool,
|
||||
|
|
@ -251,6 +259,7 @@ impl Default for Diag {
|
|||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: default_triage_state(),
|
||||
|
|
@ -2547,6 +2556,15 @@ pub(crate) fn scan_filesystem_with_observer(
|
|||
if let Some(p) = progress {
|
||||
p.set_stage(ScanStage::PostProcessing);
|
||||
}
|
||||
// Surface exposure: tag each finding with the worst-case route that
|
||||
// reaches it before ranking, so `rank_diags` can weigh external
|
||||
// reachability.
|
||||
crate::surface::exposure::annotate_exposure(
|
||||
&mut diags,
|
||||
&surface_map,
|
||||
chain_reach_out.and_then(|s| s.get()),
|
||||
Some(root),
|
||||
);
|
||||
post_process_diags(&mut diags, cfg);
|
||||
if let Some(p) = progress {
|
||||
p.record_post_process_ms(pp_start.elapsed().as_millis() as u64);
|
||||
|
|
@ -3398,6 +3416,15 @@ pub fn scan_with_index_parallel_observer(
|
|||
None,
|
||||
);
|
||||
}
|
||||
// Surface exposure: tag each finding with the worst-case route
|
||||
// that reaches it before ranking, so `rank_diags` can weigh
|
||||
// external reachability.
|
||||
crate::surface::exposure::annotate_exposure(
|
||||
&mut diags,
|
||||
&surface_map,
|
||||
chain_reach_out.and_then(|s| s.get()),
|
||||
Some(scan_root),
|
||||
);
|
||||
}
|
||||
|
||||
// NOTE: Taint-mode output is *not* filtered here. `run_rules_on_bytes`
|
||||
|
|
@ -3603,6 +3630,7 @@ fn rollup_findings(
|
|||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
@ -3837,6 +3865,7 @@ mod dedup_taint_flow_tests {
|
|||
}),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
@ -4007,6 +4036,7 @@ mod scc_tagging_tests {
|
|||
evidence: Some(Evidence::default()),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
@ -4301,6 +4331,7 @@ fn severity_filter_applied_at_output_stage() {
|
|||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
@ -4325,6 +4356,7 @@ fn severity_filter_applied_at_output_stage() {
|
|||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
@ -4376,6 +4408,7 @@ mod prioritize_tests {
|
|||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
@ -4809,6 +4842,7 @@ mod prioritize_tests {
|
|||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
@ -4901,6 +4935,7 @@ mod stable_hash_tests {
|
|||
}),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ use crate::errors::{NyxError, NyxResult};
|
|||
use crate::summary::GlobalSummaries;
|
||||
use crate::surface::{
|
||||
DataStoreKind, EdgeKind, EntryPoint, ExternalServiceKind, SurfaceMap, SurfaceNode,
|
||||
build::{SurfaceBuildInputs, build_surface_map},
|
||||
build::{SurfaceBuildInputs, SurfaceCoverage, build_surface_map_with_coverage},
|
||||
};
|
||||
use crate::utils::Config;
|
||||
use crate::utils::project::get_project_info;
|
||||
|
|
@ -60,11 +60,18 @@ pub fn handle(
|
|||
config: &Config,
|
||||
) -> NyxResult<()> {
|
||||
let scan_root = Path::new(path).canonicalize()?;
|
||||
let map = if build_inline {
|
||||
build_full_from_filesystem(&scan_root, config)?
|
||||
let (map, coverage) = if build_inline {
|
||||
let (m, c) = build_full_from_filesystem(&scan_root, config)?;
|
||||
(m, Some(c))
|
||||
} else {
|
||||
load_or_build(&scan_root, database_dir, config)?
|
||||
};
|
||||
// Coverage goes to stderr so stdout stays clean for json / dot / svg
|
||||
// consumers. Only available when the map was built this run (a
|
||||
// persisted map carries no coverage).
|
||||
if let Some(cov) = &coverage {
|
||||
eprint!("{}", render_coverage(cov));
|
||||
}
|
||||
let stdout = std::io::stdout();
|
||||
let mut out = stdout.lock();
|
||||
match format {
|
||||
|
|
@ -97,7 +104,7 @@ pub fn load_or_build(
|
|||
scan_root: &Path,
|
||||
database_dir: &Path,
|
||||
config: &Config,
|
||||
) -> NyxResult<SurfaceMap> {
|
||||
) -> NyxResult<(SurfaceMap, Option<SurfaceCoverage>)> {
|
||||
if let Ok((project, db_path)) = get_project_info(scan_root, database_dir)
|
||||
&& db_path.exists()
|
||||
&& let Ok(pool) = Indexer::init(&db_path)
|
||||
|
|
@ -105,12 +112,25 @@ pub fn load_or_build(
|
|||
&& let Ok(Some(map)) = idx.load_surface_map()
|
||||
&& !map.nodes.is_empty()
|
||||
{
|
||||
return Ok(map);
|
||||
// Persisted map: no coverage to report. Say where the data came
|
||||
// from on stderr — a reviewer comparing the tree against freshly
|
||||
// edited source needs to know it reflects the last indexed scan,
|
||||
// not the working tree.
|
||||
eprintln!(
|
||||
"Surface map: {} nodes, {} edges from the last indexed scan (pass --build to rebuild from source)",
|
||||
map.node_count(),
|
||||
map.edge_count()
|
||||
);
|
||||
return Ok((map, None));
|
||||
}
|
||||
build_from_filesystem(scan_root, config)
|
||||
let (map, cov) = build_from_filesystem(scan_root, config)?;
|
||||
Ok((map, Some(cov)))
|
||||
}
|
||||
|
||||
fn build_from_filesystem(scan_root: &Path, config: &Config) -> NyxResult<SurfaceMap> {
|
||||
fn build_from_filesystem(
|
||||
scan_root: &Path,
|
||||
config: &Config,
|
||||
) -> NyxResult<(SurfaceMap, SurfaceCoverage)> {
|
||||
let files = collect_files(scan_root, config)?;
|
||||
let summaries = GlobalSummaries::new();
|
||||
let call_graph = callgraph::build_call_graph(&summaries, &[]);
|
||||
|
|
@ -121,7 +141,7 @@ fn build_from_filesystem(scan_root: &Path, config: &Config) -> NyxResult<Surface
|
|||
call_graph: &call_graph,
|
||||
config,
|
||||
};
|
||||
Ok(build_surface_map(&inputs))
|
||||
Ok(build_surface_map_with_coverage(&inputs))
|
||||
}
|
||||
|
||||
/// Build a full SurfaceMap from source by running pass-1 summary
|
||||
|
|
@ -129,7 +149,10 @@ fn build_from_filesystem(scan_root: &Path, config: &Config) -> NyxResult<Surface
|
|||
/// resulting [`GlobalSummaries`] + [`CallGraph`] to
|
||||
/// [`build_surface_map`]. Same cost as `nyx index build` pass 1 but
|
||||
/// holds nothing in SQLite.
|
||||
fn build_full_from_filesystem(scan_root: &Path, config: &Config) -> NyxResult<SurfaceMap> {
|
||||
fn build_full_from_filesystem(
|
||||
scan_root: &Path,
|
||||
config: &Config,
|
||||
) -> NyxResult<(SurfaceMap, SurfaceCoverage)> {
|
||||
let files = collect_files(scan_root, config)?;
|
||||
let mut summaries = build_summaries_inline(&files, scan_root, config);
|
||||
summaries.install_hierarchy();
|
||||
|
|
@ -141,7 +164,26 @@ fn build_full_from_filesystem(scan_root: &Path, config: &Config) -> NyxResult<Su
|
|||
call_graph: &call_graph,
|
||||
config,
|
||||
};
|
||||
Ok(build_surface_map(&inputs))
|
||||
Ok(build_surface_map_with_coverage(&inputs))
|
||||
}
|
||||
|
||||
/// One-line coverage summary printed to stderr after a fresh build, so an
|
||||
/// operator can tell a genuinely small attack surface apart from "our
|
||||
/// probes did not understand this project". Parse failures and
|
||||
/// unsupported-language skips were previously swallowed silently.
|
||||
fn render_coverage(cov: &SurfaceCoverage) -> String {
|
||||
let mut s = format!(
|
||||
"Coverage: {} files, {} in a supported language ({} parsed, {} with routes)",
|
||||
cov.files_total, cov.files_supported, cov.files_parsed, cov.files_with_entry_points,
|
||||
);
|
||||
if cov.files_parse_failed > 0 {
|
||||
s.push_str(&format!(", {} unparsed", cov.files_parse_failed));
|
||||
}
|
||||
if cov.files_unreadable > 0 {
|
||||
s.push_str(&format!(", {} unreadable", cov.files_unreadable));
|
||||
}
|
||||
s.push('\n');
|
||||
s
|
||||
}
|
||||
|
||||
/// Run pass-1 summary extraction across `files` in parallel and merge
|
||||
|
|
@ -242,6 +284,36 @@ pub fn render_text(map: &SurfaceMap, scan_root: Option<&Path>) -> String {
|
|||
return out;
|
||||
}
|
||||
|
||||
// Risk banner: the highest-risk entry-points first, so a reviewer
|
||||
// sees "what should I look at" before the per-file inventory.
|
||||
let risks = crate::surface::risk::assess_entry_risks(map);
|
||||
let risk_by_idx: std::collections::HashMap<usize, &crate::surface::risk::EntryRisk> =
|
||||
risks.iter().map(|r| (r.entry_idx, r)).collect();
|
||||
let top: Vec<&crate::surface::risk::EntryRisk> = risks
|
||||
.iter()
|
||||
.filter(|r| r.tier >= crate::surface::risk::RiskTier::Medium)
|
||||
.take(10)
|
||||
.collect();
|
||||
if !top.is_empty() {
|
||||
out.push_str("Top risk entry-points\n");
|
||||
for r in &top {
|
||||
let Some(SurfaceNode::EntryPoint(ep)) = map.nodes.get(r.entry_idx) else {
|
||||
continue;
|
||||
};
|
||||
out.push_str(&format!(
|
||||
" [{}] {} {} ({:?}) — {} [{}:{}]\n",
|
||||
r.tier.tag(),
|
||||
method_str(ep.method),
|
||||
ep.route,
|
||||
ep.framework,
|
||||
r.factors.join(", "),
|
||||
ep.location.file,
|
||||
ep.location.line
|
||||
));
|
||||
}
|
||||
out.push('\n');
|
||||
}
|
||||
|
||||
let mut by_file: BTreeMap<&str, Vec<usize>> = BTreeMap::new();
|
||||
for (idx, node) in map.nodes.iter().enumerate() {
|
||||
by_file
|
||||
|
|
@ -252,7 +324,7 @@ pub fn render_text(map: &SurfaceMap, scan_root: Option<&Path>) -> String {
|
|||
|
||||
let mut reached: std::collections::HashSet<u32> = std::collections::HashSet::new();
|
||||
for edge in &map.edges {
|
||||
if matches!(edge.kind, EdgeKind::Reaches) {
|
||||
if edge.kind.is_reach_like() {
|
||||
reached.insert(edge.to);
|
||||
}
|
||||
}
|
||||
|
|
@ -269,7 +341,7 @@ pub fn render_text(map: &SurfaceMap, scan_root: Option<&Path>) -> String {
|
|||
let SurfaceNode::EntryPoint(ep) = &map.nodes[ei] else {
|
||||
continue;
|
||||
};
|
||||
render_entry_point(&mut out, ep, ei as u32, map);
|
||||
render_entry_point(&mut out, ep, ei as u32, map, risk_by_idx.get(&ei).copied());
|
||||
}
|
||||
}
|
||||
for &i in indices {
|
||||
|
|
@ -323,24 +395,46 @@ pub fn render_text(map: &SurfaceMap, scan_root: Option<&Path>) -> String {
|
|||
out
|
||||
}
|
||||
|
||||
fn render_entry_point(out: &mut String, ep: &EntryPoint, ep_idx: u32, map: &SurfaceMap) {
|
||||
fn render_entry_point(
|
||||
out: &mut String,
|
||||
ep: &EntryPoint,
|
||||
ep_idx: u32,
|
||||
map: &SurfaceMap,
|
||||
risk: Option<&crate::surface::risk::EntryRisk>,
|
||||
) {
|
||||
let auth = if ep.auth_required { " [auth]" } else { "" };
|
||||
// Only Medium and above gets a tag — every line reading `[low]`
|
||||
// would be noise, absence of a tag *is* the low signal.
|
||||
let risk_tag = risk
|
||||
.filter(|r| r.tier >= crate::surface::risk::RiskTier::Medium)
|
||||
.map(|r| format!(" [risk: {}]", r.tier.tag()))
|
||||
.unwrap_or_default();
|
||||
out.push_str(&format!(
|
||||
" {} {} ({:?}){}\n",
|
||||
" {} {} ({:?}){}{}\n",
|
||||
method_str(ep.method),
|
||||
ep.route,
|
||||
ep.framework,
|
||||
auth
|
||||
auth,
|
||||
risk_tag
|
||||
));
|
||||
out.push_str(&format!(
|
||||
" handler: {} at {}:{}\n",
|
||||
ep.handler_name, ep.handler_location.file, ep.handler_location.line
|
||||
));
|
||||
let mut reached: Vec<&SurfaceNode> = map
|
||||
// Dedupe destinations: a read-write data store carries both a
|
||||
// ReadsFrom and a WritesTo edge to the same node — one line each
|
||||
// would print the store twice.
|
||||
let mut to_indices: Vec<u32> = map
|
||||
.edges
|
||||
.iter()
|
||||
.filter(|e| e.from == ep_idx && matches!(e.kind, EdgeKind::Reaches))
|
||||
.filter_map(|e| map.nodes.get(e.to as usize))
|
||||
.filter(|e| e.from == ep_idx && e.kind.is_reach_like())
|
||||
.map(|e| e.to)
|
||||
.collect();
|
||||
to_indices.sort_unstable();
|
||||
to_indices.dedup();
|
||||
let mut reached: Vec<&SurfaceNode> = to_indices
|
||||
.iter()
|
||||
.filter_map(|&i| map.nodes.get(i as usize))
|
||||
.collect();
|
||||
reached.sort_by(|a, b| a.location().cmp(b.location()));
|
||||
if reached.is_empty() {
|
||||
|
|
@ -364,9 +458,16 @@ fn render_node_line(out: &mut String, node: &SurfaceNode, prefix: &str) {
|
|||
));
|
||||
}
|
||||
SurfaceNode::DataStore(ds) => {
|
||||
let access = match ds.access {
|
||||
crate::surface::AccessMode::Read => ", read",
|
||||
crate::surface::AccessMode::Write => ", write",
|
||||
crate::surface::AccessMode::ReadWrite => ", read-write",
|
||||
crate::surface::AccessMode::Unknown => "",
|
||||
};
|
||||
out.push_str(&format!(
|
||||
"{prefix}data-store ({}): {} [{}:{}]\n",
|
||||
"{prefix}data-store ({}{}): {} [{}:{}]\n",
|
||||
ds_kind_str(ds.kind),
|
||||
access,
|
||||
ds.label,
|
||||
ds.location.file,
|
||||
ds.location.line
|
||||
|
|
@ -382,9 +483,14 @@ fn render_node_line(out: &mut String, node: &SurfaceNode, prefix: &str) {
|
|||
));
|
||||
}
|
||||
SurfaceNode::DangerousLocal(dl) => {
|
||||
let caps = if dl.label.is_empty() {
|
||||
crate::surface::cap_label_string(dl.cap_bits)
|
||||
} else {
|
||||
dl.label.clone()
|
||||
};
|
||||
out.push_str(&format!(
|
||||
"{prefix}dangerous: {} (cap=0x{:x}) [{}:{}]\n",
|
||||
dl.function_name, dl.cap_bits, dl.location.file, dl.location.line
|
||||
"{prefix}dangerous ({}): {} [{}:{}]\n",
|
||||
caps, dl.function_name, dl.location.file, dl.location.line
|
||||
));
|
||||
}
|
||||
}
|
||||
|
|
@ -474,15 +580,22 @@ pub fn render_dot(map: &SurfaceMap) -> String {
|
|||
"component",
|
||||
"#8b3aa5",
|
||||
),
|
||||
SurfaceNode::DangerousLocal(dl) => (
|
||||
format!(
|
||||
"Dangerous\\n{}\\ncap=0x{:x}",
|
||||
escape_dot(&dl.function_name),
|
||||
dl.cap_bits
|
||||
),
|
||||
"octagon",
|
||||
"#c44141",
|
||||
),
|
||||
SurfaceNode::DangerousLocal(dl) => {
|
||||
let caps = if dl.label.is_empty() {
|
||||
crate::surface::cap_label_string(dl.cap_bits)
|
||||
} else {
|
||||
dl.label.clone()
|
||||
};
|
||||
(
|
||||
format!(
|
||||
"Dangerous ({})\\n{}",
|
||||
escape_dot(&caps),
|
||||
escape_dot(&dl.function_name),
|
||||
),
|
||||
"octagon",
|
||||
"#c44141",
|
||||
)
|
||||
}
|
||||
};
|
||||
out.push_str(&format!(
|
||||
" n{i} [label=\"{label}\", shape={shape}, color=\"{color}\", fontcolor=\"{color}\"];\n",
|
||||
|
|
@ -603,6 +716,7 @@ mod tests {
|
|||
location: SourceLocation::new("app.py", 12, 1),
|
||||
function_name: "eval".into(),
|
||||
cap_bits: crate::labels::Cap::CODE_EXEC.bits(),
|
||||
label: "code-exec".into(),
|
||||
},
|
||||
));
|
||||
// Build edge after canonicalize so indices are stable.
|
||||
|
|
@ -625,7 +739,7 @@ mod tests {
|
|||
m.canonicalize();
|
||||
let text = render_text(&m, None);
|
||||
assert!(text.contains("reaches:"));
|
||||
assert!(text.contains("dangerous: eval"));
|
||||
assert!(text.contains("dangerous (code-exec): eval"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -691,7 +805,7 @@ mod tests {
|
|||
|
||||
let cfg = Config::default();
|
||||
let canon = project_dir.canonicalize().unwrap();
|
||||
let map = build_full_from_filesystem(&canon, &cfg).expect("inline build succeeds");
|
||||
let (map, _cov) = build_full_from_filesystem(&canon, &cfg).expect("inline build succeeds");
|
||||
|
||||
let has_entry = map
|
||||
.nodes
|
||||
|
|
@ -722,7 +836,7 @@ mod tests {
|
|||
|
||||
let cfg = Config::default();
|
||||
let canon = project_dir.canonicalize().unwrap();
|
||||
let map = build_from_filesystem(&canon, &cfg).expect("fallback build succeeds");
|
||||
let (map, _cov) = build_from_filesystem(&canon, &cfg).expect("fallback build succeeds");
|
||||
|
||||
// Entry point should still appear (framework probes run in the
|
||||
// fallback path too).
|
||||
|
|
|
|||
|
|
@ -1091,6 +1091,7 @@ pub mod index {
|
|||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -1600,6 +1600,7 @@ mod tests {
|
|||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
14
src/fmt.rs
14
src/fmt.rs
|
|
@ -985,6 +985,7 @@ mod tests {
|
|||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
@ -1009,6 +1010,7 @@ mod tests {
|
|||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
@ -1047,6 +1049,7 @@ mod tests {
|
|||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
@ -1085,6 +1088,7 @@ mod tests {
|
|||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
@ -1109,6 +1113,7 @@ mod tests {
|
|||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
@ -1145,6 +1150,7 @@ mod tests {
|
|||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
@ -1178,6 +1184,7 @@ mod tests {
|
|||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
@ -1215,6 +1222,7 @@ mod tests {
|
|||
evidence: None,
|
||||
rank_score: Some(120.0),
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
@ -1311,6 +1319,7 @@ mod tests {
|
|||
evidence: None,
|
||||
rank_score: Some(36.0),
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
@ -1360,6 +1369,7 @@ mod tests {
|
|||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
@ -1395,6 +1405,7 @@ mod tests {
|
|||
evidence: None,
|
||||
rank_score: Some(42.0),
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
@ -1434,6 +1445,7 @@ mod tests {
|
|||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
@ -1469,6 +1481,7 @@ mod tests {
|
|||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
@ -1518,6 +1531,7 @@ mod tests {
|
|||
}),
|
||||
rank_score: Some(47.0),
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -241,6 +241,23 @@ pub fn build_sarif_with_chains(diags: &[Diag], chains: &[ChainFinding], scan_roo
|
|||
props.insert("data_exfil_field".into(), json!(field));
|
||||
}
|
||||
|
||||
// Attack-surface exposure: the externally-reachable route
|
||||
// that drives this finding. Lets a SARIF consumer (CI gate,
|
||||
// dashboard) filter on "reachable from an unauthenticated
|
||||
// route" without re-running the surface build.
|
||||
if let Some(exp) = &d.exposure {
|
||||
props.insert(
|
||||
"exposure".into(),
|
||||
json!({
|
||||
"route": exp.route,
|
||||
"method": format!("{:?}", exp.method),
|
||||
"framework": format!("{:?}", exp.framework),
|
||||
"auth_required": exp.auth_required,
|
||||
"transitive": exp.transitive,
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
if !d.finding_id.is_empty() {
|
||||
props.insert("finding_id".into(), json!(d.finding_id));
|
||||
}
|
||||
|
|
@ -395,6 +412,7 @@ mod tests {
|
|||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -80,6 +80,7 @@ pub fn scan_ejs_file(path: &Path, bytes: &[u8]) -> Vec<Diag> {
|
|||
}),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
27
src/rank.rs
27
src/rank.rs
|
|
@ -55,6 +55,32 @@ pub fn compute_attack_rank(diag: &Diag) -> AttackRank {
|
|||
components.push(("evidence".into(), format!("{evidence_bonus}")));
|
||||
}
|
||||
|
||||
// ── 3b. Surface exposure ────────────────────────────────────────────
|
||||
//
|
||||
// A finding reachable from a surface entry-point is more exploitable
|
||||
// than an internal one; reachable *without auth* more so. Transitive
|
||||
// reach (through the call graph rather than in the handler's own
|
||||
// file) is slightly discounted because the file-level reach map can
|
||||
// over-approximate. Magnitudes keep the severity tier ordering: the
|
||||
// maximum exposure bonus (+10) plus all other Medium-tier bonuses
|
||||
// stays below the High severity base (see tier tests).
|
||||
if let Some(exp) = &diag.exposure {
|
||||
let mut exposure_bonus = if exp.auth_required { 4.0 } else { 10.0 };
|
||||
if exp.transitive {
|
||||
exposure_bonus -= 2.0;
|
||||
}
|
||||
score += exposure_bonus;
|
||||
let auth_tag = if exp.auth_required {
|
||||
"auth-gated"
|
||||
} else {
|
||||
"unauthenticated"
|
||||
};
|
||||
components.push((
|
||||
"exposure".into(),
|
||||
format!("{exposure_bonus:+} ({auth_tag})"),
|
||||
));
|
||||
}
|
||||
|
||||
// ── 4. State finding sub-ranking ────────────────────────────────────
|
||||
let state_bonus = state_finding_bonus(&diag.id);
|
||||
score += state_bonus;
|
||||
|
|
@ -421,6 +447,7 @@ mod tests {
|
|||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -610,6 +610,7 @@ mod tests {
|
|||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -78,6 +78,10 @@ pub struct FindingView {
|
|||
pub guard_kind: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub rank_reason: Option<Vec<(String, String)>>,
|
||||
/// Worst-case attack-surface exposure (route, method, auth) when a
|
||||
/// surface entry-point reaches this finding.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub exposure: Option<crate::surface::exposure::Exposure>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub sanitizer_status: Option<String>,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
|
|
@ -345,6 +349,7 @@ pub fn finding_from_diag(index: usize, d: &Diag) -> FindingView {
|
|||
.and_then(|ev| ev.dynamic_verdict.clone()),
|
||||
guard_kind: None,
|
||||
rank_reason: None,
|
||||
exposure: d.exposure.clone(),
|
||||
sanitizer_status: None,
|
||||
related_findings: vec![],
|
||||
}
|
||||
|
|
@ -937,6 +942,7 @@ mod tests {
|
|||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -31,12 +31,24 @@ async fn get_surface(State(state): State<AppState>) -> ApiResult<Json<Value>> {
|
|||
.await
|
||||
.map_err(|e| ApiError::internal(format!("surface map task failed: {e}")))?;
|
||||
|
||||
let mut map =
|
||||
let (mut map, _coverage) =
|
||||
join_result.map_err(|e| ApiError::internal(format!("failed to build surface map: {e}")))?;
|
||||
// Risk is derived from the canonicalised map, so canonicalise (via
|
||||
// `to_json`) first to lock node indices, then assess.
|
||||
let bytes = map
|
||||
.to_json()
|
||||
.map_err(|e| ApiError::internal(format!("encode surface map: {e}")))?;
|
||||
let value: Value = serde_json::from_slice(&bytes)
|
||||
let mut value: Value = serde_json::from_slice(&bytes)
|
||||
.map_err(|e| ApiError::internal(format!("re-parse surface map JSON: {e}")))?;
|
||||
// Attach per-entry-point risk assessment alongside the raw map so the
|
||||
// frontend can render a risk-sorted view without re-deriving scores.
|
||||
let risks = crate::surface::risk::assess_entry_risks(&map);
|
||||
if let Value::Object(obj) = &mut value {
|
||||
obj.insert(
|
||||
"entry_risks".into(),
|
||||
serde_json::to_value(&risks)
|
||||
.map_err(|e| ApiError::internal(format!("encode entry risks: {e}")))?,
|
||||
);
|
||||
}
|
||||
Ok(Json(value))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,22 +12,30 @@
|
|||
//! SSRF caps and emits [`SurfaceNode::ExternalService`](crate::surface::SurfaceNode::ExternalService) nodes.
|
||||
//! 4. [`super::dangerous::detect_dangerous_locals`] walks summaries
|
||||
//! and emits [`SurfaceNode::DangerousLocal`](crate::surface::SurfaceNode::DangerousLocal) nodes for every
|
||||
//! function whose `sink_caps` include CODE_EXEC / DESERIALIZE /
|
||||
//! SSTI / FMT_STRING.
|
||||
//! 5. [`super::reachability::populate_reaches_edges`] runs a BFS over
|
||||
//! the [`CallGraph`] from each entry-point handler, emitting
|
||||
//! [`super::EdgeKind::Reaches`] edges to every reachable
|
||||
//! DataStore / ExternalService / DangerousLocal.
|
||||
//! function whose `sink_caps` include a local-sink class (code-exec,
|
||||
//! deserialize, SSTI, format-string, LDAP / XPath / header /
|
||||
//! open-redirect injection, XXE, prototype pollution), located at the
|
||||
//! real sink span and labelled with the decoded cap class.
|
||||
//! 5. [`super::reachability::populate_reaches_edges`] runs a forward,
|
||||
//! function-level BFS over the [`CallGraph`] from each entry-point
|
||||
//! handler, emitting [`super::EdgeKind::ReadsFrom`] (→ data store),
|
||||
//! [`super::EdgeKind::TalksTo`] (→ external service), and
|
||||
//! [`super::EdgeKind::Reaches`] (→ dangerous local) edges to every
|
||||
//! reachable destination.
|
||||
//! 6. [`SurfaceMap::canonicalize`] sorts nodes + edges so the
|
||||
//! serialised JSON is byte-deterministic across rescans.
|
||||
//!
|
||||
//! Per-file errors (parse failure, unsupported language) are
|
||||
//! swallowed so a single bad file does not kill the whole map.
|
||||
//! Per-file errors (parse failure, unsupported language, unreadable file)
|
||||
//! are swallowed so a single bad file does not kill the whole map, but are
|
||||
//! counted into [`SurfaceCoverage`] so the skip is observable rather than
|
||||
//! silent.
|
||||
|
||||
use crate::auth_analysis::auth_markers::router_auth_markers_for_lang;
|
||||
use crate::callgraph::CallGraph;
|
||||
use crate::entry_points::{EntryKind, HttpMethod};
|
||||
use crate::summary::GlobalSummaries;
|
||||
use crate::surface::{
|
||||
SurfaceMap, dangerous, datastore, external,
|
||||
EntryPoint, Framework, SourceLocation, SurfaceMap, SurfaceNode, dangerous, datastore, external,
|
||||
lang::{
|
||||
go_gin, go_http, java_quarkus, java_servlet, java_spring, js_express, js_koa, php_laravel,
|
||||
php_slim, python_django, python_fastapi, python_flask, ruby_rails, ruby_sinatra,
|
||||
|
|
@ -47,17 +55,63 @@ pub struct SurfaceBuildInputs<'a> {
|
|||
pub config: &'a Config,
|
||||
}
|
||||
|
||||
/// Per-build coverage counters. Turns the previously-silent
|
||||
/// "single bad file is swallowed" behaviour into a number an operator can
|
||||
/// read, so a small attack-surface map can be told apart from "our probes
|
||||
/// did not understand this project's framework / language".
|
||||
#[derive(Debug, Clone, Default, PartialEq, Eq)]
|
||||
pub struct SurfaceCoverage {
|
||||
/// Total files handed to the builder.
|
||||
pub files_total: usize,
|
||||
/// Files in a language a framework probe exists for.
|
||||
pub files_supported: usize,
|
||||
/// Supported-language files that parsed cleanly.
|
||||
pub files_parsed: usize,
|
||||
/// Supported-language files whose tree-sitter parse failed.
|
||||
pub files_parse_failed: usize,
|
||||
/// Files in a language with no framework probe (`.md`, `.toml`, …).
|
||||
pub files_unsupported: usize,
|
||||
/// Files that could not be read off disk.
|
||||
pub files_unreadable: usize,
|
||||
/// Supported-language files that yielded at least one entry-point node.
|
||||
pub files_with_entry_points: usize,
|
||||
}
|
||||
|
||||
/// Build a [`SurfaceMap`], discarding coverage. Thin wrapper over
|
||||
/// [`build_surface_map_with_coverage`] for callers (the indexed scan
|
||||
/// path, persistence) that do not surface telemetry.
|
||||
pub fn build_surface_map(inputs: &SurfaceBuildInputs<'_>) -> SurfaceMap {
|
||||
build_surface_map_with_coverage(inputs).0
|
||||
}
|
||||
|
||||
/// Build a [`SurfaceMap`] and report [`SurfaceCoverage`]. The `nyx
|
||||
/// surface` CLI uses this variant so parse / unsupported skips become a
|
||||
/// visible number instead of being silently swallowed.
|
||||
pub fn build_surface_map_with_coverage(
|
||||
inputs: &SurfaceBuildInputs<'_>,
|
||||
) -> (SurfaceMap, SurfaceCoverage) {
|
||||
let mut map = SurfaceMap::new();
|
||||
let _ = inputs.config;
|
||||
let mut cov = SurfaceCoverage {
|
||||
files_total: inputs.files.len(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut parsers = Parsers::new();
|
||||
for path in inputs.files {
|
||||
let Ok(bytes) = std::fs::read(path) else {
|
||||
cov.files_unreadable += 1;
|
||||
continue;
|
||||
};
|
||||
let kind = classify_file(path);
|
||||
let nodes = match kind {
|
||||
if kind == FileKind::Other {
|
||||
cov.files_unsupported += 1;
|
||||
continue;
|
||||
}
|
||||
cov.files_supported += 1;
|
||||
// `Some(nodes)` on a clean parse (possibly empty), `None` when the
|
||||
// tree-sitter parse failed — lets coverage distinguish the two.
|
||||
let parsed: Option<Vec<SurfaceNode>> = match kind {
|
||||
FileKind::Python => parsers
|
||||
.python
|
||||
.as_mut()
|
||||
|
|
@ -78,8 +132,7 @@ pub fn build_surface_map(inputs: &SurfaceBuildInputs<'_>) -> SurfaceMap {
|
|||
inputs.scan_root,
|
||||
));
|
||||
all
|
||||
})
|
||||
.unwrap_or_default(),
|
||||
}),
|
||||
FileKind::JavaScript => parsers
|
||||
.javascript
|
||||
.as_mut()
|
||||
|
|
@ -94,8 +147,7 @@ pub fn build_surface_map(inputs: &SurfaceBuildInputs<'_>) -> SurfaceMap {
|
|||
inputs.scan_root,
|
||||
));
|
||||
all
|
||||
})
|
||||
.unwrap_or_default(),
|
||||
}),
|
||||
FileKind::TypeScript => parsers
|
||||
.typescript
|
||||
.as_mut()
|
||||
|
|
@ -116,8 +168,7 @@ pub fn build_surface_map(inputs: &SurfaceBuildInputs<'_>) -> SurfaceMap {
|
|||
inputs.scan_root,
|
||||
));
|
||||
all
|
||||
})
|
||||
.unwrap_or_default(),
|
||||
}),
|
||||
FileKind::Java => parsers
|
||||
.java
|
||||
.as_mut()
|
||||
|
|
@ -138,8 +189,7 @@ pub fn build_surface_map(inputs: &SurfaceBuildInputs<'_>) -> SurfaceMap {
|
|||
inputs.scan_root,
|
||||
));
|
||||
all
|
||||
})
|
||||
.unwrap_or_default(),
|
||||
}),
|
||||
FileKind::Go => parsers
|
||||
.go
|
||||
.as_mut()
|
||||
|
|
@ -154,8 +204,7 @@ pub fn build_surface_map(inputs: &SurfaceBuildInputs<'_>) -> SurfaceMap {
|
|||
inputs.scan_root,
|
||||
));
|
||||
all
|
||||
})
|
||||
.unwrap_or_default(),
|
||||
}),
|
||||
FileKind::Php => parsers
|
||||
.php
|
||||
.as_mut()
|
||||
|
|
@ -170,8 +219,7 @@ pub fn build_surface_map(inputs: &SurfaceBuildInputs<'_>) -> SurfaceMap {
|
|||
inputs.scan_root,
|
||||
));
|
||||
all
|
||||
})
|
||||
.unwrap_or_default(),
|
||||
}),
|
||||
FileKind::Ruby => parsers
|
||||
.ruby
|
||||
.as_mut()
|
||||
|
|
@ -186,8 +234,7 @@ pub fn build_surface_map(inputs: &SurfaceBuildInputs<'_>) -> SurfaceMap {
|
|||
inputs.scan_root,
|
||||
));
|
||||
all
|
||||
})
|
||||
.unwrap_or_default(),
|
||||
}),
|
||||
FileKind::Rust => parsers
|
||||
.rust
|
||||
.as_mut()
|
||||
|
|
@ -202,15 +249,38 @@ pub fn build_surface_map(inputs: &SurfaceBuildInputs<'_>) -> SurfaceMap {
|
|||
inputs.scan_root,
|
||||
));
|
||||
all
|
||||
})
|
||||
.unwrap_or_default(),
|
||||
FileKind::Other => Vec::new(),
|
||||
}),
|
||||
// Unreachable: `Other` is filtered out before this match, but
|
||||
// the arm keeps the match exhaustive.
|
||||
FileKind::Other => None,
|
||||
};
|
||||
for n in nodes {
|
||||
map.nodes.push(n);
|
||||
match parsed {
|
||||
Some(nodes) => {
|
||||
cov.files_parsed += 1;
|
||||
if nodes
|
||||
.iter()
|
||||
.any(|n| matches!(n, SurfaceNode::EntryPoint(_)))
|
||||
{
|
||||
cov.files_with_entry_points += 1;
|
||||
}
|
||||
for n in nodes {
|
||||
map.nodes.push(n);
|
||||
}
|
||||
}
|
||||
None => cov.files_parse_failed += 1,
|
||||
}
|
||||
}
|
||||
|
||||
// Entry-point recall fallback: the pass-1 summary extractor tags
|
||||
// handler functions with `FuncSummary::entry_kind` using its own
|
||||
// (independent) framework detection. Any handler it recognised
|
||||
// that the AST probes above missed is synthesised here so the
|
||||
// surface map's entry-point set is always a superset of what the
|
||||
// taint engine treats as adversary-driven. Route strings are not
|
||||
// recoverable from summaries, so these carry `"(unrouted)"`.
|
||||
let synthesised = synth_entry_points_from_summaries(&map.nodes, inputs.global_summaries);
|
||||
map.nodes.extend(synthesised);
|
||||
|
||||
// Phase 22 — Track F.3: data-store / external-service /
|
||||
// dangerous-local detection from summaries.
|
||||
map.nodes
|
||||
|
|
@ -220,6 +290,13 @@ pub fn build_surface_map(inputs: &SurfaceBuildInputs<'_>) -> SurfaceMap {
|
|||
map.nodes
|
||||
.extend(dangerous::detect_dangerous_locals(inputs.global_summaries));
|
||||
|
||||
// Auth-detection upgrade: the probes only see router-level evidence
|
||||
// (decorators, annotations, middleware arguments). A handler that
|
||||
// guards itself in its body (`requireAuth(req)` as the first call,
|
||||
// Go-style `if !VerifyToken(...)`) is still auth-gated; lift that
|
||||
// from the handler summary's callee list.
|
||||
upgrade_auth_required_from_summaries(&mut map, inputs.global_summaries);
|
||||
|
||||
// Canonicalise so node indices are stable before reachability
|
||||
// builds edges referring to those indices.
|
||||
map.canonicalize();
|
||||
|
|
@ -230,7 +307,160 @@ pub fn build_surface_map(inputs: &SurfaceBuildInputs<'_>) -> SurfaceMap {
|
|||
// Re-canonicalise: edges added by reachability need to be sorted
|
||||
// so the serialised JSON stays byte-deterministic.
|
||||
map.canonicalize();
|
||||
map
|
||||
(map, cov)
|
||||
}
|
||||
|
||||
/// Route placeholder for entry points synthesised from summaries: the
|
||||
/// pass-1 extractor records *that* a function is a handler but not the
|
||||
/// route string the framework maps to it.
|
||||
pub const UNROUTED: &str = "(unrouted)";
|
||||
|
||||
/// Map a pass-1 [`EntryKind`] tag to the surface [`Framework`] +
|
||||
/// [`HttpMethod`] pair. Kinds with no verb evidence default to `GET`
|
||||
/// except Next.js server actions, which the framework only ever
|
||||
/// invokes via `POST`.
|
||||
fn entry_kind_to_framework(kind: &EntryKind) -> (Framework, HttpMethod) {
|
||||
match kind {
|
||||
EntryKind::UseServerDirective | EntryKind::FormAction => {
|
||||
(Framework::NextServerAction, HttpMethod::POST)
|
||||
}
|
||||
EntryKind::AppRouteHandler { method } => (Framework::NextAppRouter, *method),
|
||||
EntryKind::ExpressRoute { method } => (Framework::Express, *method),
|
||||
EntryKind::DjangoView { method } => (Framework::Django, *method),
|
||||
EntryKind::FastApiRoute { method } => (Framework::FastApi, *method),
|
||||
EntryKind::FlaskRoute { method } => (Framework::Flask, *method),
|
||||
EntryKind::SpringMapping { method } => (Framework::Spring, *method),
|
||||
EntryKind::JaxRsResource => (Framework::JaxRs, HttpMethod::GET),
|
||||
EntryKind::RailsAction => (Framework::Rails, HttpMethod::GET),
|
||||
EntryKind::SinatraRoute { method } => (Framework::Sinatra, *method),
|
||||
EntryKind::AxumHandler => (Framework::Axum, HttpMethod::GET),
|
||||
EntryKind::ActixHandler => (Framework::Actix, HttpMethod::GET),
|
||||
EntryKind::RocketRoute => (Framework::Rocket, HttpMethod::GET),
|
||||
EntryKind::GoNetHttp => (Framework::NetHttp, HttpMethod::GET),
|
||||
EntryKind::GinRoute => (Framework::Gin, HttpMethod::GET),
|
||||
}
|
||||
}
|
||||
|
||||
/// Synthesise [`SurfaceNode::EntryPoint`] nodes for handlers the pass-1
|
||||
/// summary extractor tagged with [`FuncSummary::entry_kind`](crate::summary::FuncSummary::entry_kind)
|
||||
/// but no AST probe emitted. De-duped against existing probe output on
|
||||
/// `(handler file, handler name)` so a probe-detected route always wins
|
||||
/// (it carries the real route string and span). Summaries carry no
|
||||
/// definition span, so synthesised nodes sit at line 0 of the handler
|
||||
/// file; reachability matches on `(file, name)` and is unaffected.
|
||||
fn synth_entry_points_from_summaries(
|
||||
existing: &[SurfaceNode],
|
||||
summaries: &GlobalSummaries,
|
||||
) -> Vec<SurfaceNode> {
|
||||
let mut seen: std::collections::HashSet<(String, String)> = existing
|
||||
.iter()
|
||||
.filter_map(|n| match n {
|
||||
SurfaceNode::EntryPoint(ep) => {
|
||||
Some((ep.handler_location.file.clone(), ep.handler_name.clone()))
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
let mut out: Vec<SurfaceNode> = Vec::new();
|
||||
for (key, summary) in summaries.iter() {
|
||||
let Some(kind) = &summary.entry_kind else {
|
||||
continue;
|
||||
};
|
||||
if key.name.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let file = crate::surface::namespace_file(&key.namespace).to_string();
|
||||
if !seen.insert((file.clone(), key.name.clone())) {
|
||||
continue;
|
||||
}
|
||||
let (framework, method) = entry_kind_to_framework(kind);
|
||||
let loc = SourceLocation {
|
||||
file,
|
||||
line: 0,
|
||||
col: 0,
|
||||
};
|
||||
out.push(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: loc.clone(),
|
||||
framework,
|
||||
method,
|
||||
route: UNROUTED.to_string(),
|
||||
handler_name: key.name.clone(),
|
||||
handler_location: loc,
|
||||
auth_required: false,
|
||||
}));
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Set `auth_required = true` on entry points whose handler *body*
|
||||
/// calls a known auth guard, complementing the probes' router-level
|
||||
/// (decorator / annotation / middleware-argument) detection.
|
||||
///
|
||||
/// The handler summary is located by `(handler file, handler name)`;
|
||||
/// its direct callees' leaf names are matched case-insensitively
|
||||
/// against the per-language router-auth marker registry
|
||||
/// ([`router_auth_markers_for_lang`]). Depth is deliberately 1 — a
|
||||
/// guard buried two helpers deep is a router concern the call graph
|
||||
/// models better than a name list.
|
||||
fn upgrade_auth_required_from_summaries(map: &mut SurfaceMap, summaries: &GlobalSummaries) {
|
||||
use std::collections::HashMap;
|
||||
let needs_upgrade: Vec<usize> = map
|
||||
.nodes
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, n)| match n {
|
||||
SurfaceNode::EntryPoint(ep) if !ep.auth_required && !ep.handler_name.is_empty() => {
|
||||
Some(i)
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
if needs_upgrade.is_empty() {
|
||||
return;
|
||||
}
|
||||
// (file, name) → summaries defining that function. Built once; the
|
||||
// map is small relative to the summary count.
|
||||
let mut by_fn: HashMap<
|
||||
(&str, &str),
|
||||
Vec<(&crate::symbol::FuncKey, &crate::summary::FuncSummary)>,
|
||||
> = HashMap::new();
|
||||
for (key, summary) in summaries.iter() {
|
||||
by_fn
|
||||
.entry((crate::surface::namespace_file(&key.namespace), &key.name))
|
||||
.or_default()
|
||||
.push((key, summary));
|
||||
}
|
||||
let mut marker_cache: HashMap<crate::symbol::Lang, Vec<&'static str>> = HashMap::new();
|
||||
let mut to_set: Vec<usize> = Vec::new();
|
||||
for idx in needs_upgrade {
|
||||
let SurfaceNode::EntryPoint(ep) = &map.nodes[idx] else {
|
||||
continue;
|
||||
};
|
||||
let Some(cands) = by_fn.get(&(ep.handler_location.file.as_str(), ep.handler_name.as_str()))
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
let guarded = cands.iter().any(|(key, summary)| {
|
||||
let markers = marker_cache
|
||||
.entry(key.lang)
|
||||
.or_insert_with(|| router_auth_markers_for_lang(key.lang));
|
||||
if markers.is_empty() {
|
||||
return false;
|
||||
}
|
||||
summary.callees.iter().any(|c| {
|
||||
let leaf = crate::callgraph::normalize_callee_name(&c.name);
|
||||
markers.iter().any(|m| m.eq_ignore_ascii_case(leaf))
|
||||
})
|
||||
});
|
||||
if guarded {
|
||||
to_set.push(idx);
|
||||
}
|
||||
}
|
||||
for idx in to_set {
|
||||
if let SurfaceNode::EntryPoint(ep) = &mut map.nodes[idx] {
|
||||
ep.auth_required = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, PartialEq, Eq)]
|
||||
|
|
@ -325,6 +555,139 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn synthesises_entry_point_from_summary_entry_kind() {
|
||||
use crate::summary::FuncSummary;
|
||||
use crate::symbol::{FuncKey, Lang};
|
||||
// No source file on disk (probes see nothing), but pass-1 tagged
|
||||
// a Gin handler — the fallback must surface it.
|
||||
let dir = tempdir().unwrap();
|
||||
let cfg = Config::default();
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let key = FuncKey::new_function(Lang::Go, "routes.go", "ListUsers", None);
|
||||
let summary = FuncSummary {
|
||||
name: "ListUsers".into(),
|
||||
file_path: "routes.go".into(),
|
||||
lang: "go".into(),
|
||||
entry_kind: Some(EntryKind::GinRoute),
|
||||
..Default::default()
|
||||
};
|
||||
gs.insert(key, summary);
|
||||
let cg = empty_call_graph();
|
||||
let files: Vec<PathBuf> = vec![];
|
||||
let inputs = empty_inputs(&files, Some(dir.path()), &gs, &cg, &cfg);
|
||||
let map = build_surface_map(&inputs);
|
||||
let eps: Vec<_> = map.entry_points().collect();
|
||||
assert_eq!(eps.len(), 1, "fallback entry-point expected");
|
||||
assert_eq!(eps[0].handler_name, "ListUsers");
|
||||
assert_eq!(eps[0].framework, Framework::Gin);
|
||||
assert_eq!(eps[0].route, UNROUTED);
|
||||
assert_eq!(eps[0].handler_location.file, "routes.go");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn probe_entry_point_suppresses_summary_fallback() {
|
||||
use crate::summary::FuncSummary;
|
||||
use crate::symbol::{FuncKey, Lang};
|
||||
let dir = tempdir().unwrap();
|
||||
let py = dir.path().join("app.py");
|
||||
fs::write(
|
||||
&py,
|
||||
"from flask import Flask\napp = Flask(__name__)\n@app.get('/u')\ndef u(): pass\n",
|
||||
)
|
||||
.unwrap();
|
||||
let cfg = Config::default();
|
||||
let mut gs = GlobalSummaries::new();
|
||||
// Summary tags the same handler the probe sees.
|
||||
let key = FuncKey::new_function(Lang::Python, "app.py", "u", None);
|
||||
let summary = FuncSummary {
|
||||
name: "u".into(),
|
||||
file_path: "app.py".into(),
|
||||
lang: "python".into(),
|
||||
entry_kind: Some(EntryKind::FlaskRoute {
|
||||
method: HttpMethod::GET,
|
||||
}),
|
||||
..Default::default()
|
||||
};
|
||||
gs.insert(key, summary);
|
||||
let cg = empty_call_graph();
|
||||
let files = vec![py];
|
||||
let inputs = empty_inputs(&files, Some(dir.path()), &gs, &cg, &cfg);
|
||||
let map = build_surface_map(&inputs);
|
||||
let eps: Vec<_> = map.entry_points().collect();
|
||||
assert_eq!(eps.len(), 1, "no duplicate from the fallback");
|
||||
assert_eq!(eps[0].route, "/u", "probe route (with real path) wins");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn body_level_auth_guard_upgrades_auth_required() {
|
||||
use crate::summary::{CalleeSite, FuncSummary};
|
||||
use crate::symbol::{FuncKey, Lang};
|
||||
let dir = tempdir().unwrap();
|
||||
let js = dir.path().join("routes.js");
|
||||
// Express route with NO middleware arg — probe alone says unauth.
|
||||
fs::write(
|
||||
&js,
|
||||
"const express = require('express');\nconst app = express();\napp.get('/admin', function admin(req, res) { requireAuth(req); res.send('x'); });\n",
|
||||
)
|
||||
.unwrap();
|
||||
let cfg = Config::default();
|
||||
let mut gs = GlobalSummaries::new();
|
||||
// Handler summary whose body calls requireAuth.
|
||||
let key = FuncKey::new_function(Lang::JavaScript, "routes.js", "admin", None);
|
||||
let summary = FuncSummary {
|
||||
name: "admin".into(),
|
||||
file_path: "routes.js".into(),
|
||||
lang: "javascript".into(),
|
||||
callees: vec![CalleeSite::bare("requireAuth")],
|
||||
..Default::default()
|
||||
};
|
||||
gs.insert(key, summary);
|
||||
let cg = empty_call_graph();
|
||||
let files = vec![js];
|
||||
let inputs = empty_inputs(&files, Some(dir.path()), &gs, &cg, &cfg);
|
||||
let map = build_surface_map(&inputs);
|
||||
let ep = map
|
||||
.entry_points()
|
||||
.find(|ep| ep.handler_name == "admin")
|
||||
.expect("express probe finds the named handler");
|
||||
assert!(
|
||||
ep.auth_required,
|
||||
"body-level requireAuth call should upgrade auth_required"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unrelated_callee_does_not_upgrade_auth() {
|
||||
use crate::summary::{CalleeSite, FuncSummary};
|
||||
use crate::symbol::{FuncKey, Lang};
|
||||
let dir = tempdir().unwrap();
|
||||
let py = dir.path().join("app.py");
|
||||
fs::write(
|
||||
&py,
|
||||
"from flask import Flask\napp = Flask(__name__)\n@app.get('/x')\ndef x(): pass\n",
|
||||
)
|
||||
.unwrap();
|
||||
let cfg = Config::default();
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let key = FuncKey::new_function(Lang::Python, "app.py", "x", None);
|
||||
let summary = FuncSummary {
|
||||
name: "x".into(),
|
||||
file_path: "app.py".into(),
|
||||
lang: "python".into(),
|
||||
// `settings` must not prefix-match any auth marker.
|
||||
callees: vec![CalleeSite::bare("settings"), CalleeSite::bare("render")],
|
||||
..Default::default()
|
||||
};
|
||||
gs.insert(key, summary);
|
||||
let cg = empty_call_graph();
|
||||
let files = vec![py];
|
||||
let inputs = empty_inputs(&files, Some(dir.path()), &gs, &cg, &cfg);
|
||||
let map = build_surface_map(&inputs);
|
||||
let ep = map.entry_points().next().expect("entry point");
|
||||
assert!(!ep.auth_required);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_inputs_produce_empty_map() {
|
||||
let dir = tempdir().unwrap();
|
||||
|
|
|
|||
|
|
@ -12,17 +12,33 @@
|
|||
//! detection pass is added here; the surface layer just lifts the
|
||||
//! cap-bit information out of the summary.
|
||||
|
||||
use super::{DangerousLocal, SourceLocation, SurfaceNode};
|
||||
use super::{DangerousLocal, SourceLocation, SurfaceNode, cap_label_string, namespace_file};
|
||||
use crate::labels::Cap;
|
||||
use crate::summary::GlobalSummaries;
|
||||
use crate::summary::{FuncSummary, GlobalSummaries};
|
||||
|
||||
/// Cap bits that indicate the function is a *local* sink — code exec,
|
||||
/// unsafe deserialisation, server-side template injection, format
|
||||
/// string injection. Other sink caps (SQL_QUERY → DataStore;
|
||||
/// SSRF → ExternalService) live elsewhere in the surface layer so the
|
||||
/// node taxonomy matches the chain composer's expectations.
|
||||
/// Cap bits that indicate the function is a *local* sink — a sink with no
|
||||
/// externally observable side effect that attacker data flows *into*.
|
||||
/// Other sink caps live elsewhere in the surface layer so the node
|
||||
/// taxonomy matches the chain composer's expectations: `SQL_QUERY` /
|
||||
/// `FILE_IO` → DataStore (see [`super::datastore`]); `SSRF` / `DATA_EXFIL`
|
||||
/// → ExternalService (see [`super::external`]).
|
||||
///
|
||||
/// The set was widened from the original four (code-exec, deserialize,
|
||||
/// SSTI, format-string) to cover every injection-style local sink the
|
||||
/// label registry can classify, so a function that only builds an LDAP
|
||||
/// filter, parses XXE-vulnerable XML, or merges into a prototype is no
|
||||
/// longer absent from the surface map.
|
||||
fn dangerous_caps() -> Cap {
|
||||
Cap::CODE_EXEC | Cap::DESERIALIZE | Cap::SSTI | Cap::FMT_STRING
|
||||
Cap::CODE_EXEC
|
||||
| Cap::DESERIALIZE
|
||||
| Cap::SSTI
|
||||
| Cap::FMT_STRING
|
||||
| Cap::LDAP_INJECTION
|
||||
| Cap::XPATH_INJECTION
|
||||
| Cap::HEADER_INJECTION
|
||||
| Cap::OPEN_REDIRECT
|
||||
| Cap::XXE
|
||||
| Cap::PROTOTYPE_POLLUTION
|
||||
}
|
||||
|
||||
pub fn detect_dangerous_locals(summaries: &GlobalSummaries) -> Vec<SurfaceNode> {
|
||||
|
|
@ -33,19 +49,46 @@ pub fn detect_dangerous_locals(summaries: &GlobalSummaries) -> Vec<SurfaceNode>
|
|||
if caps.is_empty() {
|
||||
continue;
|
||||
}
|
||||
// Project-relative POSIX file, keyed off the FuncKey namespace so
|
||||
// a dangerous-local node and the entry-point that reaches it agree
|
||||
// on file identity (FuncSummary.file_path is an absolute path and
|
||||
// would never match an entry-point's relative handler file).
|
||||
let file = namespace_file(&key.namespace).to_string();
|
||||
let (line, col) = sink_line_col(summary, &file, caps);
|
||||
out.push(SurfaceNode::DangerousLocal(DangerousLocal {
|
||||
location: SourceLocation {
|
||||
file: summary.file_path.clone(),
|
||||
line: 0,
|
||||
col: 0,
|
||||
},
|
||||
location: SourceLocation { file, line, col },
|
||||
function_name: key.qualified_name(),
|
||||
cap_bits: caps.bits(),
|
||||
label: cap_label_string(caps.bits()),
|
||||
}));
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Resolve the `(line, col)` of the dangerous sink inside `summary` by
|
||||
/// scanning its `param_to_sink` [`crate::summary::SinkSite`] records for a
|
||||
/// site whose cap intersects the dangerous mask. Prefers a same-file,
|
||||
/// non-chain-promoted site (the function's own sink) over a deeper
|
||||
/// chain-hop site so the coordinates point at code in `file`. Falls back
|
||||
/// to `(0, 0)` when the summary carries no located sink (pass-2 transient
|
||||
/// summaries, or summaries extracted without tree access).
|
||||
fn sink_line_col(summary: &FuncSummary, file: &str, mask: Cap) -> (u32, u32) {
|
||||
let mut fallback: Option<(u32, u32)> = None;
|
||||
for (_param, sites) in &summary.param_to_sink {
|
||||
for site in sites {
|
||||
if site.line == 0 || (site.cap & mask).is_empty() {
|
||||
continue;
|
||||
}
|
||||
let same_file = site.file_rel.is_empty() || site.file_rel == file;
|
||||
if same_file && !site.from_chain {
|
||||
return (site.line, site.col);
|
||||
}
|
||||
fallback.get_or_insert((site.line, site.col));
|
||||
}
|
||||
}
|
||||
fallback.unwrap_or((0, 0))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
|
@ -64,6 +107,63 @@ mod tests {
|
|||
(key, summary)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn carries_real_span_and_label_from_param_to_sink() {
|
||||
use crate::summary::SinkSite;
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let key = FuncKey::new_function(Lang::Python, "app.py", "render", None);
|
||||
let site = SinkSite {
|
||||
file_rel: "app.py".into(),
|
||||
line: 17,
|
||||
col: 9,
|
||||
snippet: "Template(x).render()".into(),
|
||||
cap: Cap::SSTI,
|
||||
from_chain: false,
|
||||
};
|
||||
let summary = FuncSummary {
|
||||
name: "render".into(),
|
||||
file_path: "/abs/app.py".into(), // absolute on purpose
|
||||
lang: "python".into(),
|
||||
sink_caps: Cap::SSTI.bits(),
|
||||
param_to_sink: vec![(0, vec![site].into())],
|
||||
..Default::default()
|
||||
};
|
||||
gs.insert(key, summary);
|
||||
let nodes = detect_dangerous_locals(&gs);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
let SurfaceNode::DangerousLocal(d) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
// Project-relative file (from the namespace), not the absolute path.
|
||||
assert_eq!(d.location.file, "app.py");
|
||||
assert_eq!(d.location.line, 17);
|
||||
assert_eq!(d.location.col, 9);
|
||||
assert_eq!(d.label, "ssti");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_widened_injection_caps() {
|
||||
// The widened mask now covers XXE / LDAP / open-redirect etc., which
|
||||
// the original four-cap mask missed entirely.
|
||||
for cap in [
|
||||
Cap::XXE,
|
||||
Cap::LDAP_INJECTION,
|
||||
Cap::XPATH_INJECTION,
|
||||
Cap::OPEN_REDIRECT,
|
||||
Cap::HEADER_INJECTION,
|
||||
Cap::PROTOTYPE_POLLUTION,
|
||||
] {
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let (k, s) = summary_with_caps("h", "danger.py", cap);
|
||||
gs.insert(k, s);
|
||||
assert_eq!(
|
||||
detect_dangerous_locals(&gs).len(),
|
||||
1,
|
||||
"cap {cap:?} should surface a dangerous-local node"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_eval_sink() {
|
||||
let mut gs = GlobalSummaries::new();
|
||||
|
|
|
|||
|
|
@ -12,8 +12,9 @@
|
|||
//! are forgiving — the surface map is informational, not a finding
|
||||
//! that fires on its own.
|
||||
|
||||
use super::{DataStore, DataStoreKind, SourceLocation, SurfaceNode};
|
||||
use crate::summary::{CalleeSite, FuncSummary, GlobalSummaries};
|
||||
use super::{AccessMode, DataStore, DataStoreKind, SourceLocation, SurfaceNode, namespace_file};
|
||||
use crate::labels::Cap;
|
||||
use crate::summary::GlobalSummaries;
|
||||
|
||||
/// One detection rule: leaf-name pattern → store kind + label. Stored
|
||||
/// as a flat list so adding a new ORM / driver is a one-line edit.
|
||||
|
|
@ -355,9 +356,15 @@ pub fn detect_data_stores(summaries: &GlobalSummaries) -> Vec<SurfaceNode> {
|
|||
let mut seen: std::collections::HashSet<(String, u32, String)> =
|
||||
std::collections::HashSet::new();
|
||||
for (key, summary) in summaries.iter() {
|
||||
// Project-relative POSIX file, keyed off the FuncKey namespace so a
|
||||
// data-store node and the entry-point that reaches it agree on file
|
||||
// identity (FuncSummary.file_path is an absolute path).
|
||||
let file = namespace_file(&key.namespace).to_string();
|
||||
let owner = key.qualified_name();
|
||||
let typed = summaries
|
||||
.get_ssa(key)
|
||||
.map(|s| s.typed_call_receivers.as_slice());
|
||||
let mut matched_for_fn = false;
|
||||
for callee in &summary.callees {
|
||||
let rule = match_rule(&callee.name).or_else(|| {
|
||||
typed
|
||||
|
|
@ -365,7 +372,8 @@ pub fn detect_data_stores(summaries: &GlobalSummaries) -> Vec<SurfaceNode> {
|
|||
.and_then(|c| match_rule(&qualify(c, &callee.name)))
|
||||
});
|
||||
let Some(rule) = rule else { continue };
|
||||
let location = call_site_location(summary, callee);
|
||||
matched_for_fn = true;
|
||||
let location = call_site_location(&file, callee.span);
|
||||
let dedup = (location.file.clone(), location.line, rule.label.to_string());
|
||||
if !seen.insert(dedup) {
|
||||
continue;
|
||||
|
|
@ -374,12 +382,117 @@ pub fn detect_data_stores(summaries: &GlobalSummaries) -> Vec<SurfaceNode> {
|
|||
location,
|
||||
kind: rule.kind,
|
||||
label: rule.label.to_string(),
|
||||
owner: owner.clone(),
|
||||
access: classify_access(leaf_segment(&callee.name)),
|
||||
}));
|
||||
}
|
||||
|
||||
// Cap-driven fallback: a function whose own `sink_caps` include
|
||||
// SQL_QUERY / FILE_IO is a data-store access site even when no
|
||||
// direct callee matched the driver table (custom DAO wrapper,
|
||||
// cross-file-resolved execute). Mirrors external.rs's SSRF
|
||||
// fallback. Skipped when a named driver already fired so the
|
||||
// precise label wins.
|
||||
if !matched_for_fn {
|
||||
let caps = summary.sink_caps();
|
||||
let fallback = if caps.contains(Cap::SQL_QUERY) {
|
||||
Some((DataStoreKind::Sql, "SQL query"))
|
||||
} else if caps.contains(Cap::FILE_IO) {
|
||||
Some((DataStoreKind::Filesystem, "File access"))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
if let Some((kind, label)) = fallback {
|
||||
let dedup = (file.clone(), 0, label.to_string());
|
||||
if seen.insert(dedup) {
|
||||
out.push(SurfaceNode::DataStore(DataStore {
|
||||
location: call_site_location(&file, None),
|
||||
kind,
|
||||
label: label.to_string(),
|
||||
owner: owner.clone(),
|
||||
// Cap bits carry no operation direction; a raw
|
||||
// SQL_QUERY / FILE_IO sink can be either.
|
||||
access: AccessMode::ReadWrite,
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Classify the operation direction of a data-store access from the
|
||||
/// callee's leaf name. Whole-prefix match on a lowercase verb table —
|
||||
/// `findOne` / `find_by_id` / `findAll` all classify as reads via the
|
||||
/// `find` prefix. Connect-/client-construction sites and unrecognised
|
||||
/// verbs stay [`AccessMode::Unknown`] so reachability keeps emitting
|
||||
/// the conservative `ReadsFrom` edge for them.
|
||||
fn classify_access(leaf: &str) -> AccessMode {
|
||||
const READ: &[&str] = &[
|
||||
"find",
|
||||
"get",
|
||||
"query",
|
||||
"select",
|
||||
"read",
|
||||
"fetch",
|
||||
"scan",
|
||||
"count",
|
||||
"exists",
|
||||
"aggregate",
|
||||
"lrange",
|
||||
"smembers",
|
||||
"hget",
|
||||
"mget",
|
||||
"keys",
|
||||
"first",
|
||||
"pluck",
|
||||
"all",
|
||||
];
|
||||
const WRITE: &[&str] = &[
|
||||
"insert", "update", "delete", "save", "create", "set", "put", "write", "remove", "drop",
|
||||
"truncate", "upsert", "persist", "destroy", "del", "hset", "lpush", "rpush", "sadd",
|
||||
"zadd", "append", "rename", "unlink", "mkdir", "rmdir", "incr", "decr", "expire",
|
||||
];
|
||||
const READ_WRITE: &[&str] = &[
|
||||
"execute",
|
||||
"executemany",
|
||||
"executescript",
|
||||
"exec",
|
||||
"run",
|
||||
"batch",
|
||||
"transaction",
|
||||
"pipeline",
|
||||
];
|
||||
let l = leaf.trim();
|
||||
// Verb-prefix match with a word boundary: the verb must be the whole
|
||||
// leaf, or be followed by `_` (snake_case), an uppercase letter
|
||||
// (camelCase), or a digit. `findOne` / `find_by_id` → read;
|
||||
// `settings` does NOT match `set`.
|
||||
let has_prefix = |verbs: &[&str]| {
|
||||
verbs.iter().any(|v| {
|
||||
l.get(..v.len())
|
||||
.is_some_and(|head| head.eq_ignore_ascii_case(v))
|
||||
&& l.get(v.len()..)
|
||||
.is_some_and(|rest| match rest.chars().next() {
|
||||
None => true,
|
||||
Some(c) => c == '_' || c.is_ascii_uppercase() || c.is_ascii_digit(),
|
||||
})
|
||||
})
|
||||
};
|
||||
// Order matters: WRITE before READ so `setex`-style verbs with a
|
||||
// read-looking suffix do not misclassify; READ_WRITE checked first
|
||||
// because `execute` would otherwise never match.
|
||||
if has_prefix(READ_WRITE) {
|
||||
AccessMode::ReadWrite
|
||||
} else if has_prefix(WRITE) {
|
||||
AccessMode::Write
|
||||
} else if has_prefix(READ) {
|
||||
AccessMode::Read
|
||||
} else {
|
||||
AccessMode::Unknown
|
||||
}
|
||||
}
|
||||
|
||||
/// Last segment of a callee text after the final `.` or `::`.
|
||||
fn leaf_segment(name: &str) -> &str {
|
||||
let after_colon = name.rsplit("::").next().unwrap_or(name);
|
||||
|
|
@ -422,15 +535,14 @@ fn match_rule(callee: &str) -> Option<&'static DriverRule> {
|
|||
})
|
||||
}
|
||||
|
||||
/// Source location of a call site. Reads the 1-based `(line, col)`
|
||||
/// recorded on the [`CalleeSite`] at CFG-build time (populated for every
|
||||
/// summary produced after the span field landed); for legacy summaries
|
||||
/// loaded from SQLite with no span, falls back to the function's host
|
||||
/// file with line 0.
|
||||
fn call_site_location(summary: &FuncSummary, callee: &CalleeSite) -> SourceLocation {
|
||||
let (line, col) = callee.span.unwrap_or((0, 0));
|
||||
/// Source location of a call site in the project-relative `file`. Reads
|
||||
/// the 1-based `(line, col)` recorded on the [`CalleeSite`] at CFG-build
|
||||
/// time when `span` is `Some`; for legacy summaries loaded from SQLite
|
||||
/// with no span (and the cap-driven fallback path) falls back to line 0.
|
||||
fn call_site_location(file: &str, span: Option<(u32, u32)>) -> SourceLocation {
|
||||
let (line, col) = span.unwrap_or((0, 0));
|
||||
SourceLocation {
|
||||
file: summary.file_path.clone(),
|
||||
file: file.to_string(),
|
||||
line,
|
||||
col,
|
||||
}
|
||||
|
|
@ -439,6 +551,7 @@ fn call_site_location(summary: &FuncSummary, callee: &CalleeSite) -> SourceLocat
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::summary::{CalleeSite, FuncSummary};
|
||||
use crate::symbol::{FuncKey, Lang};
|
||||
|
||||
fn summary_with_callees(name: &str, file: &str, callees: &[&str]) -> (FuncKey, FuncSummary) {
|
||||
|
|
@ -457,6 +570,49 @@ mod tests {
|
|||
(key, summary)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_access_verb_boundaries() {
|
||||
assert_eq!(classify_access("findOne"), AccessMode::Read);
|
||||
assert_eq!(classify_access("find_by_id"), AccessMode::Read);
|
||||
assert_eq!(classify_access("get"), AccessMode::Read);
|
||||
assert_eq!(classify_access("insertMany"), AccessMode::Write);
|
||||
assert_eq!(classify_access("save"), AccessMode::Write);
|
||||
assert_eq!(classify_access("deleteOne"), AccessMode::Write);
|
||||
assert_eq!(classify_access("execute"), AccessMode::ReadWrite);
|
||||
assert_eq!(classify_access("executemany"), AccessMode::ReadWrite);
|
||||
assert_eq!(classify_access("Exec"), AccessMode::ReadWrite);
|
||||
// Boundary safety: a lowercase continuation is NOT a verb match.
|
||||
assert_eq!(classify_access("settings"), AccessMode::Unknown);
|
||||
assert_eq!(classify_access("allocate"), AccessMode::Unknown);
|
||||
assert_eq!(classify_access("connect"), AccessMode::Unknown);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detected_store_carries_access_mode() {
|
||||
// `connect`-style driver match → Unknown access; the node still
|
||||
// surfaces and reachability treats it as a conservative read.
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let (key, summary) = summary_with_callees("init", "db.py", &["psycopg2.connect"]);
|
||||
gs.insert(key, summary);
|
||||
let nodes = detect_data_stores(&gs);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
let SurfaceNode::DataStore(ds) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(ds.access, AccessMode::Unknown);
|
||||
|
||||
// `pool.query` driver match → leaf `query` classifies as Read.
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let (key, summary) = summary_with_callees("run", "db.js", &["pool.query"]);
|
||||
gs.insert(key, summary);
|
||||
let nodes = detect_data_stores(&gs);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
let SurfaceNode::DataStore(ds) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(ds.access, AccessMode::Read);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn datastore_carries_callee_span_when_present() {
|
||||
// When the CFG populates `CalleeSite.span`, the detected datastore
|
||||
|
|
@ -484,6 +640,56 @@ mod tests {
|
|||
assert_eq!(ds.location.col, 13);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cap_fallback_emits_sql_store_with_owner() {
|
||||
// A custom DAO wrapper: no callee matches DRIVER_RULES, but the
|
||||
// function's own sink_caps carry SQL_QUERY. The cap-driven fallback
|
||||
// surfaces a generic Sql node carrying the owning function name.
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let key = FuncKey::new_function(Lang::Python, "dao.py", "run_query", None);
|
||||
let summary = FuncSummary {
|
||||
name: "run_query".into(),
|
||||
file_path: "dao.py".into(),
|
||||
lang: "python".into(),
|
||||
sink_caps: Cap::SQL_QUERY.bits(),
|
||||
callees: vec![CalleeSite::bare("self._exec")],
|
||||
..Default::default()
|
||||
};
|
||||
gs.insert(key, summary);
|
||||
let nodes = detect_data_stores(&gs);
|
||||
assert_eq!(nodes.len(), 1, "got {nodes:?}");
|
||||
let SurfaceNode::DataStore(ds) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(ds.kind, DataStoreKind::Sql);
|
||||
assert_eq!(ds.label, "SQL query");
|
||||
assert_eq!(ds.owner, "run_query");
|
||||
assert_eq!(ds.location.file, "dao.py");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn named_driver_suppresses_cap_fallback() {
|
||||
// When a named driver call already fired, the precise label wins and
|
||||
// the generic cap fallback does not double-emit.
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let key = FuncKey::new_function(Lang::Python, "dao.py", "init", None);
|
||||
let summary = FuncSummary {
|
||||
name: "init".into(),
|
||||
file_path: "dao.py".into(),
|
||||
lang: "python".into(),
|
||||
sink_caps: Cap::SQL_QUERY.bits(),
|
||||
callees: vec![CalleeSite::bare("psycopg2.connect")],
|
||||
..Default::default()
|
||||
};
|
||||
gs.insert(key, summary);
|
||||
let nodes = detect_data_stores(&gs);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
let SurfaceNode::DataStore(ds) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(ds.label, "PostgreSQL (psycopg2)");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_psycopg2_connect() {
|
||||
let mut gs = GlobalSummaries::new();
|
||||
|
|
|
|||
|
|
@ -7,9 +7,9 @@
|
|||
//! consulted so a probe with no SSRF cap (DNS resolver, SMTP sender)
|
||||
//! still surfaces as an external service.
|
||||
|
||||
use super::{ExternalService, ExternalServiceKind, SourceLocation, SurfaceNode};
|
||||
use super::{ExternalService, ExternalServiceKind, SourceLocation, SurfaceNode, namespace_file};
|
||||
use crate::labels::Cap;
|
||||
use crate::summary::{CalleeSite, FuncSummary, GlobalSummaries};
|
||||
use crate::summary::GlobalSummaries;
|
||||
|
||||
struct ClientRule {
|
||||
leaf: &'static str,
|
||||
|
|
@ -337,9 +337,15 @@ pub fn detect_external_services(summaries: &GlobalSummaries) -> Vec<SurfaceNode>
|
|||
let mut out: Vec<SurfaceNode> = Vec::new();
|
||||
let mut seen: std::collections::HashSet<(String, String)> = std::collections::HashSet::new();
|
||||
for (key, summary) in summaries.iter() {
|
||||
// Project-relative POSIX file, keyed off the FuncKey namespace so an
|
||||
// external-service node and the entry-point that reaches it agree on
|
||||
// file identity (FuncSummary.file_path is an absolute path).
|
||||
let file = namespace_file(&key.namespace).to_string();
|
||||
let owner = key.qualified_name();
|
||||
let typed = summaries
|
||||
.get_ssa(key)
|
||||
.map(|s| s.typed_call_receivers.as_slice());
|
||||
let mut matched_for_fn = false;
|
||||
for callee in &summary.callees {
|
||||
let rule = match_rule(&callee.name).or_else(|| {
|
||||
typed
|
||||
|
|
@ -347,7 +353,8 @@ pub fn detect_external_services(summaries: &GlobalSummaries) -> Vec<SurfaceNode>
|
|||
.and_then(|c| match_rule(&qualify(c, &callee.name)))
|
||||
});
|
||||
let Some(rule) = rule else { continue };
|
||||
let location = call_site_location(summary, Some(callee));
|
||||
matched_for_fn = true;
|
||||
let location = call_site_location(&file, callee.span);
|
||||
if !seen.insert((location.file.clone(), rule.label.to_string())) {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -355,22 +362,35 @@ pub fn detect_external_services(summaries: &GlobalSummaries) -> Vec<SurfaceNode>
|
|||
location,
|
||||
kind: rule.kind,
|
||||
label: rule.label.to_string(),
|
||||
owner: owner.clone(),
|
||||
}));
|
||||
}
|
||||
}
|
||||
// Also surface any function whose own sink_caps include SSRF — the
|
||||
// function itself is an outbound network call site even if the
|
||||
// direct callee did not match the rule list. Use the function's
|
||||
// file as the location and synthesise a generic label.
|
||||
for (_key, summary) in summaries.iter() {
|
||||
if summary.sink_caps().contains(Cap::SSRF) {
|
||||
let loc = call_site_location(summary, None);
|
||||
let dedup = (loc.file.clone(), "Outbound HTTP".to_string());
|
||||
|
||||
// Cap-driven fallback: a function whose own sink_caps include SSRF
|
||||
// (outbound request) or DATA_EXFIL (data leaving the system) is an
|
||||
// egress site even when the direct callee did not match the rule
|
||||
// list. Skipped when a named client already fired for this function
|
||||
// so the precise label wins and the generic node does not
|
||||
// double-count the same egress.
|
||||
if matched_for_fn {
|
||||
continue;
|
||||
}
|
||||
let caps = summary.sink_caps();
|
||||
let fallback = if caps.contains(Cap::SSRF) {
|
||||
Some(("Outbound HTTP", ExternalServiceKind::HttpApi))
|
||||
} else if caps.contains(Cap::DATA_EXFIL) {
|
||||
Some(("Data egress", ExternalServiceKind::Unknown))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
if let Some((label, kind)) = fallback {
|
||||
let dedup = (file.clone(), label.to_string());
|
||||
if seen.insert(dedup) {
|
||||
out.push(SurfaceNode::ExternalService(ExternalService {
|
||||
location: loc,
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "Outbound HTTP".to_string(),
|
||||
location: call_site_location(&file, None),
|
||||
kind,
|
||||
label: label.to_string(),
|
||||
owner: owner.clone(),
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
|
@ -410,14 +430,15 @@ fn match_rule(callee: &str) -> Option<&'static ClientRule> {
|
|||
})
|
||||
}
|
||||
|
||||
/// Source location of an external-service call site. Reads the 1-based
|
||||
/// `(line, col)` recorded on the [`CalleeSite`] at CFG-build time when
|
||||
/// available; otherwise (sink-cap–only fallback path, or legacy summaries
|
||||
/// loaded from SQLite) returns the function's host file with line 0.
|
||||
fn call_site_location(summary: &FuncSummary, callee: Option<&CalleeSite>) -> SourceLocation {
|
||||
let (line, col) = callee.and_then(|c| c.span).unwrap_or((0, 0));
|
||||
/// Source location of an external-service call site in the
|
||||
/// project-relative `file`. Reads the 1-based `(line, col)` recorded on
|
||||
/// the [`crate::summary::CalleeSite`] at CFG-build time when `span` is
|
||||
/// `Some`; otherwise (sink-cap–only fallback path, or legacy summaries
|
||||
/// loaded from SQLite) returns the file with line 0.
|
||||
fn call_site_location(file: &str, span: Option<(u32, u32)>) -> SourceLocation {
|
||||
let (line, col) = span.unwrap_or((0, 0));
|
||||
SourceLocation {
|
||||
file: summary.file_path.clone(),
|
||||
file: file.to_string(),
|
||||
line,
|
||||
col,
|
||||
}
|
||||
|
|
@ -426,7 +447,7 @@ fn call_site_location(summary: &FuncSummary, callee: Option<&CalleeSite>) -> Sou
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::summary::CalleeSite;
|
||||
use crate::summary::{CalleeSite, FuncSummary};
|
||||
use crate::symbol::{FuncKey, Lang};
|
||||
|
||||
#[test]
|
||||
|
|
@ -450,6 +471,48 @@ mod tests {
|
|||
assert_eq!(es.label, "requests (Python)");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ssrf_cap_fallback_carries_owner() {
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let key = FuncKey::new_function(Lang::Python, "proxy.py", "forward", None);
|
||||
let summary = FuncSummary {
|
||||
name: "forward".into(),
|
||||
file_path: "/abs/proxy.py".into(),
|
||||
lang: "python".into(),
|
||||
sink_caps: Cap::SSRF.bits(),
|
||||
..Default::default()
|
||||
};
|
||||
gs.insert(key, summary);
|
||||
let nodes = detect_external_services(&gs);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
let SurfaceNode::ExternalService(es) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(es.label, "Outbound HTTP");
|
||||
assert_eq!(es.owner, "forward");
|
||||
assert_eq!(es.location.file, "proxy.py");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn data_exfil_cap_emits_egress_node() {
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let key = FuncKey::new_function(Lang::Python, "leak.py", "dump", None);
|
||||
let summary = FuncSummary {
|
||||
name: "dump".into(),
|
||||
file_path: "leak.py".into(),
|
||||
lang: "python".into(),
|
||||
sink_caps: Cap::DATA_EXFIL.bits(),
|
||||
..Default::default()
|
||||
};
|
||||
gs.insert(key, summary);
|
||||
let nodes = detect_external_services(&gs);
|
||||
assert_eq!(nodes.len(), 1);
|
||||
let SurfaceNode::ExternalService(es) = &nodes[0] else {
|
||||
panic!()
|
||||
};
|
||||
assert_eq!(es.label, "Data egress");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bare_fetch_rule_does_not_match_prefetch_or_cachekey() {
|
||||
let mut gs = GlobalSummaries::new();
|
||||
|
|
|
|||
|
|
@ -26,10 +26,12 @@ use std::path::Path;
|
|||
pub mod build;
|
||||
pub mod dangerous;
|
||||
pub mod datastore;
|
||||
pub mod exposure;
|
||||
pub mod external;
|
||||
pub mod graph;
|
||||
pub mod lang;
|
||||
pub mod reachability;
|
||||
pub mod risk;
|
||||
|
||||
/// Stable source location used as the primary key for every
|
||||
/// [`SurfaceNode`]. `file` is a project-relative POSIX path so the
|
||||
|
|
@ -109,6 +111,53 @@ pub struct DataStore {
|
|||
pub location: SourceLocation,
|
||||
pub kind: DataStoreKind,
|
||||
pub label: String,
|
||||
/// Qualified name of the function that owns this access site
|
||||
/// (`Class::method` or a free function name). Used by reachability
|
||||
/// to connect an entry-point to this store only when the owning
|
||||
/// function is actually on the call-graph frontier, rather than the
|
||||
/// coarse "any node in the same file" match. Empty for legacy maps
|
||||
/// loaded from SQLite before the field landed.
|
||||
#[serde(default, skip_serializing_if = "String::is_empty")]
|
||||
pub owner: String,
|
||||
/// Whether the access site reads, writes, or does both, classified
|
||||
/// from the callee name at detection time (`find`/`get`/`select` →
|
||||
/// read, `insert`/`save`/`delete` → write, `execute`/`exec` →
|
||||
/// read-write). Drives the [`EdgeKind::ReadsFrom`] /
|
||||
/// [`EdgeKind::WritesTo`] split in reachability. `Unknown` for
|
||||
/// connect-style sites and legacy maps loaded from SQLite before
|
||||
/// the field landed.
|
||||
#[serde(default, skip_serializing_if = "AccessMode::is_unknown")]
|
||||
pub access: AccessMode,
|
||||
}
|
||||
|
||||
/// Direction of a data-store access site.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum AccessMode {
|
||||
Read,
|
||||
Write,
|
||||
ReadWrite,
|
||||
#[default]
|
||||
Unknown,
|
||||
}
|
||||
|
||||
impl AccessMode {
|
||||
/// Serde helper: `Unknown` is the default and is omitted from the
|
||||
/// canonical JSON so legacy payloads stay byte-identical.
|
||||
pub fn is_unknown(&self) -> bool {
|
||||
matches!(self, AccessMode::Unknown)
|
||||
}
|
||||
|
||||
/// True when the site can write (Write or ReadWrite).
|
||||
pub fn writes(self) -> bool {
|
||||
matches!(self, AccessMode::Write | AccessMode::ReadWrite)
|
||||
}
|
||||
|
||||
/// True when the site can read (Read, ReadWrite, or Unknown — an
|
||||
/// unclassified site is conservatively treated as a read).
|
||||
pub fn reads(self) -> bool {
|
||||
!matches!(self, AccessMode::Write)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
|
|
@ -130,6 +179,10 @@ pub struct ExternalService {
|
|||
pub location: SourceLocation,
|
||||
pub kind: ExternalServiceKind,
|
||||
pub label: String,
|
||||
/// Qualified name of the function that owns this egress site. See
|
||||
/// [`DataStore::owner`] for why reachability needs it.
|
||||
#[serde(default, skip_serializing_if = "String::is_empty")]
|
||||
pub owner: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
|
|
@ -151,6 +204,13 @@ pub struct DangerousLocal {
|
|||
pub location: SourceLocation,
|
||||
pub function_name: String,
|
||||
pub cap_bits: u32,
|
||||
/// Human-readable sink-class label decoded from `cap_bits`
|
||||
/// (e.g. `"code-exec"`, `"deserialize, ssti"`). Lets the CLI and
|
||||
/// the chain composer name the danger without re-deriving it from
|
||||
/// the raw bitfield. Empty for legacy maps loaded from SQLite
|
||||
/// before the field landed.
|
||||
#[serde(default, skip_serializing_if = "String::is_empty")]
|
||||
pub label: String,
|
||||
}
|
||||
|
||||
/// A node in the [`SurfaceMap`]. Every variant carries a
|
||||
|
|
@ -201,36 +261,109 @@ impl SurfaceNode {
|
|||
}
|
||||
}
|
||||
|
||||
/// Semantic kind of an edge in the [`SurfaceMap`]. Encodes the
|
||||
/// seven edge classes the chain composer walks; persistence is via
|
||||
/// JSON so adding a variant is a non-breaking schema change as long
|
||||
/// as the SQLite-level migration drops the old surface_map rows.
|
||||
/// Semantic kind of an edge in the [`SurfaceMap`].
|
||||
///
|
||||
/// Persistence is via JSON so adding a variant is a non-breaking schema
|
||||
/// change as long as the SQLite-level migration drops the old
|
||||
/// surface_map rows.
|
||||
///
|
||||
/// Emission status (kept honest so the next maintainer does not inherit
|
||||
/// a false mental model):
|
||||
///
|
||||
/// * **Emitted today** by [`reachability::populate_reaches_edges`]:
|
||||
/// [`EdgeKind::ReadsFrom`] (entry → data store the entry reads),
|
||||
/// [`EdgeKind::WritesTo`] (entry → data store the entry writes,
|
||||
/// from [`DataStore::access`]), [`EdgeKind::TalksTo`] (entry →
|
||||
/// external service), and [`EdgeKind::Reaches`] (entry →
|
||||
/// dangerous-local sink). These four are [`EdgeKind::is_reach_like`].
|
||||
/// * **Reserved** (no production construction site yet):
|
||||
/// [`EdgeKind::Calls`] (would lift call-graph edges, currently
|
||||
/// redundant with the [`crate::callgraph::CallGraph`] itself),
|
||||
/// [`EdgeKind::Triggers`] (needs job/webhook entry modelling), and
|
||||
/// [`EdgeKind::AuthRequiredOn`] (needs a dedicated auth-check node
|
||||
/// to originate from — today the auth signal rides on
|
||||
/// [`EntryPoint::auth_required`] instead).
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum EdgeKind {
|
||||
/// Caller → callee. Wraps the call-graph edge so consumers do
|
||||
/// not have to consult [`crate::callgraph::CallGraph`] directly.
|
||||
/// Reserved — not emitted.
|
||||
Calls,
|
||||
/// Function or entry-point reads from a data store / external
|
||||
/// service.
|
||||
/// Entry-point reads from a data store. Emitted by reachability.
|
||||
ReadsFrom,
|
||||
/// Function or entry-point writes to a data store.
|
||||
/// Entry-point writes to a data store. Emitted by reachability
|
||||
/// when [`DataStore::access`] classifies the site as writing.
|
||||
WritesTo,
|
||||
/// Function or entry-point sends a request to an external
|
||||
/// service.
|
||||
/// Entry-point sends a request to an external service. Emitted by
|
||||
/// reachability.
|
||||
TalksTo,
|
||||
/// Entry-point reaches a dangerous-local sink through some
|
||||
/// transitive call chain.
|
||||
/// transitive call chain. Emitted by reachability.
|
||||
Reaches,
|
||||
/// Entry-point triggers a side-effecting action (job, email,
|
||||
/// webhook) other than a direct call.
|
||||
/// webhook) other than a direct call. Reserved.
|
||||
Triggers,
|
||||
/// Entry-point gates downstream access on a successful auth
|
||||
/// check. The `from` is the auth-check node, the `to` is the
|
||||
/// entry-point.
|
||||
/// entry-point. Reserved — needs an auth-check node.
|
||||
AuthRequiredOn,
|
||||
}
|
||||
|
||||
impl EdgeKind {
|
||||
/// True for the edge classes that connect an entry-point to a
|
||||
/// reachable sink / store / external service. The CLI tree and any
|
||||
/// "what does this entry reach" query treat all three uniformly.
|
||||
pub fn is_reach_like(self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
EdgeKind::Reaches | EdgeKind::ReadsFrom | EdgeKind::TalksTo | EdgeKind::WritesTo
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Decode a [`crate::labels::Cap`] bitfield into a stable, human-readable
|
||||
/// list of sink-class slugs (e.g. `0x400` → `["code-exec"]`). Order is
|
||||
/// fixed (low bit first) so two equal bitfields render identically.
|
||||
/// Used for [`DangerousLocal::label`] and the `nyx surface` CLI so the
|
||||
/// raw `0x{:x}` debug dump never reaches a user.
|
||||
pub fn cap_labels(bits: u32) -> Vec<&'static str> {
|
||||
use crate::labels::Cap;
|
||||
const TABLE: &[(Cap, &str)] = &[
|
||||
(Cap::CODE_EXEC, "code-exec"),
|
||||
(Cap::DESERIALIZE, "deserialize"),
|
||||
(Cap::SSTI, "ssti"),
|
||||
(Cap::FMT_STRING, "format-string"),
|
||||
(Cap::SQL_QUERY, "sql"),
|
||||
(Cap::SSRF, "ssrf"),
|
||||
(Cap::FILE_IO, "file-io"),
|
||||
(Cap::LDAP_INJECTION, "ldap-injection"),
|
||||
(Cap::XPATH_INJECTION, "xpath-injection"),
|
||||
(Cap::HEADER_INJECTION, "header-injection"),
|
||||
(Cap::OPEN_REDIRECT, "open-redirect"),
|
||||
(Cap::XXE, "xxe"),
|
||||
(Cap::PROTOTYPE_POLLUTION, "prototype-pollution"),
|
||||
(Cap::CRYPTO, "weak-crypto"),
|
||||
(Cap::DATA_EXFIL, "data-exfil"),
|
||||
(Cap::UNAUTHORIZED_ID, "unauthorized-id"),
|
||||
];
|
||||
let caps = Cap::from_bits_truncate(bits);
|
||||
let mut out: Vec<&'static str> = TABLE
|
||||
.iter()
|
||||
.filter(|(c, _)| caps.contains(*c))
|
||||
.map(|(_, s)| *s)
|
||||
.collect();
|
||||
if out.is_empty() {
|
||||
out.push("sink");
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Comma-joined form of [`cap_labels`].
|
||||
pub fn cap_label_string(bits: u32) -> String {
|
||||
cap_labels(bits).join(", ")
|
||||
}
|
||||
|
||||
/// A single edge in the [`SurfaceMap`]. `from` and `to` are indices
|
||||
/// into [`SurfaceMap::nodes`]; the surface ordering keeps these
|
||||
/// stable across rescans.
|
||||
|
|
@ -337,6 +470,21 @@ impl SurfaceMap {
|
|||
}
|
||||
}
|
||||
|
||||
/// Strip the optional `@pkg/name::` package prefix from a [`crate::symbol::FuncKey`]
|
||||
/// namespace, returning the project-relative POSIX file path part.
|
||||
///
|
||||
/// `namespace_with_package` produces `"@scope/name::src/file.ts"` for
|
||||
/// JS/TS files inside resolved packages; the file part is the
|
||||
/// project-relative path that matches an [`EntryPoint`]'s
|
||||
/// `handler_location.file`. This is the single source of truth the
|
||||
/// detectors and the reachability pass both key on, so a data-store /
|
||||
/// external / dangerous-local node and the entry-point that reaches it
|
||||
/// agree on file identity even though `FuncSummary.file_path` is stored
|
||||
/// as an absolute path.
|
||||
pub fn namespace_file(ns: &str) -> &str {
|
||||
ns.rsplit_once("::").map(|(_, rest)| rest).unwrap_or(ns)
|
||||
}
|
||||
|
||||
/// Convert an absolute path to a project-relative POSIX path string.
|
||||
/// Returns the absolute path verbatim when the file is outside the
|
||||
/// scan root or when path stripping fails.
|
||||
|
|
|
|||
|
|
@ -19,21 +19,49 @@
|
|||
//! calls `eval()` will surface the eval as a `Reaches` of the entry
|
||||
//! point as long as the eval's host file is on the BFS frontier.
|
||||
|
||||
use super::{EdgeKind, SurfaceEdge, SurfaceMap, SurfaceNode};
|
||||
use super::{EdgeKind, SurfaceEdge, SurfaceMap, SurfaceNode, namespace_file};
|
||||
use crate::callgraph::CallGraph;
|
||||
use crate::summary::GlobalSummaries;
|
||||
use petgraph::Direction;
|
||||
use std::collections::{HashMap, HashSet, VecDeque};
|
||||
|
||||
/// Maximum BFS depth from an entry-point node. Surface chains beyond
|
||||
/// six call-graph hops are rare in practice and the cost of a deeper
|
||||
/// eight call-graph hops are rare in practice and the cost of a deeper
|
||||
/// walk is paid per entry-point per scan. A depth-bounded traversal
|
||||
/// also prevents recursive cycles from blowing up.
|
||||
const MAX_BFS_DEPTH: usize = 8;
|
||||
|
||||
/// Populate [`EdgeKind::Reaches`] edges on `map`. Mutates the edge
|
||||
/// list in place; the caller is expected to follow up with
|
||||
/// [`SurfaceMap::canonicalize`] before serialisation.
|
||||
/// One reachable destination node, keyed for **function-level** matching.
|
||||
struct Dest {
|
||||
idx: usize,
|
||||
/// Project-relative POSIX file the destination lives in.
|
||||
file: String,
|
||||
/// Qualified name (`Class::method` / free function) of the function
|
||||
/// that owns this destination. Empty only for legacy maps loaded
|
||||
/// from SQLite before the `owner` field landed — those fall back to
|
||||
/// file-level matching.
|
||||
owner: String,
|
||||
/// Edge classes to emit when an entry-point reaches this destination:
|
||||
/// [`EdgeKind::ReadsFrom`] / [`EdgeKind::WritesTo`] for a data store
|
||||
/// (driven by [`crate::surface::DataStore::access`]; a read-write
|
||||
/// site emits both), [`EdgeKind::TalksTo`] for an external service,
|
||||
/// [`EdgeKind::Reaches`] for a dangerous local sink.
|
||||
edges: smallvec::SmallVec<[EdgeKind; 2]>,
|
||||
}
|
||||
|
||||
/// Populate entry-point → sink reachability edges on `map`
|
||||
/// ([`EdgeKind::ReadsFrom`] / [`EdgeKind::TalksTo`] / [`EdgeKind::Reaches`]).
|
||||
/// Mutates the edge list in place; the caller is expected to follow up
|
||||
/// with [`SurfaceMap::canonicalize`] before serialisation.
|
||||
///
|
||||
/// Matching is **function-level** when the entry-point's handler resolves
|
||||
/// to a call-graph node: a destination is connected only when the
|
||||
/// function that owns it is actually on the forward BFS frontier from the
|
||||
/// handler, so two unrelated handlers in the same file no longer both
|
||||
/// "reach" a co-located `eval()`. When the handler cannot be resolved in
|
||||
/// the call graph (anonymous closure handler, unresolved seed) the pass
|
||||
/// falls back to the conservative same-file heuristic so connectivity is
|
||||
/// not silently lost.
|
||||
pub fn populate_reaches_edges(
|
||||
map: &mut SurfaceMap,
|
||||
summaries: &GlobalSummaries,
|
||||
|
|
@ -53,40 +81,42 @@ pub fn populate_reaches_edges(
|
|||
let SurfaceNode::EntryPoint(ep) = node else {
|
||||
continue;
|
||||
};
|
||||
let mut reachable_files: HashSet<String> = HashSet::new();
|
||||
// Seed with the handler's host file — the entry-point itself
|
||||
// counts as reachable, so any DataStore / ExternalService /
|
||||
// DangerousLocal in the same file is connected even when the
|
||||
// call graph cannot resolve the seed FuncKey.
|
||||
reachable_files.insert(ep.handler_location.file.clone());
|
||||
|
||||
// Locate seed FuncKeys whose `namespace` (project-relative
|
||||
// POSIX path, optionally prefixed with `@pkg/name::`) matches
|
||||
// the entry's file and whose `name` matches the handler. More
|
||||
// than one seed is possible (overloaded methods, duplicate
|
||||
// definitions).
|
||||
//
|
||||
// Phase 23 follow-up: this used to be an `ends_with` substring
|
||||
// check on both sides, which silently aliased same-basename
|
||||
// files in sibling directories — `subdir/app.py` and
|
||||
// `other/app.py` would both seed when the entry-point pointed
|
||||
// at `app.py`. We now compare the file part exactly so a
|
||||
// handler in `subdir/app.py` only seeds the FuncKey whose
|
||||
// namespace strips to `subdir/app.py`.
|
||||
let seeds = call_graph
|
||||
.index
|
||||
.iter()
|
||||
.filter(|(k, _)| k.name == ep.handler_name)
|
||||
.filter(|(k, _)| file_part_of_namespace(&k.namespace) == ep.handler_location.file)
|
||||
.map(|(_, idx)| *idx)
|
||||
.collect::<Vec<_>>();
|
||||
// Locate seed FuncKeys whose namespace file-part matches the
|
||||
// entry's handler file and whose `name` matches the handler.
|
||||
// More than one seed is possible (overloads, duplicate defs).
|
||||
// Anonymous handlers (empty name) match nothing — handled by the
|
||||
// unresolved fallback below.
|
||||
let seeds = if ep.handler_name.is_empty() {
|
||||
Vec::new()
|
||||
} else {
|
||||
call_graph
|
||||
.index
|
||||
.iter()
|
||||
.filter(|(k, _)| k.name == ep.handler_name)
|
||||
.filter(|(k, _)| namespace_file(&k.namespace) == ep.handler_location.file)
|
||||
.map(|(_, idx)| *idx)
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
let seed_found = !seeds.is_empty();
|
||||
|
||||
// Forward BFS over the call graph, collecting the set of reachable
|
||||
// owner functions as `(file, qualified_name)` keys. Inserting the
|
||||
// *file part* of the namespace (not the raw `@pkg::path` namespace)
|
||||
// fixes the prior bug where packaged JS/TS namespaces never matched
|
||||
// a destination's bare file, silently killing all transitive reach.
|
||||
let mut reachable_fns: HashSet<(String, String)> = HashSet::new();
|
||||
let mut reachable_files: HashSet<String> = HashSet::new();
|
||||
reachable_files.insert(ep.handler_location.file.clone());
|
||||
|
||||
let mut visited: HashSet<_> = seeds.iter().copied().collect();
|
||||
let mut queue: VecDeque<(petgraph::graph::NodeIndex, usize)> =
|
||||
seeds.iter().map(|n| (*n, 0)).collect();
|
||||
while let Some((node_idx, depth)) = queue.pop_front() {
|
||||
if let Some(key) = call_graph.graph.node_weight(node_idx) {
|
||||
reachable_files.insert(key.namespace.clone());
|
||||
let file = namespace_file(&key.namespace).to_string();
|
||||
reachable_fns.insert((file.clone(), key.qualified_name()));
|
||||
reachable_files.insert(file);
|
||||
}
|
||||
if depth >= MAX_BFS_DEPTH {
|
||||
continue;
|
||||
|
|
@ -101,13 +131,24 @@ pub fn populate_reaches_edges(
|
|||
}
|
||||
}
|
||||
|
||||
for (dst_idx, dst_file) in &dst_index {
|
||||
if reachable_files.contains(dst_file) {
|
||||
new_edges.insert(SurfaceEdge {
|
||||
from: entry_idx as u32,
|
||||
to: *dst_idx as u32,
|
||||
kind: EdgeKind::Reaches,
|
||||
});
|
||||
for d in &dst_index {
|
||||
let reached = if seed_found && !d.owner.is_empty() {
|
||||
// Precise: the owning function must be on the BFS frontier.
|
||||
reachable_fns.contains(&(d.file.clone(), d.owner.clone()))
|
||||
} else {
|
||||
// Unresolved seed, or a legacy destination with no owner:
|
||||
// conservative same-file fallback (preserves connectivity
|
||||
// when the call graph cannot resolve the handler).
|
||||
reachable_files.contains(&d.file)
|
||||
};
|
||||
if reached {
|
||||
for kind in &d.edges {
|
||||
new_edges.insert(SurfaceEdge {
|
||||
from: entry_idx as u32,
|
||||
to: d.idx as u32,
|
||||
kind: *kind,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -115,27 +156,40 @@ pub fn populate_reaches_edges(
|
|||
map.edges.extend(new_edges);
|
||||
}
|
||||
|
||||
/// Strip the optional `@pkg/name::` package prefix from a `FuncKey`
|
||||
/// namespace, returning the project-relative POSIX file path part.
|
||||
/// `namespace_with_package` produces `"@scope/name::src/file.ts"` for
|
||||
/// JS/TS files inside resolved packages; the file part is what
|
||||
/// matches an entry-point's `handler_location.file`.
|
||||
fn file_part_of_namespace(ns: &str) -> &str {
|
||||
ns.rsplit_once("::").map(|(_, rest)| rest).unwrap_or(ns)
|
||||
}
|
||||
|
||||
/// Build a lookup from destination node index → destination file.
|
||||
/// Restricted to the three reachable-from-entry-point variants.
|
||||
fn build_destination_index(map: &SurfaceMap) -> Vec<(usize, String)> {
|
||||
let mut out: Vec<(usize, String)> = Vec::new();
|
||||
/// Build the destination index: every non-entry-point node tagged with
|
||||
/// its file, owning function, and the edge class to emit.
|
||||
fn build_destination_index(map: &SurfaceMap) -> Vec<Dest> {
|
||||
let mut out: Vec<Dest> = Vec::new();
|
||||
for (idx, node) in map.nodes.iter().enumerate() {
|
||||
let file = match node {
|
||||
SurfaceNode::DataStore(n) => n.location.file.clone(),
|
||||
SurfaceNode::ExternalService(n) => n.location.file.clone(),
|
||||
SurfaceNode::DangerousLocal(n) => n.location.file.clone(),
|
||||
let (file, owner, edges) = match node {
|
||||
SurfaceNode::DataStore(n) => {
|
||||
let mut edges: smallvec::SmallVec<[EdgeKind; 2]> = smallvec::SmallVec::new();
|
||||
if n.access.reads() {
|
||||
edges.push(EdgeKind::ReadsFrom);
|
||||
}
|
||||
if n.access.writes() {
|
||||
edges.push(EdgeKind::WritesTo);
|
||||
}
|
||||
(n.location.file.clone(), n.owner.clone(), edges)
|
||||
}
|
||||
SurfaceNode::ExternalService(n) => (
|
||||
n.location.file.clone(),
|
||||
n.owner.clone(),
|
||||
smallvec::smallvec![EdgeKind::TalksTo],
|
||||
),
|
||||
SurfaceNode::DangerousLocal(n) => (
|
||||
n.location.file.clone(),
|
||||
n.function_name.clone(),
|
||||
smallvec::smallvec![EdgeKind::Reaches],
|
||||
),
|
||||
SurfaceNode::EntryPoint(_) => continue,
|
||||
};
|
||||
out.push((idx, file));
|
||||
out.push(Dest {
|
||||
idx,
|
||||
file,
|
||||
owner,
|
||||
edges,
|
||||
});
|
||||
}
|
||||
out
|
||||
}
|
||||
|
|
@ -164,7 +218,8 @@ mod tests {
|
|||
use super::*;
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::{
|
||||
DangerousLocal, EntryPoint, Framework, SourceLocation, SurfaceMap, SurfaceNode,
|
||||
DangerousLocal, DataStore, DataStoreKind, EntryPoint, ExternalService, ExternalServiceKind,
|
||||
Framework, SourceLocation, SurfaceMap, SurfaceNode,
|
||||
};
|
||||
|
||||
fn ep(file: &str, handler: &str) -> SurfaceNode {
|
||||
|
|
@ -184,6 +239,7 @@ mod tests {
|
|||
location: SourceLocation::new(file, 0, 0),
|
||||
function_name: name.into(),
|
||||
cap_bits: 0x1,
|
||||
label: String::new(),
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -207,14 +263,179 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn file_part_of_namespace_strips_package_prefix() {
|
||||
assert_eq!(file_part_of_namespace("app.py"), "app.py");
|
||||
assert_eq!(file_part_of_namespace("src/main.rs"), "src/main.rs");
|
||||
assert_eq!(
|
||||
file_part_of_namespace("@scope/name::src/file.ts"),
|
||||
"src/file.ts"
|
||||
fn emits_typed_edges_for_store_and_external() {
|
||||
// A data store yields ReadsFrom, an external service yields TalksTo
|
||||
// (Reaches is reserved for dangerous-local sinks). Uses the
|
||||
// unresolved-seed same-file fallback (empty call graph).
|
||||
let mut map = SurfaceMap::new();
|
||||
map.nodes.push(ep("app.py", "handler")); // 0
|
||||
map.nodes.push(SurfaceNode::DataStore(DataStore {
|
||||
location: SourceLocation::new("app.py", 4, 1),
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "PostgreSQL".into(),
|
||||
owner: "handler".into(),
|
||||
access: Default::default(),
|
||||
})); // 1
|
||||
map.nodes
|
||||
.push(SurfaceNode::ExternalService(ExternalService {
|
||||
location: SourceLocation::new("app.py", 6, 1),
|
||||
kind: ExternalServiceKind::HttpApi,
|
||||
label: "requests".into(),
|
||||
owner: "handler".into(),
|
||||
})); // 2
|
||||
let gs = GlobalSummaries::new();
|
||||
let cg = CallGraph {
|
||||
graph: petgraph::graph::DiGraph::new(),
|
||||
index: Default::default(),
|
||||
unresolved_not_found: vec![],
|
||||
unresolved_ambiguous: vec![],
|
||||
};
|
||||
populate_reaches_edges(&mut map, &gs, &cg);
|
||||
assert!(
|
||||
map.edges
|
||||
.iter()
|
||||
.any(|e| e.kind == EdgeKind::ReadsFrom && e.to == 1)
|
||||
);
|
||||
assert!(
|
||||
map.edges
|
||||
.iter()
|
||||
.any(|e| e.kind == EdgeKind::TalksTo && e.to == 2)
|
||||
);
|
||||
assert!(map.edges.iter().all(|e| e.kind != EdgeKind::Reaches));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_access_emits_writes_to_edge() {
|
||||
use crate::surface::AccessMode;
|
||||
let mut map = SurfaceMap::new();
|
||||
map.nodes.push(ep("app.py", "handler")); // 0
|
||||
map.nodes.push(SurfaceNode::DataStore(DataStore {
|
||||
location: SourceLocation::new("app.py", 4, 1),
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "PostgreSQL".into(),
|
||||
owner: "handler".into(),
|
||||
access: AccessMode::Write,
|
||||
})); // 1
|
||||
map.nodes.push(SurfaceNode::DataStore(DataStore {
|
||||
location: SourceLocation::new("app.py", 6, 1),
|
||||
kind: DataStoreKind::Sql,
|
||||
label: "PostgreSQL exec".into(),
|
||||
owner: "handler".into(),
|
||||
access: AccessMode::ReadWrite,
|
||||
})); // 2
|
||||
let gs = GlobalSummaries::new();
|
||||
let cg = CallGraph {
|
||||
graph: petgraph::graph::DiGraph::new(),
|
||||
index: Default::default(),
|
||||
unresolved_not_found: vec![],
|
||||
unresolved_ambiguous: vec![],
|
||||
};
|
||||
populate_reaches_edges(&mut map, &gs, &cg);
|
||||
// Write-only store: WritesTo, no ReadsFrom.
|
||||
assert!(
|
||||
map.edges
|
||||
.iter()
|
||||
.any(|e| e.kind == EdgeKind::WritesTo && e.to == 1)
|
||||
);
|
||||
assert!(
|
||||
!map.edges
|
||||
.iter()
|
||||
.any(|e| e.kind == EdgeKind::ReadsFrom && e.to == 1)
|
||||
);
|
||||
// Read-write store: both edges.
|
||||
assert!(
|
||||
map.edges
|
||||
.iter()
|
||||
.any(|e| e.kind == EdgeKind::WritesTo && e.to == 2)
|
||||
);
|
||||
assert!(
|
||||
map.edges
|
||||
.iter()
|
||||
.any(|e| e.kind == EdgeKind::ReadsFrom && e.to == 2)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn namespace_file_strips_package_prefix() {
|
||||
use crate::surface::namespace_file;
|
||||
assert_eq!(namespace_file("app.py"), "app.py");
|
||||
assert_eq!(namespace_file("src/main.rs"), "src/main.rs");
|
||||
assert_eq!(namespace_file("@scope/name::src/file.ts"), "src/file.ts");
|
||||
// Last `::` wins, matching `namespace_with_package`'s shape.
|
||||
assert_eq!(file_part_of_namespace("@a/b::@c/d::lib/x.ts"), "lib/x.ts");
|
||||
assert_eq!(namespace_file("@a/b::@c/d::lib/x.ts"), "lib/x.ts");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn function_level_match_skips_unrelated_same_file_sink() {
|
||||
// Two handlers and one dangerous sink live in the same file, but
|
||||
// only `caller` calls `do_eval`. With a resolvable call graph the
|
||||
// unrelated `other` handler must NOT get a Reaches edge — the
|
||||
// file-level heuristic used to connect both.
|
||||
use crate::symbol::{FuncKey, Lang};
|
||||
let mut map = SurfaceMap::new();
|
||||
map.nodes.push(ep("app.py", "caller")); // idx 0
|
||||
map.nodes.push(ep("app.py", "other")); // idx 1
|
||||
// Dangerous sink owned by `do_eval`.
|
||||
map.nodes.push(SurfaceNode::DangerousLocal(DangerousLocal {
|
||||
location: SourceLocation::new("app.py", 12, 1),
|
||||
function_name: "do_eval".into(),
|
||||
cap_bits: 0x1,
|
||||
label: "code-exec".into(),
|
||||
})); // idx 2
|
||||
|
||||
// Call graph: caller -> do_eval ; other is isolated.
|
||||
let mut cg = CallGraph {
|
||||
graph: petgraph::graph::DiGraph::new(),
|
||||
index: Default::default(),
|
||||
unresolved_not_found: vec![],
|
||||
unresolved_ambiguous: vec![],
|
||||
};
|
||||
let caller = cg.graph.add_node(FuncKey::new_function(
|
||||
Lang::Python,
|
||||
"app.py",
|
||||
"caller",
|
||||
None,
|
||||
));
|
||||
let other = cg
|
||||
.graph
|
||||
.add_node(FuncKey::new_function(Lang::Python, "app.py", "other", None));
|
||||
let do_eval = cg.graph.add_node(FuncKey::new_function(
|
||||
Lang::Python,
|
||||
"app.py",
|
||||
"do_eval",
|
||||
None,
|
||||
));
|
||||
cg.graph.add_edge(
|
||||
caller,
|
||||
do_eval,
|
||||
crate::callgraph::CallEdge {
|
||||
call_site: "do_eval".into(),
|
||||
},
|
||||
);
|
||||
cg.index.insert(
|
||||
FuncKey::new_function(Lang::Python, "app.py", "caller", None),
|
||||
caller,
|
||||
);
|
||||
cg.index.insert(
|
||||
FuncKey::new_function(Lang::Python, "app.py", "other", None),
|
||||
other,
|
||||
);
|
||||
cg.index.insert(
|
||||
FuncKey::new_function(Lang::Python, "app.py", "do_eval", None),
|
||||
do_eval,
|
||||
);
|
||||
|
||||
let gs = GlobalSummaries::new();
|
||||
populate_reaches_edges(&mut map, &gs, &cg);
|
||||
// Exactly one Reaches edge: caller(0) -> sink(2). `other`(1) is
|
||||
// excluded by function-level matching.
|
||||
let reaches: Vec<_> = map
|
||||
.edges
|
||||
.iter()
|
||||
.filter(|e| e.kind == EdgeKind::Reaches)
|
||||
.collect();
|
||||
assert_eq!(reaches.len(), 1, "got {reaches:?}");
|
||||
assert_eq!(reaches[0].from, 0);
|
||||
assert_eq!(reaches[0].to, 2);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -97,6 +97,7 @@ fn make_diag(
|
|||
evidence: Some(make_evidence(source_kind, verdict)),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ fn diag_with_caps(path: &str, line: usize, caps: Cap) -> Diag {
|
|||
}),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ fn fixture_surface_map() -> SurfaceMap {
|
|||
location: loc("app.py", 30),
|
||||
function_name: "shell.exec".into(),
|
||||
cap_bits: Cap::CODE_EXEC.bits(),
|
||||
label: String::new(),
|
||||
}));
|
||||
m
|
||||
}
|
||||
|
|
@ -77,6 +78,7 @@ fn fixture_findings() -> Vec<Diag> {
|
|||
evidence: Some(ev),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -968,6 +968,7 @@ fn make_diag(path: &Path, func: &str, cap: Cap, sink_line: u32) -> Diag {
|
|||
evidence: Some(evidence),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ fn base_diag() -> Diag {
|
|||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -59,6 +59,7 @@ fn deny_diag(stable_hash: u64) -> Diag {
|
|||
evidence: Some(ev),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
@ -312,6 +313,7 @@ fn confirmed_run_is_byte_identical_across_runs() {
|
|||
evidence: Some(evidence),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -86,6 +86,7 @@ mod parity_tests {
|
|||
}),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -78,6 +78,7 @@ mod verify_e2e {
|
|||
}),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
@ -111,6 +112,7 @@ mod verify_e2e {
|
|||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -64,6 +64,7 @@ fn high_confidence_taint_diag(path: &str, line: u32) -> Diag {
|
|||
}),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -452,6 +452,7 @@ mod go_fixture_tests {
|
|||
evidence: Some(evidence),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@ fn diag(severity: Severity, id: &str, conf: Option<Confidence>) -> Diag {
|
|||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -450,6 +450,7 @@ mod java_fixture_tests {
|
|||
evidence: Some(evidence),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -445,6 +445,7 @@ mod js_fixture_tests {
|
|||
evidence: Some(evidence),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ fn base_diag() -> Diag {
|
|||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -55,6 +55,7 @@ mod lang_detect {
|
|||
}),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -440,6 +440,7 @@ mod php_fixture_tests {
|
|||
evidence: Some(evidence),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ fn empty_diag() -> Diag {
|
|||
evidence: Some(Evidence::default()),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -928,6 +928,7 @@ mod python_fixture_tests {
|
|||
evidence: Some(evidence),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -279,6 +279,7 @@ mod rust_fixture_tests {
|
|||
evidence: Some(evidence),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -752,6 +752,7 @@ mod hardening_tests {
|
|||
evidence: Some(evidence),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
@ -947,6 +948,7 @@ mod hardening_tests {
|
|||
evidence: Some(evidence),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -647,6 +647,7 @@ finally:
|
|||
evidence: Some(evidence),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
@ -787,6 +788,7 @@ finally:
|
|||
evidence: Some(evidence),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ fn base_diag() -> Diag {
|
|||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -78,6 +78,7 @@ fn make_diag(id: &str, path: &str, line: usize) -> Diag {
|
|||
evidence: Some(Evidence::default()),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@ mod spec_strategies {
|
|||
evidence: Some(Evidence::default()),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -73,6 +73,7 @@ fn make_diag(path: &str, handler: &str, line: usize, cap: Cap, rule_id: &str) ->
|
|||
evidence: Some(ev),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
exposure: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
triage_state: "open".to_string(),
|
||||
|
|
|
|||
|
|
@ -116,7 +116,7 @@ fn load_or_build_falls_back_to_filesystem_when_no_db() {
|
|||
.unwrap();
|
||||
let db_dir = tempfile::tempdir().unwrap();
|
||||
let cfg = Config::default();
|
||||
let map = load_or_build(tmp.path(), db_dir.path(), &cfg).expect("load_or_build");
|
||||
let (map, _cov) = load_or_build(tmp.path(), db_dir.path(), &cfg).expect("load_or_build");
|
||||
assert!(
|
||||
map.entry_points().next().is_some(),
|
||||
"expected at least one entry-point in fallback path"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue