diff --git a/src/ast.rs b/src/ast.rs index f9fe33a6..d1238f58 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -97,6 +97,7 @@ fn parse_timeout_diag(path: &Path, timeout_ms: u64) -> Diag { evidence: Some(evidence), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), @@ -711,6 +712,7 @@ fn build_taint_diag( }), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), @@ -1400,6 +1402,7 @@ impl<'a> ParsedSource<'a> { }), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), @@ -2046,6 +2049,7 @@ impl<'a> ParsedFile<'a> { }), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), @@ -2129,6 +2133,7 @@ impl<'a> ParsedFile<'a> { }), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/src/auth_analysis/mod.rs b/src/auth_analysis/mod.rs index 47a243ef..89eb4ad3 100644 --- a/src/auth_analysis/mod.rs +++ b/src/auth_analysis/mod.rs @@ -1044,6 +1044,7 @@ fn auth_finding_to_diag(finding: &checks::AuthFinding, tree: &Tree, file_path: & }), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/src/baseline.rs b/src/baseline.rs index 6d529a62..0c9afc04 100644 --- a/src/baseline.rs +++ b/src/baseline.rs @@ -404,6 +404,7 @@ mod tests { evidence: None, rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/src/chain/edges.rs b/src/chain/edges.rs index 353f7d3b..4dbf5d56 100644 --- a/src/chain/edges.rs +++ b/src/chain/edges.rs @@ -192,35 +192,41 @@ pub fn pick_chain_cap(bits: u32) -> Option { } fn locate_reach(loc: &SourceLocation, surface: &SurfaceMap, reach: Option<&FileReachMap>) -> Reach { - // Pass 1: file-local match (legacy behaviour, always applies). - for node in &surface.nodes { - if let SurfaceNode::EntryPoint(ep) = node - && ep.handler_location.file == loc.file - { - return Reach::Reachable { - location: ep.location.clone(), - method: ep.method, - route: ep.route.clone(), - auth_required: ep.auth_required, - }; + // Within each pass, prefer an *unauthenticated* entry-point over an + // auth-gated one: the chain composer scores worst-case exposure, and + // taking the first match used to under-report whenever an auth-gated + // route happened to sort first in the same file. + let pick = |matches_entry: &dyn Fn(&crate::surface::EntryPoint) -> bool| -> Option { + let mut best: Option<&crate::surface::EntryPoint> = None; + for node in &surface.nodes { + if let SurfaceNode::EntryPoint(ep) = node + && matches_entry(ep) + { + if !ep.auth_required { + best = Some(ep); + break; + } + best.get_or_insert(ep); + } } + best.map(|ep| Reach::Reachable { + location: ep.location.clone(), + method: ep.method, + route: ep.route.clone(), + auth_required: ep.auth_required, + }) + }; + // Pass 1: file-local match (legacy behaviour, always applies). + if let Some(found) = pick(&|ep| ep.handler_location.file == loc.file) { + return found; } // Pass 2: transitive caller match via the call graph. Only fires // when `reach` is supplied — keeps the legacy file-local behaviour // for callers that have not yet wired the call-graph reach map. - if let Some(reach) = reach { - for node in &surface.nodes { - if let SurfaceNode::EntryPoint(ep) = node - && reach.reaches(&ep.handler_location.file, &loc.file) - { - return Reach::Reachable { - location: ep.location.clone(), - method: ep.method, - route: ep.route.clone(), - auth_required: ep.auth_required, - }; - } - } + if let Some(reach) = reach + && let Some(found) = pick(&|ep| reach.reaches(&ep.handler_location.file, &loc.file)) + { + return found; } Reach::Unreachable } diff --git a/src/chain/search.rs b/src/chain/search.rs index 30bb1d2e..a723e278 100644 --- a/src/chain/search.rs +++ b/src/chain/search.rs @@ -463,6 +463,7 @@ mod tests { location: loc(file, line), function_name: fname.into(), cap_bits: caps.bits(), + label: String::new(), }) } @@ -671,6 +672,8 @@ mod tests { location: loc("app.py", 5), kind: DataStoreKind::KeyValue, label: "redis://127.0.0.1:6379".into(), + owner: String::new(), + access: Default::default(), })); let boosted = find_chains( &[edge()], diff --git a/src/commands/scan.rs b/src/commands/scan.rs index c46d2709..c2d6ebd3 100644 --- a/src/commands/scan.rs +++ b/src/commands/scan.rs @@ -167,6 +167,14 @@ pub struct Diag { /// Breakdown of how the ranking score was computed. #[serde(default, skip_serializing_if = "Option::is_none")] pub rank_reason: Option>, + /// Worst-case attack-surface exposure: the externally-reachable + /// route that can drive this finding, when the surface map's + /// entry-points reach the finding's file (directly or via the call + /// graph). `None` when the project has no detected entry-points + /// or no route reaches the file. Populated by + /// [`crate::surface::exposure::annotate_exposure`] before ranking. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub exposure: Option, /// Whether this finding was suppressed by an inline `nyx:ignore` directive. #[serde(default, skip_serializing_if = "is_false")] pub suppressed: bool, @@ -251,6 +259,7 @@ impl Default for Diag { evidence: None, rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: default_triage_state(), @@ -2547,6 +2556,15 @@ pub(crate) fn scan_filesystem_with_observer( if let Some(p) = progress { p.set_stage(ScanStage::PostProcessing); } + // Surface exposure: tag each finding with the worst-case route that + // reaches it before ranking, so `rank_diags` can weigh external + // reachability. + crate::surface::exposure::annotate_exposure( + &mut diags, + &surface_map, + chain_reach_out.and_then(|s| s.get()), + Some(root), + ); post_process_diags(&mut diags, cfg); if let Some(p) = progress { p.record_post_process_ms(pp_start.elapsed().as_millis() as u64); @@ -3398,6 +3416,15 @@ pub fn scan_with_index_parallel_observer( None, ); } + // Surface exposure: tag each finding with the worst-case route + // that reaches it before ranking, so `rank_diags` can weigh + // external reachability. + crate::surface::exposure::annotate_exposure( + &mut diags, + &surface_map, + chain_reach_out.and_then(|s| s.get()), + Some(scan_root), + ); } // NOTE: Taint-mode output is *not* filtered here. `run_rules_on_bytes` @@ -3603,6 +3630,7 @@ fn rollup_findings( evidence: None, rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), @@ -3837,6 +3865,7 @@ mod dedup_taint_flow_tests { }), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), @@ -4007,6 +4036,7 @@ mod scc_tagging_tests { evidence: Some(Evidence::default()), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), @@ -4301,6 +4331,7 @@ fn severity_filter_applied_at_output_stage() { evidence: None, rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), @@ -4325,6 +4356,7 @@ fn severity_filter_applied_at_output_stage() { evidence: None, rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), @@ -4376,6 +4408,7 @@ mod prioritize_tests { evidence: None, rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), @@ -4809,6 +4842,7 @@ mod prioritize_tests { evidence: None, rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), @@ -4901,6 +4935,7 @@ mod stable_hash_tests { }), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/src/commands/surface.rs b/src/commands/surface.rs index 7c08aad5..b5df8554 100644 --- a/src/commands/surface.rs +++ b/src/commands/surface.rs @@ -32,7 +32,7 @@ use crate::errors::{NyxError, NyxResult}; use crate::summary::GlobalSummaries; use crate::surface::{ DataStoreKind, EdgeKind, EntryPoint, ExternalServiceKind, SurfaceMap, SurfaceNode, - build::{SurfaceBuildInputs, build_surface_map}, + build::{SurfaceBuildInputs, SurfaceCoverage, build_surface_map_with_coverage}, }; use crate::utils::Config; use crate::utils::project::get_project_info; @@ -60,11 +60,18 @@ pub fn handle( config: &Config, ) -> NyxResult<()> { let scan_root = Path::new(path).canonicalize()?; - let map = if build_inline { - build_full_from_filesystem(&scan_root, config)? + let (map, coverage) = if build_inline { + let (m, c) = build_full_from_filesystem(&scan_root, config)?; + (m, Some(c)) } else { load_or_build(&scan_root, database_dir, config)? }; + // Coverage goes to stderr so stdout stays clean for json / dot / svg + // consumers. Only available when the map was built this run (a + // persisted map carries no coverage). + if let Some(cov) = &coverage { + eprint!("{}", render_coverage(cov)); + } let stdout = std::io::stdout(); let mut out = stdout.lock(); match format { @@ -97,7 +104,7 @@ pub fn load_or_build( scan_root: &Path, database_dir: &Path, config: &Config, -) -> NyxResult { +) -> NyxResult<(SurfaceMap, Option)> { if let Ok((project, db_path)) = get_project_info(scan_root, database_dir) && db_path.exists() && let Ok(pool) = Indexer::init(&db_path) @@ -105,12 +112,25 @@ pub fn load_or_build( && let Ok(Some(map)) = idx.load_surface_map() && !map.nodes.is_empty() { - return Ok(map); + // Persisted map: no coverage to report. Say where the data came + // from on stderr — a reviewer comparing the tree against freshly + // edited source needs to know it reflects the last indexed scan, + // not the working tree. + eprintln!( + "Surface map: {} nodes, {} edges from the last indexed scan (pass --build to rebuild from source)", + map.node_count(), + map.edge_count() + ); + return Ok((map, None)); } - build_from_filesystem(scan_root, config) + let (map, cov) = build_from_filesystem(scan_root, config)?; + Ok((map, Some(cov))) } -fn build_from_filesystem(scan_root: &Path, config: &Config) -> NyxResult { +fn build_from_filesystem( + scan_root: &Path, + config: &Config, +) -> NyxResult<(SurfaceMap, SurfaceCoverage)> { let files = collect_files(scan_root, config)?; let summaries = GlobalSummaries::new(); let call_graph = callgraph::build_call_graph(&summaries, &[]); @@ -121,7 +141,7 @@ fn build_from_filesystem(scan_root: &Path, config: &Config) -> NyxResult NyxResult NyxResult { +fn build_full_from_filesystem( + scan_root: &Path, + config: &Config, +) -> NyxResult<(SurfaceMap, SurfaceCoverage)> { let files = collect_files(scan_root, config)?; let mut summaries = build_summaries_inline(&files, scan_root, config); summaries.install_hierarchy(); @@ -141,7 +164,26 @@ fn build_full_from_filesystem(scan_root: &Path, config: &Config) -> NyxResult String { + let mut s = format!( + "Coverage: {} files, {} in a supported language ({} parsed, {} with routes)", + cov.files_total, cov.files_supported, cov.files_parsed, cov.files_with_entry_points, + ); + if cov.files_parse_failed > 0 { + s.push_str(&format!(", {} unparsed", cov.files_parse_failed)); + } + if cov.files_unreadable > 0 { + s.push_str(&format!(", {} unreadable", cov.files_unreadable)); + } + s.push('\n'); + s } /// Run pass-1 summary extraction across `files` in parallel and merge @@ -242,6 +284,36 @@ pub fn render_text(map: &SurfaceMap, scan_root: Option<&Path>) -> String { return out; } + // Risk banner: the highest-risk entry-points first, so a reviewer + // sees "what should I look at" before the per-file inventory. + let risks = crate::surface::risk::assess_entry_risks(map); + let risk_by_idx: std::collections::HashMap = + risks.iter().map(|r| (r.entry_idx, r)).collect(); + let top: Vec<&crate::surface::risk::EntryRisk> = risks + .iter() + .filter(|r| r.tier >= crate::surface::risk::RiskTier::Medium) + .take(10) + .collect(); + if !top.is_empty() { + out.push_str("Top risk entry-points\n"); + for r in &top { + let Some(SurfaceNode::EntryPoint(ep)) = map.nodes.get(r.entry_idx) else { + continue; + }; + out.push_str(&format!( + " [{}] {} {} ({:?}) — {} [{}:{}]\n", + r.tier.tag(), + method_str(ep.method), + ep.route, + ep.framework, + r.factors.join(", "), + ep.location.file, + ep.location.line + )); + } + out.push('\n'); + } + let mut by_file: BTreeMap<&str, Vec> = BTreeMap::new(); for (idx, node) in map.nodes.iter().enumerate() { by_file @@ -252,7 +324,7 @@ pub fn render_text(map: &SurfaceMap, scan_root: Option<&Path>) -> String { let mut reached: std::collections::HashSet = std::collections::HashSet::new(); for edge in &map.edges { - if matches!(edge.kind, EdgeKind::Reaches) { + if edge.kind.is_reach_like() { reached.insert(edge.to); } } @@ -269,7 +341,7 @@ pub fn render_text(map: &SurfaceMap, scan_root: Option<&Path>) -> String { let SurfaceNode::EntryPoint(ep) = &map.nodes[ei] else { continue; }; - render_entry_point(&mut out, ep, ei as u32, map); + render_entry_point(&mut out, ep, ei as u32, map, risk_by_idx.get(&ei).copied()); } } for &i in indices { @@ -323,24 +395,46 @@ pub fn render_text(map: &SurfaceMap, scan_root: Option<&Path>) -> String { out } -fn render_entry_point(out: &mut String, ep: &EntryPoint, ep_idx: u32, map: &SurfaceMap) { +fn render_entry_point( + out: &mut String, + ep: &EntryPoint, + ep_idx: u32, + map: &SurfaceMap, + risk: Option<&crate::surface::risk::EntryRisk>, +) { let auth = if ep.auth_required { " [auth]" } else { "" }; + // Only Medium and above gets a tag — every line reading `[low]` + // would be noise, absence of a tag *is* the low signal. + let risk_tag = risk + .filter(|r| r.tier >= crate::surface::risk::RiskTier::Medium) + .map(|r| format!(" [risk: {}]", r.tier.tag())) + .unwrap_or_default(); out.push_str(&format!( - " {} {} ({:?}){}\n", + " {} {} ({:?}){}{}\n", method_str(ep.method), ep.route, ep.framework, - auth + auth, + risk_tag )); out.push_str(&format!( " handler: {} at {}:{}\n", ep.handler_name, ep.handler_location.file, ep.handler_location.line )); - let mut reached: Vec<&SurfaceNode> = map + // Dedupe destinations: a read-write data store carries both a + // ReadsFrom and a WritesTo edge to the same node — one line each + // would print the store twice. + let mut to_indices: Vec = map .edges .iter() - .filter(|e| e.from == ep_idx && matches!(e.kind, EdgeKind::Reaches)) - .filter_map(|e| map.nodes.get(e.to as usize)) + .filter(|e| e.from == ep_idx && e.kind.is_reach_like()) + .map(|e| e.to) + .collect(); + to_indices.sort_unstable(); + to_indices.dedup(); + let mut reached: Vec<&SurfaceNode> = to_indices + .iter() + .filter_map(|&i| map.nodes.get(i as usize)) .collect(); reached.sort_by(|a, b| a.location().cmp(b.location())); if reached.is_empty() { @@ -364,9 +458,16 @@ fn render_node_line(out: &mut String, node: &SurfaceNode, prefix: &str) { )); } SurfaceNode::DataStore(ds) => { + let access = match ds.access { + crate::surface::AccessMode::Read => ", read", + crate::surface::AccessMode::Write => ", write", + crate::surface::AccessMode::ReadWrite => ", read-write", + crate::surface::AccessMode::Unknown => "", + }; out.push_str(&format!( - "{prefix}data-store ({}): {} [{}:{}]\n", + "{prefix}data-store ({}{}): {} [{}:{}]\n", ds_kind_str(ds.kind), + access, ds.label, ds.location.file, ds.location.line @@ -382,9 +483,14 @@ fn render_node_line(out: &mut String, node: &SurfaceNode, prefix: &str) { )); } SurfaceNode::DangerousLocal(dl) => { + let caps = if dl.label.is_empty() { + crate::surface::cap_label_string(dl.cap_bits) + } else { + dl.label.clone() + }; out.push_str(&format!( - "{prefix}dangerous: {} (cap=0x{:x}) [{}:{}]\n", - dl.function_name, dl.cap_bits, dl.location.file, dl.location.line + "{prefix}dangerous ({}): {} [{}:{}]\n", + caps, dl.function_name, dl.location.file, dl.location.line )); } } @@ -474,15 +580,22 @@ pub fn render_dot(map: &SurfaceMap) -> String { "component", "#8b3aa5", ), - SurfaceNode::DangerousLocal(dl) => ( - format!( - "Dangerous\\n{}\\ncap=0x{:x}", - escape_dot(&dl.function_name), - dl.cap_bits - ), - "octagon", - "#c44141", - ), + SurfaceNode::DangerousLocal(dl) => { + let caps = if dl.label.is_empty() { + crate::surface::cap_label_string(dl.cap_bits) + } else { + dl.label.clone() + }; + ( + format!( + "Dangerous ({})\\n{}", + escape_dot(&caps), + escape_dot(&dl.function_name), + ), + "octagon", + "#c44141", + ) + } }; out.push_str(&format!( " n{i} [label=\"{label}\", shape={shape}, color=\"{color}\", fontcolor=\"{color}\"];\n", @@ -603,6 +716,7 @@ mod tests { location: SourceLocation::new("app.py", 12, 1), function_name: "eval".into(), cap_bits: crate::labels::Cap::CODE_EXEC.bits(), + label: "code-exec".into(), }, )); // Build edge after canonicalize so indices are stable. @@ -625,7 +739,7 @@ mod tests { m.canonicalize(); let text = render_text(&m, None); assert!(text.contains("reaches:")); - assert!(text.contains("dangerous: eval")); + assert!(text.contains("dangerous (code-exec): eval")); } #[test] @@ -691,7 +805,7 @@ mod tests { let cfg = Config::default(); let canon = project_dir.canonicalize().unwrap(); - let map = build_full_from_filesystem(&canon, &cfg).expect("inline build succeeds"); + let (map, _cov) = build_full_from_filesystem(&canon, &cfg).expect("inline build succeeds"); let has_entry = map .nodes @@ -722,7 +836,7 @@ mod tests { let cfg = Config::default(); let canon = project_dir.canonicalize().unwrap(); - let map = build_from_filesystem(&canon, &cfg).expect("fallback build succeeds"); + let (map, _cov) = build_from_filesystem(&canon, &cfg).expect("fallback build succeeds"); // Entry point should still appear (framework probes run in the // fallback path too). diff --git a/src/database.rs b/src/database.rs index 1c52694f..99664137 100644 --- a/src/database.rs +++ b/src/database.rs @@ -1091,6 +1091,7 @@ pub mod index { evidence: None, rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/src/evidence.rs b/src/evidence.rs index 9297031f..8728e683 100644 --- a/src/evidence.rs +++ b/src/evidence.rs @@ -1600,6 +1600,7 @@ mod tests { evidence: None, rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/src/fmt.rs b/src/fmt.rs index 675cea3c..8b2cf351 100644 --- a/src/fmt.rs +++ b/src/fmt.rs @@ -985,6 +985,7 @@ mod tests { evidence: None, rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), @@ -1009,6 +1010,7 @@ mod tests { evidence: None, rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), @@ -1047,6 +1049,7 @@ mod tests { evidence: None, rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), @@ -1085,6 +1088,7 @@ mod tests { evidence: None, rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), @@ -1109,6 +1113,7 @@ mod tests { evidence: None, rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), @@ -1145,6 +1150,7 @@ mod tests { evidence: None, rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), @@ -1178,6 +1184,7 @@ mod tests { evidence: None, rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), @@ -1215,6 +1222,7 @@ mod tests { evidence: None, rank_score: Some(120.0), rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), @@ -1311,6 +1319,7 @@ mod tests { evidence: None, rank_score: Some(36.0), rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), @@ -1360,6 +1369,7 @@ mod tests { evidence: None, rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), @@ -1395,6 +1405,7 @@ mod tests { evidence: None, rank_score: Some(42.0), rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), @@ -1434,6 +1445,7 @@ mod tests { evidence: None, rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), @@ -1469,6 +1481,7 @@ mod tests { evidence: None, rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), @@ -1518,6 +1531,7 @@ mod tests { }), rank_score: Some(47.0), rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/src/output/sarif.rs b/src/output/sarif.rs index 5c612e56..3628fe07 100644 --- a/src/output/sarif.rs +++ b/src/output/sarif.rs @@ -241,6 +241,23 @@ pub fn build_sarif_with_chains(diags: &[Diag], chains: &[ChainFinding], scan_roo props.insert("data_exfil_field".into(), json!(field)); } + // Attack-surface exposure: the externally-reachable route + // that drives this finding. Lets a SARIF consumer (CI gate, + // dashboard) filter on "reachable from an unauthenticated + // route" without re-running the surface build. + if let Some(exp) = &d.exposure { + props.insert( + "exposure".into(), + json!({ + "route": exp.route, + "method": format!("{:?}", exp.method), + "framework": format!("{:?}", exp.framework), + "auth_required": exp.auth_required, + "transitive": exp.transitive, + }), + ); + } + if !d.finding_id.is_empty() { props.insert("finding_id".into(), json!(d.finding_id)); } @@ -395,6 +412,7 @@ mod tests { evidence: None, rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/src/patterns/ejs.rs b/src/patterns/ejs.rs index de2f6ee7..5dc3196e 100644 --- a/src/patterns/ejs.rs +++ b/src/patterns/ejs.rs @@ -80,6 +80,7 @@ pub fn scan_ejs_file(path: &Path, bytes: &[u8]) -> Vec { }), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/src/rank.rs b/src/rank.rs index d3a87c18..4af8162f 100644 --- a/src/rank.rs +++ b/src/rank.rs @@ -55,6 +55,32 @@ pub fn compute_attack_rank(diag: &Diag) -> AttackRank { components.push(("evidence".into(), format!("{evidence_bonus}"))); } + // ── 3b. Surface exposure ──────────────────────────────────────────── + // + // A finding reachable from a surface entry-point is more exploitable + // than an internal one; reachable *without auth* more so. Transitive + // reach (through the call graph rather than in the handler's own + // file) is slightly discounted because the file-level reach map can + // over-approximate. Magnitudes keep the severity tier ordering: the + // maximum exposure bonus (+10) plus all other Medium-tier bonuses + // stays below the High severity base (see tier tests). + if let Some(exp) = &diag.exposure { + let mut exposure_bonus = if exp.auth_required { 4.0 } else { 10.0 }; + if exp.transitive { + exposure_bonus -= 2.0; + } + score += exposure_bonus; + let auth_tag = if exp.auth_required { + "auth-gated" + } else { + "unauthenticated" + }; + components.push(( + "exposure".into(), + format!("{exposure_bonus:+} ({auth_tag})"), + )); + } + // ── 4. State finding sub-ranking ──────────────────────────────────── let state_bonus = state_finding_bonus(&diag.id); score += state_bonus; @@ -421,6 +447,7 @@ mod tests { evidence: None, rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/src/server/health.rs b/src/server/health.rs index 8054d569..28a55bc4 100644 --- a/src/server/health.rs +++ b/src/server/health.rs @@ -610,6 +610,7 @@ mod tests { evidence: None, rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/src/server/models.rs b/src/server/models.rs index 6148f9df..123f7893 100644 --- a/src/server/models.rs +++ b/src/server/models.rs @@ -78,6 +78,10 @@ pub struct FindingView { pub guard_kind: Option, #[serde(skip_serializing_if = "Option::is_none")] pub rank_reason: Option>, + /// Worst-case attack-surface exposure (route, method, auth) when a + /// surface entry-point reaches this finding. + #[serde(skip_serializing_if = "Option::is_none")] + pub exposure: Option, #[serde(skip_serializing_if = "Option::is_none")] pub sanitizer_status: Option, #[serde(skip_serializing_if = "Vec::is_empty")] @@ -345,6 +349,7 @@ pub fn finding_from_diag(index: usize, d: &Diag) -> FindingView { .and_then(|ev| ev.dynamic_verdict.clone()), guard_kind: None, rank_reason: None, + exposure: d.exposure.clone(), sanitizer_status: None, related_findings: vec![], } @@ -937,6 +942,7 @@ mod tests { evidence: None, rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/src/server/routes/surface.rs b/src/server/routes/surface.rs index e91661d8..13e61039 100644 --- a/src/server/routes/surface.rs +++ b/src/server/routes/surface.rs @@ -31,12 +31,24 @@ async fn get_surface(State(state): State) -> ApiResult> { .await .map_err(|e| ApiError::internal(format!("surface map task failed: {e}")))?; - let mut map = + let (mut map, _coverage) = join_result.map_err(|e| ApiError::internal(format!("failed to build surface map: {e}")))?; + // Risk is derived from the canonicalised map, so canonicalise (via + // `to_json`) first to lock node indices, then assess. let bytes = map .to_json() .map_err(|e| ApiError::internal(format!("encode surface map: {e}")))?; - let value: Value = serde_json::from_slice(&bytes) + let mut value: Value = serde_json::from_slice(&bytes) .map_err(|e| ApiError::internal(format!("re-parse surface map JSON: {e}")))?; + // Attach per-entry-point risk assessment alongside the raw map so the + // frontend can render a risk-sorted view without re-deriving scores. + let risks = crate::surface::risk::assess_entry_risks(&map); + if let Value::Object(obj) = &mut value { + obj.insert( + "entry_risks".into(), + serde_json::to_value(&risks) + .map_err(|e| ApiError::internal(format!("encode entry risks: {e}")))?, + ); + } Ok(Json(value)) } diff --git a/src/surface/build.rs b/src/surface/build.rs index 02eab398..e57f911f 100644 --- a/src/surface/build.rs +++ b/src/surface/build.rs @@ -12,22 +12,30 @@ //! SSRF caps and emits [`SurfaceNode::ExternalService`](crate::surface::SurfaceNode::ExternalService) nodes. //! 4. [`super::dangerous::detect_dangerous_locals`] walks summaries //! and emits [`SurfaceNode::DangerousLocal`](crate::surface::SurfaceNode::DangerousLocal) nodes for every -//! function whose `sink_caps` include CODE_EXEC / DESERIALIZE / -//! SSTI / FMT_STRING. -//! 5. [`super::reachability::populate_reaches_edges`] runs a BFS over -//! the [`CallGraph`] from each entry-point handler, emitting -//! [`super::EdgeKind::Reaches`] edges to every reachable -//! DataStore / ExternalService / DangerousLocal. +//! function whose `sink_caps` include a local-sink class (code-exec, +//! deserialize, SSTI, format-string, LDAP / XPath / header / +//! open-redirect injection, XXE, prototype pollution), located at the +//! real sink span and labelled with the decoded cap class. +//! 5. [`super::reachability::populate_reaches_edges`] runs a forward, +//! function-level BFS over the [`CallGraph`] from each entry-point +//! handler, emitting [`super::EdgeKind::ReadsFrom`] (→ data store), +//! [`super::EdgeKind::TalksTo`] (→ external service), and +//! [`super::EdgeKind::Reaches`] (→ dangerous local) edges to every +//! reachable destination. //! 6. [`SurfaceMap::canonicalize`] sorts nodes + edges so the //! serialised JSON is byte-deterministic across rescans. //! -//! Per-file errors (parse failure, unsupported language) are -//! swallowed so a single bad file does not kill the whole map. +//! Per-file errors (parse failure, unsupported language, unreadable file) +//! are swallowed so a single bad file does not kill the whole map, but are +//! counted into [`SurfaceCoverage`] so the skip is observable rather than +//! silent. +use crate::auth_analysis::auth_markers::router_auth_markers_for_lang; use crate::callgraph::CallGraph; +use crate::entry_points::{EntryKind, HttpMethod}; use crate::summary::GlobalSummaries; use crate::surface::{ - SurfaceMap, dangerous, datastore, external, + EntryPoint, Framework, SourceLocation, SurfaceMap, SurfaceNode, dangerous, datastore, external, lang::{ go_gin, go_http, java_quarkus, java_servlet, java_spring, js_express, js_koa, php_laravel, php_slim, python_django, python_fastapi, python_flask, ruby_rails, ruby_sinatra, @@ -47,17 +55,63 @@ pub struct SurfaceBuildInputs<'a> { pub config: &'a Config, } +/// Per-build coverage counters. Turns the previously-silent +/// "single bad file is swallowed" behaviour into a number an operator can +/// read, so a small attack-surface map can be told apart from "our probes +/// did not understand this project's framework / language". +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct SurfaceCoverage { + /// Total files handed to the builder. + pub files_total: usize, + /// Files in a language a framework probe exists for. + pub files_supported: usize, + /// Supported-language files that parsed cleanly. + pub files_parsed: usize, + /// Supported-language files whose tree-sitter parse failed. + pub files_parse_failed: usize, + /// Files in a language with no framework probe (`.md`, `.toml`, …). + pub files_unsupported: usize, + /// Files that could not be read off disk. + pub files_unreadable: usize, + /// Supported-language files that yielded at least one entry-point node. + pub files_with_entry_points: usize, +} + +/// Build a [`SurfaceMap`], discarding coverage. Thin wrapper over +/// [`build_surface_map_with_coverage`] for callers (the indexed scan +/// path, persistence) that do not surface telemetry. pub fn build_surface_map(inputs: &SurfaceBuildInputs<'_>) -> SurfaceMap { + build_surface_map_with_coverage(inputs).0 +} + +/// Build a [`SurfaceMap`] and report [`SurfaceCoverage`]. The `nyx +/// surface` CLI uses this variant so parse / unsupported skips become a +/// visible number instead of being silently swallowed. +pub fn build_surface_map_with_coverage( + inputs: &SurfaceBuildInputs<'_>, +) -> (SurfaceMap, SurfaceCoverage) { let mut map = SurfaceMap::new(); let _ = inputs.config; + let mut cov = SurfaceCoverage { + files_total: inputs.files.len(), + ..Default::default() + }; let mut parsers = Parsers::new(); for path in inputs.files { let Ok(bytes) = std::fs::read(path) else { + cov.files_unreadable += 1; continue; }; let kind = classify_file(path); - let nodes = match kind { + if kind == FileKind::Other { + cov.files_unsupported += 1; + continue; + } + cov.files_supported += 1; + // `Some(nodes)` on a clean parse (possibly empty), `None` when the + // tree-sitter parse failed — lets coverage distinguish the two. + let parsed: Option> = match kind { FileKind::Python => parsers .python .as_mut() @@ -78,8 +132,7 @@ pub fn build_surface_map(inputs: &SurfaceBuildInputs<'_>) -> SurfaceMap { inputs.scan_root, )); all - }) - .unwrap_or_default(), + }), FileKind::JavaScript => parsers .javascript .as_mut() @@ -94,8 +147,7 @@ pub fn build_surface_map(inputs: &SurfaceBuildInputs<'_>) -> SurfaceMap { inputs.scan_root, )); all - }) - .unwrap_or_default(), + }), FileKind::TypeScript => parsers .typescript .as_mut() @@ -116,8 +168,7 @@ pub fn build_surface_map(inputs: &SurfaceBuildInputs<'_>) -> SurfaceMap { inputs.scan_root, )); all - }) - .unwrap_or_default(), + }), FileKind::Java => parsers .java .as_mut() @@ -138,8 +189,7 @@ pub fn build_surface_map(inputs: &SurfaceBuildInputs<'_>) -> SurfaceMap { inputs.scan_root, )); all - }) - .unwrap_or_default(), + }), FileKind::Go => parsers .go .as_mut() @@ -154,8 +204,7 @@ pub fn build_surface_map(inputs: &SurfaceBuildInputs<'_>) -> SurfaceMap { inputs.scan_root, )); all - }) - .unwrap_or_default(), + }), FileKind::Php => parsers .php .as_mut() @@ -170,8 +219,7 @@ pub fn build_surface_map(inputs: &SurfaceBuildInputs<'_>) -> SurfaceMap { inputs.scan_root, )); all - }) - .unwrap_or_default(), + }), FileKind::Ruby => parsers .ruby .as_mut() @@ -186,8 +234,7 @@ pub fn build_surface_map(inputs: &SurfaceBuildInputs<'_>) -> SurfaceMap { inputs.scan_root, )); all - }) - .unwrap_or_default(), + }), FileKind::Rust => parsers .rust .as_mut() @@ -202,15 +249,38 @@ pub fn build_surface_map(inputs: &SurfaceBuildInputs<'_>) -> SurfaceMap { inputs.scan_root, )); all - }) - .unwrap_or_default(), - FileKind::Other => Vec::new(), + }), + // Unreachable: `Other` is filtered out before this match, but + // the arm keeps the match exhaustive. + FileKind::Other => None, }; - for n in nodes { - map.nodes.push(n); + match parsed { + Some(nodes) => { + cov.files_parsed += 1; + if nodes + .iter() + .any(|n| matches!(n, SurfaceNode::EntryPoint(_))) + { + cov.files_with_entry_points += 1; + } + for n in nodes { + map.nodes.push(n); + } + } + None => cov.files_parse_failed += 1, } } + // Entry-point recall fallback: the pass-1 summary extractor tags + // handler functions with `FuncSummary::entry_kind` using its own + // (independent) framework detection. Any handler it recognised + // that the AST probes above missed is synthesised here so the + // surface map's entry-point set is always a superset of what the + // taint engine treats as adversary-driven. Route strings are not + // recoverable from summaries, so these carry `"(unrouted)"`. + let synthesised = synth_entry_points_from_summaries(&map.nodes, inputs.global_summaries); + map.nodes.extend(synthesised); + // Phase 22 — Track F.3: data-store / external-service / // dangerous-local detection from summaries. map.nodes @@ -220,6 +290,13 @@ pub fn build_surface_map(inputs: &SurfaceBuildInputs<'_>) -> SurfaceMap { map.nodes .extend(dangerous::detect_dangerous_locals(inputs.global_summaries)); + // Auth-detection upgrade: the probes only see router-level evidence + // (decorators, annotations, middleware arguments). A handler that + // guards itself in its body (`requireAuth(req)` as the first call, + // Go-style `if !VerifyToken(...)`) is still auth-gated; lift that + // from the handler summary's callee list. + upgrade_auth_required_from_summaries(&mut map, inputs.global_summaries); + // Canonicalise so node indices are stable before reachability // builds edges referring to those indices. map.canonicalize(); @@ -230,7 +307,160 @@ pub fn build_surface_map(inputs: &SurfaceBuildInputs<'_>) -> SurfaceMap { // Re-canonicalise: edges added by reachability need to be sorted // so the serialised JSON stays byte-deterministic. map.canonicalize(); - map + (map, cov) +} + +/// Route placeholder for entry points synthesised from summaries: the +/// pass-1 extractor records *that* a function is a handler but not the +/// route string the framework maps to it. +pub const UNROUTED: &str = "(unrouted)"; + +/// Map a pass-1 [`EntryKind`] tag to the surface [`Framework`] + +/// [`HttpMethod`] pair. Kinds with no verb evidence default to `GET` +/// except Next.js server actions, which the framework only ever +/// invokes via `POST`. +fn entry_kind_to_framework(kind: &EntryKind) -> (Framework, HttpMethod) { + match kind { + EntryKind::UseServerDirective | EntryKind::FormAction => { + (Framework::NextServerAction, HttpMethod::POST) + } + EntryKind::AppRouteHandler { method } => (Framework::NextAppRouter, *method), + EntryKind::ExpressRoute { method } => (Framework::Express, *method), + EntryKind::DjangoView { method } => (Framework::Django, *method), + EntryKind::FastApiRoute { method } => (Framework::FastApi, *method), + EntryKind::FlaskRoute { method } => (Framework::Flask, *method), + EntryKind::SpringMapping { method } => (Framework::Spring, *method), + EntryKind::JaxRsResource => (Framework::JaxRs, HttpMethod::GET), + EntryKind::RailsAction => (Framework::Rails, HttpMethod::GET), + EntryKind::SinatraRoute { method } => (Framework::Sinatra, *method), + EntryKind::AxumHandler => (Framework::Axum, HttpMethod::GET), + EntryKind::ActixHandler => (Framework::Actix, HttpMethod::GET), + EntryKind::RocketRoute => (Framework::Rocket, HttpMethod::GET), + EntryKind::GoNetHttp => (Framework::NetHttp, HttpMethod::GET), + EntryKind::GinRoute => (Framework::Gin, HttpMethod::GET), + } +} + +/// Synthesise [`SurfaceNode::EntryPoint`] nodes for handlers the pass-1 +/// summary extractor tagged with [`FuncSummary::entry_kind`](crate::summary::FuncSummary::entry_kind) +/// but no AST probe emitted. De-duped against existing probe output on +/// `(handler file, handler name)` so a probe-detected route always wins +/// (it carries the real route string and span). Summaries carry no +/// definition span, so synthesised nodes sit at line 0 of the handler +/// file; reachability matches on `(file, name)` and is unaffected. +fn synth_entry_points_from_summaries( + existing: &[SurfaceNode], + summaries: &GlobalSummaries, +) -> Vec { + let mut seen: std::collections::HashSet<(String, String)> = existing + .iter() + .filter_map(|n| match n { + SurfaceNode::EntryPoint(ep) => { + Some((ep.handler_location.file.clone(), ep.handler_name.clone())) + } + _ => None, + }) + .collect(); + let mut out: Vec = Vec::new(); + for (key, summary) in summaries.iter() { + let Some(kind) = &summary.entry_kind else { + continue; + }; + if key.name.is_empty() { + continue; + } + let file = crate::surface::namespace_file(&key.namespace).to_string(); + if !seen.insert((file.clone(), key.name.clone())) { + continue; + } + let (framework, method) = entry_kind_to_framework(kind); + let loc = SourceLocation { + file, + line: 0, + col: 0, + }; + out.push(SurfaceNode::EntryPoint(EntryPoint { + location: loc.clone(), + framework, + method, + route: UNROUTED.to_string(), + handler_name: key.name.clone(), + handler_location: loc, + auth_required: false, + })); + } + out +} + +/// Set `auth_required = true` on entry points whose handler *body* +/// calls a known auth guard, complementing the probes' router-level +/// (decorator / annotation / middleware-argument) detection. +/// +/// The handler summary is located by `(handler file, handler name)`; +/// its direct callees' leaf names are matched case-insensitively +/// against the per-language router-auth marker registry +/// ([`router_auth_markers_for_lang`]). Depth is deliberately 1 — a +/// guard buried two helpers deep is a router concern the call graph +/// models better than a name list. +fn upgrade_auth_required_from_summaries(map: &mut SurfaceMap, summaries: &GlobalSummaries) { + use std::collections::HashMap; + let needs_upgrade: Vec = map + .nodes + .iter() + .enumerate() + .filter_map(|(i, n)| match n { + SurfaceNode::EntryPoint(ep) if !ep.auth_required && !ep.handler_name.is_empty() => { + Some(i) + } + _ => None, + }) + .collect(); + if needs_upgrade.is_empty() { + return; + } + // (file, name) → summaries defining that function. Built once; the + // map is small relative to the summary count. + let mut by_fn: HashMap< + (&str, &str), + Vec<(&crate::symbol::FuncKey, &crate::summary::FuncSummary)>, + > = HashMap::new(); + for (key, summary) in summaries.iter() { + by_fn + .entry((crate::surface::namespace_file(&key.namespace), &key.name)) + .or_default() + .push((key, summary)); + } + let mut marker_cache: HashMap> = HashMap::new(); + let mut to_set: Vec = Vec::new(); + for idx in needs_upgrade { + let SurfaceNode::EntryPoint(ep) = &map.nodes[idx] else { + continue; + }; + let Some(cands) = by_fn.get(&(ep.handler_location.file.as_str(), ep.handler_name.as_str())) + else { + continue; + }; + let guarded = cands.iter().any(|(key, summary)| { + let markers = marker_cache + .entry(key.lang) + .or_insert_with(|| router_auth_markers_for_lang(key.lang)); + if markers.is_empty() { + return false; + } + summary.callees.iter().any(|c| { + let leaf = crate::callgraph::normalize_callee_name(&c.name); + markers.iter().any(|m| m.eq_ignore_ascii_case(leaf)) + }) + }); + if guarded { + to_set.push(idx); + } + } + for idx in to_set { + if let SurfaceNode::EntryPoint(ep) = &mut map.nodes[idx] { + ep.auth_required = true; + } + } } #[derive(Copy, Clone, PartialEq, Eq)] @@ -325,6 +555,139 @@ mod tests { } } + #[test] + fn synthesises_entry_point_from_summary_entry_kind() { + use crate::summary::FuncSummary; + use crate::symbol::{FuncKey, Lang}; + // No source file on disk (probes see nothing), but pass-1 tagged + // a Gin handler — the fallback must surface it. + let dir = tempdir().unwrap(); + let cfg = Config::default(); + let mut gs = GlobalSummaries::new(); + let key = FuncKey::new_function(Lang::Go, "routes.go", "ListUsers", None); + let summary = FuncSummary { + name: "ListUsers".into(), + file_path: "routes.go".into(), + lang: "go".into(), + entry_kind: Some(EntryKind::GinRoute), + ..Default::default() + }; + gs.insert(key, summary); + let cg = empty_call_graph(); + let files: Vec = vec![]; + let inputs = empty_inputs(&files, Some(dir.path()), &gs, &cg, &cfg); + let map = build_surface_map(&inputs); + let eps: Vec<_> = map.entry_points().collect(); + assert_eq!(eps.len(), 1, "fallback entry-point expected"); + assert_eq!(eps[0].handler_name, "ListUsers"); + assert_eq!(eps[0].framework, Framework::Gin); + assert_eq!(eps[0].route, UNROUTED); + assert_eq!(eps[0].handler_location.file, "routes.go"); + } + + #[test] + fn probe_entry_point_suppresses_summary_fallback() { + use crate::summary::FuncSummary; + use crate::symbol::{FuncKey, Lang}; + let dir = tempdir().unwrap(); + let py = dir.path().join("app.py"); + fs::write( + &py, + "from flask import Flask\napp = Flask(__name__)\n@app.get('/u')\ndef u(): pass\n", + ) + .unwrap(); + let cfg = Config::default(); + let mut gs = GlobalSummaries::new(); + // Summary tags the same handler the probe sees. + let key = FuncKey::new_function(Lang::Python, "app.py", "u", None); + let summary = FuncSummary { + name: "u".into(), + file_path: "app.py".into(), + lang: "python".into(), + entry_kind: Some(EntryKind::FlaskRoute { + method: HttpMethod::GET, + }), + ..Default::default() + }; + gs.insert(key, summary); + let cg = empty_call_graph(); + let files = vec![py]; + let inputs = empty_inputs(&files, Some(dir.path()), &gs, &cg, &cfg); + let map = build_surface_map(&inputs); + let eps: Vec<_> = map.entry_points().collect(); + assert_eq!(eps.len(), 1, "no duplicate from the fallback"); + assert_eq!(eps[0].route, "/u", "probe route (with real path) wins"); + } + + #[test] + fn body_level_auth_guard_upgrades_auth_required() { + use crate::summary::{CalleeSite, FuncSummary}; + use crate::symbol::{FuncKey, Lang}; + let dir = tempdir().unwrap(); + let js = dir.path().join("routes.js"); + // Express route with NO middleware arg — probe alone says unauth. + fs::write( + &js, + "const express = require('express');\nconst app = express();\napp.get('/admin', function admin(req, res) { requireAuth(req); res.send('x'); });\n", + ) + .unwrap(); + let cfg = Config::default(); + let mut gs = GlobalSummaries::new(); + // Handler summary whose body calls requireAuth. + let key = FuncKey::new_function(Lang::JavaScript, "routes.js", "admin", None); + let summary = FuncSummary { + name: "admin".into(), + file_path: "routes.js".into(), + lang: "javascript".into(), + callees: vec![CalleeSite::bare("requireAuth")], + ..Default::default() + }; + gs.insert(key, summary); + let cg = empty_call_graph(); + let files = vec![js]; + let inputs = empty_inputs(&files, Some(dir.path()), &gs, &cg, &cfg); + let map = build_surface_map(&inputs); + let ep = map + .entry_points() + .find(|ep| ep.handler_name == "admin") + .expect("express probe finds the named handler"); + assert!( + ep.auth_required, + "body-level requireAuth call should upgrade auth_required" + ); + } + + #[test] + fn unrelated_callee_does_not_upgrade_auth() { + use crate::summary::{CalleeSite, FuncSummary}; + use crate::symbol::{FuncKey, Lang}; + let dir = tempdir().unwrap(); + let py = dir.path().join("app.py"); + fs::write( + &py, + "from flask import Flask\napp = Flask(__name__)\n@app.get('/x')\ndef x(): pass\n", + ) + .unwrap(); + let cfg = Config::default(); + let mut gs = GlobalSummaries::new(); + let key = FuncKey::new_function(Lang::Python, "app.py", "x", None); + let summary = FuncSummary { + name: "x".into(), + file_path: "app.py".into(), + lang: "python".into(), + // `settings` must not prefix-match any auth marker. + callees: vec![CalleeSite::bare("settings"), CalleeSite::bare("render")], + ..Default::default() + }; + gs.insert(key, summary); + let cg = empty_call_graph(); + let files = vec![py]; + let inputs = empty_inputs(&files, Some(dir.path()), &gs, &cg, &cfg); + let map = build_surface_map(&inputs); + let ep = map.entry_points().next().expect("entry point"); + assert!(!ep.auth_required); + } + #[test] fn empty_inputs_produce_empty_map() { let dir = tempdir().unwrap(); diff --git a/src/surface/dangerous.rs b/src/surface/dangerous.rs index b465e502..d89418de 100644 --- a/src/surface/dangerous.rs +++ b/src/surface/dangerous.rs @@ -12,17 +12,33 @@ //! detection pass is added here; the surface layer just lifts the //! cap-bit information out of the summary. -use super::{DangerousLocal, SourceLocation, SurfaceNode}; +use super::{DangerousLocal, SourceLocation, SurfaceNode, cap_label_string, namespace_file}; use crate::labels::Cap; -use crate::summary::GlobalSummaries; +use crate::summary::{FuncSummary, GlobalSummaries}; -/// Cap bits that indicate the function is a *local* sink — code exec, -/// unsafe deserialisation, server-side template injection, format -/// string injection. Other sink caps (SQL_QUERY → DataStore; -/// SSRF → ExternalService) live elsewhere in the surface layer so the -/// node taxonomy matches the chain composer's expectations. +/// Cap bits that indicate the function is a *local* sink — a sink with no +/// externally observable side effect that attacker data flows *into*. +/// Other sink caps live elsewhere in the surface layer so the node +/// taxonomy matches the chain composer's expectations: `SQL_QUERY` / +/// `FILE_IO` → DataStore (see [`super::datastore`]); `SSRF` / `DATA_EXFIL` +/// → ExternalService (see [`super::external`]). +/// +/// The set was widened from the original four (code-exec, deserialize, +/// SSTI, format-string) to cover every injection-style local sink the +/// label registry can classify, so a function that only builds an LDAP +/// filter, parses XXE-vulnerable XML, or merges into a prototype is no +/// longer absent from the surface map. fn dangerous_caps() -> Cap { - Cap::CODE_EXEC | Cap::DESERIALIZE | Cap::SSTI | Cap::FMT_STRING + Cap::CODE_EXEC + | Cap::DESERIALIZE + | Cap::SSTI + | Cap::FMT_STRING + | Cap::LDAP_INJECTION + | Cap::XPATH_INJECTION + | Cap::HEADER_INJECTION + | Cap::OPEN_REDIRECT + | Cap::XXE + | Cap::PROTOTYPE_POLLUTION } pub fn detect_dangerous_locals(summaries: &GlobalSummaries) -> Vec { @@ -33,19 +49,46 @@ pub fn detect_dangerous_locals(summaries: &GlobalSummaries) -> Vec if caps.is_empty() { continue; } + // Project-relative POSIX file, keyed off the FuncKey namespace so + // a dangerous-local node and the entry-point that reaches it agree + // on file identity (FuncSummary.file_path is an absolute path and + // would never match an entry-point's relative handler file). + let file = namespace_file(&key.namespace).to_string(); + let (line, col) = sink_line_col(summary, &file, caps); out.push(SurfaceNode::DangerousLocal(DangerousLocal { - location: SourceLocation { - file: summary.file_path.clone(), - line: 0, - col: 0, - }, + location: SourceLocation { file, line, col }, function_name: key.qualified_name(), cap_bits: caps.bits(), + label: cap_label_string(caps.bits()), })); } out } +/// Resolve the `(line, col)` of the dangerous sink inside `summary` by +/// scanning its `param_to_sink` [`crate::summary::SinkSite`] records for a +/// site whose cap intersects the dangerous mask. Prefers a same-file, +/// non-chain-promoted site (the function's own sink) over a deeper +/// chain-hop site so the coordinates point at code in `file`. Falls back +/// to `(0, 0)` when the summary carries no located sink (pass-2 transient +/// summaries, or summaries extracted without tree access). +fn sink_line_col(summary: &FuncSummary, file: &str, mask: Cap) -> (u32, u32) { + let mut fallback: Option<(u32, u32)> = None; + for (_param, sites) in &summary.param_to_sink { + for site in sites { + if site.line == 0 || (site.cap & mask).is_empty() { + continue; + } + let same_file = site.file_rel.is_empty() || site.file_rel == file; + if same_file && !site.from_chain { + return (site.line, site.col); + } + fallback.get_or_insert((site.line, site.col)); + } + } + fallback.unwrap_or((0, 0)) +} + #[cfg(test)] mod tests { use super::*; @@ -64,6 +107,63 @@ mod tests { (key, summary) } + #[test] + fn carries_real_span_and_label_from_param_to_sink() { + use crate::summary::SinkSite; + let mut gs = GlobalSummaries::new(); + let key = FuncKey::new_function(Lang::Python, "app.py", "render", None); + let site = SinkSite { + file_rel: "app.py".into(), + line: 17, + col: 9, + snippet: "Template(x).render()".into(), + cap: Cap::SSTI, + from_chain: false, + }; + let summary = FuncSummary { + name: "render".into(), + file_path: "/abs/app.py".into(), // absolute on purpose + lang: "python".into(), + sink_caps: Cap::SSTI.bits(), + param_to_sink: vec![(0, vec![site].into())], + ..Default::default() + }; + gs.insert(key, summary); + let nodes = detect_dangerous_locals(&gs); + assert_eq!(nodes.len(), 1); + let SurfaceNode::DangerousLocal(d) = &nodes[0] else { + panic!() + }; + // Project-relative file (from the namespace), not the absolute path. + assert_eq!(d.location.file, "app.py"); + assert_eq!(d.location.line, 17); + assert_eq!(d.location.col, 9); + assert_eq!(d.label, "ssti"); + } + + #[test] + fn detects_widened_injection_caps() { + // The widened mask now covers XXE / LDAP / open-redirect etc., which + // the original four-cap mask missed entirely. + for cap in [ + Cap::XXE, + Cap::LDAP_INJECTION, + Cap::XPATH_INJECTION, + Cap::OPEN_REDIRECT, + Cap::HEADER_INJECTION, + Cap::PROTOTYPE_POLLUTION, + ] { + let mut gs = GlobalSummaries::new(); + let (k, s) = summary_with_caps("h", "danger.py", cap); + gs.insert(k, s); + assert_eq!( + detect_dangerous_locals(&gs).len(), + 1, + "cap {cap:?} should surface a dangerous-local node" + ); + } + } + #[test] fn detects_eval_sink() { let mut gs = GlobalSummaries::new(); diff --git a/src/surface/datastore.rs b/src/surface/datastore.rs index f47ac6e0..78278944 100644 --- a/src/surface/datastore.rs +++ b/src/surface/datastore.rs @@ -12,8 +12,9 @@ //! are forgiving — the surface map is informational, not a finding //! that fires on its own. -use super::{DataStore, DataStoreKind, SourceLocation, SurfaceNode}; -use crate::summary::{CalleeSite, FuncSummary, GlobalSummaries}; +use super::{AccessMode, DataStore, DataStoreKind, SourceLocation, SurfaceNode, namespace_file}; +use crate::labels::Cap; +use crate::summary::GlobalSummaries; /// One detection rule: leaf-name pattern → store kind + label. Stored /// as a flat list so adding a new ORM / driver is a one-line edit. @@ -355,9 +356,15 @@ pub fn detect_data_stores(summaries: &GlobalSummaries) -> Vec { let mut seen: std::collections::HashSet<(String, u32, String)> = std::collections::HashSet::new(); for (key, summary) in summaries.iter() { + // Project-relative POSIX file, keyed off the FuncKey namespace so a + // data-store node and the entry-point that reaches it agree on file + // identity (FuncSummary.file_path is an absolute path). + let file = namespace_file(&key.namespace).to_string(); + let owner = key.qualified_name(); let typed = summaries .get_ssa(key) .map(|s| s.typed_call_receivers.as_slice()); + let mut matched_for_fn = false; for callee in &summary.callees { let rule = match_rule(&callee.name).or_else(|| { typed @@ -365,7 +372,8 @@ pub fn detect_data_stores(summaries: &GlobalSummaries) -> Vec { .and_then(|c| match_rule(&qualify(c, &callee.name))) }); let Some(rule) = rule else { continue }; - let location = call_site_location(summary, callee); + matched_for_fn = true; + let location = call_site_location(&file, callee.span); let dedup = (location.file.clone(), location.line, rule.label.to_string()); if !seen.insert(dedup) { continue; @@ -374,12 +382,117 @@ pub fn detect_data_stores(summaries: &GlobalSummaries) -> Vec { location, kind: rule.kind, label: rule.label.to_string(), + owner: owner.clone(), + access: classify_access(leaf_segment(&callee.name)), })); } + + // Cap-driven fallback: a function whose own `sink_caps` include + // SQL_QUERY / FILE_IO is a data-store access site even when no + // direct callee matched the driver table (custom DAO wrapper, + // cross-file-resolved execute). Mirrors external.rs's SSRF + // fallback. Skipped when a named driver already fired so the + // precise label wins. + if !matched_for_fn { + let caps = summary.sink_caps(); + let fallback = if caps.contains(Cap::SQL_QUERY) { + Some((DataStoreKind::Sql, "SQL query")) + } else if caps.contains(Cap::FILE_IO) { + Some((DataStoreKind::Filesystem, "File access")) + } else { + None + }; + if let Some((kind, label)) = fallback { + let dedup = (file.clone(), 0, label.to_string()); + if seen.insert(dedup) { + out.push(SurfaceNode::DataStore(DataStore { + location: call_site_location(&file, None), + kind, + label: label.to_string(), + owner: owner.clone(), + // Cap bits carry no operation direction; a raw + // SQL_QUERY / FILE_IO sink can be either. + access: AccessMode::ReadWrite, + })); + } + } + } } out } +/// Classify the operation direction of a data-store access from the +/// callee's leaf name. Whole-prefix match on a lowercase verb table — +/// `findOne` / `find_by_id` / `findAll` all classify as reads via the +/// `find` prefix. Connect-/client-construction sites and unrecognised +/// verbs stay [`AccessMode::Unknown`] so reachability keeps emitting +/// the conservative `ReadsFrom` edge for them. +fn classify_access(leaf: &str) -> AccessMode { + const READ: &[&str] = &[ + "find", + "get", + "query", + "select", + "read", + "fetch", + "scan", + "count", + "exists", + "aggregate", + "lrange", + "smembers", + "hget", + "mget", + "keys", + "first", + "pluck", + "all", + ]; + const WRITE: &[&str] = &[ + "insert", "update", "delete", "save", "create", "set", "put", "write", "remove", "drop", + "truncate", "upsert", "persist", "destroy", "del", "hset", "lpush", "rpush", "sadd", + "zadd", "append", "rename", "unlink", "mkdir", "rmdir", "incr", "decr", "expire", + ]; + const READ_WRITE: &[&str] = &[ + "execute", + "executemany", + "executescript", + "exec", + "run", + "batch", + "transaction", + "pipeline", + ]; + let l = leaf.trim(); + // Verb-prefix match with a word boundary: the verb must be the whole + // leaf, or be followed by `_` (snake_case), an uppercase letter + // (camelCase), or a digit. `findOne` / `find_by_id` → read; + // `settings` does NOT match `set`. + let has_prefix = |verbs: &[&str]| { + verbs.iter().any(|v| { + l.get(..v.len()) + .is_some_and(|head| head.eq_ignore_ascii_case(v)) + && l.get(v.len()..) + .is_some_and(|rest| match rest.chars().next() { + None => true, + Some(c) => c == '_' || c.is_ascii_uppercase() || c.is_ascii_digit(), + }) + }) + }; + // Order matters: WRITE before READ so `setex`-style verbs with a + // read-looking suffix do not misclassify; READ_WRITE checked first + // because `execute` would otherwise never match. + if has_prefix(READ_WRITE) { + AccessMode::ReadWrite + } else if has_prefix(WRITE) { + AccessMode::Write + } else if has_prefix(READ) { + AccessMode::Read + } else { + AccessMode::Unknown + } +} + /// Last segment of a callee text after the final `.` or `::`. fn leaf_segment(name: &str) -> &str { let after_colon = name.rsplit("::").next().unwrap_or(name); @@ -422,15 +535,14 @@ fn match_rule(callee: &str) -> Option<&'static DriverRule> { }) } -/// Source location of a call site. Reads the 1-based `(line, col)` -/// recorded on the [`CalleeSite`] at CFG-build time (populated for every -/// summary produced after the span field landed); for legacy summaries -/// loaded from SQLite with no span, falls back to the function's host -/// file with line 0. -fn call_site_location(summary: &FuncSummary, callee: &CalleeSite) -> SourceLocation { - let (line, col) = callee.span.unwrap_or((0, 0)); +/// Source location of a call site in the project-relative `file`. Reads +/// the 1-based `(line, col)` recorded on the [`CalleeSite`] at CFG-build +/// time when `span` is `Some`; for legacy summaries loaded from SQLite +/// with no span (and the cap-driven fallback path) falls back to line 0. +fn call_site_location(file: &str, span: Option<(u32, u32)>) -> SourceLocation { + let (line, col) = span.unwrap_or((0, 0)); SourceLocation { - file: summary.file_path.clone(), + file: file.to_string(), line, col, } @@ -439,6 +551,7 @@ fn call_site_location(summary: &FuncSummary, callee: &CalleeSite) -> SourceLocat #[cfg(test)] mod tests { use super::*; + use crate::summary::{CalleeSite, FuncSummary}; use crate::symbol::{FuncKey, Lang}; fn summary_with_callees(name: &str, file: &str, callees: &[&str]) -> (FuncKey, FuncSummary) { @@ -457,6 +570,49 @@ mod tests { (key, summary) } + #[test] + fn classify_access_verb_boundaries() { + assert_eq!(classify_access("findOne"), AccessMode::Read); + assert_eq!(classify_access("find_by_id"), AccessMode::Read); + assert_eq!(classify_access("get"), AccessMode::Read); + assert_eq!(classify_access("insertMany"), AccessMode::Write); + assert_eq!(classify_access("save"), AccessMode::Write); + assert_eq!(classify_access("deleteOne"), AccessMode::Write); + assert_eq!(classify_access("execute"), AccessMode::ReadWrite); + assert_eq!(classify_access("executemany"), AccessMode::ReadWrite); + assert_eq!(classify_access("Exec"), AccessMode::ReadWrite); + // Boundary safety: a lowercase continuation is NOT a verb match. + assert_eq!(classify_access("settings"), AccessMode::Unknown); + assert_eq!(classify_access("allocate"), AccessMode::Unknown); + assert_eq!(classify_access("connect"), AccessMode::Unknown); + } + + #[test] + fn detected_store_carries_access_mode() { + // `connect`-style driver match → Unknown access; the node still + // surfaces and reachability treats it as a conservative read. + let mut gs = GlobalSummaries::new(); + let (key, summary) = summary_with_callees("init", "db.py", &["psycopg2.connect"]); + gs.insert(key, summary); + let nodes = detect_data_stores(&gs); + assert_eq!(nodes.len(), 1); + let SurfaceNode::DataStore(ds) = &nodes[0] else { + panic!() + }; + assert_eq!(ds.access, AccessMode::Unknown); + + // `pool.query` driver match → leaf `query` classifies as Read. + let mut gs = GlobalSummaries::new(); + let (key, summary) = summary_with_callees("run", "db.js", &["pool.query"]); + gs.insert(key, summary); + let nodes = detect_data_stores(&gs); + assert_eq!(nodes.len(), 1); + let SurfaceNode::DataStore(ds) = &nodes[0] else { + panic!() + }; + assert_eq!(ds.access, AccessMode::Read); + } + #[test] fn datastore_carries_callee_span_when_present() { // When the CFG populates `CalleeSite.span`, the detected datastore @@ -484,6 +640,56 @@ mod tests { assert_eq!(ds.location.col, 13); } + #[test] + fn cap_fallback_emits_sql_store_with_owner() { + // A custom DAO wrapper: no callee matches DRIVER_RULES, but the + // function's own sink_caps carry SQL_QUERY. The cap-driven fallback + // surfaces a generic Sql node carrying the owning function name. + let mut gs = GlobalSummaries::new(); + let key = FuncKey::new_function(Lang::Python, "dao.py", "run_query", None); + let summary = FuncSummary { + name: "run_query".into(), + file_path: "dao.py".into(), + lang: "python".into(), + sink_caps: Cap::SQL_QUERY.bits(), + callees: vec![CalleeSite::bare("self._exec")], + ..Default::default() + }; + gs.insert(key, summary); + let nodes = detect_data_stores(&gs); + assert_eq!(nodes.len(), 1, "got {nodes:?}"); + let SurfaceNode::DataStore(ds) = &nodes[0] else { + panic!() + }; + assert_eq!(ds.kind, DataStoreKind::Sql); + assert_eq!(ds.label, "SQL query"); + assert_eq!(ds.owner, "run_query"); + assert_eq!(ds.location.file, "dao.py"); + } + + #[test] + fn named_driver_suppresses_cap_fallback() { + // When a named driver call already fired, the precise label wins and + // the generic cap fallback does not double-emit. + let mut gs = GlobalSummaries::new(); + let key = FuncKey::new_function(Lang::Python, "dao.py", "init", None); + let summary = FuncSummary { + name: "init".into(), + file_path: "dao.py".into(), + lang: "python".into(), + sink_caps: Cap::SQL_QUERY.bits(), + callees: vec![CalleeSite::bare("psycopg2.connect")], + ..Default::default() + }; + gs.insert(key, summary); + let nodes = detect_data_stores(&gs); + assert_eq!(nodes.len(), 1); + let SurfaceNode::DataStore(ds) = &nodes[0] else { + panic!() + }; + assert_eq!(ds.label, "PostgreSQL (psycopg2)"); + } + #[test] fn detects_psycopg2_connect() { let mut gs = GlobalSummaries::new(); diff --git a/src/surface/external.rs b/src/surface/external.rs index bd42db4f..c89d73af 100644 --- a/src/surface/external.rs +++ b/src/surface/external.rs @@ -7,9 +7,9 @@ //! consulted so a probe with no SSRF cap (DNS resolver, SMTP sender) //! still surfaces as an external service. -use super::{ExternalService, ExternalServiceKind, SourceLocation, SurfaceNode}; +use super::{ExternalService, ExternalServiceKind, SourceLocation, SurfaceNode, namespace_file}; use crate::labels::Cap; -use crate::summary::{CalleeSite, FuncSummary, GlobalSummaries}; +use crate::summary::GlobalSummaries; struct ClientRule { leaf: &'static str, @@ -337,9 +337,15 @@ pub fn detect_external_services(summaries: &GlobalSummaries) -> Vec let mut out: Vec = Vec::new(); let mut seen: std::collections::HashSet<(String, String)> = std::collections::HashSet::new(); for (key, summary) in summaries.iter() { + // Project-relative POSIX file, keyed off the FuncKey namespace so an + // external-service node and the entry-point that reaches it agree on + // file identity (FuncSummary.file_path is an absolute path). + let file = namespace_file(&key.namespace).to_string(); + let owner = key.qualified_name(); let typed = summaries .get_ssa(key) .map(|s| s.typed_call_receivers.as_slice()); + let mut matched_for_fn = false; for callee in &summary.callees { let rule = match_rule(&callee.name).or_else(|| { typed @@ -347,7 +353,8 @@ pub fn detect_external_services(summaries: &GlobalSummaries) -> Vec .and_then(|c| match_rule(&qualify(c, &callee.name))) }); let Some(rule) = rule else { continue }; - let location = call_site_location(summary, Some(callee)); + matched_for_fn = true; + let location = call_site_location(&file, callee.span); if !seen.insert((location.file.clone(), rule.label.to_string())) { continue; } @@ -355,22 +362,35 @@ pub fn detect_external_services(summaries: &GlobalSummaries) -> Vec location, kind: rule.kind, label: rule.label.to_string(), + owner: owner.clone(), })); } - } - // Also surface any function whose own sink_caps include SSRF — the - // function itself is an outbound network call site even if the - // direct callee did not match the rule list. Use the function's - // file as the location and synthesise a generic label. - for (_key, summary) in summaries.iter() { - if summary.sink_caps().contains(Cap::SSRF) { - let loc = call_site_location(summary, None); - let dedup = (loc.file.clone(), "Outbound HTTP".to_string()); + + // Cap-driven fallback: a function whose own sink_caps include SSRF + // (outbound request) or DATA_EXFIL (data leaving the system) is an + // egress site even when the direct callee did not match the rule + // list. Skipped when a named client already fired for this function + // so the precise label wins and the generic node does not + // double-count the same egress. + if matched_for_fn { + continue; + } + let caps = summary.sink_caps(); + let fallback = if caps.contains(Cap::SSRF) { + Some(("Outbound HTTP", ExternalServiceKind::HttpApi)) + } else if caps.contains(Cap::DATA_EXFIL) { + Some(("Data egress", ExternalServiceKind::Unknown)) + } else { + None + }; + if let Some((label, kind)) = fallback { + let dedup = (file.clone(), label.to_string()); if seen.insert(dedup) { out.push(SurfaceNode::ExternalService(ExternalService { - location: loc, - kind: ExternalServiceKind::HttpApi, - label: "Outbound HTTP".to_string(), + location: call_site_location(&file, None), + kind, + label: label.to_string(), + owner: owner.clone(), })); } } @@ -410,14 +430,15 @@ fn match_rule(callee: &str) -> Option<&'static ClientRule> { }) } -/// Source location of an external-service call site. Reads the 1-based -/// `(line, col)` recorded on the [`CalleeSite`] at CFG-build time when -/// available; otherwise (sink-cap–only fallback path, or legacy summaries -/// loaded from SQLite) returns the function's host file with line 0. -fn call_site_location(summary: &FuncSummary, callee: Option<&CalleeSite>) -> SourceLocation { - let (line, col) = callee.and_then(|c| c.span).unwrap_or((0, 0)); +/// Source location of an external-service call site in the +/// project-relative `file`. Reads the 1-based `(line, col)` recorded on +/// the [`crate::summary::CalleeSite`] at CFG-build time when `span` is +/// `Some`; otherwise (sink-cap–only fallback path, or legacy summaries +/// loaded from SQLite) returns the file with line 0. +fn call_site_location(file: &str, span: Option<(u32, u32)>) -> SourceLocation { + let (line, col) = span.unwrap_or((0, 0)); SourceLocation { - file: summary.file_path.clone(), + file: file.to_string(), line, col, } @@ -426,7 +447,7 @@ fn call_site_location(summary: &FuncSummary, callee: Option<&CalleeSite>) -> Sou #[cfg(test)] mod tests { use super::*; - use crate::summary::CalleeSite; + use crate::summary::{CalleeSite, FuncSummary}; use crate::symbol::{FuncKey, Lang}; #[test] @@ -450,6 +471,48 @@ mod tests { assert_eq!(es.label, "requests (Python)"); } + #[test] + fn ssrf_cap_fallback_carries_owner() { + let mut gs = GlobalSummaries::new(); + let key = FuncKey::new_function(Lang::Python, "proxy.py", "forward", None); + let summary = FuncSummary { + name: "forward".into(), + file_path: "/abs/proxy.py".into(), + lang: "python".into(), + sink_caps: Cap::SSRF.bits(), + ..Default::default() + }; + gs.insert(key, summary); + let nodes = detect_external_services(&gs); + assert_eq!(nodes.len(), 1); + let SurfaceNode::ExternalService(es) = &nodes[0] else { + panic!() + }; + assert_eq!(es.label, "Outbound HTTP"); + assert_eq!(es.owner, "forward"); + assert_eq!(es.location.file, "proxy.py"); + } + + #[test] + fn data_exfil_cap_emits_egress_node() { + let mut gs = GlobalSummaries::new(); + let key = FuncKey::new_function(Lang::Python, "leak.py", "dump", None); + let summary = FuncSummary { + name: "dump".into(), + file_path: "leak.py".into(), + lang: "python".into(), + sink_caps: Cap::DATA_EXFIL.bits(), + ..Default::default() + }; + gs.insert(key, summary); + let nodes = detect_external_services(&gs); + assert_eq!(nodes.len(), 1); + let SurfaceNode::ExternalService(es) = &nodes[0] else { + panic!() + }; + assert_eq!(es.label, "Data egress"); + } + #[test] fn bare_fetch_rule_does_not_match_prefetch_or_cachekey() { let mut gs = GlobalSummaries::new(); diff --git a/src/surface/mod.rs b/src/surface/mod.rs index db5097ea..4617bb0f 100644 --- a/src/surface/mod.rs +++ b/src/surface/mod.rs @@ -26,10 +26,12 @@ use std::path::Path; pub mod build; pub mod dangerous; pub mod datastore; +pub mod exposure; pub mod external; pub mod graph; pub mod lang; pub mod reachability; +pub mod risk; /// Stable source location used as the primary key for every /// [`SurfaceNode`]. `file` is a project-relative POSIX path so the @@ -109,6 +111,53 @@ pub struct DataStore { pub location: SourceLocation, pub kind: DataStoreKind, pub label: String, + /// Qualified name of the function that owns this access site + /// (`Class::method` or a free function name). Used by reachability + /// to connect an entry-point to this store only when the owning + /// function is actually on the call-graph frontier, rather than the + /// coarse "any node in the same file" match. Empty for legacy maps + /// loaded from SQLite before the field landed. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub owner: String, + /// Whether the access site reads, writes, or does both, classified + /// from the callee name at detection time (`find`/`get`/`select` → + /// read, `insert`/`save`/`delete` → write, `execute`/`exec` → + /// read-write). Drives the [`EdgeKind::ReadsFrom`] / + /// [`EdgeKind::WritesTo`] split in reachability. `Unknown` for + /// connect-style sites and legacy maps loaded from SQLite before + /// the field landed. + #[serde(default, skip_serializing_if = "AccessMode::is_unknown")] + pub access: AccessMode, +} + +/// Direction of a data-store access site. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum AccessMode { + Read, + Write, + ReadWrite, + #[default] + Unknown, +} + +impl AccessMode { + /// Serde helper: `Unknown` is the default and is omitted from the + /// canonical JSON so legacy payloads stay byte-identical. + pub fn is_unknown(&self) -> bool { + matches!(self, AccessMode::Unknown) + } + + /// True when the site can write (Write or ReadWrite). + pub fn writes(self) -> bool { + matches!(self, AccessMode::Write | AccessMode::ReadWrite) + } + + /// True when the site can read (Read, ReadWrite, or Unknown — an + /// unclassified site is conservatively treated as a read). + pub fn reads(self) -> bool { + !matches!(self, AccessMode::Write) + } } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] @@ -130,6 +179,10 @@ pub struct ExternalService { pub location: SourceLocation, pub kind: ExternalServiceKind, pub label: String, + /// Qualified name of the function that owns this egress site. See + /// [`DataStore::owner`] for why reachability needs it. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub owner: String, } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] @@ -151,6 +204,13 @@ pub struct DangerousLocal { pub location: SourceLocation, pub function_name: String, pub cap_bits: u32, + /// Human-readable sink-class label decoded from `cap_bits` + /// (e.g. `"code-exec"`, `"deserialize, ssti"`). Lets the CLI and + /// the chain composer name the danger without re-deriving it from + /// the raw bitfield. Empty for legacy maps loaded from SQLite + /// before the field landed. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub label: String, } /// A node in the [`SurfaceMap`]. Every variant carries a @@ -201,36 +261,109 @@ impl SurfaceNode { } } -/// Semantic kind of an edge in the [`SurfaceMap`]. Encodes the -/// seven edge classes the chain composer walks; persistence is via -/// JSON so adding a variant is a non-breaking schema change as long -/// as the SQLite-level migration drops the old surface_map rows. +/// Semantic kind of an edge in the [`SurfaceMap`]. +/// +/// Persistence is via JSON so adding a variant is a non-breaking schema +/// change as long as the SQLite-level migration drops the old +/// surface_map rows. +/// +/// Emission status (kept honest so the next maintainer does not inherit +/// a false mental model): +/// +/// * **Emitted today** by [`reachability::populate_reaches_edges`]: +/// [`EdgeKind::ReadsFrom`] (entry → data store the entry reads), +/// [`EdgeKind::WritesTo`] (entry → data store the entry writes, +/// from [`DataStore::access`]), [`EdgeKind::TalksTo`] (entry → +/// external service), and [`EdgeKind::Reaches`] (entry → +/// dangerous-local sink). These four are [`EdgeKind::is_reach_like`]. +/// * **Reserved** (no production construction site yet): +/// [`EdgeKind::Calls`] (would lift call-graph edges, currently +/// redundant with the [`crate::callgraph::CallGraph`] itself), +/// [`EdgeKind::Triggers`] (needs job/webhook entry modelling), and +/// [`EdgeKind::AuthRequiredOn`] (needs a dedicated auth-check node +/// to originate from — today the auth signal rides on +/// [`EntryPoint::auth_required`] instead). #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] pub enum EdgeKind { /// Caller → callee. Wraps the call-graph edge so consumers do /// not have to consult [`crate::callgraph::CallGraph`] directly. + /// Reserved — not emitted. Calls, - /// Function or entry-point reads from a data store / external - /// service. + /// Entry-point reads from a data store. Emitted by reachability. ReadsFrom, - /// Function or entry-point writes to a data store. + /// Entry-point writes to a data store. Emitted by reachability + /// when [`DataStore::access`] classifies the site as writing. WritesTo, - /// Function or entry-point sends a request to an external - /// service. + /// Entry-point sends a request to an external service. Emitted by + /// reachability. TalksTo, /// Entry-point reaches a dangerous-local sink through some - /// transitive call chain. + /// transitive call chain. Emitted by reachability. Reaches, /// Entry-point triggers a side-effecting action (job, email, - /// webhook) other than a direct call. + /// webhook) other than a direct call. Reserved. Triggers, /// Entry-point gates downstream access on a successful auth /// check. The `from` is the auth-check node, the `to` is the - /// entry-point. + /// entry-point. Reserved — needs an auth-check node. AuthRequiredOn, } +impl EdgeKind { + /// True for the edge classes that connect an entry-point to a + /// reachable sink / store / external service. The CLI tree and any + /// "what does this entry reach" query treat all three uniformly. + pub fn is_reach_like(self) -> bool { + matches!( + self, + EdgeKind::Reaches | EdgeKind::ReadsFrom | EdgeKind::TalksTo | EdgeKind::WritesTo + ) + } +} + +/// Decode a [`crate::labels::Cap`] bitfield into a stable, human-readable +/// list of sink-class slugs (e.g. `0x400` → `["code-exec"]`). Order is +/// fixed (low bit first) so two equal bitfields render identically. +/// Used for [`DangerousLocal::label`] and the `nyx surface` CLI so the +/// raw `0x{:x}` debug dump never reaches a user. +pub fn cap_labels(bits: u32) -> Vec<&'static str> { + use crate::labels::Cap; + const TABLE: &[(Cap, &str)] = &[ + (Cap::CODE_EXEC, "code-exec"), + (Cap::DESERIALIZE, "deserialize"), + (Cap::SSTI, "ssti"), + (Cap::FMT_STRING, "format-string"), + (Cap::SQL_QUERY, "sql"), + (Cap::SSRF, "ssrf"), + (Cap::FILE_IO, "file-io"), + (Cap::LDAP_INJECTION, "ldap-injection"), + (Cap::XPATH_INJECTION, "xpath-injection"), + (Cap::HEADER_INJECTION, "header-injection"), + (Cap::OPEN_REDIRECT, "open-redirect"), + (Cap::XXE, "xxe"), + (Cap::PROTOTYPE_POLLUTION, "prototype-pollution"), + (Cap::CRYPTO, "weak-crypto"), + (Cap::DATA_EXFIL, "data-exfil"), + (Cap::UNAUTHORIZED_ID, "unauthorized-id"), + ]; + let caps = Cap::from_bits_truncate(bits); + let mut out: Vec<&'static str> = TABLE + .iter() + .filter(|(c, _)| caps.contains(*c)) + .map(|(_, s)| *s) + .collect(); + if out.is_empty() { + out.push("sink"); + } + out +} + +/// Comma-joined form of [`cap_labels`]. +pub fn cap_label_string(bits: u32) -> String { + cap_labels(bits).join(", ") +} + /// A single edge in the [`SurfaceMap`]. `from` and `to` are indices /// into [`SurfaceMap::nodes`]; the surface ordering keeps these /// stable across rescans. @@ -337,6 +470,21 @@ impl SurfaceMap { } } +/// Strip the optional `@pkg/name::` package prefix from a [`crate::symbol::FuncKey`] +/// namespace, returning the project-relative POSIX file path part. +/// +/// `namespace_with_package` produces `"@scope/name::src/file.ts"` for +/// JS/TS files inside resolved packages; the file part is the +/// project-relative path that matches an [`EntryPoint`]'s +/// `handler_location.file`. This is the single source of truth the +/// detectors and the reachability pass both key on, so a data-store / +/// external / dangerous-local node and the entry-point that reaches it +/// agree on file identity even though `FuncSummary.file_path` is stored +/// as an absolute path. +pub fn namespace_file(ns: &str) -> &str { + ns.rsplit_once("::").map(|(_, rest)| rest).unwrap_or(ns) +} + /// Convert an absolute path to a project-relative POSIX path string. /// Returns the absolute path verbatim when the file is outside the /// scan root or when path stripping fails. diff --git a/src/surface/reachability.rs b/src/surface/reachability.rs index 603a006c..1cfe3a34 100644 --- a/src/surface/reachability.rs +++ b/src/surface/reachability.rs @@ -19,21 +19,49 @@ //! calls `eval()` will surface the eval as a `Reaches` of the entry //! point as long as the eval's host file is on the BFS frontier. -use super::{EdgeKind, SurfaceEdge, SurfaceMap, SurfaceNode}; +use super::{EdgeKind, SurfaceEdge, SurfaceMap, SurfaceNode, namespace_file}; use crate::callgraph::CallGraph; use crate::summary::GlobalSummaries; use petgraph::Direction; use std::collections::{HashMap, HashSet, VecDeque}; /// Maximum BFS depth from an entry-point node. Surface chains beyond -/// six call-graph hops are rare in practice and the cost of a deeper +/// eight call-graph hops are rare in practice and the cost of a deeper /// walk is paid per entry-point per scan. A depth-bounded traversal /// also prevents recursive cycles from blowing up. const MAX_BFS_DEPTH: usize = 8; -/// Populate [`EdgeKind::Reaches`] edges on `map`. Mutates the edge -/// list in place; the caller is expected to follow up with -/// [`SurfaceMap::canonicalize`] before serialisation. +/// One reachable destination node, keyed for **function-level** matching. +struct Dest { + idx: usize, + /// Project-relative POSIX file the destination lives in. + file: String, + /// Qualified name (`Class::method` / free function) of the function + /// that owns this destination. Empty only for legacy maps loaded + /// from SQLite before the `owner` field landed — those fall back to + /// file-level matching. + owner: String, + /// Edge classes to emit when an entry-point reaches this destination: + /// [`EdgeKind::ReadsFrom`] / [`EdgeKind::WritesTo`] for a data store + /// (driven by [`crate::surface::DataStore::access`]; a read-write + /// site emits both), [`EdgeKind::TalksTo`] for an external service, + /// [`EdgeKind::Reaches`] for a dangerous local sink. + edges: smallvec::SmallVec<[EdgeKind; 2]>, +} + +/// Populate entry-point → sink reachability edges on `map` +/// ([`EdgeKind::ReadsFrom`] / [`EdgeKind::TalksTo`] / [`EdgeKind::Reaches`]). +/// Mutates the edge list in place; the caller is expected to follow up +/// with [`SurfaceMap::canonicalize`] before serialisation. +/// +/// Matching is **function-level** when the entry-point's handler resolves +/// to a call-graph node: a destination is connected only when the +/// function that owns it is actually on the forward BFS frontier from the +/// handler, so two unrelated handlers in the same file no longer both +/// "reach" a co-located `eval()`. When the handler cannot be resolved in +/// the call graph (anonymous closure handler, unresolved seed) the pass +/// falls back to the conservative same-file heuristic so connectivity is +/// not silently lost. pub fn populate_reaches_edges( map: &mut SurfaceMap, summaries: &GlobalSummaries, @@ -53,40 +81,42 @@ pub fn populate_reaches_edges( let SurfaceNode::EntryPoint(ep) = node else { continue; }; - let mut reachable_files: HashSet = HashSet::new(); - // Seed with the handler's host file — the entry-point itself - // counts as reachable, so any DataStore / ExternalService / - // DangerousLocal in the same file is connected even when the - // call graph cannot resolve the seed FuncKey. - reachable_files.insert(ep.handler_location.file.clone()); - // Locate seed FuncKeys whose `namespace` (project-relative - // POSIX path, optionally prefixed with `@pkg/name::`) matches - // the entry's file and whose `name` matches the handler. More - // than one seed is possible (overloaded methods, duplicate - // definitions). - // - // Phase 23 follow-up: this used to be an `ends_with` substring - // check on both sides, which silently aliased same-basename - // files in sibling directories — `subdir/app.py` and - // `other/app.py` would both seed when the entry-point pointed - // at `app.py`. We now compare the file part exactly so a - // handler in `subdir/app.py` only seeds the FuncKey whose - // namespace strips to `subdir/app.py`. - let seeds = call_graph - .index - .iter() - .filter(|(k, _)| k.name == ep.handler_name) - .filter(|(k, _)| file_part_of_namespace(&k.namespace) == ep.handler_location.file) - .map(|(_, idx)| *idx) - .collect::>(); + // Locate seed FuncKeys whose namespace file-part matches the + // entry's handler file and whose `name` matches the handler. + // More than one seed is possible (overloads, duplicate defs). + // Anonymous handlers (empty name) match nothing — handled by the + // unresolved fallback below. + let seeds = if ep.handler_name.is_empty() { + Vec::new() + } else { + call_graph + .index + .iter() + .filter(|(k, _)| k.name == ep.handler_name) + .filter(|(k, _)| namespace_file(&k.namespace) == ep.handler_location.file) + .map(|(_, idx)| *idx) + .collect::>() + }; + let seed_found = !seeds.is_empty(); + + // Forward BFS over the call graph, collecting the set of reachable + // owner functions as `(file, qualified_name)` keys. Inserting the + // *file part* of the namespace (not the raw `@pkg::path` namespace) + // fixes the prior bug where packaged JS/TS namespaces never matched + // a destination's bare file, silently killing all transitive reach. + let mut reachable_fns: HashSet<(String, String)> = HashSet::new(); + let mut reachable_files: HashSet = HashSet::new(); + reachable_files.insert(ep.handler_location.file.clone()); let mut visited: HashSet<_> = seeds.iter().copied().collect(); let mut queue: VecDeque<(petgraph::graph::NodeIndex, usize)> = seeds.iter().map(|n| (*n, 0)).collect(); while let Some((node_idx, depth)) = queue.pop_front() { if let Some(key) = call_graph.graph.node_weight(node_idx) { - reachable_files.insert(key.namespace.clone()); + let file = namespace_file(&key.namespace).to_string(); + reachable_fns.insert((file.clone(), key.qualified_name())); + reachable_files.insert(file); } if depth >= MAX_BFS_DEPTH { continue; @@ -101,13 +131,24 @@ pub fn populate_reaches_edges( } } - for (dst_idx, dst_file) in &dst_index { - if reachable_files.contains(dst_file) { - new_edges.insert(SurfaceEdge { - from: entry_idx as u32, - to: *dst_idx as u32, - kind: EdgeKind::Reaches, - }); + for d in &dst_index { + let reached = if seed_found && !d.owner.is_empty() { + // Precise: the owning function must be on the BFS frontier. + reachable_fns.contains(&(d.file.clone(), d.owner.clone())) + } else { + // Unresolved seed, or a legacy destination with no owner: + // conservative same-file fallback (preserves connectivity + // when the call graph cannot resolve the handler). + reachable_files.contains(&d.file) + }; + if reached { + for kind in &d.edges { + new_edges.insert(SurfaceEdge { + from: entry_idx as u32, + to: d.idx as u32, + kind: *kind, + }); + } } } } @@ -115,27 +156,40 @@ pub fn populate_reaches_edges( map.edges.extend(new_edges); } -/// Strip the optional `@pkg/name::` package prefix from a `FuncKey` -/// namespace, returning the project-relative POSIX file path part. -/// `namespace_with_package` produces `"@scope/name::src/file.ts"` for -/// JS/TS files inside resolved packages; the file part is what -/// matches an entry-point's `handler_location.file`. -fn file_part_of_namespace(ns: &str) -> &str { - ns.rsplit_once("::").map(|(_, rest)| rest).unwrap_or(ns) -} - -/// Build a lookup from destination node index → destination file. -/// Restricted to the three reachable-from-entry-point variants. -fn build_destination_index(map: &SurfaceMap) -> Vec<(usize, String)> { - let mut out: Vec<(usize, String)> = Vec::new(); +/// Build the destination index: every non-entry-point node tagged with +/// its file, owning function, and the edge class to emit. +fn build_destination_index(map: &SurfaceMap) -> Vec { + let mut out: Vec = Vec::new(); for (idx, node) in map.nodes.iter().enumerate() { - let file = match node { - SurfaceNode::DataStore(n) => n.location.file.clone(), - SurfaceNode::ExternalService(n) => n.location.file.clone(), - SurfaceNode::DangerousLocal(n) => n.location.file.clone(), + let (file, owner, edges) = match node { + SurfaceNode::DataStore(n) => { + let mut edges: smallvec::SmallVec<[EdgeKind; 2]> = smallvec::SmallVec::new(); + if n.access.reads() { + edges.push(EdgeKind::ReadsFrom); + } + if n.access.writes() { + edges.push(EdgeKind::WritesTo); + } + (n.location.file.clone(), n.owner.clone(), edges) + } + SurfaceNode::ExternalService(n) => ( + n.location.file.clone(), + n.owner.clone(), + smallvec::smallvec![EdgeKind::TalksTo], + ), + SurfaceNode::DangerousLocal(n) => ( + n.location.file.clone(), + n.function_name.clone(), + smallvec::smallvec![EdgeKind::Reaches], + ), SurfaceNode::EntryPoint(_) => continue, }; - out.push((idx, file)); + out.push(Dest { + idx, + file, + owner, + edges, + }); } out } @@ -164,7 +218,8 @@ mod tests { use super::*; use crate::entry_points::HttpMethod; use crate::surface::{ - DangerousLocal, EntryPoint, Framework, SourceLocation, SurfaceMap, SurfaceNode, + DangerousLocal, DataStore, DataStoreKind, EntryPoint, ExternalService, ExternalServiceKind, + Framework, SourceLocation, SurfaceMap, SurfaceNode, }; fn ep(file: &str, handler: &str) -> SurfaceNode { @@ -184,6 +239,7 @@ mod tests { location: SourceLocation::new(file, 0, 0), function_name: name.into(), cap_bits: 0x1, + label: String::new(), }) } @@ -207,14 +263,179 @@ mod tests { } #[test] - fn file_part_of_namespace_strips_package_prefix() { - assert_eq!(file_part_of_namespace("app.py"), "app.py"); - assert_eq!(file_part_of_namespace("src/main.rs"), "src/main.rs"); - assert_eq!( - file_part_of_namespace("@scope/name::src/file.ts"), - "src/file.ts" + fn emits_typed_edges_for_store_and_external() { + // A data store yields ReadsFrom, an external service yields TalksTo + // (Reaches is reserved for dangerous-local sinks). Uses the + // unresolved-seed same-file fallback (empty call graph). + let mut map = SurfaceMap::new(); + map.nodes.push(ep("app.py", "handler")); // 0 + map.nodes.push(SurfaceNode::DataStore(DataStore { + location: SourceLocation::new("app.py", 4, 1), + kind: DataStoreKind::Sql, + label: "PostgreSQL".into(), + owner: "handler".into(), + access: Default::default(), + })); // 1 + map.nodes + .push(SurfaceNode::ExternalService(ExternalService { + location: SourceLocation::new("app.py", 6, 1), + kind: ExternalServiceKind::HttpApi, + label: "requests".into(), + owner: "handler".into(), + })); // 2 + let gs = GlobalSummaries::new(); + let cg = CallGraph { + graph: petgraph::graph::DiGraph::new(), + index: Default::default(), + unresolved_not_found: vec![], + unresolved_ambiguous: vec![], + }; + populate_reaches_edges(&mut map, &gs, &cg); + assert!( + map.edges + .iter() + .any(|e| e.kind == EdgeKind::ReadsFrom && e.to == 1) ); + assert!( + map.edges + .iter() + .any(|e| e.kind == EdgeKind::TalksTo && e.to == 2) + ); + assert!(map.edges.iter().all(|e| e.kind != EdgeKind::Reaches)); + } + + #[test] + fn write_access_emits_writes_to_edge() { + use crate::surface::AccessMode; + let mut map = SurfaceMap::new(); + map.nodes.push(ep("app.py", "handler")); // 0 + map.nodes.push(SurfaceNode::DataStore(DataStore { + location: SourceLocation::new("app.py", 4, 1), + kind: DataStoreKind::Sql, + label: "PostgreSQL".into(), + owner: "handler".into(), + access: AccessMode::Write, + })); // 1 + map.nodes.push(SurfaceNode::DataStore(DataStore { + location: SourceLocation::new("app.py", 6, 1), + kind: DataStoreKind::Sql, + label: "PostgreSQL exec".into(), + owner: "handler".into(), + access: AccessMode::ReadWrite, + })); // 2 + let gs = GlobalSummaries::new(); + let cg = CallGraph { + graph: petgraph::graph::DiGraph::new(), + index: Default::default(), + unresolved_not_found: vec![], + unresolved_ambiguous: vec![], + }; + populate_reaches_edges(&mut map, &gs, &cg); + // Write-only store: WritesTo, no ReadsFrom. + assert!( + map.edges + .iter() + .any(|e| e.kind == EdgeKind::WritesTo && e.to == 1) + ); + assert!( + !map.edges + .iter() + .any(|e| e.kind == EdgeKind::ReadsFrom && e.to == 1) + ); + // Read-write store: both edges. + assert!( + map.edges + .iter() + .any(|e| e.kind == EdgeKind::WritesTo && e.to == 2) + ); + assert!( + map.edges + .iter() + .any(|e| e.kind == EdgeKind::ReadsFrom && e.to == 2) + ); + } + + #[test] + fn namespace_file_strips_package_prefix() { + use crate::surface::namespace_file; + assert_eq!(namespace_file("app.py"), "app.py"); + assert_eq!(namespace_file("src/main.rs"), "src/main.rs"); + assert_eq!(namespace_file("@scope/name::src/file.ts"), "src/file.ts"); // Last `::` wins, matching `namespace_with_package`'s shape. - assert_eq!(file_part_of_namespace("@a/b::@c/d::lib/x.ts"), "lib/x.ts"); + assert_eq!(namespace_file("@a/b::@c/d::lib/x.ts"), "lib/x.ts"); + } + + #[test] + fn function_level_match_skips_unrelated_same_file_sink() { + // Two handlers and one dangerous sink live in the same file, but + // only `caller` calls `do_eval`. With a resolvable call graph the + // unrelated `other` handler must NOT get a Reaches edge — the + // file-level heuristic used to connect both. + use crate::symbol::{FuncKey, Lang}; + let mut map = SurfaceMap::new(); + map.nodes.push(ep("app.py", "caller")); // idx 0 + map.nodes.push(ep("app.py", "other")); // idx 1 + // Dangerous sink owned by `do_eval`. + map.nodes.push(SurfaceNode::DangerousLocal(DangerousLocal { + location: SourceLocation::new("app.py", 12, 1), + function_name: "do_eval".into(), + cap_bits: 0x1, + label: "code-exec".into(), + })); // idx 2 + + // Call graph: caller -> do_eval ; other is isolated. + let mut cg = CallGraph { + graph: petgraph::graph::DiGraph::new(), + index: Default::default(), + unresolved_not_found: vec![], + unresolved_ambiguous: vec![], + }; + let caller = cg.graph.add_node(FuncKey::new_function( + Lang::Python, + "app.py", + "caller", + None, + )); + let other = cg + .graph + .add_node(FuncKey::new_function(Lang::Python, "app.py", "other", None)); + let do_eval = cg.graph.add_node(FuncKey::new_function( + Lang::Python, + "app.py", + "do_eval", + None, + )); + cg.graph.add_edge( + caller, + do_eval, + crate::callgraph::CallEdge { + call_site: "do_eval".into(), + }, + ); + cg.index.insert( + FuncKey::new_function(Lang::Python, "app.py", "caller", None), + caller, + ); + cg.index.insert( + FuncKey::new_function(Lang::Python, "app.py", "other", None), + other, + ); + cg.index.insert( + FuncKey::new_function(Lang::Python, "app.py", "do_eval", None), + do_eval, + ); + + let gs = GlobalSummaries::new(); + populate_reaches_edges(&mut map, &gs, &cg); + // Exactly one Reaches edge: caller(0) -> sink(2). `other`(1) is + // excluded by function-level matching. + let reaches: Vec<_> = map + .edges + .iter() + .filter(|e| e.kind == EdgeKind::Reaches) + .collect(); + assert_eq!(reaches.len(), 1, "got {reaches:?}"); + assert_eq!(reaches[0].from, 0); + assert_eq!(reaches[0].to, 2); } } diff --git a/tests/calibration_data_exfil.rs b/tests/calibration_data_exfil.rs index 2cfc67b9..267a7c31 100644 --- a/tests/calibration_data_exfil.rs +++ b/tests/calibration_data_exfil.rs @@ -97,6 +97,7 @@ fn make_diag( evidence: Some(make_evidence(source_kind, verdict)), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/tests/chain_edges.rs b/tests/chain_edges.rs index 4b2ece0f..36b7c17b 100644 --- a/tests/chain_edges.rs +++ b/tests/chain_edges.rs @@ -50,6 +50,7 @@ fn diag_with_caps(path: &str, line: usize, caps: Cap) -> Diag { }), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/tests/chain_emission.rs b/tests/chain_emission.rs index c3ed8469..36cc9947 100644 --- a/tests/chain_emission.rs +++ b/tests/chain_emission.rs @@ -45,6 +45,7 @@ fn fixture_surface_map() -> SurfaceMap { location: loc("app.py", 30), function_name: "shell.exec".into(), cap_bits: Cap::CODE_EXEC.bits(), + label: String::new(), })); m } @@ -77,6 +78,7 @@ fn fixture_findings() -> Vec { evidence: Some(ev), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/tests/common/fixture_harness.rs b/tests/common/fixture_harness.rs index 8b934ca6..b4318c85 100644 --- a/tests/common/fixture_harness.rs +++ b/tests/common/fixture_harness.rs @@ -968,6 +968,7 @@ fn make_diag(path: &Path, func: &str, cap: Cap, sink_line: u32) -> Diag { evidence: Some(evidence), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/tests/console_snapshot.rs b/tests/console_snapshot.rs index 160fca8d..50746034 100644 --- a/tests/console_snapshot.rs +++ b/tests/console_snapshot.rs @@ -45,6 +45,7 @@ fn base_diag() -> Diag { evidence: None, rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/tests/determinism_audit.rs b/tests/determinism_audit.rs index ea5c714c..e55d7335 100644 --- a/tests/determinism_audit.rs +++ b/tests/determinism_audit.rs @@ -59,6 +59,7 @@ fn deny_diag(stable_hash: u64) -> Diag { evidence: Some(ev), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), @@ -312,6 +313,7 @@ fn confirmed_run_is_byte_identical_across_runs() { evidence: Some(evidence), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/tests/dynamic_parity.rs b/tests/dynamic_parity.rs index 141cb238..837600fb 100644 --- a/tests/dynamic_parity.rs +++ b/tests/dynamic_parity.rs @@ -86,6 +86,7 @@ mod parity_tests { }), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/tests/dynamic_verify_e2e.rs b/tests/dynamic_verify_e2e.rs index 6a99b7fd..ac160204 100644 --- a/tests/dynamic_verify_e2e.rs +++ b/tests/dynamic_verify_e2e.rs @@ -78,6 +78,7 @@ mod verify_e2e { }), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), @@ -111,6 +112,7 @@ mod verify_e2e { evidence: None, rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/tests/engine_notes_rank_tests.rs b/tests/engine_notes_rank_tests.rs index d84ab6a2..9a376e9f 100644 --- a/tests/engine_notes_rank_tests.rs +++ b/tests/engine_notes_rank_tests.rs @@ -64,6 +64,7 @@ fn high_confidence_taint_diag(path: &str, line: u32) -> Diag { }), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/tests/go_fixtures.rs b/tests/go_fixtures.rs index 0a18143c..ab3aa021 100644 --- a/tests/go_fixtures.rs +++ b/tests/go_fixtures.rs @@ -452,6 +452,7 @@ mod go_fixture_tests { evidence: Some(evidence), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/tests/health_score_calibration.rs b/tests/health_score_calibration.rs index f22dcc2b..120084b6 100644 --- a/tests/health_score_calibration.rs +++ b/tests/health_score_calibration.rs @@ -47,6 +47,7 @@ fn diag(severity: Severity, id: &str, conf: Option) -> Diag { evidence: None, rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/tests/java_fixtures.rs b/tests/java_fixtures.rs index 6788a29d..3a57099d 100644 --- a/tests/java_fixtures.rs +++ b/tests/java_fixtures.rs @@ -450,6 +450,7 @@ mod java_fixture_tests { evidence: Some(evidence), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/tests/js_fixtures.rs b/tests/js_fixtures.rs index caa2e418..7341eed9 100644 --- a/tests/js_fixtures.rs +++ b/tests/js_fixtures.rs @@ -445,6 +445,7 @@ mod js_fixture_tests { evidence: Some(evidence), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/tests/json_snapshot.rs b/tests/json_snapshot.rs index 9450e47a..78928f64 100644 --- a/tests/json_snapshot.rs +++ b/tests/json_snapshot.rs @@ -25,6 +25,7 @@ fn base_diag() -> Diag { evidence: None, rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/tests/lang_detect_probes.rs b/tests/lang_detect_probes.rs index 36314723..932769d2 100644 --- a/tests/lang_detect_probes.rs +++ b/tests/lang_detect_probes.rs @@ -55,6 +55,7 @@ mod lang_detect { }), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/tests/php_fixtures.rs b/tests/php_fixtures.rs index ad2bc1a3..72505f67 100644 --- a/tests/php_fixtures.rs +++ b/tests/php_fixtures.rs @@ -440,6 +440,7 @@ mod php_fixture_tests { evidence: Some(evidence), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/tests/policy_deny.rs b/tests/policy_deny.rs index 71dcf45b..d35c1bbc 100644 --- a/tests/policy_deny.rs +++ b/tests/policy_deny.rs @@ -34,6 +34,7 @@ fn empty_diag() -> Diag { evidence: Some(Evidence::default()), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/tests/python_fixtures.rs b/tests/python_fixtures.rs index 66c72797..ac9071d4 100644 --- a/tests/python_fixtures.rs +++ b/tests/python_fixtures.rs @@ -928,6 +928,7 @@ mod python_fixture_tests { evidence: Some(evidence), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/tests/rust_fixtures.rs b/tests/rust_fixtures.rs index 14cfa3b0..58d2448b 100644 --- a/tests/rust_fixtures.rs +++ b/tests/rust_fixtures.rs @@ -279,6 +279,7 @@ mod rust_fixture_tests { evidence: Some(evidence), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/tests/sandbox_hardening_linux.rs b/tests/sandbox_hardening_linux.rs index 99c878f5..45d4701c 100644 --- a/tests/sandbox_hardening_linux.rs +++ b/tests/sandbox_hardening_linux.rs @@ -752,6 +752,7 @@ mod hardening_tests { evidence: Some(evidence), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), @@ -947,6 +948,7 @@ mod hardening_tests { evidence: Some(evidence), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/tests/sandbox_hardening_macos.rs b/tests/sandbox_hardening_macos.rs index 187b8e03..1249bda4 100644 --- a/tests/sandbox_hardening_macos.rs +++ b/tests/sandbox_hardening_macos.rs @@ -647,6 +647,7 @@ finally: evidence: Some(evidence), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), @@ -787,6 +788,7 @@ finally: evidence: Some(evidence), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/tests/sarif_dynamic_verdict_tests.rs b/tests/sarif_dynamic_verdict_tests.rs index 764cc776..09ffd450 100644 --- a/tests/sarif_dynamic_verdict_tests.rs +++ b/tests/sarif_dynamic_verdict_tests.rs @@ -29,6 +29,7 @@ fn base_diag() -> Diag { evidence: None, rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/tests/spec_callgraph_resolution.rs b/tests/spec_callgraph_resolution.rs index 148547cf..22c541e0 100644 --- a/tests/spec_callgraph_resolution.rs +++ b/tests/spec_callgraph_resolution.rs @@ -78,6 +78,7 @@ fn make_diag(id: &str, path: &str, line: usize) -> Diag { evidence: Some(Evidence::default()), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/tests/spec_derivation_strategies.rs b/tests/spec_derivation_strategies.rs index f4167b9f..19e7eee1 100644 --- a/tests/spec_derivation_strategies.rs +++ b/tests/spec_derivation_strategies.rs @@ -48,6 +48,7 @@ mod spec_strategies { evidence: Some(Evidence::default()), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/tests/spec_framework_sample.rs b/tests/spec_framework_sample.rs index e602179b..479ec55f 100644 --- a/tests/spec_framework_sample.rs +++ b/tests/spec_framework_sample.rs @@ -73,6 +73,7 @@ fn make_diag(path: &str, handler: &str, line: usize, cap: Cap, rule_id: &str) -> evidence: Some(ev), rank_score: None, rank_reason: None, + exposure: None, suppressed: false, suppression: None, triage_state: "open".to_string(), diff --git a/tests/surface_cli.rs b/tests/surface_cli.rs index c15eb921..77322ab1 100644 --- a/tests/surface_cli.rs +++ b/tests/surface_cli.rs @@ -116,7 +116,7 @@ fn load_or_build_falls_back_to_filesystem_when_no_db() { .unwrap(); let db_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let map = load_or_build(tmp.path(), db_dir.path(), &cfg).expect("load_or_build"); + let (map, _cov) = load_or_build(tmp.path(), db_dir.path(), &cfg).expect("load_or_build"); assert!( map.entry_points().next().is_some(), "expected at least one entry-point in fallback path"