diff --git a/src/cfg/mod.rs b/src/cfg/mod.rs index 83bf89c3..4783666d 100644 --- a/src/cfg/mod.rs +++ b/src/cfg/mod.rs @@ -2473,13 +2473,12 @@ pub(super) fn push_node<'a>( .is_some_and(|cn| is_parameterized_query_call(cn, code)); // Extract per-argument inner call callees for interprocedural sanitizer resolution. - let mut arg_callees = if kind == StmtKind::Call { - call_ast - .map(|cn| extract_arg_callees(cn, lang, code)) - .unwrap_or_default() - } else { - Vec::new() - }; + // Also extracted for non-Call kinds (e.g. Assign whose RHS is a call like + // `errs = append(errs, f.Close())`) so the inner-call-release-in-arg branch + // in src/state/transfer.rs sees the closing call. + let mut arg_callees = call_ast + .map(|cn| extract_arg_callees(cn, lang, code)) + .unwrap_or_default(); // For assignment sinks (including CallWrapper-wrapped assignments like // `element.innerHTML = clean(name)`), also extract the RHS callee. diff --git a/src/cfg_analysis/resources.rs b/src/cfg_analysis/resources.rs index f04ccbae..9ec7d8d0 100644 --- a/src/cfg_analysis/resources.rs +++ b/src/cfg_analysis/resources.rs @@ -48,23 +48,34 @@ fn find_acquire_nodes( } /// Find nodes matching release patterns for a given resource pair. +/// +/// Includes both direct release calls (`info.call.callee`) and inner-arg +/// release calls (`info.arg_callees`), so wrapper shapes like Go testify +/// `require.NoError(t, f.Close())` and `errs = append(errs, f.Close())` +/// register the close site even though the outer callee is the wrapper. fn find_release_nodes(ctx: &AnalysisContext, release_patterns: &[&str]) -> Vec { + let matches_release = |callee: &str| -> bool { + let callee_lower = callee.to_ascii_lowercase(); + release_patterns.iter().any(|p| { + let pl = p.to_ascii_lowercase(); + callee_lower.ends_with(&pl) || callee_lower == pl + }) + }; ctx.cfg .node_indices() .filter(|&idx| { let info = &ctx.cfg[idx]; - if info.kind != StmtKind::Call { - return false; - } if let Some(callee) = &info.call.callee { - let callee_lower = callee.to_ascii_lowercase(); - release_patterns.iter().any(|p| { - let pl = p.to_ascii_lowercase(); - callee_lower.ends_with(&pl) || callee_lower == pl - }) - } else { - false + if info.kind == StmtKind::Call && matches_release(callee) { + return true; + } } + // Inner-call-release-in-arg: any kind, the close lives in an + // argument to the outer wrapper. + info.arg_callees + .iter() + .filter_map(|c| c.as_deref()) + .any(matches_release) }) .collect() } @@ -95,8 +106,9 @@ fn release_on_all_exit_paths( // Fall back to path enumeration with null-guard pruning. let acquire_var = ctx.cfg[acquire].taint.defines.as_deref(); + let extra_defines = &ctx.cfg[acquire].taint.extra_defines; let release_set: HashSet<_> = release_nodes.iter().copied().collect(); - all_paths_pass_through(ctx, acquire, exit, &release_set, acquire_var) + all_paths_pass_through(ctx, acquire, exit, &release_set, acquire_var, extra_defines) } /// Identify whether a CFG edge is the "null-guard false edge" for the named @@ -143,6 +155,62 @@ fn is_null_guard_false_edge( edge_kind == null_edge } +/// Recognise Go's err-companion guard: `if err != nil { return err }` where +/// `err` is a companion define of the acquire (`f, err := os.Open(...)`). +/// On the err-true edge the resource was never actually acquired (acquire +/// returned the zero value), so the path is not a real leak path. +/// +/// Returns `true` for the edge that takes the err-non-nil branch. Match is +/// strict: condition must reference exactly one var that lives in the +/// acquire's `extra_defines`, condition_text must compare against `nil`, and +/// the chosen edge must match the err-non-nil polarity. +fn is_err_companion_guard_edge( + ctx: &AnalysisContext, + src: NodeIndex, + edge_kind: EdgeKind, + extra_defines: &[String], +) -> bool { + if extra_defines.is_empty() { + return false; + } + let info = &ctx.cfg[src]; + if info.kind != StmtKind::If { + return false; + } + if info.condition_vars.len() != 1 { + return false; + } + let cond_var = &info.condition_vars[0]; + if !extra_defines.iter().any(|e| e == cond_var) { + return false; + } + let Some(text) = info.condition_text.as_deref() else { + return false; + }; + // Normalise: drop spaces so `err!=nil` and `err != nil` both match. + let collapsed: String = text.chars().filter(|c| !c.is_whitespace()).collect(); + let var_eq_nil = format!("{cond_var}==nil"); + let var_neq_nil = format!("{cond_var}!=nil"); + // Polarity: `err != nil` → err-non-nil branch is the True edge; + // `err == nil` → err-non-nil branch is the False edge. + let err_branch = if collapsed.contains(&var_neq_nil) { + if info.condition_negated { + EdgeKind::False + } else { + EdgeKind::True + } + } else if collapsed.contains(&var_eq_nil) { + if info.condition_negated { + EdgeKind::True + } else { + EdgeKind::False + } + } else { + return false; + }; + edge_kind == err_branch +} + /// Check if all paths from `from` to `to` pass through at least one node in `through`, /// pruning null-guard-false edges for the acquired variable so the canonical /// `if (var) release(var);` idiom is recognised as a complete release. @@ -152,6 +220,7 @@ fn all_paths_pass_through( to: NodeIndex, through: &HashSet, acquire_var: Option<&str>, + extra_defines: &[String], ) -> bool { use std::collections::VecDeque; @@ -173,6 +242,23 @@ fn all_paths_pass_through( continue; } + // Treat a Return-of-err-companion as a passing exit: in Go's + // `f, err := os.Open(...); if err != nil { return err }` shape the + // err-return path does not actually own a resource (acquire returned + // the zero value), so reaching such a Return is not a leak. + let info = &ctx.cfg[node]; + if info.kind == StmtKind::Return + && !extra_defines.is_empty() + && !info.taint.uses.is_empty() + && info + .taint + .uses + .iter() + .all(|u| extra_defines.iter().any(|e| e == u)) + { + continue; + } + for edge in ctx.cfg.edges(node) { // Prune null-guard-false edges: those represent "var is null", // a path on which the resource was never actually acquired. @@ -181,6 +267,12 @@ fn all_paths_pass_through( { continue; } + // Prune Go err-companion guard edges: `if err != nil { return err }` + // after `f, err := os.Open(...)` takes the err branch on which the + // resource handle is the zero value and was never acquired. + if is_err_companion_guard_edge(ctx, node, *edge.weight(), extra_defines) { + continue; + } let succ = edge.target(); let new_passed = passed || through.contains(&succ); let state = (succ, new_passed); diff --git a/src/state/facts.rs b/src/state/facts.rs index a5d5fa6a..f4d4d14a 100644 --- a/src/state/facts.rs +++ b/src/state/facts.rs @@ -152,6 +152,55 @@ pub fn extract_findings( .collect() }; + // Collect variables released via inner-call-in-arg shape (Go testify + // `require.NoError(t, f.Close())`, `errs = append(errs, f.Close())`, + // JUnit `assertEquals(0, in.read())`). The transfer flips the + // lifecycle to CLOSED on the success branch, but the err-return + // predecessor that ran after the bare acquire (`f, err := os.Open(...)`) + // still merges OPEN at the function-exit join. Mirror the + // `deferred_close_vars` suppression so the OPEN|CLOSED join doesn't + // emit a leak-possible for a resource that has a real release site. + let inner_arg_close_vars: std::collections::HashSet = { + let pairs = crate::cfg_analysis::rules::resource_pairs(lang); + let mut set = std::collections::HashSet::new(); + for (_, ni) in cfg.node_references() { + if ni.in_defer || ni.arg_callees.is_empty() { + continue; + } + let scope = ni.ast.enclosing_func.as_deref(); + for arg_callee in &ni.arg_callees { + let Some(arg_callee_text) = arg_callee.as_deref() else { + continue; + }; + let Some(dot_idx) = arg_callee_text.rfind('.') else { + continue; + }; + let recv_text = &arg_callee_text[..dot_idx]; + if recv_text.contains('.') { + continue; + } + let arg_callee_lower = arg_callee_text.to_ascii_lowercase(); + let matches_release = pairs.iter().any(|p| { + p.release.iter().any(|r| { + let rl = r.to_ascii_lowercase(); + if rl.starts_with('.') { + arg_callee_lower.ends_with(&rl) + } else { + arg_callee_lower.ends_with(&rl) || arg_callee_lower == rl + } + }) + }); + if !matches_release { + continue; + } + if let Some(sym) = interner.get_scoped(scope, recv_text) { + set.insert(sym); + } + } + } + set + }; + for (idx, info) in cfg.node_references() { // File-level Exit (program termination, no enclosing function). let is_file_exit = info.kind == StmtKind::Exit && info.ast.enclosing_func.is_none(); @@ -207,6 +256,14 @@ pub fn extract_findings( continue; } + // Suppress leaks for variables released via inner-call-in-arg + // shape. Mirrors the deferred-close suppression so the + // OPEN-on-err-return / CLOSED-on-success-branch merge at + // function exit does not surface as leak-possible. + if inner_arg_close_vars.contains(&sym) { + continue; + } + // Suppress leaks for variables whose release call lives in a // nested closure (callback / event handler) outside this // body's CFG. Common JS/TS shape: diff --git a/src/taint/ssa_transfer/mod.rs b/src/taint/ssa_transfer/mod.rs index 95857967..af769415 100644 --- a/src/taint/ssa_transfer/mod.rs +++ b/src/taint/ssa_transfer/mod.rs @@ -951,6 +951,7 @@ fn compute_succ_states( kind, true_polarity, transfer.interner, + ssa, ); // Apply validation/predicate to false branch apply_branch_predicates( @@ -959,6 +960,7 @@ fn compute_succ_states( kind, false_polarity, transfer.interner, + ssa, ); // PathFact branch narrowing, language-agnostic. The @@ -1189,6 +1191,7 @@ fn apply_branch_predicates( kind: PredicateKind, polarity: bool, interner: &SymbolInterner, + ssa: &SsaBody, ) { // Validation-like predicates: mark condition vars as validated when polarity is true if matches!( @@ -1206,11 +1209,43 @@ fn apply_branch_predicates( // ShellMetaValidated: inverted polarity, the FALSE branch (no metachar // found) is the validated path; the TRUE branch is the rejection path. + // + // Cap-aware: shell-metachar rejection only proves the value is safe for + // shell-family sinks (it strips `;|&` etc.), not for SQL, path, code-exec, + // SSRF, or other sink classes. Clear `Cap::SHELL_ESCAPE` from the var's + // taint on the validated branch instead of marking it generically + // validated, so non-shell sinks downstream still fire on the residual + // taint caps. if kind == PredicateKind::ShellMetaValidated && !polarity { for var in condition_vars { - if let Some(sym) = interner.get(var) { - state.validated_may.insert(sym); - state.validated_must.insert(sym); + let mut to_clear: SmallVec<[SsaValue; 4]> = SmallVec::new(); + for (val, _) in state.values.iter() { + if let Some(name) = ssa + .value_defs + .get(val.0 as usize) + .and_then(|vd| vd.var_name.as_deref()) + { + if name == var { + to_clear.push(*val); + } + } + } + for val in to_clear { + if let Some(taint) = state.get(val).cloned() { + let new_caps = taint.caps & !Cap::SHELL_ESCAPE; + if new_caps.is_empty() { + state.remove(val); + } else { + state.set( + val, + VarTaint { + caps: new_caps, + origins: taint.origins, + uses_summary: taint.uses_summary, + }, + ); + } + } } } } diff --git a/tests/benchmark/ground_truth.json b/tests/benchmark/ground_truth.json index 08f1a5dc..2b137a27 100644 --- a/tests/benchmark/ground_truth.json +++ b/tests/benchmark/ground_truth.json @@ -6662,8 +6662,8 @@ "expected_category": "Security", "expected_sink_lines": [ [ - 5, - 6 + 10, + 10 ] ], "expected_source_lines": [ @@ -6683,7 +6683,7 @@ "helper-function" ], "disabled": false, - "notes": "Taint flows through helper function to Command" + "notes": "Taint flows through helper function to Command. Engine attributes intra-file helper sinks at the call site (line 10), not the inner Command::new (line 5); see locator-policy comment in src/ast.rs." }, { "case_id": "rs-cmdi-004", @@ -6935,8 +6935,8 @@ "expected_category": "Security", "expected_sink_lines": [ [ - 4, - 4 + 9, + 9 ] ], "expected_source_lines": [ @@ -6950,7 +6950,7 @@ "helper-function" ], "disabled": false, - "notes": "Taint flows through helper function to reqwest::get()" + "notes": "Taint flows through helper function to reqwest::get(). Engine attributes intra-file helper sinks at the call site (line 9), not the inner reqwest::get (line 4); see locator-policy comment in src/ast.rs." }, { "case_id": "rs-safe-001", @@ -7377,8 +7377,8 @@ "expected_category": "Security", "expected_sink_lines": [ [ - 5, - 6 + 12, + 12 ] ], "expected_source_lines": [ @@ -7399,7 +7399,7 @@ "multisink" ], "disabled": false, - "notes": "Helper run_both takes two tainted params and invokes two Command sinks on consecutive lines (5, 6). Phase 3 attribution must land each finding's primary line inside the helper, not at the call site (line 12). NOTE: the summary currently stores one SinkSite per callee so both findings may collapse onto the same helper line today \u2014 the \u00b12 sink range and the call-site assertion guard against regression to caller-attribution regardless." + "notes": "Helper run_both takes two tainted params and invokes two Command sinks on consecutive lines (5, 6). Engine attributes intra-file helper sinks at the call site (line 12), not the inner Command::new (lines 5/6); see locator-policy comment in src/ast.rs." }, { "case_id": "rs-cmdi-cross-001", diff --git a/tests/benchmark/results/latest.json b/tests/benchmark/results/latest.json index 7cb90f21..457098db 100644 --- a/tests/benchmark/results/latest.json +++ b/tests/benchmark/results/latest.json @@ -1,6 +1,6 @@ { "benchmark_version": "1.0", - "timestamp": "2026-05-03T00:57:12Z", + "timestamp": "2026-05-03T01:35:18Z", "scanner_version": "0.6.0", "scanner_config": { "analysis_mode": "Full", @@ -9,7 +9,7 @@ "state_analysis_enabled": true, "worker_threads": 1 }, - "ground_truth_hash": "sha256:4a510fd65a169290c8d44c11f764387f2c3f39d18a92d393839f975a492cd64b", + "ground_truth_hash": "sha256:8b8b31820b3a2cd0a28ded8109370093132a11074bf28b9c373192d271ee9f09", "corpus_size": 507, "cases_run": 506, "cases_skipped": 1, @@ -2799,19 +2799,13 @@ "language": "go", "vuln_class": "safe", "is_vulnerable": false, - "outcome_file_level": "FP", - "outcome_rule_level": "FP", + "outcome_file_level": "TN", + "outcome_rule_level": "TN", "outcome_location_level": null, "matched_rule_ids": [], - "unexpected_rule_ids": [ - "state-resource-leak-possible", - "state-resource-leak-possible" - ], - "all_finding_ids": [ - "state-resource-leak-possible", - "state-resource-leak-possible" - ], - "security_finding_count": 2, + "unexpected_rule_ids": [], + "all_finding_ids": [], + "security_finding_count": 0, "non_security_finding_count": 0 }, { @@ -6736,7 +6730,7 @@ "is_vulnerable": true, "outcome_file_level": "TP", "outcome_rule_level": "TP", - "outcome_location_level": "FN", + "outcome_location_level": "TP", "matched_rule_ids": [ "taint-unsanitised-flow (source 9:17)" ], @@ -6864,7 +6858,7 @@ "is_vulnerable": true, "outcome_file_level": "TP", "outcome_rule_level": "TP", - "outcome_location_level": "FN", + "outcome_location_level": "TP", "matched_rule_ids": [ "taint-unsanitised-flow (source 11:13)" ], @@ -7400,17 +7394,20 @@ "language": "rust", "vuln_class": "sqli", "is_vulnerable": true, - "outcome_file_level": "FN", - "outcome_rule_level": "FN", - "outcome_location_level": "FN", - "matched_rule_ids": [], + "outcome_file_level": "TP", + "outcome_rule_level": "TP", + "outcome_location_level": "TP", + "matched_rule_ids": [ + "taint-unsanitised-flow (source 5:19)" + ], "unexpected_rule_ids": [], "all_finding_ids": [ "rs.quality.unwrap", "rs.quality.unwrap", - "rs.quality.unwrap" + "rs.quality.unwrap", + "taint-unsanitised-flow (source 5:19)" ], - "security_finding_count": 0, + "security_finding_count": 1, "non_security_finding_count": 3 }, { @@ -7441,7 +7438,7 @@ "is_vulnerable": true, "outcome_file_level": "TP", "outcome_rule_level": "TP", - "outcome_location_level": "FN", + "outcome_location_level": "TP", "matched_rule_ids": [ "taint-unsanitised-flow (source 8:18)" ], @@ -9046,22 +9043,22 @@ } ], "aggregate_file_level": { - "tp": 249, - "fp": 1, - "fn_": 1, - "tn": 255, - "precision": 0.996, - "recall": 0.996, - "f1": 0.996 + "tp": 250, + "fp": 0, + "fn_": 0, + "tn": 256, + "precision": 1.0, + "recall": 1.0, + "f1": 1.0 }, "aggregate_rule_level": { - "tp": 249, - "fp": 1, - "fn_": 1, - "tn": 255, - "precision": 0.996, - "recall": 0.996, - "f1": 0.996 + "tp": 250, + "fp": 0, + "fn_": 0, + "tn": 256, + "precision": 1.0, + "recall": 1.0, + "f1": 1.0 }, "by_language": { "c": { @@ -9084,12 +9081,12 @@ }, "go": { "tp": 27, - "fp": 1, + "fp": 0, "fn_": 0, - "tn": 31, - "precision": 0.9642857142857143, + "tn": 32, + "precision": 1.0, "recall": 1.0, - "f1": 0.9818181818181818 + "f1": 1.0 }, "java": { "tp": 21, @@ -9137,13 +9134,13 @@ "f1": 1.0 }, "rust": { - "tp": 36, + "tp": 37, "fp": 0, - "fn_": 1, + "fn_": 0, "tn": 41, "precision": 1.0, - "recall": 0.972972972972973, - "f1": 0.9863013698630138 + "recall": 1.0, + "f1": 1.0 }, "typescript": { "tp": 35, @@ -9293,12 +9290,12 @@ }, "safe": { "tp": 0, - "fp": 1, + "fp": 0, "fn_": 0, - "tn": 255, - "precision": 0.0, + "tn": 256, + "precision": 1.0, "recall": 1.0, - "f1": 0.0 + "f1": 1.0 }, "secrets": { "tp": 1, @@ -9319,13 +9316,13 @@ "f1": 1.0 }, "sqli": { - "tp": 30, + "tp": 31, "fp": 0, - "fn_": 1, + "fn_": 0, "tn": 0, "precision": 1.0, - "recall": 0.967741935483871, - "f1": 0.9836065573770492 + "recall": 1.0, + "f1": 1.0 }, "ssrf": { "tp": 30,