From 946cb6a9bcb3e26d9a26aa54812b861188044c14 Mon Sep 17 00:00:00 2001 From: pitboss Date: Wed, 13 May 2026 14:58:49 -0400 Subject: [PATCH] [pitboss] sweep after phase 01: 3 deferred items resolved --- src/commands/scan.rs | 46 ++++ src/dynamic/spec.rs | 409 ++++++++++++++++++++++++---- src/dynamic/verify.rs | 21 +- tests/dynamic_parity.rs | 2 + tests/spec_derivation_strategies.rs | 34 +++ 5 files changed, 455 insertions(+), 57 deletions(-) diff --git a/src/commands/scan.rs b/src/commands/scan.rs index 0f989d17..74a14c17 100644 --- a/src/commands/scan.rs +++ b/src/commands/scan.rs @@ -333,6 +333,46 @@ pub(crate) fn is_preview_tier_path(path: &Path) -> bool { ) } +/// Load every persisted `FuncSummary` for `project` from `db_path` and fold +/// them into a [`GlobalSummaries`]. Best-effort: any failure (pool init, +/// summary load) logs and returns `None`, leaving dynamic verification on +/// the no-summaries code path. +/// +/// Called once at the top of the verify loop so per-finding spec derivation +/// hits an in-memory index, not SQLite. The index is wrapped in `Arc` so +/// `VerifyOptions` can be cloned cheaply if a caller threads it onto +/// multiple findings concurrently in the future. +#[cfg(feature = "dynamic")] +fn load_verify_summaries( + project: &str, + db_path: &Path, + scan_root: &Path, +) -> Option> { + let pool = match Indexer::init(db_path) { + Ok(p) => p, + Err(e) => { + tracing::debug!("verify: indexer init failed; summary-driven spec derivation off: {e}"); + return None; + } + }; + let idx = match Indexer::from_pool(project, &pool) { + Ok(i) => i, + Err(e) => { + tracing::debug!("verify: indexer open failed; summary-driven spec derivation off: {e}"); + return None; + } + }; + let all = match idx.load_all_summaries() { + Ok(v) => v, + Err(e) => { + tracing::debug!("verify: load_all_summaries failed; spec derivation off: {e}"); + return None; + } + }; + let root_str = scan_root.to_string_lossy().into_owned(); + Some(Arc::new(crate::summary::merge_summaries(all, Some(&root_str)))) +} + /// Entry point called by the CLI. #[allow(clippy::too_many_arguments)] pub fn handle( @@ -483,6 +523,12 @@ pub fn handle( // When index_mode is Off, the DB is never created, so no cache. if index_mode != IndexMode::Off && db_path.exists() { opts.db_path = Some(db_path.clone()); + // Preload cross-file summaries once so the spec-derivation + // pipeline can resolve the enclosing function's `FuncSummary` + // (strategy 3) and its static `entry_kind` (strategy 4) + // without re-hitting SQLite per finding. Best-effort: a load + // failure logs and falls through to the substring heuristics. + opts.summaries = load_verify_summaries(&project_name, &db_path, &scan_path); } for diag in &mut diags { let result = crate::dynamic::verify::verify_finding(diag, &opts); diff --git a/src/dynamic/spec.rs b/src/dynamic/spec.rs index 9d5bc45c..a71329e7 100644 --- a/src/dynamic/spec.rs +++ b/src/dynamic/spec.rs @@ -19,9 +19,9 @@ use crate::commands::scan::Diag; use crate::dynamic::corpus::CORPUS_VERSION; -use crate::evidence::{Confidence, FlowStep, FlowStepKind, UnsupportedReason}; +use crate::evidence::{Confidence, FlowStepKind, UnsupportedReason}; use crate::labels::Cap; -use crate::summary::FuncSummary; +use crate::summary::{FuncSummary, GlobalSummaries}; use crate::symbol::Lang; use serde::{Deserialize, Serialize}; use std::path::Path; @@ -157,6 +157,32 @@ impl HarnessSpec { pub fn from_finding_opts( diag: &Diag, verify_all_confidence: bool, + ) -> Result { + Self::from_finding_with_summaries(diag, verify_all_confidence, None) + } + + /// Strategy-aware constructor that consults `summaries` when present. + /// + /// When `summaries` is `Some`, strategy 3 ([`SpecDerivationStrategy::FromFuncSummaryWalk`]) + /// looks up the enclosing function's [`FuncSummary`] by `(lang, name, file)` + /// — derived from `evidence.flow_steps[*].function` — and pulls a real + /// `tainted_sink_params` slot rather than no-op'ing as it does in the + /// `None` path. Strategy 4 additionally upgrades the + /// `.http.` / `.cli.` substring heuristic by consulting + /// [`FuncSummary::entry_kind`] on the resolved summary; an HTTP-shaped + /// entry-kind variant becomes `EntryKind::HttpRoute` regardless of the + /// rule id, and the legacy substring fallback runs only when no summary + /// is found. + /// + /// The `entry_name` populated by strategies 2 and 4 is also resolved + /// from `evidence.flow_steps[*].function` (the authoritative enclosing + /// function annotation set by the SSA taint engine) rather than from + /// `evidence.sink.snippet` / `evidence.source.snippet`, which carry + /// shortened callee text — never the enclosing-function name. + pub fn from_finding_with_summaries( + diag: &Diag, + verify_all_confidence: bool, + summaries: Option<&GlobalSummaries>, ) -> Result { if !verify_all_confidence { match diag.confidence { @@ -171,13 +197,13 @@ impl HarnessSpec { if let Some(spec) = derive_from_flow_steps(diag, evidence) { return Ok(spec); } - if let Some(spec) = derive_from_rule_namespace(diag, evidence) { + if let Some(spec) = derive_from_rule_namespace_with(diag, evidence, summaries) { return Ok(spec); } - if let Some(spec) = derive_from_func_summary(diag, evidence, None) { + if let Some(spec) = derive_from_func_summary_auto(diag, evidence, summaries) { return Ok(spec); } - if let Some(spec) = derive_from_callgraph_entry(diag, evidence) { + if let Some(spec) = derive_from_callgraph_entry_with(diag, evidence, summaries) { return Ok(spec); } @@ -248,6 +274,20 @@ fn derive_from_flow_steps(diag: &Diag, evidence: &crate::evidence::Evidence) -> pub fn derive_from_rule_namespace( diag: &Diag, evidence: &crate::evidence::Evidence, +) -> Option { + derive_from_rule_namespace_with(diag, evidence, None) +} + +/// Like [`derive_from_rule_namespace`], but consults `summaries` to recover the +/// enclosing function name when `evidence.flow_steps` does not carry one. +/// +/// When neither flow_steps nor the summary index resolve a name, the entry +/// name falls back to `""` (kept stable across runs so spec hashes +/// remain reproducible). +pub fn derive_from_rule_namespace_with( + diag: &Diag, + evidence: &crate::evidence::Evidence, + summaries: Option<&GlobalSummaries>, ) -> Option { let mut iter = diag.id.split('.'); let lang_prefix = iter.next()?; @@ -284,11 +324,7 @@ pub fn derive_from_rule_namespace( } } - let entry_function = evidence - .sink - .as_ref() - .and_then(|s| s.snippet.clone()) - .filter(|s| !s.is_empty()) + let entry_function = resolve_enclosing_function(diag, evidence, summaries, lang) .unwrap_or_else(|| "".to_owned()); Some(finalize_spec( @@ -353,6 +389,26 @@ pub fn derive_from_func_summary( Some(spec) } +// ── Strategy 3 (auto): locate the enclosing FuncSummary in `summaries` ─────── + +/// Resolve the enclosing function's [`FuncSummary`] from `summaries` and +/// delegate to [`derive_from_func_summary`]. +/// +/// Returns `None` when `summaries` is `None`, when the enclosing function +/// name cannot be recovered from `evidence.flow_steps`, or when no summary +/// matches `(lang, name, file)`. +fn derive_from_func_summary_auto( + diag: &Diag, + evidence: &crate::evidence::Evidence, + summaries: Option<&GlobalSummaries>, +) -> Option { + let summaries = summaries?; + let lang = lang_from_path(&diag.path)?; + let name = enclosing_function_from_flow_steps(evidence)?; + let summary = find_summary_by_path(summaries, lang, &name, &diag.path)?; + derive_from_func_summary(diag, evidence, Some(summary)) +} + // ── Strategy 4: callgraph entry-kind ───────────────────────────────────────── /// Build a spec by treating the sink's enclosing function as an entry point @@ -367,26 +423,46 @@ pub fn derive_from_callgraph_entry( diag: &Diag, evidence: &crate::evidence::Evidence, ) -> Option { - let id = &diag.id; - let entry_kind = if id.contains(".http.") { - EntryKind::HttpRoute - } else if id.contains(".cli.") { - EntryKind::CliSubcommand - } else { - return None; - }; + derive_from_callgraph_entry_with(diag, evidence, None) +} +/// Like [`derive_from_callgraph_entry`], but prefers +/// [`FuncSummary::entry_kind`] over the `.http.` / `.cli.` rule-id substring +/// heuristic when a matching summary is available in `summaries`. +/// +/// An HTTP-shaped [`crate::entry_points::EntryKind`] variant on the enclosing +/// function's summary becomes [`EntryKind::HttpRoute`] regardless of the rule +/// id. The substring fallback runs only when no summary entry-kind is found +/// — e.g. for AST-only findings with no taint-engine flow_steps. +pub fn derive_from_callgraph_entry_with( + diag: &Diag, + evidence: &crate::evidence::Evidence, + summaries: Option<&GlobalSummaries>, +) -> Option { let lang = lang_from_path(&diag.path)?; let expected_cap = Cap::from_bits_truncate(evidence.sink_caps); if expected_cap.is_empty() { return None; } - let entry_function = evidence - .source - .as_ref() - .and_then(|s| s.snippet.clone()) - .filter(|s| !s.is_empty()) + // Step 1: try summary-based classification. + let summary_kind = enclosing_function_from_flow_steps(evidence) + .and_then(|name| find_summary_by_path(summaries?, lang, &name, &diag.path)) + .and_then(|s| s.entry_kind.as_ref().map(entry_kind_from_summary)); + + // Step 2: fall back to rule-id substring heuristic (legacy). + let id = &diag.id; + let id_kind = if id.contains(".http.") { + Some(EntryKind::HttpRoute) + } else if id.contains(".cli.") { + Some(EntryKind::CliSubcommand) + } else { + None + }; + + let entry_kind = summary_kind.or(id_kind)?; + + let entry_function = resolve_enclosing_function(diag, evidence, summaries, lang) .unwrap_or_else(|| "".to_owned()); let mut spec = finalize_spec( @@ -404,6 +480,16 @@ pub fn derive_from_callgraph_entry( Some(spec) } +/// Map a static-analysis [`crate::entry_points::EntryKind`] (route shape) onto +/// the dynamic-side [`EntryKind`] taxonomy. Every current variant of the +/// static enum describes an HTTP route handler — no CLI / library-API +/// variants exist statically — so they all collapse to +/// [`EntryKind::HttpRoute`]. When the static taxonomy grows non-HTTP variants +/// (e.g. clap subcommand detection), extend this match to preserve them. +fn entry_kind_from_summary(_kind: &crate::entry_points::EntryKind) -> EntryKind { + EntryKind::HttpRoute +} + // ── Helpers ────────────────────────────────────────────────────────────────── fn lang_from_path(path: &str) -> Option { @@ -411,6 +497,76 @@ fn lang_from_path(path: &str) -> Option { Lang::from_extension(ext) } +/// Return the first non-empty `function` annotation found on any flow step. +/// +/// Strategy 1 ([`derive_from_flow_steps`]) consumes the `Source`-step +/// annotation directly; strategies 2 and 4 fall back to *any* step with a +/// `function` set because the SSA engine annotates sink and assignment steps +/// as well. The annotation is authoritative — it carries the enclosing +/// function as resolved against the CFG — so it is preferred over the call +/// snippet, which carries shortened callee text. +fn enclosing_function_from_flow_steps(evidence: &crate::evidence::Evidence) -> Option { + evidence + .flow_steps + .iter() + .find_map(|s| s.function.clone().filter(|f| !f.is_empty())) +} + +/// Resolve the enclosing function name for the diag using, in order: +/// 1. any `flow_steps[*].function` annotation (always authoritative), +/// 2. a [`GlobalSummaries`] lookup when `summaries` is `Some` and exactly one +/// function in the diag's file shares the rule-language tag (last-resort +/// disambiguation when flow_steps is empty), +/// 3. `None` (callers default to `""`). +fn resolve_enclosing_function( + diag: &Diag, + evidence: &crate::evidence::Evidence, + summaries: Option<&GlobalSummaries>, + lang: Lang, +) -> Option { + if let Some(name) = enclosing_function_from_flow_steps(evidence) { + return Some(name); + } + let summaries = summaries?; + let mut hits = summaries + .iter() + .filter(|(k, _)| k.lang == lang) + .filter(|(_, s)| paths_match(&s.file_path, &diag.path)); + let first = hits.next()?; + if hits.next().is_some() { + // Ambiguous: multiple functions in this file; refuse to guess. + return None; + } + Some(first.1.name.clone()) +} + +/// Lookup a `FuncSummary` by `(lang, name)` and filter to one whose +/// `file_path` matches `diag_path`. Returns `None` on no match. +fn find_summary_by_path<'a>( + summaries: &'a GlobalSummaries, + lang: Lang, + name: &str, + diag_path: &str, +) -> Option<&'a FuncSummary> { + summaries + .lookup_same_lang(lang, name) + .into_iter() + .find(|(_, s)| paths_match(&s.file_path, diag_path)) + .map(|(_, s)| s) +} + +/// Loose path comparison that tolerates absolute / project-relative drift. +/// +/// `FuncSummary::file_path` may be stored relative to the project root while +/// `Diag::path` may be canonicalised. A suffix match is permissive enough to +/// link them without dragging the canonicaliser into the verify hot path. +fn paths_match(summary_path: &str, diag_path: &str) -> bool { + if summary_path == diag_path { + return true; + } + summary_path.ends_with(diag_path) || diag_path.ends_with(summary_path) +} + /// Map the first segment of a Nyx rule id (`py`, `js`, `ts`, `java`, …) to a /// [`Lang`]. Returns `None` for non-language prefixes (`taint-`, `cfg-`, /// `state-`). @@ -485,39 +641,6 @@ fn finalize_spec( spec } -/// Walk a synthetic single-step flow to satisfy callers that expect a `FlowStep` -/// vector. Used by strategies 2–4 when they need to materialise a flow for -/// downstream consumers. -#[allow(dead_code)] -pub(crate) fn synthetic_flow(diag: &Diag, function: &str) -> Vec { - vec![ - FlowStep { - step: 1, - kind: FlowStepKind::Source, - file: diag.path.clone(), - line: diag.line as u32, - col: diag.col as u32, - snippet: None, - variable: None, - callee: None, - function: Some(function.to_owned()), - is_cross_file: false, - }, - FlowStep { - step: 2, - kind: FlowStepKind::Sink, - file: diag.path.clone(), - line: diag.line as u32, - col: diag.col as u32, - snippet: None, - variable: None, - callee: None, - function: Some(function.to_owned()), - is_cross_file: false, - }, - ] -} - /// Walk `flow_steps` and return the entry point: the enclosing function of /// the first `Source` step that has a function annotation. This is the /// outermost callable that receives the tainted input. @@ -919,6 +1042,22 @@ mod tests { assert_eq!(spec.expected_cap, Cap::DESERIALIZE); } + #[test] + fn rule_namespace_strategy_pins_rs_auth_mapping() { + // Regression: `rs.auth.*` must map to `Lang::Rust` + `Cap::UNAUTHORIZED_ID`. + // The plan calls out this exemplar but had no test coverage. + let diag = diag_with_rule_id( + "rs.auth.missing_ownership_check.taint", + "src/handler.rs", + 0, + ); + let spec = HarnessSpec::from_finding(&diag).unwrap(); + assert_eq!(spec.derivation, SpecDerivationStrategy::FromRuleNamespace); + assert_eq!(spec.lang, Lang::Rust); + assert_eq!(spec.expected_cap, Cap::UNAUTHORIZED_ID); + assert_eq!(spec.toolchain_id, "rust-stable"); + } + #[test] fn rule_namespace_strategy_rejects_path_lang_mismatch() { use crate::labels::Cap; @@ -1039,4 +1178,162 @@ mod tests { let spec = HarnessSpec::from_finding(&diag).unwrap(); assert_eq!(spec.derivation, SpecDerivationStrategy::FromFlowSteps); } + + // ── Phase 01 follow-ups: GlobalSummaries threading ─────────────────────── + + fn sink_only_step_with_function(file: &str, function: &str) -> crate::evidence::FlowStep { + crate::evidence::FlowStep { + step: 1, + kind: FlowStepKind::Sink, + file: file.into(), + line: 6, + col: 0, + snippet: Some("os.system".into()), + variable: None, + callee: Some("os.system".into()), + function: Some(function.into()), + is_cross_file: false, + } + } + + fn build_summary(name: &str, file: &str, lang: &str, sink_caps: u32, tainted_params: Vec, entry_kind: Option) -> FuncSummary { + FuncSummary { + name: name.into(), + file_path: file.into(), + lang: lang.into(), + param_count: 1, + param_names: vec!["req".into()], + source_caps: 0, + sanitizer_caps: 0, + sink_caps, + propagating_params: vec![], + propagates_taint: false, + tainted_sink_params: tainted_params, + param_to_sink: vec![], + callees: vec![], + container: String::new(), + disambig: None, + kind: Default::default(), + module_path: None, + rust_use_map: None, + rust_wildcards: None, + hierarchy_edges: vec![], + entry_kind, + } + } + + #[test] + fn entry_name_uses_flow_steps_function_not_snippet() { + // Strategy 2 was previously populating `entry_name` from the sink's + // *snippet* (callee text like `"os.system"`). The fix prefers the + // `function` annotation on any flow step, which carries the + // enclosing function name. + use crate::labels::Cap; + let ev = Evidence { + flow_steps: vec![sink_only_step_with_function( + "app/handler.py", + "do_request", + )], + sink_caps: Cap::SHELL_ESCAPE.bits(), + ..Default::default() + }; + let diag = crate::commands::scan::Diag { + id: "py.cmdi.os_system".into(), + path: "app/handler.py".into(), + line: 6, + confidence: Some(Confidence::High), + evidence: Some(ev.clone()), + ..Default::default() + }; + let spec = derive_from_rule_namespace(&diag, &ev).expect("must derive"); + assert_eq!(spec.entry_name, "do_request"); + // The callee text never leaks into the entry name. + assert!(!spec.entry_name.contains("os.system")); + } + + #[test] + fn func_summary_auto_resolves_via_global_summaries() { + // Strategy 3 with `summaries = Some(_)`: the enclosing function + // name comes from the flow_steps annotation, the summary is found + // by `(lang, name)` lookup filtered by file_path, and the spec + // picks `tainted_sink_params[0]` as the payload slot. + use crate::labels::Cap; + use crate::symbol::FuncKey; + let mut gs = GlobalSummaries::new(); + let summary = build_summary( + "do_request", + "app/handler.py", + "python", + Cap::SHELL_ESCAPE.bits(), + vec![0], + None, + ); + let key = FuncKey::new_function(Lang::Python, "app/handler.py", "do_request", Some(1)); + gs.insert(key, summary); + + let ev = Evidence { + flow_steps: vec![sink_only_step_with_function( + "app/handler.py", + "do_request", + )], + sink_caps: Cap::SHELL_ESCAPE.bits(), + ..Default::default() + }; + let diag = crate::commands::scan::Diag { + id: "taint-unsanitised-flow".into(), + path: "app/handler.py".into(), + line: 6, + confidence: Some(Confidence::High), + evidence: Some(ev), + ..Default::default() + }; + let spec = HarnessSpec::from_finding_with_summaries(&diag, false, Some(&gs)) + .expect("summary-driven derivation must succeed"); + assert_eq!(spec.derivation, SpecDerivationStrategy::FromFuncSummaryWalk); + assert!(matches!(spec.payload_slot, PayloadSlot::Param(0))); + assert_eq!(spec.entry_name, "do_request"); + } + + #[test] + fn callgraph_entry_uses_summary_entry_kind_over_rule_id() { + // Strategy 4 with summaries: a non-http/non-cli rule id still wins + // HttpRoute classification when the enclosing function's + // `entry_kind` is set on its summary. + use crate::entry_points::{EntryKind as StaticEntryKind, HttpMethod}; + use crate::labels::Cap; + use crate::symbol::FuncKey; + let mut gs = GlobalSummaries::new(); + let summary = build_summary( + "index", + "app/views.py", + "python", + Cap::SSRF.bits(), + vec![], + Some(StaticEntryKind::FlaskRoute { method: HttpMethod::GET }), + ); + let key = FuncKey::new_function(Lang::Python, "app/views.py", "index", Some(1)); + gs.insert(key, summary); + + let ev = Evidence { + flow_steps: vec![sink_only_step_with_function("app/views.py", "index")], + sink_caps: Cap::SSRF.bits(), + ..Default::default() + }; + let diag = crate::commands::scan::Diag { + // Note: the rule id has no `.http.` or `.cli.` segment — the + // legacy substring heuristic would bail. Only the summary + // entry_kind unlocks HttpRoute classification. + id: "taint-unsanitised-flow".into(), + path: "app/views.py".into(), + line: 6, + confidence: Some(Confidence::High), + evidence: Some(ev.clone()), + ..Default::default() + }; + let spec = derive_from_callgraph_entry_with(&diag, &ev, Some(&gs)) + .expect("entry-kind-driven derivation must succeed"); + assert_eq!(spec.derivation, SpecDerivationStrategy::FromCallgraphEntry); + assert!(matches!(spec.entry_kind, EntryKind::HttpRoute)); + assert_eq!(spec.entry_name, "index"); + } } diff --git a/src/dynamic/verify.rs b/src/dynamic/verify.rs index afd1bc01..a4dfad1b 100644 --- a/src/dynamic/verify.rs +++ b/src/dynamic/verify.rs @@ -12,8 +12,10 @@ use crate::dynamic::spec::{HarnessSpec, SPEC_FORMAT_VERSION}; use crate::dynamic::telemetry::{self, TelemetryEvent}; use crate::dynamic::toolchain; use crate::evidence::{InconclusiveReason, SpecDerivationStrategy, UnsupportedReason}; +use crate::summary::GlobalSummaries; use crate::utils::config::Config; use std::path::Path; +use std::sync::Arc; use std::time::Instant; #[derive(Debug, Clone, Default)] @@ -27,6 +29,18 @@ pub struct VerifyOptions { /// When `true`, skip the `Confidence >= Medium` gate and attempt /// verification on all findings. Corresponds to `--verify-all-confidence`. pub verify_all_confidence: bool, + /// Cross-file function summaries shared by every finding in a scan. + /// + /// Threaded into [`HarnessSpec::from_finding_with_summaries`] so the + /// summary-walk strategy and the entry-kind-aware callgraph strategy + /// can resolve the diag's enclosing function against the same + /// [`GlobalSummaries`] index the taint engine used. Held by `Arc` so the + /// caller (e.g. the scan command) can build the index once and reuse it + /// across the per-finding loop without cloning. + /// + /// `None` disables the summary-driven derivation paths; strategy 3 is a + /// no-op and strategy 4 falls back to the rule-id substring heuristic. + pub summaries: Option>, } impl VerifyOptions { @@ -46,6 +60,7 @@ impl VerifyOptions { project_root: None, db_path: None, verify_all_confidence: config.scanner.verify_all_confidence, + summaries: None, } } } @@ -243,7 +258,11 @@ fn derivation_failure_hint(diag: &Diag) -> String { pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult { let finding_id = format!("{:016x}", diag.stable_hash); - let spec = match HarnessSpec::from_finding_opts(diag, opts.verify_all_confidence) { + let spec = match HarnessSpec::from_finding_with_summaries( + diag, + opts.verify_all_confidence, + opts.summaries.as_deref(), + ) { Ok(s) => s, Err(reason) => { return spec_derivation_failed_verdict(finding_id, diag, reason); diff --git a/tests/dynamic_parity.rs b/tests/dynamic_parity.rs index fe861a01..a1a13453 100644 --- a/tests/dynamic_parity.rs +++ b/tests/dynamic_parity.rs @@ -105,6 +105,7 @@ mod parity_tests { project_root: None, db_path: None, verify_all_confidence: false, + summaries: None, } } @@ -118,6 +119,7 @@ mod parity_tests { project_root: None, db_path: None, verify_all_confidence: false, + summaries: None, } } diff --git a/tests/spec_derivation_strategies.rs b/tests/spec_derivation_strategies.rs index e399315c..9c7eeec2 100644 --- a/tests/spec_derivation_strategies.rs +++ b/tests/spec_derivation_strategies.rs @@ -144,6 +144,40 @@ mod spec_strategies { assert_eq!(spec.expected_cap, Cap::DESERIALIZE); } + #[test] + fn from_rule_namespace_pins_rs_auth_to_unauthorized_id() { + // Regression: `rs.auth.missing_ownership_check.taint` must derive a + // Rust + UNAUTHORIZED_ID spec via the rule-namespace strategy. The + // phase 01 deliverables called out `rs.auth.*` as an exemplar but + // shipped without a regression test pinning the `auth → UNAUTHORIZED_ID` + // mapping. + let mut diag = make_diag( + "rs.auth.missing_ownership_check.taint", + "src/handler.rs", + 14, + ); + let mut ev = Evidence::default(); + ev.sink_caps = Cap::UNAUTHORIZED_ID.bits(); + diag.evidence = Some(ev.clone()); + + let spec = derive_from_rule_namespace(&diag, &ev) + .expect("rs.auth rule namespace must derive a spec"); + assert_eq!(spec.derivation, SpecDerivationStrategy::FromRuleNamespace); + assert_eq!(spec.lang, nyx_scanner::symbol::Lang::Rust); + assert_eq!(spec.expected_cap, Cap::UNAUTHORIZED_ID); + assert_eq!(spec.sink_line, 14); + assert_eq!(spec.toolchain_id, "rust-stable"); + + // End-to-end through `HarnessSpec::from_finding` (no flow_steps). + let spec_end_to_end = + HarnessSpec::from_finding(&diag).expect("end-to-end derivation must succeed"); + assert_eq!( + spec_end_to_end.derivation, + SpecDerivationStrategy::FromRuleNamespace + ); + assert_eq!(spec_end_to_end.expected_cap, Cap::UNAUTHORIZED_ID); + } + // ── Strategy 3: FromFuncSummaryWalk ────────────────────────────────────── #[test]