[pitboss/grind] deferred session-0002 (20260516T052512Z-20f8)

This commit is contained in:
pitboss 2026-05-16 01:46:35 -05:00
parent 7a2f82c2ab
commit 282acddbbf
11 changed files with 214 additions and 45 deletions

View file

@ -3958,3 +3958,42 @@ fn rhs_array_literal_elements_recognise_per_language_shapes() {
// Non-array-shape node returns empty (defensive guard). // Non-array-shape node returns empty (defensive guard).
assert!(run("javascript", b"const x = tainted;\n", &["identifier"]).is_empty()); assert!(run("javascript", b"const x = tainted;\n", &["identifier"]).is_empty());
} }
/// `CalleeSite.span` should carry the 1-based (line, col) of each call's
/// node span so downstream consumers (surface map, datastore/external
/// detectors) can render real coordinates instead of `line: 0`.
#[test]
fn callee_site_span_carries_line_and_column() {
// Three calls on three different lines. The leading newline puts
// line 1 at the blank line; `helper(x, y);` is on line 3, etc.
let src = b"
function outer(obj, x, y) {
helper(x, y);
obj.method(x);
}
";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "javascript", ts_lang);
let (_key, outer) = file_cfg
.summaries
.iter()
.find(|(k, _)| k.name == "outer")
.expect("outer summary should exist");
let helper_site = outer
.callees
.iter()
.find(|c| c.name == "helper")
.expect("helper call should be recorded");
let (line, col) = helper_site.span.expect("span populated at CFG-build time");
assert_eq!(line, 3, "helper(...) sits on the 3rd source line");
assert!(col >= 5, "indented 4 spaces — column is 1-based and > 4");
let method_site = outer
.callees
.iter()
.find(|c| c.name.ends_with("method"))
.expect("method call should be recorded");
let (mline, _) = method_site.span.expect("method span populated");
assert_eq!(mline, 4, "obj.method(x) on line 4");
}

View file

@ -5664,7 +5664,7 @@ pub(super) fn build_sub<'a>(
for idx in fn_graph.node_indices() { for idx in fn_graph.node_indices() {
let info = &fn_graph[idx]; let info = &fn_graph[idx];
if let Some(callee) = &info.call.callee { if let Some(callee) = &info.call.callee {
let site = build_callee_site(callee, info, lang); let site = build_callee_site(callee, info, lang, code);
// Dedup by (name, arity, receiver, qualifier, ordinal). A // Dedup by (name, arity, receiver, qualifier, ordinal). A
// single function may legitimately contain multiple distinct // single function may legitimately contain multiple distinct
// calls to the same callee (e.g. different ordinals or // calls to the same callee (e.g. different ordinals or
@ -6632,7 +6632,12 @@ fn apply_gated_label_rules(
/// remains the single segment immediately before the leaf (back-compat /// remains the single segment immediately before the leaf (back-compat
/// with the legacy heuristic). For method calls the qualifier is /// with the legacy heuristic). For method calls the qualifier is
/// redundant with `receiver` and is left `None`. /// redundant with `receiver` and is left `None`.
fn build_callee_site(callee: &str, info: &NodeInfo, lang: &str) -> crate::summary::CalleeSite { fn build_callee_site(
callee: &str,
info: &NodeInfo,
lang: &str,
code: &[u8],
) -> crate::summary::CalleeSite {
use crate::summary::CalleeSite; use crate::summary::CalleeSite;
let receiver = info.call.receiver.clone(); let receiver = info.call.receiver.clone();
@ -6661,15 +6666,39 @@ fn build_callee_site(callee: &str, info: &NodeInfo, lang: &str) -> crate::summar
None None
}; };
let span = callee_span_line_col(code, info.ast.span.0);
CalleeSite { CalleeSite {
name: callee.to_string(), name: callee.to_string(),
arity, arity,
receiver, receiver,
qualifier, qualifier,
ordinal: info.call.call_ordinal, ordinal: info.call.call_ordinal,
span,
} }
} }
/// Convert a byte offset into a 1-based `(line, col)` pair against `code`.
///
/// Returns `None` only when `code` is empty (no source to resolve against);
/// out-of-range offsets are clamped to `code.len()` so a synthetic node
/// whose span overshoots the file still produces the last-line coordinate
/// rather than `None`.
fn callee_span_line_col(code: &[u8], offset: usize) -> Option<(u32, u32)> {
if code.is_empty() {
return None;
}
let clamped = offset.min(code.len());
let prefix = &code[..clamped];
let line = prefix.iter().filter(|&&b| b == b'\n').count() as u32 + 1;
let col_bytes = match prefix.iter().rposition(|&b| b == b'\n') {
Some(idx) => clamped - idx - 1,
None => clamped,
} as u32
+ 1;
Some((line, col_bytes))
}
/// Convert the graphlocal `FuncSummaries` into serialisable [`FuncSummary`] /// Convert the graphlocal `FuncSummaries` into serialisable [`FuncSummary`]
/// values suitable for crossfile persistence. /// values suitable for crossfile persistence.
pub(crate) fn export_summaries( pub(crate) fn export_summaries(

View file

@ -277,6 +277,12 @@ pub enum PolicyDecision {
/// Stable rule identifier — one of [`DenyRule::CREDENTIALS`], /// Stable rule identifier — one of [`DenyRule::CREDENTIALS`],
/// [`DenyRule::PRIVATE_KEY`], [`DenyRule::PRODUCTION_ENDPOINT`]. /// [`DenyRule::PRIVATE_KEY`], [`DenyRule::PRODUCTION_ENDPOINT`].
rule: &'static str, rule: &'static str,
/// Logical name of the diag field that produced the matched text
/// (e.g. `path`, `message`, `evidence.notes[2]`,
/// `flow_steps[1].snippet`). Lets operators triage *where* the
/// rule fired without having to re-derive the match from the
/// scrubbed excerpt alone.
field: String,
/// Short text excerpt (max 120 chars, scrubbed via /// Short text excerpt (max 120 chars, scrubbed via
/// [`Scrubber::scrub_string`]) of the offending field so an /// [`Scrubber::scrub_string`]) of the offending field so an
/// operator can identify *why* the deny fired without having to /// operator can identify *why* the deny fired without having to
@ -377,10 +383,11 @@ const PROD_ENDPOINT_REGEXES: &[&str] = &[
/// the leak shape. /// the leak shape.
pub fn evaluate(diag: &crate::commands::scan::Diag) -> PolicyDecision { pub fn evaluate(diag: &crate::commands::scan::Diag) -> PolicyDecision {
let texts = collect_diag_texts(diag); let texts = collect_diag_texts(diag);
for text in &texts { for (field, text) in &texts {
if let Some(hit) = match_text(text) { if let Some(hit) = match_text(text) {
return PolicyDecision::Deny { return PolicyDecision::Deny {
rule: hit.0, rule: hit.0,
field: field.clone(),
excerpt: excerpt_with_scrubber(hit.1), excerpt: excerpt_with_scrubber(hit.1),
}; };
} }
@ -388,46 +395,56 @@ pub fn evaluate(diag: &crate::commands::scan::Diag) -> PolicyDecision {
PolicyDecision::Allow PolicyDecision::Allow
} }
fn collect_diag_texts(diag: &crate::commands::scan::Diag) -> Vec<String> { /// Collect every text fragment from `diag` paired with a stable name for
let mut out: Vec<String> = Vec::new(); /// the source field. The returned field names are intentionally
/// human-readable (e.g. `evidence.notes[2]`, `flow_steps[1].snippet`)
/// rather than enum variants so they read identically in audit logs and
/// in `Display` output.
fn collect_diag_texts(diag: &crate::commands::scan::Diag) -> Vec<(String, String)> {
let mut out: Vec<(String, String)> = Vec::new();
if !diag.id.is_empty() { if !diag.id.is_empty() {
out.push(diag.id.clone()); out.push(("id".into(), diag.id.clone()));
} }
if !diag.path.is_empty() { if !diag.path.is_empty() {
out.push(diag.path.clone()); out.push(("path".into(), diag.path.clone()));
} }
if let Some(msg) = diag.message.as_ref() { if let Some(msg) = diag.message.as_ref() {
out.push(msg.clone()); out.push(("message".into(), msg.clone()));
} }
if let Some(ev) = diag.evidence.as_ref() { if let Some(ev) = diag.evidence.as_ref() {
for note in &ev.notes { for (i, note) in ev.notes.iter().enumerate() {
out.push(note.clone()); out.push((format!("evidence.notes[{i}]"), note.clone()));
} }
if let Some(exp) = ev.explanation.as_ref() { if let Some(exp) = ev.explanation.as_ref() {
out.push(exp.clone()); out.push(("evidence.explanation".into(), exp.clone()));
} }
for s in [&ev.source, &ev.sink] { for (label, s) in [("source", &ev.source), ("sink", &ev.sink)] {
if let Some(span) = s.as_ref() { if let Some(span) = s.as_ref() {
out.push(span.path.clone()); out.push((format!("evidence.{label}.path"), span.path.clone()));
if let Some(sn) = span.snippet.as_ref() { if let Some(sn) = span.snippet.as_ref() {
out.push(sn.clone()); out.push((format!("evidence.{label}.snippet"), sn.clone()));
} }
} }
} }
for span in ev.guards.iter().chain(ev.sanitizers.iter()) { for (i, span) in ev.guards.iter().enumerate() {
if let Some(sn) = span.snippet.as_ref() { if let Some(sn) = span.snippet.as_ref() {
out.push(sn.clone()); out.push((format!("evidence.guards[{i}].snippet"), sn.clone()));
} }
} }
for step in &ev.flow_steps { for (i, span) in ev.sanitizers.iter().enumerate() {
if let Some(sn) = span.snippet.as_ref() {
out.push((format!("evidence.sanitizers[{i}].snippet"), sn.clone()));
}
}
for (i, step) in ev.flow_steps.iter().enumerate() {
if !step.file.is_empty() { if !step.file.is_empty() {
out.push(step.file.clone()); out.push((format!("flow_steps[{i}].file"), step.file.clone()));
} }
if let Some(sn) = step.snippet.as_ref() { if let Some(sn) = step.snippet.as_ref() {
out.push(sn.clone()); out.push((format!("flow_steps[{i}].snippet"), sn.clone()));
} }
if let Some(callee) = step.callee.as_ref() { if let Some(callee) = step.callee.as_ref() {
out.push(callee.clone()); out.push((format!("flow_steps[{i}].callee"), callee.clone()));
} }
} }
} }

View file

@ -410,18 +410,22 @@ pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult {
// The verifier returns `Inconclusive(PolicyDeniedDynamic)` so the // The verifier returns `Inconclusive(PolicyDeniedDynamic)` so the
// operator sees *why* dynamic execution was skipped without losing // operator sees *why* dynamic execution was skipped without losing
// the static finding from the report. // the static finding from the report.
if let crate::dynamic::policy::PolicyDecision::Deny { rule, excerpt } = if let crate::dynamic::policy::PolicyDecision::Deny {
crate::dynamic::policy::evaluate(diag) rule,
field,
excerpt,
} = crate::dynamic::policy::evaluate(diag)
{ {
trace.record( trace.record(
crate::dynamic::trace::TraceStage::Verdict, crate::dynamic::trace::TraceStage::Verdict,
Some(format!("policy_denied rule={rule}")), Some(format!("policy_denied rule={rule} field={field}")),
); );
if opts.trace_verbose { if opts.trace_verbose {
trace.print_to_stderr(); trace.print_to_stderr();
} }
let inconclusive_reason = InconclusiveReason::PolicyDeniedDynamic { let inconclusive_reason = InconclusiveReason::PolicyDeniedDynamic {
rule: rule.to_owned(), rule: rule.to_owned(),
field: field.clone(),
excerpt: excerpt.clone(), excerpt: excerpt.clone(),
}; };
// Emit telemetry so the Phase 27 events log records the deny — // Emit telemetry so the Phase 27 events log records the deny —

View file

@ -337,6 +337,12 @@ pub enum InconclusiveReason {
/// `production-endpoint`) and an evidence excerpt for triage. /// `production-endpoint`) and an evidence excerpt for triage.
PolicyDeniedDynamic { PolicyDeniedDynamic {
rule: String, rule: String,
/// Logical name of the diag field that matched the deny rule
/// (e.g. `path`, `evidence.notes[2]`, `flow_steps[1].snippet`).
/// Empty string for verdicts loaded from older telemetry that
/// did not capture this field.
#[serde(default)]
field: String,
excerpt: String, excerpt: String,
}, },
} }
@ -399,10 +405,23 @@ impl fmt::Display for InconclusiveReason {
f, f,
"{backend} backend cannot enforce isolation for {oracle_kind} oracle" "{backend} backend cannot enforce isolation for {oracle_kind} oracle"
), ),
Self::PolicyDeniedDynamic { rule, excerpt } => write!( Self::PolicyDeniedDynamic {
f, rule,
"dynamic execution refused by policy rule {rule} (matched: {excerpt})" field,
), excerpt,
} => {
if field.is_empty() {
write!(
f,
"dynamic execution refused by policy rule {rule} (matched: {excerpt})"
)
} else {
write!(
f,
"dynamic execution refused by policy rule {rule} (matched {field}: {excerpt})"
)
}
}
} }
} }
} }

View file

@ -809,6 +809,8 @@ pub struct CalleeSiteView {
pub qualifier: Option<String>, pub qualifier: Option<String>,
#[serde(skip_serializing_if = "is_zero_u32")] #[serde(skip_serializing_if = "is_zero_u32")]
pub ordinal: u32, pub ordinal: u32,
#[serde(skip_serializing_if = "Option::is_none")]
pub span: Option<(u32, u32)>,
} }
fn is_zero_u32(n: &u32) -> bool { fn is_zero_u32(n: &u32) -> bool {
@ -884,6 +886,7 @@ impl FuncSummaryView {
receiver: c.receiver.clone(), receiver: c.receiver.clone(),
qualifier: c.qualifier.clone(), qualifier: c.qualifier.clone(),
ordinal: c.ordinal, ordinal: c.ordinal,
span: c.span,
}) })
.collect(), .collect(),
ssa_summary: ssa_view, ssa_summary: ssa_view,

View file

@ -191,6 +191,11 @@ const SYNTHETIC_DISAMBIG_BIT: u32 = 0x8000_0000;
/// * `ordinal`, the per-function call ordinal matching /// * `ordinal`, the per-function call ordinal matching
/// `CallMeta.call_ordinal`, allowing cross-file consumers to address a /// `CallMeta.call_ordinal`, allowing cross-file consumers to address a
/// specific call site rather than just a callee name. /// specific call site rather than just a callee name.
/// * `span`, optional 1-based `(line, col)` source coordinate of the call
/// expression, populated at CFG-build time when source bytes are
/// available. `None` for legacy summaries loaded from SQLite that
/// pre-date the span field, and for synthetic test fixtures that build
/// `CalleeSite` values directly.
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Hash)] #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub struct CalleeSite { pub struct CalleeSite {
pub name: String, pub name: String,
@ -202,6 +207,8 @@ pub struct CalleeSite {
pub qualifier: Option<String>, pub qualifier: Option<String>,
#[serde(default, skip_serializing_if = "is_zero_u32")] #[serde(default, skip_serializing_if = "is_zero_u32")]
pub ordinal: u32, pub ordinal: u32,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub span: Option<(u32, u32)>,
} }
fn is_zero_u32(n: &u32) -> bool { fn is_zero_u32(n: &u32) -> bool {

View file

@ -1791,6 +1791,7 @@ fn callee_site_structured_roundtrip() {
receiver: Some("obj".into()), receiver: Some("obj".into()),
qualifier: None, qualifier: None,
ordinal: 1, ordinal: 1,
..Default::default()
}, },
CalleeSite { CalleeSite {
name: "env::var".into(), name: "env::var".into(),
@ -1798,6 +1799,7 @@ fn callee_site_structured_roundtrip() {
receiver: None, receiver: None,
qualifier: Some("env".into()), qualifier: Some("env".into()),
ordinal: 2, ordinal: 2,
..Default::default()
}, },
], ],
..Default::default() ..Default::default()

View file

@ -13,7 +13,7 @@
//! that fires on its own. //! that fires on its own.
use super::{DataStore, DataStoreKind, SourceLocation, SurfaceNode}; use super::{DataStore, DataStoreKind, SourceLocation, SurfaceNode};
use crate::summary::{FuncSummary, GlobalSummaries}; use crate::summary::{CalleeSite, FuncSummary, GlobalSummaries};
/// One detection rule: leaf-name pattern → store kind + label. Stored /// One detection rule: leaf-name pattern → store kind + label. Stored
/// as a flat list so adding a new ORM / driver is a one-line edit. /// as a flat list so adding a new ORM / driver is a one-line edit.
@ -108,7 +108,7 @@ pub fn detect_data_stores(summaries: &GlobalSummaries) -> Vec<SurfaceNode> {
let Some(rule) = match_rule(&callee.name) else { let Some(rule) = match_rule(&callee.name) else {
continue; continue;
}; };
let location = call_site_location(summary, callee.ordinal); let location = call_site_location(summary, callee);
let dedup = ( let dedup = (
location.file.clone(), location.file.clone(),
location.line, location.line,
@ -148,22 +148,23 @@ fn match_rule(callee: &str) -> Option<&'static DriverRule> {
}) })
} }
/// Best-effort source location for a call site. We only have file + /// Source location of a call site. Reads the 1-based `(line, col)`
/// (sometimes) sink-attribution metadata on `FuncSummary`, so the /// recorded on the [`CalleeSite`] at CFG-build time (populated for every
/// location falls back to the function's file with line 0 when no /// summary produced after the span field landed); for legacy summaries
/// finer-grained data is available. /// loaded from SQLite with no span, falls back to the function's host
fn call_site_location(summary: &FuncSummary, _ordinal: u32) -> SourceLocation { /// file with line 0.
fn call_site_location(summary: &FuncSummary, callee: &CalleeSite) -> SourceLocation {
let (line, col) = callee.span.unwrap_or((0, 0));
SourceLocation { SourceLocation {
file: summary.file_path.clone(), file: summary.file_path.clone(),
line: 0, line,
col: 0, col,
} }
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use crate::summary::CalleeSite;
use crate::symbol::{FuncKey, Lang}; use crate::symbol::{FuncKey, Lang};
fn summary_with_callees(name: &str, file: &str, callees: &[&str]) -> (FuncKey, FuncSummary) { fn summary_with_callees(name: &str, file: &str, callees: &[&str]) -> (FuncKey, FuncSummary) {
@ -182,6 +183,33 @@ mod tests {
(key, summary) (key, summary)
} }
#[test]
fn datastore_carries_callee_span_when_present() {
// When the CFG populates `CalleeSite.span`, the detected datastore
// node's `SourceLocation` must reflect that 1-based `(line, col)`
// — not the legacy `(0, 0)` fallback.
let mut gs = GlobalSummaries::new();
let key = FuncKey::new_function(Lang::Python, "app.py", "init", None);
let mut callee = CalleeSite::bare("psycopg2.connect");
callee.span = Some((42, 13));
let summary = FuncSummary {
name: "init".into(),
file_path: "app.py".into(),
lang: "python".into(),
param_count: 0,
callees: vec![callee],
..Default::default()
};
gs.insert(key, summary);
let nodes = detect_data_stores(&gs);
assert_eq!(nodes.len(), 1);
let SurfaceNode::DataStore(ds) = &nodes[0] else {
panic!()
};
assert_eq!(ds.location.line, 42);
assert_eq!(ds.location.col, 13);
}
#[test] #[test]
fn detects_psycopg2_connect() { fn detects_psycopg2_connect() {
let mut gs = GlobalSummaries::new(); let mut gs = GlobalSummaries::new();

View file

@ -9,7 +9,7 @@
use super::{ExternalService, ExternalServiceKind, SourceLocation, SurfaceNode}; use super::{ExternalService, ExternalServiceKind, SourceLocation, SurfaceNode};
use crate::labels::Cap; use crate::labels::Cap;
use crate::summary::{FuncSummary, GlobalSummaries}; use crate::summary::{CalleeSite, FuncSummary, GlobalSummaries};
struct ClientRule { struct ClientRule {
leaf: &'static str, leaf: &'static str,
@ -87,7 +87,7 @@ pub fn detect_external_services(summaries: &GlobalSummaries) -> Vec<SurfaceNode>
let Some(rule) = match_rule(&callee.name) else { let Some(rule) = match_rule(&callee.name) else {
continue; continue;
}; };
let location = call_site_location(summary); let location = call_site_location(summary, Some(callee));
if !seen.insert((location.file.clone(), rule.label.to_string())) { if !seen.insert((location.file.clone(), rule.label.to_string())) {
continue; continue;
} }
@ -104,7 +104,7 @@ pub fn detect_external_services(summaries: &GlobalSummaries) -> Vec<SurfaceNode>
// file as the location and synthesise a generic label. // file as the location and synthesise a generic label.
for (_key, summary) in summaries.iter() { for (_key, summary) in summaries.iter() {
if summary.sink_caps().contains(Cap::SSRF) { if summary.sink_caps().contains(Cap::SSRF) {
let loc = call_site_location(summary); let loc = call_site_location(summary, None);
let dedup = (loc.file.clone(), "Outbound HTTP".to_string()); let dedup = (loc.file.clone(), "Outbound HTTP".to_string());
if seen.insert(dedup) { if seen.insert(dedup) {
out.push(SurfaceNode::ExternalService(ExternalService { out.push(SurfaceNode::ExternalService(ExternalService {
@ -134,11 +134,16 @@ fn match_rule(callee: &str) -> Option<&'static ClientRule> {
}) })
} }
fn call_site_location(summary: &FuncSummary) -> SourceLocation { /// Source location of an external-service call site. Reads the 1-based
/// `(line, col)` recorded on the [`CalleeSite`] at CFG-build time when
/// available; otherwise (sink-caponly fallback path, or legacy summaries
/// loaded from SQLite) returns the function's host file with line 0.
fn call_site_location(summary: &FuncSummary, callee: Option<&CalleeSite>) -> SourceLocation {
let (line, col) = callee.and_then(|c| c.span).unwrap_or((0, 0));
SourceLocation { SourceLocation {
file: summary.file_path.clone(), file: summary.file_path.clone(),
line: 0, line,
col: 0, col,
} }
} }

View file

@ -82,8 +82,16 @@ fn credentials_rule_fires_on_aws_key_in_flow_step_snippet() {
)]; )];
diag.evidence = Some(ev); diag.evidence = Some(ev);
match policy::evaluate(&diag) { match policy::evaluate(&diag) {
PolicyDecision::Deny { rule, excerpt } => { PolicyDecision::Deny {
rule,
field,
excerpt,
} => {
assert_eq!(rule, DenyRule::CREDENTIALS); assert_eq!(rule, DenyRule::CREDENTIALS);
assert!(
field.starts_with("flow_steps[") && field.ends_with(".snippet"),
"deny must record the source field, got {field:?}"
);
assert!( assert!(
!excerpt.contains("AKIAFAKETEST00000000"), !excerpt.contains("AKIAFAKETEST00000000"),
"excerpt must scrub the raw token, got {excerpt:?}" "excerpt must scrub the raw token, got {excerpt:?}"
@ -209,8 +217,16 @@ fn verify_finding_short_circuits_without_sandbox() {
.inconclusive_reason .inconclusive_reason
.expect("PolicyDeniedDynamic must populate inconclusive_reason"); .expect("PolicyDeniedDynamic must populate inconclusive_reason");
match reason { match reason {
InconclusiveReason::PolicyDeniedDynamic { rule, excerpt } => { InconclusiveReason::PolicyDeniedDynamic {
rule,
field,
excerpt,
} => {
assert_eq!(rule, DenyRule::CREDENTIALS); assert_eq!(rule, DenyRule::CREDENTIALS);
assert!(
field.starts_with("evidence.notes["),
"deny must record the source field, got {field:?}"
);
assert!( assert!(
!excerpt.contains("hunter2-supersecret-test"), !excerpt.contains("hunter2-supersecret-test"),
"excerpt must scrub the raw secret, got {excerpt:?}" "excerpt must scrub the raw secret, got {excerpt:?}"