[pitboss/grind] deferred session-0002 (20260516T052512Z-20f8)

This commit is contained in:
pitboss 2026-05-16 01:46:35 -05:00
parent 7a2f82c2ab
commit 282acddbbf
11 changed files with 214 additions and 45 deletions

View file

@ -3958,3 +3958,42 @@ fn rhs_array_literal_elements_recognise_per_language_shapes() {
// Non-array-shape node returns empty (defensive guard).
assert!(run("javascript", b"const x = tainted;\n", &["identifier"]).is_empty());
}
/// `CalleeSite.span` should carry the 1-based (line, col) of each call's
/// node span so downstream consumers (surface map, datastore/external
/// detectors) can render real coordinates instead of `line: 0`.
#[test]
fn callee_site_span_carries_line_and_column() {
// Three calls on three different lines. The leading newline puts
// line 1 at the blank line; `helper(x, y);` is on line 3, etc.
let src = b"
function outer(obj, x, y) {
helper(x, y);
obj.method(x);
}
";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "javascript", ts_lang);
let (_key, outer) = file_cfg
.summaries
.iter()
.find(|(k, _)| k.name == "outer")
.expect("outer summary should exist");
let helper_site = outer
.callees
.iter()
.find(|c| c.name == "helper")
.expect("helper call should be recorded");
let (line, col) = helper_site.span.expect("span populated at CFG-build time");
assert_eq!(line, 3, "helper(...) sits on the 3rd source line");
assert!(col >= 5, "indented 4 spaces — column is 1-based and > 4");
let method_site = outer
.callees
.iter()
.find(|c| c.name.ends_with("method"))
.expect("method call should be recorded");
let (mline, _) = method_site.span.expect("method span populated");
assert_eq!(mline, 4, "obj.method(x) on line 4");
}

View file

@ -5664,7 +5664,7 @@ pub(super) fn build_sub<'a>(
for idx in fn_graph.node_indices() {
let info = &fn_graph[idx];
if let Some(callee) = &info.call.callee {
let site = build_callee_site(callee, info, lang);
let site = build_callee_site(callee, info, lang, code);
// Dedup by (name, arity, receiver, qualifier, ordinal). A
// single function may legitimately contain multiple distinct
// calls to the same callee (e.g. different ordinals or
@ -6632,7 +6632,12 @@ fn apply_gated_label_rules(
/// remains the single segment immediately before the leaf (back-compat
/// with the legacy heuristic). For method calls the qualifier is
/// redundant with `receiver` and is left `None`.
fn build_callee_site(callee: &str, info: &NodeInfo, lang: &str) -> crate::summary::CalleeSite {
fn build_callee_site(
callee: &str,
info: &NodeInfo,
lang: &str,
code: &[u8],
) -> crate::summary::CalleeSite {
use crate::summary::CalleeSite;
let receiver = info.call.receiver.clone();
@ -6661,15 +6666,39 @@ fn build_callee_site(callee: &str, info: &NodeInfo, lang: &str) -> crate::summar
None
};
let span = callee_span_line_col(code, info.ast.span.0);
CalleeSite {
name: callee.to_string(),
arity,
receiver,
qualifier,
ordinal: info.call.call_ordinal,
span,
}
}
/// Convert a byte offset into a 1-based `(line, col)` pair against `code`.
///
/// Returns `None` only when `code` is empty (no source to resolve against);
/// out-of-range offsets are clamped to `code.len()` so a synthetic node
/// whose span overshoots the file still produces the last-line coordinate
/// rather than `None`.
fn callee_span_line_col(code: &[u8], offset: usize) -> Option<(u32, u32)> {
if code.is_empty() {
return None;
}
let clamped = offset.min(code.len());
let prefix = &code[..clamped];
let line = prefix.iter().filter(|&&b| b == b'\n').count() as u32 + 1;
let col_bytes = match prefix.iter().rposition(|&b| b == b'\n') {
Some(idx) => clamped - idx - 1,
None => clamped,
} as u32
+ 1;
Some((line, col_bytes))
}
/// Convert the graphlocal `FuncSummaries` into serialisable [`FuncSummary`]
/// values suitable for crossfile persistence.
pub(crate) fn export_summaries(

View file

@ -277,6 +277,12 @@ pub enum PolicyDecision {
/// Stable rule identifier — one of [`DenyRule::CREDENTIALS`],
/// [`DenyRule::PRIVATE_KEY`], [`DenyRule::PRODUCTION_ENDPOINT`].
rule: &'static str,
/// Logical name of the diag field that produced the matched text
/// (e.g. `path`, `message`, `evidence.notes[2]`,
/// `flow_steps[1].snippet`). Lets operators triage *where* the
/// rule fired without having to re-derive the match from the
/// scrubbed excerpt alone.
field: String,
/// Short text excerpt (max 120 chars, scrubbed via
/// [`Scrubber::scrub_string`]) of the offending field so an
/// operator can identify *why* the deny fired without having to
@ -377,10 +383,11 @@ const PROD_ENDPOINT_REGEXES: &[&str] = &[
/// the leak shape.
pub fn evaluate(diag: &crate::commands::scan::Diag) -> PolicyDecision {
let texts = collect_diag_texts(diag);
for text in &texts {
for (field, text) in &texts {
if let Some(hit) = match_text(text) {
return PolicyDecision::Deny {
rule: hit.0,
field: field.clone(),
excerpt: excerpt_with_scrubber(hit.1),
};
}
@ -388,46 +395,56 @@ pub fn evaluate(diag: &crate::commands::scan::Diag) -> PolicyDecision {
PolicyDecision::Allow
}
fn collect_diag_texts(diag: &crate::commands::scan::Diag) -> Vec<String> {
let mut out: Vec<String> = Vec::new();
/// Collect every text fragment from `diag` paired with a stable name for
/// the source field. The returned field names are intentionally
/// human-readable (e.g. `evidence.notes[2]`, `flow_steps[1].snippet`)
/// rather than enum variants so they read identically in audit logs and
/// in `Display` output.
fn collect_diag_texts(diag: &crate::commands::scan::Diag) -> Vec<(String, String)> {
let mut out: Vec<(String, String)> = Vec::new();
if !diag.id.is_empty() {
out.push(diag.id.clone());
out.push(("id".into(), diag.id.clone()));
}
if !diag.path.is_empty() {
out.push(diag.path.clone());
out.push(("path".into(), diag.path.clone()));
}
if let Some(msg) = diag.message.as_ref() {
out.push(msg.clone());
out.push(("message".into(), msg.clone()));
}
if let Some(ev) = diag.evidence.as_ref() {
for note in &ev.notes {
out.push(note.clone());
for (i, note) in ev.notes.iter().enumerate() {
out.push((format!("evidence.notes[{i}]"), note.clone()));
}
if let Some(exp) = ev.explanation.as_ref() {
out.push(exp.clone());
out.push(("evidence.explanation".into(), exp.clone()));
}
for s in [&ev.source, &ev.sink] {
for (label, s) in [("source", &ev.source), ("sink", &ev.sink)] {
if let Some(span) = s.as_ref() {
out.push(span.path.clone());
out.push((format!("evidence.{label}.path"), span.path.clone()));
if let Some(sn) = span.snippet.as_ref() {
out.push(sn.clone());
out.push((format!("evidence.{label}.snippet"), sn.clone()));
}
}
}
for span in ev.guards.iter().chain(ev.sanitizers.iter()) {
for (i, span) in ev.guards.iter().enumerate() {
if let Some(sn) = span.snippet.as_ref() {
out.push(sn.clone());
out.push((format!("evidence.guards[{i}].snippet"), sn.clone()));
}
}
for step in &ev.flow_steps {
for (i, span) in ev.sanitizers.iter().enumerate() {
if let Some(sn) = span.snippet.as_ref() {
out.push((format!("evidence.sanitizers[{i}].snippet"), sn.clone()));
}
}
for (i, step) in ev.flow_steps.iter().enumerate() {
if !step.file.is_empty() {
out.push(step.file.clone());
out.push((format!("flow_steps[{i}].file"), step.file.clone()));
}
if let Some(sn) = step.snippet.as_ref() {
out.push(sn.clone());
out.push((format!("flow_steps[{i}].snippet"), sn.clone()));
}
if let Some(callee) = step.callee.as_ref() {
out.push(callee.clone());
out.push((format!("flow_steps[{i}].callee"), callee.clone()));
}
}
}

View file

@ -410,18 +410,22 @@ pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult {
// The verifier returns `Inconclusive(PolicyDeniedDynamic)` so the
// operator sees *why* dynamic execution was skipped without losing
// the static finding from the report.
if let crate::dynamic::policy::PolicyDecision::Deny { rule, excerpt } =
crate::dynamic::policy::evaluate(diag)
if let crate::dynamic::policy::PolicyDecision::Deny {
rule,
field,
excerpt,
} = crate::dynamic::policy::evaluate(diag)
{
trace.record(
crate::dynamic::trace::TraceStage::Verdict,
Some(format!("policy_denied rule={rule}")),
Some(format!("policy_denied rule={rule} field={field}")),
);
if opts.trace_verbose {
trace.print_to_stderr();
}
let inconclusive_reason = InconclusiveReason::PolicyDeniedDynamic {
rule: rule.to_owned(),
field: field.clone(),
excerpt: excerpt.clone(),
};
// Emit telemetry so the Phase 27 events log records the deny —

View file

@ -337,6 +337,12 @@ pub enum InconclusiveReason {
/// `production-endpoint`) and an evidence excerpt for triage.
PolicyDeniedDynamic {
rule: String,
/// Logical name of the diag field that matched the deny rule
/// (e.g. `path`, `evidence.notes[2]`, `flow_steps[1].snippet`).
/// Empty string for verdicts loaded from older telemetry that
/// did not capture this field.
#[serde(default)]
field: String,
excerpt: String,
},
}
@ -399,10 +405,23 @@ impl fmt::Display for InconclusiveReason {
f,
"{backend} backend cannot enforce isolation for {oracle_kind} oracle"
),
Self::PolicyDeniedDynamic { rule, excerpt } => write!(
f,
"dynamic execution refused by policy rule {rule} (matched: {excerpt})"
),
Self::PolicyDeniedDynamic {
rule,
field,
excerpt,
} => {
if field.is_empty() {
write!(
f,
"dynamic execution refused by policy rule {rule} (matched: {excerpt})"
)
} else {
write!(
f,
"dynamic execution refused by policy rule {rule} (matched {field}: {excerpt})"
)
}
}
}
}
}

View file

@ -809,6 +809,8 @@ pub struct CalleeSiteView {
pub qualifier: Option<String>,
#[serde(skip_serializing_if = "is_zero_u32")]
pub ordinal: u32,
#[serde(skip_serializing_if = "Option::is_none")]
pub span: Option<(u32, u32)>,
}
fn is_zero_u32(n: &u32) -> bool {
@ -884,6 +886,7 @@ impl FuncSummaryView {
receiver: c.receiver.clone(),
qualifier: c.qualifier.clone(),
ordinal: c.ordinal,
span: c.span,
})
.collect(),
ssa_summary: ssa_view,

View file

@ -191,6 +191,11 @@ const SYNTHETIC_DISAMBIG_BIT: u32 = 0x8000_0000;
/// * `ordinal`, the per-function call ordinal matching
/// `CallMeta.call_ordinal`, allowing cross-file consumers to address a
/// specific call site rather than just a callee name.
/// * `span`, optional 1-based `(line, col)` source coordinate of the call
/// expression, populated at CFG-build time when source bytes are
/// available. `None` for legacy summaries loaded from SQLite that
/// pre-date the span field, and for synthetic test fixtures that build
/// `CalleeSite` values directly.
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub struct CalleeSite {
pub name: String,
@ -202,6 +207,8 @@ pub struct CalleeSite {
pub qualifier: Option<String>,
#[serde(default, skip_serializing_if = "is_zero_u32")]
pub ordinal: u32,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub span: Option<(u32, u32)>,
}
fn is_zero_u32(n: &u32) -> bool {

View file

@ -1791,6 +1791,7 @@ fn callee_site_structured_roundtrip() {
receiver: Some("obj".into()),
qualifier: None,
ordinal: 1,
..Default::default()
},
CalleeSite {
name: "env::var".into(),
@ -1798,6 +1799,7 @@ fn callee_site_structured_roundtrip() {
receiver: None,
qualifier: Some("env".into()),
ordinal: 2,
..Default::default()
},
],
..Default::default()

View file

@ -13,7 +13,7 @@
//! that fires on its own.
use super::{DataStore, DataStoreKind, SourceLocation, SurfaceNode};
use crate::summary::{FuncSummary, GlobalSummaries};
use crate::summary::{CalleeSite, FuncSummary, GlobalSummaries};
/// One detection rule: leaf-name pattern → store kind + label. Stored
/// as a flat list so adding a new ORM / driver is a one-line edit.
@ -108,7 +108,7 @@ pub fn detect_data_stores(summaries: &GlobalSummaries) -> Vec<SurfaceNode> {
let Some(rule) = match_rule(&callee.name) else {
continue;
};
let location = call_site_location(summary, callee.ordinal);
let location = call_site_location(summary, callee);
let dedup = (
location.file.clone(),
location.line,
@ -148,22 +148,23 @@ fn match_rule(callee: &str) -> Option<&'static DriverRule> {
})
}
/// Best-effort source location for a call site. We only have file +
/// (sometimes) sink-attribution metadata on `FuncSummary`, so the
/// location falls back to the function's file with line 0 when no
/// finer-grained data is available.
fn call_site_location(summary: &FuncSummary, _ordinal: u32) -> SourceLocation {
/// Source location of a call site. Reads the 1-based `(line, col)`
/// recorded on the [`CalleeSite`] at CFG-build time (populated for every
/// summary produced after the span field landed); for legacy summaries
/// loaded from SQLite with no span, falls back to the function's host
/// file with line 0.
fn call_site_location(summary: &FuncSummary, callee: &CalleeSite) -> SourceLocation {
let (line, col) = callee.span.unwrap_or((0, 0));
SourceLocation {
file: summary.file_path.clone(),
line: 0,
col: 0,
line,
col,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::summary::CalleeSite;
use crate::symbol::{FuncKey, Lang};
fn summary_with_callees(name: &str, file: &str, callees: &[&str]) -> (FuncKey, FuncSummary) {
@ -182,6 +183,33 @@ mod tests {
(key, summary)
}
#[test]
fn datastore_carries_callee_span_when_present() {
// When the CFG populates `CalleeSite.span`, the detected datastore
// node's `SourceLocation` must reflect that 1-based `(line, col)`
// — not the legacy `(0, 0)` fallback.
let mut gs = GlobalSummaries::new();
let key = FuncKey::new_function(Lang::Python, "app.py", "init", None);
let mut callee = CalleeSite::bare("psycopg2.connect");
callee.span = Some((42, 13));
let summary = FuncSummary {
name: "init".into(),
file_path: "app.py".into(),
lang: "python".into(),
param_count: 0,
callees: vec![callee],
..Default::default()
};
gs.insert(key, summary);
let nodes = detect_data_stores(&gs);
assert_eq!(nodes.len(), 1);
let SurfaceNode::DataStore(ds) = &nodes[0] else {
panic!()
};
assert_eq!(ds.location.line, 42);
assert_eq!(ds.location.col, 13);
}
#[test]
fn detects_psycopg2_connect() {
let mut gs = GlobalSummaries::new();

View file

@ -9,7 +9,7 @@
use super::{ExternalService, ExternalServiceKind, SourceLocation, SurfaceNode};
use crate::labels::Cap;
use crate::summary::{FuncSummary, GlobalSummaries};
use crate::summary::{CalleeSite, FuncSummary, GlobalSummaries};
struct ClientRule {
leaf: &'static str,
@ -87,7 +87,7 @@ pub fn detect_external_services(summaries: &GlobalSummaries) -> Vec<SurfaceNode>
let Some(rule) = match_rule(&callee.name) else {
continue;
};
let location = call_site_location(summary);
let location = call_site_location(summary, Some(callee));
if !seen.insert((location.file.clone(), rule.label.to_string())) {
continue;
}
@ -104,7 +104,7 @@ pub fn detect_external_services(summaries: &GlobalSummaries) -> Vec<SurfaceNode>
// file as the location and synthesise a generic label.
for (_key, summary) in summaries.iter() {
if summary.sink_caps().contains(Cap::SSRF) {
let loc = call_site_location(summary);
let loc = call_site_location(summary, None);
let dedup = (loc.file.clone(), "Outbound HTTP".to_string());
if seen.insert(dedup) {
out.push(SurfaceNode::ExternalService(ExternalService {
@ -134,11 +134,16 @@ fn match_rule(callee: &str) -> Option<&'static ClientRule> {
})
}
fn call_site_location(summary: &FuncSummary) -> SourceLocation {
/// Source location of an external-service call site. Reads the 1-based
/// `(line, col)` recorded on the [`CalleeSite`] at CFG-build time when
/// available; otherwise (sink-caponly fallback path, or legacy summaries
/// loaded from SQLite) returns the function's host file with line 0.
fn call_site_location(summary: &FuncSummary, callee: Option<&CalleeSite>) -> SourceLocation {
let (line, col) = callee.and_then(|c| c.span).unwrap_or((0, 0));
SourceLocation {
file: summary.file_path.clone(),
line: 0,
col: 0,
line,
col,
}
}

View file

@ -82,8 +82,16 @@ fn credentials_rule_fires_on_aws_key_in_flow_step_snippet() {
)];
diag.evidence = Some(ev);
match policy::evaluate(&diag) {
PolicyDecision::Deny { rule, excerpt } => {
PolicyDecision::Deny {
rule,
field,
excerpt,
} => {
assert_eq!(rule, DenyRule::CREDENTIALS);
assert!(
field.starts_with("flow_steps[") && field.ends_with(".snippet"),
"deny must record the source field, got {field:?}"
);
assert!(
!excerpt.contains("AKIAFAKETEST00000000"),
"excerpt must scrub the raw token, got {excerpt:?}"
@ -209,8 +217,16 @@ fn verify_finding_short_circuits_without_sandbox() {
.inconclusive_reason
.expect("PolicyDeniedDynamic must populate inconclusive_reason");
match reason {
InconclusiveReason::PolicyDeniedDynamic { rule, excerpt } => {
InconclusiveReason::PolicyDeniedDynamic {
rule,
field,
excerpt,
} => {
assert_eq!(rule, DenyRule::CREDENTIALS);
assert!(
field.starts_with("evidence.notes["),
"deny must record the source field, got {field:?}"
);
assert!(
!excerpt.contains("hunter2-supersecret-test"),
"excerpt must scrub the raw secret, got {excerpt:?}"