nyx/src/engine_notes.rs
Eli Peter a438886217
Python fp and docs updtes (#58)
* refactor: Update comments for clarity and add expectations.json files for performance metrics

* feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks

* feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks

* refactor: Simplify code formatting for better readability in multiple files

* refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration

* feat: Update Java and Python patterns to include new security rules

* refactor: Improve comment clarity and consistency across multiple Rust files

* refactor: Simplify code formatting for improved readability in integration tests and module files

* refactor: Improve comment formatting and enhance clarity in assertions across multiple files
2026-04-29 19:53:34 -04:00

474 lines
17 KiB
Rust

//! Provenance notes attached to findings when the engine has hit an
//! internal budget, widening, or lowering cap.
//!
//! Each note carries a [`LossDirection`] classification.
//! [`crate::evidence::compute_confidence`] caps confidence at `Medium`
//! for `OverReport`/`Bail` notes, and [`crate::rank`] applies a
//! direction-aware completeness penalty.
use serde::{Deserialize, Serialize};
use smallvec::SmallVec;
/// Why a fix-point loop hit its safety cap. Distinguishes "raise the
/// cap" cases from non-monotonicity bugs in cap-hit telemetry.
/// Serialized as a tagged snake_case enum for SARIF/JSON consumers.
#[derive(Debug, Clone, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(tag = "kind", rename_all = "snake_case")]
pub enum CapHitReason {
/// Change-set still decreasing when the cap fired. Safe to raise
/// the cap; the SCC is just larger than budget.
MonotoneShrinking { trajectory: SmallVec<[u32; 4]> },
/// Change-set held steady at a non-zero value for ≥2 iterations.
/// Same keys updating back and forth, investigate.
Plateau { delta: u32 },
/// Period-2 oscillation detected. Non-monotone; raising the cap
/// will not help. File a bug.
SuspectedOscillation {
period: u8,
trajectory: SmallVec<[u32; 4]>,
},
/// No trajectory recorded (e.g. cap fired after a single iteration).
#[default]
Unknown,
}
impl CapHitReason {
/// Classify a trajectory of per-iteration change-set sizes
/// (most recent last). Rules: <2 samples → `Unknown`; a,b,a,b with
/// a≠b → `SuspectedOscillation`; last two equal non-zero →
/// `Plateau`; strictly decreasing tail → `MonotoneShrinking`;
/// otherwise `Unknown`.
pub fn classify(deltas: &[u32]) -> CapHitReason {
if deltas.len() < 2 {
return CapHitReason::Unknown;
}
// Detect period-2 oscillation: last 4 samples as (a,b,a,b) with a ≠ b.
if deltas.len() >= 4 {
let n = deltas.len();
let (a0, b0, a1, b1) = (deltas[n - 4], deltas[n - 3], deltas[n - 2], deltas[n - 1]);
if a0 == a1 && b0 == b1 && a0 != b0 {
let tail = deltas
.iter()
.rev()
.take(4)
.rev()
.copied()
.collect::<SmallVec<[u32; 4]>>();
return CapHitReason::SuspectedOscillation {
period: 2,
trajectory: tail,
};
}
}
let last = deltas[deltas.len() - 1];
let prev = deltas[deltas.len() - 2];
// Plateau: change-set size stuck at the same non-zero value.
if last == prev && last > 0 {
return CapHitReason::Plateau { delta: last };
}
// Monotone shrinking: strictly decreasing over the full
// recorded tail. (Equal-zero at the end would have meant
// convergence, so the cap wouldn't have fired.)
let mut monotone = true;
for w in deltas.windows(2) {
if w[1] > w[0] {
monotone = false;
break;
}
}
if monotone {
let tail = deltas
.iter()
.rev()
.take(4)
.rev()
.copied()
.collect::<SmallVec<[u32; 4]>>();
return CapHitReason::MonotoneShrinking { trajectory: tail };
}
CapHitReason::Unknown
}
/// Stable snake-case tag for log/diag consumption.
pub fn tag(&self) -> &'static str {
match self {
CapHitReason::MonotoneShrinking { .. } => "monotone_shrinking",
CapHitReason::Plateau { .. } => "plateau",
CapHitReason::SuspectedOscillation { .. } => "suspected_oscillation",
CapHitReason::Unknown => "unknown",
}
}
}
/// Direction of precision loss encoded by an [`EngineNote`].
/// Variants are ordered by worsening credibility impact;
/// [`combine`](Self::combine) takes the max.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum LossDirection {
/// Analysis converged; the note records a harmless event.
Informational,
/// Analysis may have missed findings (worklist was capped). Reported
/// findings remain sound, the result set is a lower bound.
UnderReport,
/// Analysis may have reported a spurious finding (e.g. predicate
/// state widened to top, dropping a guard). Likely FP.
OverReport,
/// Analysis aborted before producing a trustworthy result.
/// Treat the finding as a starting point, not a confirmed flow.
Bail,
}
impl LossDirection {
/// Merge by taking the worse (later in `Ord`).
pub fn combine(self, other: LossDirection) -> LossDirection {
self.max(other)
}
/// Snake-case tag used in console output and JSON properties.
pub fn tag(self) -> &'static str {
match self {
LossDirection::Informational => "informational",
LossDirection::UnderReport => "under-report",
LossDirection::OverReport => "over-report",
LossDirection::Bail => "bail",
}
}
}
/// A single provenance event recorded during analysis.
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(tag = "kind", rename_all = "snake_case")]
pub enum EngineNote {
/// Taint worklist hit its iteration budget. UnderReport.
WorklistCapped { iterations: u32 },
/// Per-value origin set truncated to `analysis.engine.max_origins`
/// (default 32). UnderReport, dropped origins correspond to real
/// source flows whose findings won't emit.
OriginsTruncated { dropped: u32 },
/// JS/TS pass-2 in-file global propagation hit its cap. UnderReport.
InFileFixpointCapped {
iterations: u32,
#[serde(default)]
reason: CapHitReason,
},
/// Cross-file SCC fixpoint hit `SCC_FIXPOINT_SAFETY_CAP`. UnderReport.
CrossFileFixpointCapped {
iterations: u32,
#[serde(default)]
reason: CapHitReason,
},
/// SSA lowering produced an empty body. Bail.
SsaLoweringBailed { reason: String },
/// Tree-sitter parse exceeded the timeout. Bail.
ParseTimeout { timeout_ms: u32 },
/// Predicate state widened to top to keep the lattice monotone.
/// OverReport, guards may have been lost.
PredicateStateWidened,
/// Path-environment constraints widened to top. OverReport.
PathEnvCapped,
/// Inline cache reused a cached body. Informational.
InlineCacheReused,
/// Points-to set truncated to `analysis.engine.max_pointsto`
/// (default 32). UnderReport.
PointsToTruncated { dropped: u32 },
}
impl EngineNote {
/// Direction of precision loss for this note. New variants must
/// declare one explicitly.
pub fn direction(&self) -> LossDirection {
match self {
EngineNote::WorklistCapped { .. } => LossDirection::UnderReport,
EngineNote::OriginsTruncated { .. } => LossDirection::UnderReport,
EngineNote::InFileFixpointCapped { .. } => LossDirection::UnderReport,
EngineNote::CrossFileFixpointCapped { .. } => LossDirection::UnderReport,
EngineNote::SsaLoweringBailed { .. } => LossDirection::Bail,
EngineNote::ParseTimeout { .. } => LossDirection::Bail,
EngineNote::PredicateStateWidened => LossDirection::OverReport,
EngineNote::PathEnvCapped => LossDirection::OverReport,
EngineNote::InlineCacheReused => LossDirection::Informational,
EngineNote::PointsToTruncated { .. } => LossDirection::UnderReport,
}
}
/// True for any non-informational direction. Drives the
/// `confidence_capped` SARIF property.
pub fn lowers_confidence(&self) -> bool {
self.direction() != LossDirection::Informational
}
}
/// Worst non-informational direction across a slice of notes, or
/// `None` if the slice is empty or only carries informational notes.
pub fn worst_direction(notes: &[EngineNote]) -> Option<LossDirection> {
let mut worst: Option<LossDirection> = None;
for note in notes {
let dir = note.direction();
if dir == LossDirection::Informational {
continue;
}
worst = Some(match worst {
Some(w) => w.combine(dir),
None => dir,
});
}
worst
}
/// Push-if-not-present.
pub fn push_unique(notes: &mut smallvec::SmallVec<[EngineNote; 2]>, note: EngineNote) {
if !notes.iter().any(|n| n == &note) {
notes.push(note);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn worklist_capped_lowers_confidence() {
assert!(EngineNote::WorklistCapped { iterations: 10 }.lowers_confidence());
}
#[test]
fn inline_cache_reused_does_not_lower_confidence() {
assert!(!EngineNote::InlineCacheReused.lowers_confidence());
}
#[test]
fn serialization_uses_snake_case_tag() {
let note = EngineNote::WorklistCapped { iterations: 7 };
let s = serde_json::to_string(&note).unwrap();
assert!(s.contains("\"kind\":\"worklist_capped\""));
assert!(s.contains("\"iterations\":7"));
}
#[test]
fn push_unique_deduplicates() {
let mut v = smallvec::SmallVec::<[EngineNote; 2]>::new();
push_unique(&mut v, EngineNote::WorklistCapped { iterations: 1 });
push_unique(&mut v, EngineNote::WorklistCapped { iterations: 1 });
push_unique(&mut v, EngineNote::OriginsTruncated { dropped: 2 });
assert_eq!(v.len(), 2);
}
#[test]
fn direction_classification_is_exhaustive() {
// Budget caps ⇒ under-report: fixpoint aborted, results still sound.
assert_eq!(
EngineNote::WorklistCapped { iterations: 1 }.direction(),
LossDirection::UnderReport
);
assert_eq!(
EngineNote::OriginsTruncated { dropped: 1 }.direction(),
LossDirection::UnderReport
);
assert_eq!(
EngineNote::InFileFixpointCapped {
iterations: 1,
reason: CapHitReason::Unknown,
}
.direction(),
LossDirection::UnderReport
);
assert_eq!(
EngineNote::CrossFileFixpointCapped {
iterations: 1,
reason: CapHitReason::Unknown,
}
.direction(),
LossDirection::UnderReport
);
assert_eq!(
EngineNote::PointsToTruncated { dropped: 1 }.direction(),
LossDirection::UnderReport
);
// Widening ⇒ over-report: validation guards may have been lost.
assert_eq!(
EngineNote::PredicateStateWidened.direction(),
LossDirection::OverReport
);
assert_eq!(
EngineNote::PathEnvCapped.direction(),
LossDirection::OverReport
);
// Hard aborts ⇒ bail: IR or parse failed.
assert_eq!(
EngineNote::SsaLoweringBailed { reason: "x".into() }.direction(),
LossDirection::Bail
);
assert_eq!(
EngineNote::ParseTimeout { timeout_ms: 1 }.direction(),
LossDirection::Bail
);
// Informational ⇒ no credibility impact.
assert_eq!(
EngineNote::InlineCacheReused.direction(),
LossDirection::Informational
);
}
#[test]
fn loss_direction_order_is_worst_last() {
// combine() takes the max, so Bail must dominate OverReport must
// dominate UnderReport must dominate Informational.
assert!(LossDirection::Bail > LossDirection::OverReport);
assert!(LossDirection::OverReport > LossDirection::UnderReport);
assert!(LossDirection::UnderReport > LossDirection::Informational);
}
#[test]
fn combine_takes_the_worse_direction() {
assert_eq!(
LossDirection::UnderReport.combine(LossDirection::OverReport),
LossDirection::OverReport
);
assert_eq!(
LossDirection::OverReport.combine(LossDirection::UnderReport),
LossDirection::OverReport
);
assert_eq!(
LossDirection::Bail.combine(LossDirection::OverReport),
LossDirection::Bail
);
assert_eq!(
LossDirection::Informational.combine(LossDirection::Informational),
LossDirection::Informational
);
}
#[test]
fn worst_direction_empty_is_none() {
let notes: Vec<EngineNote> = vec![];
assert_eq!(worst_direction(&notes), None);
}
#[test]
fn worst_direction_informational_only_is_none() {
let notes = vec![EngineNote::InlineCacheReused, EngineNote::InlineCacheReused];
assert_eq!(worst_direction(&notes), None);
}
#[test]
fn worst_direction_mixed_picks_worst() {
let notes = vec![
EngineNote::InlineCacheReused,
EngineNote::WorklistCapped { iterations: 1 },
EngineNote::PredicateStateWidened,
];
assert_eq!(worst_direction(&notes), Some(LossDirection::OverReport));
}
#[test]
fn worst_direction_bail_dominates() {
let notes = vec![
EngineNote::PredicateStateWidened,
EngineNote::ParseTimeout { timeout_ms: 100 },
];
assert_eq!(worst_direction(&notes), Some(LossDirection::Bail));
}
#[test]
fn cap_hit_reason_too_few_samples_unknown() {
assert_eq!(CapHitReason::classify(&[]), CapHitReason::Unknown);
assert_eq!(CapHitReason::classify(&[5]), CapHitReason::Unknown);
}
#[test]
fn cap_hit_reason_detects_period_2_oscillation() {
let result = CapHitReason::classify(&[3, 7, 3, 7]);
match result {
CapHitReason::SuspectedOscillation { period, .. } => assert_eq!(period, 2),
other => panic!("expected SuspectedOscillation; got {other:?}"),
}
}
#[test]
fn cap_hit_reason_detects_plateau() {
let result = CapHitReason::classify(&[10, 5, 5]);
assert_eq!(result, CapHitReason::Plateau { delta: 5 });
}
#[test]
fn cap_hit_reason_plateau_at_zero_is_not_a_plateau() {
// Zero-delta means we converged; classifier should not flag.
let result = CapHitReason::classify(&[3, 0, 0]);
// Strictly decreasing tail → monotone-shrinking; not plateau.
match result {
CapHitReason::MonotoneShrinking { .. } => {}
other => panic!("expected MonotoneShrinking; got {other:?}"),
}
}
#[test]
fn cap_hit_reason_detects_monotone_shrinking() {
let result = CapHitReason::classify(&[10, 7, 4, 2]);
match result {
CapHitReason::MonotoneShrinking { trajectory } => {
assert_eq!(trajectory.as_slice(), &[10, 7, 4, 2]);
}
other => panic!("expected MonotoneShrinking; got {other:?}"),
}
}
#[test]
fn cap_hit_reason_non_monotone_non_oscillating_is_unknown() {
// Goes up then down without a clean period-2 pattern.
let result = CapHitReason::classify(&[3, 8, 2]);
assert_eq!(result, CapHitReason::Unknown);
}
#[test]
fn cap_hit_reason_serializes_snake_case_tag() {
let r = CapHitReason::Plateau { delta: 4 };
let s = serde_json::to_string(&r).unwrap();
assert!(s.contains("\"kind\":\"plateau\""), "got {s}");
assert!(s.contains("\"delta\":4"), "got {s}");
}
#[test]
fn in_file_fixpoint_capped_serde_backcompat() {
// Older serialized notes without the `reason` field must still
// deserialize (serde(default) → CapHitReason::Unknown).
let legacy = r#"{"kind":"in_file_fixpoint_capped","iterations":7}"#;
let parsed: EngineNote = serde_json::from_str(legacy).unwrap();
match parsed {
EngineNote::InFileFixpointCapped { iterations, reason } => {
assert_eq!(iterations, 7);
assert_eq!(reason, CapHitReason::Unknown);
}
other => panic!("expected InFileFixpointCapped; got {other:?}"),
}
}
#[test]
fn cross_file_fixpoint_capped_serde_backcompat() {
let legacy = r#"{"kind":"cross_file_fixpoint_capped","iterations":64}"#;
let parsed: EngineNote = serde_json::from_str(legacy).unwrap();
match parsed {
EngineNote::CrossFileFixpointCapped { iterations, reason } => {
assert_eq!(iterations, 64);
assert_eq!(reason, CapHitReason::Unknown);
}
other => panic!("expected CrossFileFixpointCapped; got {other:?}"),
}
}
#[test]
fn loss_direction_tag_stable() {
assert_eq!(LossDirection::UnderReport.tag(), "under-report");
assert_eq!(LossDirection::OverReport.tag(), "over-report");
assert_eq!(LossDirection::Bail.tag(), "bail");
assert_eq!(LossDirection::Informational.tag(), "informational");
}
}