mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
* refactor: Update comments for clarity and add expectations.json files for performance metrics * feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks * feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks * refactor: Simplify code formatting for better readability in multiple files * refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration * feat: Update Java and Python patterns to include new security rules * refactor: Improve comment clarity and consistency across multiple Rust files * refactor: Simplify code formatting for improved readability in integration tests and module files * refactor: Improve comment formatting and enhance clarity in assertions across multiple files
474 lines
17 KiB
Rust
474 lines
17 KiB
Rust
//! Provenance notes attached to findings when the engine has hit an
|
|
//! internal budget, widening, or lowering cap.
|
|
//!
|
|
//! Each note carries a [`LossDirection`] classification.
|
|
//! [`crate::evidence::compute_confidence`] caps confidence at `Medium`
|
|
//! for `OverReport`/`Bail` notes, and [`crate::rank`] applies a
|
|
//! direction-aware completeness penalty.
|
|
|
|
use serde::{Deserialize, Serialize};
|
|
use smallvec::SmallVec;
|
|
|
|
/// Why a fix-point loop hit its safety cap. Distinguishes "raise the
|
|
/// cap" cases from non-monotonicity bugs in cap-hit telemetry.
|
|
/// Serialized as a tagged snake_case enum for SARIF/JSON consumers.
|
|
#[derive(Debug, Clone, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
|
#[serde(tag = "kind", rename_all = "snake_case")]
|
|
pub enum CapHitReason {
|
|
/// Change-set still decreasing when the cap fired. Safe to raise
|
|
/// the cap; the SCC is just larger than budget.
|
|
MonotoneShrinking { trajectory: SmallVec<[u32; 4]> },
|
|
/// Change-set held steady at a non-zero value for ≥2 iterations.
|
|
/// Same keys updating back and forth, investigate.
|
|
Plateau { delta: u32 },
|
|
/// Period-2 oscillation detected. Non-monotone; raising the cap
|
|
/// will not help. File a bug.
|
|
SuspectedOscillation {
|
|
period: u8,
|
|
trajectory: SmallVec<[u32; 4]>,
|
|
},
|
|
/// No trajectory recorded (e.g. cap fired after a single iteration).
|
|
#[default]
|
|
Unknown,
|
|
}
|
|
|
|
impl CapHitReason {
|
|
/// Classify a trajectory of per-iteration change-set sizes
|
|
/// (most recent last). Rules: <2 samples → `Unknown`; a,b,a,b with
|
|
/// a≠b → `SuspectedOscillation`; last two equal non-zero →
|
|
/// `Plateau`; strictly decreasing tail → `MonotoneShrinking`;
|
|
/// otherwise `Unknown`.
|
|
pub fn classify(deltas: &[u32]) -> CapHitReason {
|
|
if deltas.len() < 2 {
|
|
return CapHitReason::Unknown;
|
|
}
|
|
|
|
// Detect period-2 oscillation: last 4 samples as (a,b,a,b) with a ≠ b.
|
|
if deltas.len() >= 4 {
|
|
let n = deltas.len();
|
|
let (a0, b0, a1, b1) = (deltas[n - 4], deltas[n - 3], deltas[n - 2], deltas[n - 1]);
|
|
if a0 == a1 && b0 == b1 && a0 != b0 {
|
|
let tail = deltas
|
|
.iter()
|
|
.rev()
|
|
.take(4)
|
|
.rev()
|
|
.copied()
|
|
.collect::<SmallVec<[u32; 4]>>();
|
|
return CapHitReason::SuspectedOscillation {
|
|
period: 2,
|
|
trajectory: tail,
|
|
};
|
|
}
|
|
}
|
|
|
|
let last = deltas[deltas.len() - 1];
|
|
let prev = deltas[deltas.len() - 2];
|
|
|
|
// Plateau: change-set size stuck at the same non-zero value.
|
|
if last == prev && last > 0 {
|
|
return CapHitReason::Plateau { delta: last };
|
|
}
|
|
|
|
// Monotone shrinking: strictly decreasing over the full
|
|
// recorded tail. (Equal-zero at the end would have meant
|
|
// convergence, so the cap wouldn't have fired.)
|
|
let mut monotone = true;
|
|
for w in deltas.windows(2) {
|
|
if w[1] > w[0] {
|
|
monotone = false;
|
|
break;
|
|
}
|
|
}
|
|
if monotone {
|
|
let tail = deltas
|
|
.iter()
|
|
.rev()
|
|
.take(4)
|
|
.rev()
|
|
.copied()
|
|
.collect::<SmallVec<[u32; 4]>>();
|
|
return CapHitReason::MonotoneShrinking { trajectory: tail };
|
|
}
|
|
|
|
CapHitReason::Unknown
|
|
}
|
|
|
|
/// Stable snake-case tag for log/diag consumption.
|
|
pub fn tag(&self) -> &'static str {
|
|
match self {
|
|
CapHitReason::MonotoneShrinking { .. } => "monotone_shrinking",
|
|
CapHitReason::Plateau { .. } => "plateau",
|
|
CapHitReason::SuspectedOscillation { .. } => "suspected_oscillation",
|
|
CapHitReason::Unknown => "unknown",
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Direction of precision loss encoded by an [`EngineNote`].
|
|
/// Variants are ordered by worsening credibility impact;
|
|
/// [`combine`](Self::combine) takes the max.
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
|
|
#[serde(rename_all = "snake_case")]
|
|
pub enum LossDirection {
|
|
/// Analysis converged; the note records a harmless event.
|
|
Informational,
|
|
/// Analysis may have missed findings (worklist was capped). Reported
|
|
/// findings remain sound, the result set is a lower bound.
|
|
UnderReport,
|
|
/// Analysis may have reported a spurious finding (e.g. predicate
|
|
/// state widened to top, dropping a guard). Likely FP.
|
|
OverReport,
|
|
/// Analysis aborted before producing a trustworthy result.
|
|
/// Treat the finding as a starting point, not a confirmed flow.
|
|
Bail,
|
|
}
|
|
|
|
impl LossDirection {
|
|
/// Merge by taking the worse (later in `Ord`).
|
|
pub fn combine(self, other: LossDirection) -> LossDirection {
|
|
self.max(other)
|
|
}
|
|
|
|
/// Snake-case tag used in console output and JSON properties.
|
|
pub fn tag(self) -> &'static str {
|
|
match self {
|
|
LossDirection::Informational => "informational",
|
|
LossDirection::UnderReport => "under-report",
|
|
LossDirection::OverReport => "over-report",
|
|
LossDirection::Bail => "bail",
|
|
}
|
|
}
|
|
}
|
|
|
|
/// A single provenance event recorded during analysis.
|
|
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
|
#[serde(tag = "kind", rename_all = "snake_case")]
|
|
pub enum EngineNote {
|
|
/// Taint worklist hit its iteration budget. UnderReport.
|
|
WorklistCapped { iterations: u32 },
|
|
/// Per-value origin set truncated to `analysis.engine.max_origins`
|
|
/// (default 32). UnderReport, dropped origins correspond to real
|
|
/// source flows whose findings won't emit.
|
|
OriginsTruncated { dropped: u32 },
|
|
/// JS/TS pass-2 in-file global propagation hit its cap. UnderReport.
|
|
InFileFixpointCapped {
|
|
iterations: u32,
|
|
#[serde(default)]
|
|
reason: CapHitReason,
|
|
},
|
|
/// Cross-file SCC fixpoint hit `SCC_FIXPOINT_SAFETY_CAP`. UnderReport.
|
|
CrossFileFixpointCapped {
|
|
iterations: u32,
|
|
#[serde(default)]
|
|
reason: CapHitReason,
|
|
},
|
|
/// SSA lowering produced an empty body. Bail.
|
|
SsaLoweringBailed { reason: String },
|
|
/// Tree-sitter parse exceeded the timeout. Bail.
|
|
ParseTimeout { timeout_ms: u32 },
|
|
/// Predicate state widened to top to keep the lattice monotone.
|
|
/// OverReport, guards may have been lost.
|
|
PredicateStateWidened,
|
|
/// Path-environment constraints widened to top. OverReport.
|
|
PathEnvCapped,
|
|
/// Inline cache reused a cached body. Informational.
|
|
InlineCacheReused,
|
|
/// Points-to set truncated to `analysis.engine.max_pointsto`
|
|
/// (default 32). UnderReport.
|
|
PointsToTruncated { dropped: u32 },
|
|
}
|
|
|
|
impl EngineNote {
|
|
/// Direction of precision loss for this note. New variants must
|
|
/// declare one explicitly.
|
|
pub fn direction(&self) -> LossDirection {
|
|
match self {
|
|
EngineNote::WorklistCapped { .. } => LossDirection::UnderReport,
|
|
EngineNote::OriginsTruncated { .. } => LossDirection::UnderReport,
|
|
EngineNote::InFileFixpointCapped { .. } => LossDirection::UnderReport,
|
|
EngineNote::CrossFileFixpointCapped { .. } => LossDirection::UnderReport,
|
|
EngineNote::SsaLoweringBailed { .. } => LossDirection::Bail,
|
|
EngineNote::ParseTimeout { .. } => LossDirection::Bail,
|
|
EngineNote::PredicateStateWidened => LossDirection::OverReport,
|
|
EngineNote::PathEnvCapped => LossDirection::OverReport,
|
|
EngineNote::InlineCacheReused => LossDirection::Informational,
|
|
EngineNote::PointsToTruncated { .. } => LossDirection::UnderReport,
|
|
}
|
|
}
|
|
|
|
/// True for any non-informational direction. Drives the
|
|
/// `confidence_capped` SARIF property.
|
|
pub fn lowers_confidence(&self) -> bool {
|
|
self.direction() != LossDirection::Informational
|
|
}
|
|
}
|
|
|
|
/// Worst non-informational direction across a slice of notes, or
|
|
/// `None` if the slice is empty or only carries informational notes.
|
|
pub fn worst_direction(notes: &[EngineNote]) -> Option<LossDirection> {
|
|
let mut worst: Option<LossDirection> = None;
|
|
for note in notes {
|
|
let dir = note.direction();
|
|
if dir == LossDirection::Informational {
|
|
continue;
|
|
}
|
|
worst = Some(match worst {
|
|
Some(w) => w.combine(dir),
|
|
None => dir,
|
|
});
|
|
}
|
|
worst
|
|
}
|
|
|
|
/// Push-if-not-present.
|
|
pub fn push_unique(notes: &mut smallvec::SmallVec<[EngineNote; 2]>, note: EngineNote) {
|
|
if !notes.iter().any(|n| n == ¬e) {
|
|
notes.push(note);
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn worklist_capped_lowers_confidence() {
|
|
assert!(EngineNote::WorklistCapped { iterations: 10 }.lowers_confidence());
|
|
}
|
|
|
|
#[test]
|
|
fn inline_cache_reused_does_not_lower_confidence() {
|
|
assert!(!EngineNote::InlineCacheReused.lowers_confidence());
|
|
}
|
|
|
|
#[test]
|
|
fn serialization_uses_snake_case_tag() {
|
|
let note = EngineNote::WorklistCapped { iterations: 7 };
|
|
let s = serde_json::to_string(¬e).unwrap();
|
|
assert!(s.contains("\"kind\":\"worklist_capped\""));
|
|
assert!(s.contains("\"iterations\":7"));
|
|
}
|
|
|
|
#[test]
|
|
fn push_unique_deduplicates() {
|
|
let mut v = smallvec::SmallVec::<[EngineNote; 2]>::new();
|
|
push_unique(&mut v, EngineNote::WorklistCapped { iterations: 1 });
|
|
push_unique(&mut v, EngineNote::WorklistCapped { iterations: 1 });
|
|
push_unique(&mut v, EngineNote::OriginsTruncated { dropped: 2 });
|
|
assert_eq!(v.len(), 2);
|
|
}
|
|
|
|
#[test]
|
|
fn direction_classification_is_exhaustive() {
|
|
// Budget caps ⇒ under-report: fixpoint aborted, results still sound.
|
|
assert_eq!(
|
|
EngineNote::WorklistCapped { iterations: 1 }.direction(),
|
|
LossDirection::UnderReport
|
|
);
|
|
assert_eq!(
|
|
EngineNote::OriginsTruncated { dropped: 1 }.direction(),
|
|
LossDirection::UnderReport
|
|
);
|
|
assert_eq!(
|
|
EngineNote::InFileFixpointCapped {
|
|
iterations: 1,
|
|
reason: CapHitReason::Unknown,
|
|
}
|
|
.direction(),
|
|
LossDirection::UnderReport
|
|
);
|
|
assert_eq!(
|
|
EngineNote::CrossFileFixpointCapped {
|
|
iterations: 1,
|
|
reason: CapHitReason::Unknown,
|
|
}
|
|
.direction(),
|
|
LossDirection::UnderReport
|
|
);
|
|
assert_eq!(
|
|
EngineNote::PointsToTruncated { dropped: 1 }.direction(),
|
|
LossDirection::UnderReport
|
|
);
|
|
|
|
// Widening ⇒ over-report: validation guards may have been lost.
|
|
assert_eq!(
|
|
EngineNote::PredicateStateWidened.direction(),
|
|
LossDirection::OverReport
|
|
);
|
|
assert_eq!(
|
|
EngineNote::PathEnvCapped.direction(),
|
|
LossDirection::OverReport
|
|
);
|
|
|
|
// Hard aborts ⇒ bail: IR or parse failed.
|
|
assert_eq!(
|
|
EngineNote::SsaLoweringBailed { reason: "x".into() }.direction(),
|
|
LossDirection::Bail
|
|
);
|
|
assert_eq!(
|
|
EngineNote::ParseTimeout { timeout_ms: 1 }.direction(),
|
|
LossDirection::Bail
|
|
);
|
|
|
|
// Informational ⇒ no credibility impact.
|
|
assert_eq!(
|
|
EngineNote::InlineCacheReused.direction(),
|
|
LossDirection::Informational
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn loss_direction_order_is_worst_last() {
|
|
// combine() takes the max, so Bail must dominate OverReport must
|
|
// dominate UnderReport must dominate Informational.
|
|
assert!(LossDirection::Bail > LossDirection::OverReport);
|
|
assert!(LossDirection::OverReport > LossDirection::UnderReport);
|
|
assert!(LossDirection::UnderReport > LossDirection::Informational);
|
|
}
|
|
|
|
#[test]
|
|
fn combine_takes_the_worse_direction() {
|
|
assert_eq!(
|
|
LossDirection::UnderReport.combine(LossDirection::OverReport),
|
|
LossDirection::OverReport
|
|
);
|
|
assert_eq!(
|
|
LossDirection::OverReport.combine(LossDirection::UnderReport),
|
|
LossDirection::OverReport
|
|
);
|
|
assert_eq!(
|
|
LossDirection::Bail.combine(LossDirection::OverReport),
|
|
LossDirection::Bail
|
|
);
|
|
assert_eq!(
|
|
LossDirection::Informational.combine(LossDirection::Informational),
|
|
LossDirection::Informational
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn worst_direction_empty_is_none() {
|
|
let notes: Vec<EngineNote> = vec![];
|
|
assert_eq!(worst_direction(¬es), None);
|
|
}
|
|
|
|
#[test]
|
|
fn worst_direction_informational_only_is_none() {
|
|
let notes = vec![EngineNote::InlineCacheReused, EngineNote::InlineCacheReused];
|
|
assert_eq!(worst_direction(¬es), None);
|
|
}
|
|
|
|
#[test]
|
|
fn worst_direction_mixed_picks_worst() {
|
|
let notes = vec![
|
|
EngineNote::InlineCacheReused,
|
|
EngineNote::WorklistCapped { iterations: 1 },
|
|
EngineNote::PredicateStateWidened,
|
|
];
|
|
assert_eq!(worst_direction(¬es), Some(LossDirection::OverReport));
|
|
}
|
|
|
|
#[test]
|
|
fn worst_direction_bail_dominates() {
|
|
let notes = vec![
|
|
EngineNote::PredicateStateWidened,
|
|
EngineNote::ParseTimeout { timeout_ms: 100 },
|
|
];
|
|
assert_eq!(worst_direction(¬es), Some(LossDirection::Bail));
|
|
}
|
|
|
|
#[test]
|
|
fn cap_hit_reason_too_few_samples_unknown() {
|
|
assert_eq!(CapHitReason::classify(&[]), CapHitReason::Unknown);
|
|
assert_eq!(CapHitReason::classify(&[5]), CapHitReason::Unknown);
|
|
}
|
|
|
|
#[test]
|
|
fn cap_hit_reason_detects_period_2_oscillation() {
|
|
let result = CapHitReason::classify(&[3, 7, 3, 7]);
|
|
match result {
|
|
CapHitReason::SuspectedOscillation { period, .. } => assert_eq!(period, 2),
|
|
other => panic!("expected SuspectedOscillation; got {other:?}"),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn cap_hit_reason_detects_plateau() {
|
|
let result = CapHitReason::classify(&[10, 5, 5]);
|
|
assert_eq!(result, CapHitReason::Plateau { delta: 5 });
|
|
}
|
|
|
|
#[test]
|
|
fn cap_hit_reason_plateau_at_zero_is_not_a_plateau() {
|
|
// Zero-delta means we converged; classifier should not flag.
|
|
let result = CapHitReason::classify(&[3, 0, 0]);
|
|
// Strictly decreasing tail → monotone-shrinking; not plateau.
|
|
match result {
|
|
CapHitReason::MonotoneShrinking { .. } => {}
|
|
other => panic!("expected MonotoneShrinking; got {other:?}"),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn cap_hit_reason_detects_monotone_shrinking() {
|
|
let result = CapHitReason::classify(&[10, 7, 4, 2]);
|
|
match result {
|
|
CapHitReason::MonotoneShrinking { trajectory } => {
|
|
assert_eq!(trajectory.as_slice(), &[10, 7, 4, 2]);
|
|
}
|
|
other => panic!("expected MonotoneShrinking; got {other:?}"),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn cap_hit_reason_non_monotone_non_oscillating_is_unknown() {
|
|
// Goes up then down without a clean period-2 pattern.
|
|
let result = CapHitReason::classify(&[3, 8, 2]);
|
|
assert_eq!(result, CapHitReason::Unknown);
|
|
}
|
|
|
|
#[test]
|
|
fn cap_hit_reason_serializes_snake_case_tag() {
|
|
let r = CapHitReason::Plateau { delta: 4 };
|
|
let s = serde_json::to_string(&r).unwrap();
|
|
assert!(s.contains("\"kind\":\"plateau\""), "got {s}");
|
|
assert!(s.contains("\"delta\":4"), "got {s}");
|
|
}
|
|
|
|
#[test]
|
|
fn in_file_fixpoint_capped_serde_backcompat() {
|
|
// Older serialized notes without the `reason` field must still
|
|
// deserialize (serde(default) → CapHitReason::Unknown).
|
|
let legacy = r#"{"kind":"in_file_fixpoint_capped","iterations":7}"#;
|
|
let parsed: EngineNote = serde_json::from_str(legacy).unwrap();
|
|
match parsed {
|
|
EngineNote::InFileFixpointCapped { iterations, reason } => {
|
|
assert_eq!(iterations, 7);
|
|
assert_eq!(reason, CapHitReason::Unknown);
|
|
}
|
|
other => panic!("expected InFileFixpointCapped; got {other:?}"),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn cross_file_fixpoint_capped_serde_backcompat() {
|
|
let legacy = r#"{"kind":"cross_file_fixpoint_capped","iterations":64}"#;
|
|
let parsed: EngineNote = serde_json::from_str(legacy).unwrap();
|
|
match parsed {
|
|
EngineNote::CrossFileFixpointCapped { iterations, reason } => {
|
|
assert_eq!(iterations, 64);
|
|
assert_eq!(reason, CapHitReason::Unknown);
|
|
}
|
|
other => panic!("expected CrossFileFixpointCapped; got {other:?}"),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn loss_direction_tag_stable() {
|
|
assert_eq!(LossDirection::UnderReport.tag(), "under-report");
|
|
assert_eq!(LossDirection::OverReport.tag(), "over-report");
|
|
assert_eq!(LossDirection::Bail.tag(), "bail");
|
|
assert_eq!(LossDirection::Informational.tag(), "informational");
|
|
}
|
|
}
|