mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-21 20:18:06 +02:00
397 lines
14 KiB
Rust
397 lines
14 KiB
Rust
|
|
//! Structured evidence and confidence types for scan diagnostics.
|
||
|
|
//!
|
||
|
|
//! These types capture the provenance of findings (source locations,
|
||
|
|
//! sanitizer/guard info, state-machine transitions) in a structured form
|
||
|
|
//! that can be serialized to JSON and consumed by ranking, filtering,
|
||
|
|
//! and downstream tooling.
|
||
|
|
|
||
|
|
use crate::commands::scan::Diag;
|
||
|
|
use crate::patterns::Severity;
|
||
|
|
use serde::{Deserialize, Serialize};
|
||
|
|
use std::fmt;
|
||
|
|
use std::str::FromStr;
|
||
|
|
|
||
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
||
|
|
// Confidence
|
||
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
/// Confidence level for a diagnostic finding.
|
||
|
|
///
|
||
|
|
/// Ordered Low < Medium < High so that `>=` comparisons work naturally
|
||
|
|
/// for filtering (e.g. `--min-confidence medium` keeps Medium and High).
|
||
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
|
||
|
|
pub enum Confidence {
|
||
|
|
Low,
|
||
|
|
Medium,
|
||
|
|
High,
|
||
|
|
}
|
||
|
|
|
||
|
|
impl fmt::Display for Confidence {
|
||
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||
|
|
match self {
|
||
|
|
Self::Low => write!(f, "Low"),
|
||
|
|
Self::Medium => write!(f, "Medium"),
|
||
|
|
Self::High => write!(f, "High"),
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
impl FromStr for Confidence {
|
||
|
|
type Err = String;
|
||
|
|
|
||
|
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||
|
|
match s.to_ascii_lowercase().as_str() {
|
||
|
|
"low" => Ok(Self::Low),
|
||
|
|
"medium" | "med" => Ok(Self::Medium),
|
||
|
|
"high" => Ok(Self::High),
|
||
|
|
_ => Err(format!(
|
||
|
|
"unknown confidence level: {s:?} (expected low, medium, high)"
|
||
|
|
)),
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
||
|
|
// Evidence
|
||
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
/// Structured evidence for a diagnostic finding.
|
||
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||
|
|
pub struct Evidence {
|
||
|
|
/// Where tainted data originated.
|
||
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||
|
|
pub source: Option<SpanEvidence>,
|
||
|
|
|
||
|
|
/// Where the dangerous operation happens.
|
||
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||
|
|
pub sink: Option<SpanEvidence>,
|
||
|
|
|
||
|
|
/// Validation guards protecting this path.
|
||
|
|
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||
|
|
pub guards: Vec<SpanEvidence>,
|
||
|
|
|
||
|
|
/// Sanitizers applied to this path.
|
||
|
|
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||
|
|
pub sanitizers: Vec<SpanEvidence>,
|
||
|
|
|
||
|
|
/// State-machine evidence (resource lifecycle / auth).
|
||
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||
|
|
pub state: Option<StateEvidence>,
|
||
|
|
|
||
|
|
/// Free-form notes for ranking and display.
|
||
|
|
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||
|
|
pub notes: Vec<String>,
|
||
|
|
}
|
||
|
|
|
||
|
|
impl Evidence {
|
||
|
|
/// Returns `true` if the evidence contains no useful data.
|
||
|
|
pub fn is_empty(&self) -> bool {
|
||
|
|
self.source.is_none()
|
||
|
|
&& self.sink.is_none()
|
||
|
|
&& self.guards.is_empty()
|
||
|
|
&& self.sanitizers.is_empty()
|
||
|
|
&& self.state.is_none()
|
||
|
|
&& self.notes.is_empty()
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/// A source-location evidence span.
|
||
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||
|
|
pub struct SpanEvidence {
|
||
|
|
pub path: String,
|
||
|
|
pub line: u32,
|
||
|
|
pub col: u32,
|
||
|
|
/// One of: `"source"`, `"sink"`, `"guard"`, `"sanitizer"`.
|
||
|
|
pub kind: String,
|
||
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||
|
|
pub snippet: Option<String>,
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Evidence from a state-machine analysis (resource lifecycle / auth).
|
||
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||
|
|
pub struct StateEvidence {
|
||
|
|
/// The state machine: `"resource"` or `"auth"`.
|
||
|
|
pub machine: String,
|
||
|
|
/// Variable name if available.
|
||
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||
|
|
pub subject: Option<String>,
|
||
|
|
/// State before the event.
|
||
|
|
pub from_state: String,
|
||
|
|
/// State after the event.
|
||
|
|
pub to_state: String,
|
||
|
|
}
|
||
|
|
|
||
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
||
|
|
// compute_confidence
|
||
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
/// Derive a confidence level for `diag` based on its rule ID, severity,
|
||
|
|
/// evidence, and analysis kind.
|
||
|
|
///
|
||
|
|
/// This is called as a post-pass after all findings are collected; findings
|
||
|
|
/// that already have a confidence set (e.g. from CFG analysis) are preserved.
|
||
|
|
pub fn compute_confidence(diag: &Diag) -> Confidence {
|
||
|
|
// Degraded analysis caps confidence
|
||
|
|
if let Some(ev) = &diag.evidence
|
||
|
|
&& ev.notes.iter().any(|n| n.starts_with("degraded:"))
|
||
|
|
{
|
||
|
|
return Confidence::Low;
|
||
|
|
}
|
||
|
|
|
||
|
|
let id = &diag.id;
|
||
|
|
|
||
|
|
if id.starts_with("taint-") {
|
||
|
|
if let Some(ev) = &diag.evidence
|
||
|
|
&& ev.notes.iter().any(|n| n == "path_validated")
|
||
|
|
{
|
||
|
|
return Confidence::Medium;
|
||
|
|
}
|
||
|
|
// source+sink present = High
|
||
|
|
if let Some(ev) = &diag.evidence
|
||
|
|
&& ev.source.is_some()
|
||
|
|
&& ev.sink.is_some()
|
||
|
|
{
|
||
|
|
return Confidence::High;
|
||
|
|
}
|
||
|
|
return Confidence::High; // default for taint
|
||
|
|
}
|
||
|
|
|
||
|
|
if id.starts_with("state-") {
|
||
|
|
return match id.as_str() {
|
||
|
|
"state-use-after-close" => Confidence::High,
|
||
|
|
"state-double-close" => Confidence::High,
|
||
|
|
"state-unauthed-access" => Confidence::High,
|
||
|
|
"state-resource-leak" => Confidence::Medium,
|
||
|
|
"state-resource-leak-possible" => Confidence::Low,
|
||
|
|
_ => Confidence::Medium,
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
if id.starts_with("cfg-") {
|
||
|
|
// If CFG conversion already set confidence, preserve it
|
||
|
|
return diag.confidence.unwrap_or(Confidence::Medium);
|
||
|
|
}
|
||
|
|
|
||
|
|
// AST patterns: High severity → Medium confidence, else Low
|
||
|
|
if diag.severity == Severity::High {
|
||
|
|
Confidence::Medium
|
||
|
|
} else {
|
||
|
|
Confidence::Low
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
||
|
|
// Tests
|
||
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
#[cfg(test)]
|
||
|
|
mod tests {
|
||
|
|
use super::*;
|
||
|
|
|
||
|
|
fn make_diag(id: &str, severity: Severity) -> Diag {
|
||
|
|
Diag {
|
||
|
|
path: "test.rs".into(),
|
||
|
|
line: 1,
|
||
|
|
col: 1,
|
||
|
|
severity,
|
||
|
|
id: id.into(),
|
||
|
|
category: crate::patterns::FindingCategory::Security,
|
||
|
|
path_validated: false,
|
||
|
|
guard_kind: None,
|
||
|
|
message: None,
|
||
|
|
labels: vec![],
|
||
|
|
confidence: None,
|
||
|
|
evidence: None,
|
||
|
|
rank_score: None,
|
||
|
|
rank_reason: None,
|
||
|
|
suppressed: false,
|
||
|
|
suppression: None,
|
||
|
|
rollup: None,
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn compute_confidence_taint_high() {
|
||
|
|
let mut d = make_diag("taint-unsanitised-flow (source 1:1)", Severity::High);
|
||
|
|
d.evidence = Some(Evidence {
|
||
|
|
source: Some(SpanEvidence {
|
||
|
|
path: "test.rs".into(),
|
||
|
|
line: 1,
|
||
|
|
col: 1,
|
||
|
|
kind: "source".into(),
|
||
|
|
snippet: Some("env::var(\"X\")".into()),
|
||
|
|
}),
|
||
|
|
sink: Some(SpanEvidence {
|
||
|
|
path: "test.rs".into(),
|
||
|
|
line: 10,
|
||
|
|
col: 5,
|
||
|
|
kind: "sink".into(),
|
||
|
|
snippet: Some("exec()".into()),
|
||
|
|
}),
|
||
|
|
guards: vec![],
|
||
|
|
sanitizers: vec![],
|
||
|
|
state: None,
|
||
|
|
notes: vec![],
|
||
|
|
});
|
||
|
|
assert_eq!(compute_confidence(&d), Confidence::High);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn compute_confidence_taint_validated() {
|
||
|
|
let mut d = make_diag("taint-unsanitised-flow (source 1:1)", Severity::High);
|
||
|
|
d.evidence = Some(Evidence {
|
||
|
|
source: Some(SpanEvidence {
|
||
|
|
path: "test.rs".into(),
|
||
|
|
line: 1,
|
||
|
|
col: 1,
|
||
|
|
kind: "source".into(),
|
||
|
|
snippet: None,
|
||
|
|
}),
|
||
|
|
sink: Some(SpanEvidence {
|
||
|
|
path: "test.rs".into(),
|
||
|
|
line: 10,
|
||
|
|
col: 5,
|
||
|
|
kind: "sink".into(),
|
||
|
|
snippet: None,
|
||
|
|
}),
|
||
|
|
guards: vec![],
|
||
|
|
sanitizers: vec![],
|
||
|
|
state: None,
|
||
|
|
notes: vec!["path_validated".into()],
|
||
|
|
});
|
||
|
|
assert_eq!(compute_confidence(&d), Confidence::Medium);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn compute_confidence_degraded_caps_to_low() {
|
||
|
|
let mut d = make_diag("taint-unsanitised-flow (source 1:1)", Severity::High);
|
||
|
|
d.evidence = Some(Evidence {
|
||
|
|
source: None,
|
||
|
|
sink: None,
|
||
|
|
guards: vec![],
|
||
|
|
sanitizers: vec![],
|
||
|
|
state: None,
|
||
|
|
notes: vec!["degraded:budget_exceeded".into()],
|
||
|
|
});
|
||
|
|
assert_eq!(compute_confidence(&d), Confidence::Low);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn compute_confidence_state_rules() {
|
||
|
|
assert_eq!(
|
||
|
|
compute_confidence(&make_diag("state-use-after-close", Severity::High)),
|
||
|
|
Confidence::High,
|
||
|
|
);
|
||
|
|
assert_eq!(
|
||
|
|
compute_confidence(&make_diag("state-double-close", Severity::Medium)),
|
||
|
|
Confidence::High,
|
||
|
|
);
|
||
|
|
assert_eq!(
|
||
|
|
compute_confidence(&make_diag("state-unauthed-access", Severity::High)),
|
||
|
|
Confidence::High,
|
||
|
|
);
|
||
|
|
assert_eq!(
|
||
|
|
compute_confidence(&make_diag("state-resource-leak", Severity::Medium)),
|
||
|
|
Confidence::Medium,
|
||
|
|
);
|
||
|
|
assert_eq!(
|
||
|
|
compute_confidence(&make_diag("state-resource-leak-possible", Severity::Low)),
|
||
|
|
Confidence::Low,
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn compute_confidence_cfg_preserves_existing() {
|
||
|
|
let mut d = make_diag("cfg-unguarded-sink", Severity::High);
|
||
|
|
d.confidence = Some(Confidence::Low);
|
||
|
|
assert_eq!(compute_confidence(&d), Confidence::Low);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn compute_confidence_ast_low() {
|
||
|
|
let d = make_diag("rs.code_exec.eval", Severity::Medium);
|
||
|
|
assert_eq!(compute_confidence(&d), Confidence::Low);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn compute_confidence_ast_high_severity_medium() {
|
||
|
|
let d = make_diag("rs.code_exec.eval", Severity::High);
|
||
|
|
assert_eq!(compute_confidence(&d), Confidence::Medium);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn evidence_is_empty() {
|
||
|
|
let ev = Evidence {
|
||
|
|
source: None,
|
||
|
|
sink: None,
|
||
|
|
guards: vec![],
|
||
|
|
sanitizers: vec![],
|
||
|
|
state: None,
|
||
|
|
notes: vec![],
|
||
|
|
};
|
||
|
|
assert!(ev.is_empty());
|
||
|
|
|
||
|
|
let ev2 = Evidence {
|
||
|
|
source: Some(SpanEvidence {
|
||
|
|
path: "x.rs".into(),
|
||
|
|
line: 1,
|
||
|
|
col: 1,
|
||
|
|
kind: "source".into(),
|
||
|
|
snippet: None,
|
||
|
|
}),
|
||
|
|
sink: None,
|
||
|
|
guards: vec![],
|
||
|
|
sanitizers: vec![],
|
||
|
|
state: None,
|
||
|
|
notes: vec![],
|
||
|
|
};
|
||
|
|
assert!(!ev2.is_empty());
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn confidence_ord() {
|
||
|
|
assert!(Confidence::Low < Confidence::Medium);
|
||
|
|
assert!(Confidence::Medium < Confidence::High);
|
||
|
|
assert!(Confidence::Low < Confidence::High);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn confidence_display_and_parse() {
|
||
|
|
assert_eq!(Confidence::Low.to_string(), "Low");
|
||
|
|
assert_eq!(Confidence::Medium.to_string(), "Medium");
|
||
|
|
assert_eq!(Confidence::High.to_string(), "High");
|
||
|
|
|
||
|
|
assert_eq!("low".parse::<Confidence>().unwrap(), Confidence::Low);
|
||
|
|
assert_eq!("MEDIUM".parse::<Confidence>().unwrap(), Confidence::Medium);
|
||
|
|
assert_eq!("High".parse::<Confidence>().unwrap(), Confidence::High);
|
||
|
|
assert!("invalid".parse::<Confidence>().is_err());
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn compute_confidence_does_not_override_preset() {
|
||
|
|
// AST patterns set confidence directly; compute_confidence must not overwrite.
|
||
|
|
let mut d = make_diag("rs.quality.expect", Severity::Low);
|
||
|
|
d.confidence = Some(Confidence::High);
|
||
|
|
// The post-pass only runs when confidence is None, but verify compute_confidence
|
||
|
|
// itself would return something different (Low for AST + Low severity), proving
|
||
|
|
// the guard in scan.rs is necessary.
|
||
|
|
assert_eq!(compute_confidence(&d), Confidence::Low);
|
||
|
|
// The actual guard: confidence is already Some, so scan.rs skips compute_confidence.
|
||
|
|
assert_eq!(d.confidence, Some(Confidence::High));
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn json_omits_none_fields() {
|
||
|
|
let ev = Evidence {
|
||
|
|
source: None,
|
||
|
|
sink: None,
|
||
|
|
guards: vec![],
|
||
|
|
sanitizers: vec![],
|
||
|
|
state: None,
|
||
|
|
notes: vec![],
|
||
|
|
};
|
||
|
|
let json = serde_json::to_string(&ev).unwrap();
|
||
|
|
assert_eq!(json, "{}");
|
||
|
|
}
|
||
|
|
}
|