2026-05-05 03:43:45 -04:00
|
|
|
//! Harness specification: the bridge between a static finding and a runnable harness.
|
|
|
|
|
//!
|
|
|
|
|
//! A [`HarnessSpec`] is built from a [`crate::commands::scan::Diag`] without
|
|
|
|
|
//! any further analysis. It records what the dynamic side needs to know:
|
|
|
|
|
//! which entry point to drive, which parameter carries the payload, what
|
|
|
|
|
//! sink (cap) we expect to hit, and which language toolchain to use.
|
|
|
|
|
//!
|
2026-05-11 21:19:03 -04:00
|
|
|
//! Construction is total but may return `Err` when the finding lacks the
|
|
|
|
|
//! evidence required to drive it dynamically (confidence too low, no source
|
|
|
|
|
//! span, no callable entry, sink in dead code, etc.). Those findings stay
|
|
|
|
|
//! static-only.
|
|
|
|
|
//!
|
|
|
|
|
//! # Versioning
|
|
|
|
|
//!
|
|
|
|
|
//! [`SPEC_FORMAT_VERSION`] is baked into every [`HarnessSpec::spec_hash`].
|
|
|
|
|
//! Bump it — and update `compute_spec_hash` — whenever any field changes
|
|
|
|
|
//! meaning, the hash inputs change, or the corpus changes in a way that
|
|
|
|
|
//! would invalidate previously-computed hashes.
|
2026-05-05 03:43:45 -04:00
|
|
|
|
|
|
|
|
use crate::commands::scan::Diag;
|
2026-05-11 21:19:03 -04:00
|
|
|
use crate::dynamic::corpus::CORPUS_VERSION;
|
|
|
|
|
use crate::evidence::{Confidence, FlowStepKind, UnsupportedReason};
|
2026-05-05 03:43:45 -04:00
|
|
|
use crate::labels::Cap;
|
|
|
|
|
use crate::symbol::Lang;
|
|
|
|
|
use serde::{Deserialize, Serialize};
|
2026-05-11 21:19:03 -04:00
|
|
|
use std::path::Path;
|
|
|
|
|
|
|
|
|
|
/// Bump whenever [`HarnessSpec`] fields change meaning or the spec hash
|
|
|
|
|
/// inputs change. Downstream tools should reject specs with an unrecognised
|
|
|
|
|
/// version.
|
|
|
|
|
pub const SPEC_FORMAT_VERSION: u32 = 1;
|
|
|
|
|
|
|
|
|
|
/// Identifies the entry point extracted from a taint flow.
|
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
|
|
|
|
pub struct EntryRef {
|
|
|
|
|
/// Project-relative path of the file containing the entry function.
|
|
|
|
|
pub file: String,
|
|
|
|
|
/// Name of the entry function (unqualified).
|
|
|
|
|
pub function: String,
|
|
|
|
|
}
|
2026-05-05 03:43:45 -04:00
|
|
|
|
|
|
|
|
/// What kind of entry point the harness should call.
|
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
|
|
|
|
pub enum EntryKind {
|
|
|
|
|
/// Free function. Build a `main` that calls it directly.
|
|
|
|
|
Function,
|
|
|
|
|
/// HTTP route. Stand up the framework, send a request.
|
|
|
|
|
HttpRoute,
|
|
|
|
|
/// CLI subcommand. Spawn the binary with crafted argv.
|
|
|
|
|
CliSubcommand,
|
|
|
|
|
/// Library API surface. Build an in-process consumer.
|
|
|
|
|
LibraryApi,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Where the payload goes when the harness fires.
|
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
|
|
|
pub enum PayloadSlot {
|
|
|
|
|
/// Nth positional parameter of the entry function.
|
|
|
|
|
Param(usize),
|
|
|
|
|
/// Named HTTP query parameter.
|
|
|
|
|
QueryParam(String),
|
|
|
|
|
/// HTTP request body (raw bytes).
|
|
|
|
|
HttpBody,
|
|
|
|
|
/// Environment variable.
|
|
|
|
|
EnvVar(String),
|
|
|
|
|
/// CLI argv slot (0-based, excluding argv[0]).
|
|
|
|
|
Argv(usize),
|
|
|
|
|
/// stdin.
|
|
|
|
|
Stdin,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Self-contained recipe for building and running a single harness.
|
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
|
|
|
pub struct HarnessSpec {
|
2026-05-11 21:19:03 -04:00
|
|
|
/// Stable id of the source finding (`Diag::stable_hash` as hex).
|
2026-05-05 03:43:45 -04:00
|
|
|
pub finding_id: String,
|
|
|
|
|
/// Project-relative path to the file holding the entry point.
|
|
|
|
|
pub entry_file: String,
|
|
|
|
|
/// Function/route/subcommand name to drive.
|
|
|
|
|
pub entry_name: String,
|
|
|
|
|
/// How to invoke it.
|
|
|
|
|
pub entry_kind: EntryKind,
|
|
|
|
|
/// Source language (drives toolchain selection).
|
|
|
|
|
pub lang: Lang,
|
2026-05-11 21:19:03 -04:00
|
|
|
/// Toolchain identifier string (e.g. `"rust-stable"`, `"node-20"`).
|
|
|
|
|
/// Informational; harness builder may override for local installs.
|
|
|
|
|
pub toolchain_id: String,
|
2026-05-05 03:43:45 -04:00
|
|
|
/// Where the payload is injected.
|
|
|
|
|
pub payload_slot: PayloadSlot,
|
|
|
|
|
/// Sink capability we expect to fire (drives oracle + corpus pick).
|
|
|
|
|
pub expected_cap: Cap,
|
|
|
|
|
/// Optional symex-derived constraint hints (prefix/suffix locks, etc.).
|
|
|
|
|
/// Populated later from `Evidence::engine_notes` when available.
|
|
|
|
|
#[serde(default)]
|
|
|
|
|
pub constraint_hints: Vec<String>,
|
2026-05-11 21:19:03 -04:00
|
|
|
/// Blake3 hash (16 hex chars) of the spec's key fields, version-pinned.
|
|
|
|
|
/// Stable across identical specs; used for deduplication and caching.
|
|
|
|
|
pub spec_hash: String,
|
2026-05-05 03:43:45 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl HarnessSpec {
|
2026-05-11 21:19:03 -04:00
|
|
|
/// Build a spec from a finding. Returns `Err` with a typed reason when
|
|
|
|
|
/// the finding cannot be driven dynamically.
|
2026-05-05 03:43:45 -04:00
|
|
|
///
|
2026-05-11 21:19:03 -04:00
|
|
|
/// Conditions for `None` return:
|
|
|
|
|
/// - Confidence below `Medium`
|
|
|
|
|
/// - No `flow_steps` in evidence
|
|
|
|
|
/// - No callable entry (source step missing a `function` annotation)
|
|
|
|
|
/// - Unknown language (file extension unrecognised)
|
|
|
|
|
/// - Zero sink capability bits
|
|
|
|
|
pub fn from_finding(diag: &Diag) -> Result<Self, UnsupportedReason> {
|
|
|
|
|
// Require at least Medium confidence to attempt dynamic verification.
|
|
|
|
|
match diag.confidence {
|
|
|
|
|
Some(c) if c >= Confidence::Medium => {}
|
|
|
|
|
_ => return Err(UnsupportedReason::ConfidenceTooLow),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let evidence = diag.evidence.as_ref().ok_or(UnsupportedReason::NoFlowSteps)?;
|
|
|
|
|
|
|
|
|
|
if evidence.flow_steps.is_empty() {
|
|
|
|
|
return Err(UnsupportedReason::NoFlowSteps);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let entry = outermost_entry(&evidence.flow_steps)
|
|
|
|
|
.ok_or(UnsupportedReason::SpecDerivationFailed)?;
|
|
|
|
|
|
|
|
|
|
let ext = Path::new(&entry.file)
|
|
|
|
|
.extension()
|
|
|
|
|
.and_then(|e| e.to_str())
|
|
|
|
|
.unwrap_or("");
|
|
|
|
|
let lang = Lang::from_extension(ext).ok_or(UnsupportedReason::SpecDerivationFailed)?;
|
|
|
|
|
|
|
|
|
|
let expected_cap = Cap::from_bits_truncate(evidence.sink_caps);
|
|
|
|
|
if expected_cap.is_empty() {
|
|
|
|
|
return Err(UnsupportedReason::SpecDerivationFailed);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let toolchain_id = toolchain_id_for_lang(lang).to_owned();
|
|
|
|
|
|
|
|
|
|
let mut spec = HarnessSpec {
|
|
|
|
|
finding_id: format!("{:016x}", diag.stable_hash),
|
|
|
|
|
entry_file: entry.file,
|
|
|
|
|
entry_name: entry.function,
|
|
|
|
|
entry_kind: EntryKind::Function,
|
|
|
|
|
lang,
|
|
|
|
|
toolchain_id,
|
|
|
|
|
payload_slot: PayloadSlot::Param(0),
|
|
|
|
|
expected_cap,
|
|
|
|
|
constraint_hints: vec![],
|
|
|
|
|
spec_hash: String::new(),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
spec.spec_hash = compute_spec_hash(&spec);
|
|
|
|
|
Ok(spec)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Walk `flow_steps` and return the entry point: the enclosing function of
|
|
|
|
|
/// the first `Source` step that has a function annotation. This is the
|
|
|
|
|
/// outermost callable that receives the tainted input.
|
|
|
|
|
pub fn outermost_entry(steps: &[crate::evidence::FlowStep]) -> Option<EntryRef> {
|
|
|
|
|
for step in steps {
|
|
|
|
|
if matches!(step.kind, FlowStepKind::Source) {
|
|
|
|
|
if let Some(ref func) = step.function {
|
|
|
|
|
if !func.is_empty() {
|
|
|
|
|
return Some(EntryRef {
|
|
|
|
|
file: step.file.clone(),
|
|
|
|
|
function: func.clone(),
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
None
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Default toolchain label for a language (informational; harness builder
|
|
|
|
|
/// may override for locally-installed compilers/runtimes).
|
|
|
|
|
fn toolchain_id_for_lang(lang: Lang) -> &'static str {
|
|
|
|
|
match lang {
|
|
|
|
|
Lang::Rust => "rust-stable",
|
|
|
|
|
Lang::C => "gcc-stable",
|
|
|
|
|
Lang::Cpp => "g++-stable",
|
|
|
|
|
Lang::Java => "java-21",
|
|
|
|
|
Lang::Go => "go-stable",
|
|
|
|
|
Lang::Php => "php-8",
|
|
|
|
|
Lang::Python => "python-3",
|
|
|
|
|
Lang::Ruby => "ruby-3",
|
|
|
|
|
Lang::TypeScript | Lang::JavaScript => "node-20",
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Blake3 hash of the spec's key fields, truncated to 8 bytes and hex-encoded.
|
|
|
|
|
///
|
|
|
|
|
/// Inputs (in order):
|
|
|
|
|
/// `SPEC_FORMAT_VERSION` (u32 LE), entry_file, entry_name, payload_slot tag
|
|
|
|
|
/// + value, expected_cap bits (u32 LE), sorted constraint_hints,
|
|
|
|
|
/// toolchain_id, `CORPUS_VERSION` (u32 LE).
|
|
|
|
|
///
|
|
|
|
|
/// Bump [`SPEC_FORMAT_VERSION`] when the inputs or semantics change.
|
|
|
|
|
fn compute_spec_hash(spec: &HarnessSpec) -> String {
|
|
|
|
|
let mut h = blake3::Hasher::new();
|
|
|
|
|
|
|
|
|
|
h.update(&SPEC_FORMAT_VERSION.to_le_bytes());
|
|
|
|
|
h.update(spec.entry_file.as_bytes());
|
|
|
|
|
h.update(b"\0");
|
|
|
|
|
h.update(spec.entry_name.as_bytes());
|
|
|
|
|
h.update(b"\0");
|
|
|
|
|
|
|
|
|
|
// Payload slot: tag byte + optional value
|
|
|
|
|
match &spec.payload_slot {
|
|
|
|
|
PayloadSlot::Param(n) => {
|
|
|
|
|
h.update(&[0u8]);
|
|
|
|
|
h.update(&(*n as u64).to_le_bytes());
|
|
|
|
|
}
|
|
|
|
|
PayloadSlot::QueryParam(s) => {
|
|
|
|
|
h.update(&[1u8]);
|
|
|
|
|
h.update(s.as_bytes());
|
|
|
|
|
}
|
|
|
|
|
PayloadSlot::HttpBody => {
|
|
|
|
|
h.update(&[2u8]);
|
|
|
|
|
}
|
|
|
|
|
PayloadSlot::EnvVar(s) => {
|
|
|
|
|
h.update(&[3u8]);
|
|
|
|
|
h.update(s.as_bytes());
|
|
|
|
|
}
|
|
|
|
|
PayloadSlot::Argv(n) => {
|
|
|
|
|
h.update(&[4u8]);
|
|
|
|
|
h.update(&(*n as u64).to_le_bytes());
|
|
|
|
|
}
|
|
|
|
|
PayloadSlot::Stdin => {
|
|
|
|
|
h.update(&[5u8]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
h.update(&spec.expected_cap.bits().to_le_bytes());
|
|
|
|
|
|
|
|
|
|
let mut hints = spec.constraint_hints.clone();
|
|
|
|
|
hints.sort_unstable();
|
|
|
|
|
for hint in &hints {
|
|
|
|
|
h.update(hint.as_bytes());
|
|
|
|
|
h.update(b"\0");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
h.update(spec.toolchain_id.as_bytes());
|
|
|
|
|
h.update(b"\0");
|
|
|
|
|
h.update(&CORPUS_VERSION.to_le_bytes());
|
|
|
|
|
|
|
|
|
|
let out = h.finalize();
|
|
|
|
|
let bytes = out.as_bytes();
|
|
|
|
|
format!("{:016x}", u64::from_le_bytes(bytes[..8].try_into().unwrap()))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests {
|
|
|
|
|
use super::*;
|
|
|
|
|
use crate::evidence::{Evidence, FlowStep, FlowStepKind};
|
|
|
|
|
|
|
|
|
|
fn source_step(file: &str, function: &str) -> FlowStep {
|
|
|
|
|
FlowStep {
|
|
|
|
|
step: 1,
|
|
|
|
|
kind: FlowStepKind::Source,
|
|
|
|
|
file: file.into(),
|
|
|
|
|
line: 1,
|
|
|
|
|
col: 0,
|
|
|
|
|
snippet: None,
|
|
|
|
|
variable: Some("x".into()),
|
|
|
|
|
callee: None,
|
|
|
|
|
function: Some(function.into()),
|
|
|
|
|
is_cross_file: false,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn sink_step(file: &str) -> FlowStep {
|
|
|
|
|
FlowStep {
|
|
|
|
|
step: 2,
|
|
|
|
|
kind: FlowStepKind::Sink,
|
|
|
|
|
file: file.into(),
|
|
|
|
|
line: 10,
|
|
|
|
|
col: 0,
|
|
|
|
|
snippet: None,
|
|
|
|
|
variable: None,
|
|
|
|
|
callee: None,
|
|
|
|
|
function: None,
|
|
|
|
|
is_cross_file: false,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn outermost_entry_picks_source_step() {
|
|
|
|
|
let steps = vec![source_step("src/main.rs", "handle_request"), sink_step("src/main.rs")];
|
|
|
|
|
let entry = outermost_entry(&steps).unwrap();
|
|
|
|
|
assert_eq!(entry.file, "src/main.rs");
|
|
|
|
|
assert_eq!(entry.function, "handle_request");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn outermost_entry_none_when_no_source() {
|
|
|
|
|
let steps = vec![sink_step("src/main.rs")];
|
|
|
|
|
assert!(outermost_entry(&steps).is_none());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn outermost_entry_none_when_source_has_no_function() {
|
|
|
|
|
let mut step = source_step("src/main.rs", "");
|
|
|
|
|
step.function = None;
|
|
|
|
|
let steps = vec![step, sink_step("src/main.rs")];
|
|
|
|
|
assert!(outermost_entry(&steps).is_none());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn from_finding_err_low_confidence() {
|
|
|
|
|
let diag = crate::commands::scan::Diag {
|
|
|
|
|
confidence: Some(Confidence::Low),
|
|
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
assert_eq!(
|
|
|
|
|
HarnessSpec::from_finding(&diag).unwrap_err(),
|
|
|
|
|
UnsupportedReason::ConfidenceTooLow
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn from_finding_err_no_flow_steps() {
|
|
|
|
|
let diag = crate::commands::scan::Diag {
|
|
|
|
|
confidence: Some(Confidence::Medium),
|
|
|
|
|
evidence: Some(Evidence::default()),
|
|
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
assert_eq!(
|
|
|
|
|
HarnessSpec::from_finding(&diag).unwrap_err(),
|
|
|
|
|
UnsupportedReason::NoFlowSteps
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn from_finding_ok_rust_medium_confidence() {
|
|
|
|
|
use crate::labels::Cap;
|
|
|
|
|
let evidence = Evidence {
|
|
|
|
|
flow_steps: vec![
|
|
|
|
|
source_step("src/handler.rs", "process"),
|
|
|
|
|
sink_step("src/handler.rs"),
|
|
|
|
|
],
|
|
|
|
|
sink_caps: Cap::SQL_QUERY.bits(),
|
|
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
let diag = crate::commands::scan::Diag {
|
|
|
|
|
confidence: Some(Confidence::Medium),
|
|
|
|
|
evidence: Some(evidence),
|
|
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
let spec = HarnessSpec::from_finding(&diag).unwrap();
|
|
|
|
|
assert_eq!(spec.lang, Lang::Rust);
|
|
|
|
|
assert_eq!(spec.entry_name, "process");
|
|
|
|
|
assert_eq!(spec.toolchain_id, "rust-stable");
|
|
|
|
|
assert!(!spec.spec_hash.is_empty());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn spec_hash_is_deterministic() {
|
|
|
|
|
use crate::labels::Cap;
|
|
|
|
|
let evidence = Evidence {
|
|
|
|
|
flow_steps: vec![
|
|
|
|
|
source_step("src/handler.rs", "process"),
|
|
|
|
|
sink_step("src/handler.rs"),
|
|
|
|
|
],
|
|
|
|
|
sink_caps: Cap::SQL_QUERY.bits(),
|
|
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
let diag = crate::commands::scan::Diag {
|
|
|
|
|
confidence: Some(Confidence::High),
|
|
|
|
|
evidence: Some(evidence),
|
|
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
let s1 = HarnessSpec::from_finding(&diag).unwrap();
|
|
|
|
|
let s2 = HarnessSpec::from_finding(&diag).unwrap();
|
|
|
|
|
assert_eq!(s1.spec_hash, s2.spec_hash);
|
2026-05-05 03:43:45 -04:00
|
|
|
}
|
|
|
|
|
}
|