nyx/src/dynamic/corpus/registry.rs

493 lines
17 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! `(Cap, Lang)` payload registry.
//!
//! [`CORPUS`] is the canonical, const-built lookup table. Track J phases
//! 0311 land each cap independently by adding new per-`(cap, lang)` slice
//! files under `src/dynamic/corpus/<cap>/<lang>.rs` and wiring them in
//! here.
//!
//! Public surface:
//!
//! * [`payloads_for_lang`] — per-language lookup (new API).
//! * [`payloads_for`] — back-compatible union shim that flattens every
//! language registered for a cap. Returns `&'static [CuratedPayload]`
//! so existing call sites in [`crate::dynamic::runner`],
//! [`crate::dynamic::verify`], and the fuzzer compile unchanged.
//! * [`benign_payload_for`], [`resolve_benign_control`],
//! [`materialise_bytes`], [`audit_marker_collisions`] — unchanged
//! semantics; all route through the registry.
// Legacy [`Oracle::OutputContains`] is intentionally retained for
// pre-Phase-06 corpus entries; the deprecation warning is informational.
#![allow(deprecated)]
use std::collections::HashMap;
use std::sync::OnceLock;
use super::{cmdi, fmt_string, path_trav, sqli, ssrf, xss};
use super::{CapCorpus, CuratedPayload, Oracle};
use crate::dynamic::oracle::ProbePredicate;
use crate::labels::Cap;
use crate::symbol::Lang;
/// Caps with no payloads of their own — source-only sources, sanitizers,
/// and sinks we cannot yet model with a reliable oracle. The
/// [`super::audit`] module asserts that the union of caps covered by
/// [`CORPUS::entries`] and this constant equals [`Cap::all`].
pub const CORPUS_UNSUPPORTED_LANG_NEUTRAL: u32 = Cap::ENV_VAR.bits()
| Cap::SHELL_ESCAPE.bits()
| Cap::URL_ENCODE.bits()
| Cap::JSON_PARSE.bits()
| Cap::DESERIALIZE.bits()
| Cap::CRYPTO.bits()
| Cap::UNAUTHORIZED_ID.bits()
| Cap::DATA_EXFIL.bits()
| Cap::LDAP_INJECTION.bits()
| Cap::XPATH_INJECTION.bits()
| Cap::HEADER_INJECTION.bits()
| Cap::OPEN_REDIRECT.bits()
| Cap::SSTI.bits()
| Cap::XXE.bits()
| Cap::PROTOTYPE_POLLUTION.bits();
/// Flat `(Cap, Lang, slice)` table. A single cap can carry per-language
/// variants — that's the whole reason this layer exists.
const ENTRIES: &[(Cap, Lang, &[CuratedPayload])] = &[
(Cap::SQL_QUERY, Lang::Rust, sqli::rust::PAYLOADS),
(Cap::CODE_EXEC, Lang::Rust, cmdi::rust::PAYLOADS),
(Cap::FILE_IO, Lang::Rust, path_trav::rust::PAYLOADS),
(Cap::SSRF, Lang::Rust, ssrf::rust::PAYLOADS),
(Cap::HTML_ESCAPE, Lang::Rust, xss::rust::PAYLOADS),
(Cap::FMT_STRING, Lang::C, fmt_string::c::PAYLOADS),
];
/// Reserved for per-cap oracle defaults. Empty in Phase 02; populated by
/// later Track J phases that hoist a cap-wide
/// [`ProbePredicate`](crate::dynamic::oracle::ProbePredicate) set off the
/// individual [`CuratedPayload::probe_predicates`] fields.
const ORACLES: &[(Cap, &[ProbePredicate])] = &[];
/// The canonical registry instance.
pub const CORPUS: CapCorpus = CapCorpus {
entries: ENTRIES,
oracles: ORACLES,
};
/// Per-language payload lookup.
///
/// Returns an empty slice when no payloads are registered for the requested
/// `(cap, lang)` pair. This is the new API; existing callers go through
/// [`payloads_for`] until they need per-language precision.
pub fn payloads_for_lang(cap: Cap, lang: Lang) -> &'static [CuratedPayload] {
for &(c, l, slice) in CORPUS.entries {
if c == cap && l == lang {
return slice;
}
}
&[]
}
/// Back-compatible union shim: returns every payload registered against
/// `cap`, across all languages.
///
/// The union is leaked once per cap on first access. All payload data is
/// `&'static`, so each `CuratedPayload` clone is a cheap shallow copy and
/// the leaked allocation stays bounded by the corpus size (under 1 KiB).
pub fn payloads_for(cap: Cap) -> &'static [CuratedPayload] {
static CACHE: OnceLock<HashMap<u32, &'static [CuratedPayload]>> = OnceLock::new();
let cache = CACHE.get_or_init(|| {
let mut grouped: HashMap<u32, Vec<CuratedPayload>> = HashMap::new();
for &(c, _lang, slice) in CORPUS.entries {
grouped
.entry(c.bits())
.or_default()
.extend(slice.iter().cloned());
}
grouped
.into_iter()
.map(|(k, v)| {
let leaked: &'static [CuratedPayload] = Box::leak(v.into_boxed_slice());
(k, leaked)
})
.collect()
});
cache.get(&cap.bits()).copied().unwrap_or(&[])
}
/// Return the (first) benign control payload for a cap, if one exists.
pub fn benign_payload_for(cap: Cap) -> Option<&'static CuratedPayload> {
payloads_for(cap).iter().find(|p| p.is_benign)
}
/// Resolve a [`CuratedPayload::benign_control`] reference to the matching
/// benign entry inside the same cap's payload slice (across all langs).
///
/// Returns `None` when the vulnerable payload has no paired control
/// (`benign_control == None`) or when the named label is missing /
/// non-benign in the corpus. The runner treats the `None` result as
/// `NoControl` and downgrades the verdict to
/// [`crate::evidence::InconclusiveReason::NoBenignControl`].
pub fn resolve_benign_control(
vuln_payload: &CuratedPayload,
cap: Cap,
) -> Option<&'static CuratedPayload> {
let r = vuln_payload.benign_control?;
payloads_for(cap)
.iter()
.find(|p| p.is_benign && p.label == r.label)
}
/// Materialise the effective bytes for a payload.
///
/// For static payloads (`oob_nonce_slot == false`) returns the `bytes`
/// slice directly. For OOB-nonce payloads, constructs the callback URL
/// from the listener and nonce; returns `None` when no listener is
/// configured.
pub fn materialise_bytes<'a>(
payload: &'a CuratedPayload,
oob_url: Option<&str>,
) -> Option<std::borrow::Cow<'a, [u8]>> {
if payload.oob_nonce_slot {
oob_url.map(|u| std::borrow::Cow::Owned(u.as_bytes().to_vec()))
} else {
Some(std::borrow::Cow::Borrowed(payload.bytes))
}
}
/// Marker-collision audit (§16.3).
///
/// Returns `(cap_name, label, conflicting_cap_name)` triples where a
/// non-benign payload's `OutputContains` marker also appears in another
/// cap's payload bytes. Empty result = passing.
pub fn audit_marker_collisions() -> Vec<(&'static str, &'static str, &'static str)> {
fn cap_label(cap: Cap) -> Option<&'static str> {
match cap {
Cap::SQL_QUERY => Some("SQL_QUERY"),
Cap::CODE_EXEC => Some("CODE_EXEC"),
Cap::FILE_IO => Some("FILE_IO"),
Cap::SSRF => Some("SSRF"),
Cap::HTML_ESCAPE => Some("HTML_ESCAPE"),
Cap::FMT_STRING => Some("FMT_STRING"),
_ => None,
}
}
let mut cap_payloads: Vec<(Cap, &'static str, &'static [CuratedPayload])> = Vec::new();
let mut seen_bits: u32 = 0;
for &(c, _lang, _slice) in CORPUS.entries {
if seen_bits & c.bits() != 0 {
continue;
}
seen_bits |= c.bits();
if let Some(name) = cap_label(c) {
cap_payloads.push((c, name, payloads_for(c)));
}
}
let mut collisions = Vec::new();
for &(src_cap, src_name, src_slice) in &cap_payloads {
for p in src_slice {
if p.is_benign {
continue;
}
let Oracle::OutputContains(marker) = &p.oracle else {
continue;
};
let marker_bytes = marker.as_bytes();
for &(other_cap, other_name, other_slice) in &cap_payloads {
if other_cap == src_cap {
continue;
}
for op in other_slice {
if op.is_benign {
continue;
}
if op
.bytes
.windows(marker_bytes.len())
.any(|w| w == marker_bytes)
{
collisions.push((src_name, p.label, other_name));
}
}
}
}
}
collisions
}
#[cfg(test)]
mod tests {
use super::*;
use crate::dynamic::corpus::{benign_payload_for, CORPUS_VERSION};
#[test]
fn supported_caps_have_payloads() {
assert!(!payloads_for(Cap::SQL_QUERY).is_empty());
assert!(!payloads_for(Cap::CODE_EXEC).is_empty());
assert!(!payloads_for(Cap::FILE_IO).is_empty());
assert!(!payloads_for(Cap::SSRF).is_empty());
assert!(!payloads_for(Cap::HTML_ESCAPE).is_empty());
assert!(!payloads_for(Cap::FMT_STRING).is_empty());
}
#[test]
fn unsupported_caps_return_empty() {
let unsupported = [
Cap::ENV_VAR,
Cap::SHELL_ESCAPE,
Cap::URL_ENCODE,
Cap::JSON_PARSE,
Cap::DESERIALIZE,
Cap::CRYPTO,
Cap::UNAUTHORIZED_ID,
Cap::DATA_EXFIL,
Cap::LDAP_INJECTION,
Cap::XPATH_INJECTION,
Cap::HEADER_INJECTION,
Cap::OPEN_REDIRECT,
Cap::SSTI,
Cap::XXE,
Cap::PROTOTYPE_POLLUTION,
];
for cap in unsupported {
assert!(
payloads_for(cap).is_empty(),
"expected {cap:?} to return empty payloads",
);
}
}
#[test]
fn fileio_has_benign_payload() {
assert!(benign_payload_for(Cap::FILE_IO).is_some());
}
#[test]
fn html_escape_has_benign_payload() {
assert!(benign_payload_for(Cap::HTML_ESCAPE).is_some());
}
#[test]
fn vuln_payloads_not_benign() {
for cap in [
Cap::SQL_QUERY,
Cap::CODE_EXEC,
Cap::FILE_IO,
Cap::HTML_ESCAPE,
Cap::FMT_STRING,
] {
let has_vuln = payloads_for(cap).iter().any(|p| !p.is_benign);
assert!(has_vuln, "{cap:?} must have at least one vuln payload");
}
}
#[test]
fn fmt_string_has_sink_crash_oracle_and_benign_control() {
let payloads = payloads_for(Cap::FMT_STRING);
let vuln = payloads
.iter()
.find(|p| !p.is_benign)
.expect("FMT_STRING must have a vuln payload");
assert!(
matches!(vuln.oracle, Oracle::SinkCrash { .. }),
"FMT_STRING vuln payload oracle must be SinkCrash (Phase 08)"
);
let bref = vuln
.benign_control
.expect("FMT_STRING vuln must reference a benign control");
assert!(
resolve_benign_control(vuln, Cap::FMT_STRING).is_some(),
"FMT_STRING benign-control label '{}' must resolve",
bref.label,
);
}
#[test]
fn marker_uniqueness_sqli() {
for p in payloads_for(Cap::SQL_QUERY) {
assert!(
!p.bytes.windows(7).any(|w| w == b"NYX_PWN"),
"NYX_PWN (CODE_EXEC marker) must not appear in SQLI payloads",
);
}
}
#[test]
fn all_payloads_have_fixture_paths() {
let caps = [
Cap::SQL_QUERY,
Cap::CODE_EXEC,
Cap::FILE_IO,
Cap::SSRF,
Cap::HTML_ESCAPE,
Cap::FMT_STRING,
];
for cap in caps {
for p in payloads_for(cap) {
assert!(
!p.fixture_paths.is_empty(),
"payload '{}' for {cap:?} must have ≥1 fixture_path (§16.1)",
p.label,
);
}
}
}
#[test]
fn all_payloads_have_valid_since_corpus_version() {
let caps = [
Cap::SQL_QUERY,
Cap::CODE_EXEC,
Cap::FILE_IO,
Cap::SSRF,
Cap::HTML_ESCAPE,
Cap::FMT_STRING,
];
for cap in caps {
for p in payloads_for(cap) {
assert!(
p.since_corpus_version >= 1 && p.since_corpus_version <= CORPUS_VERSION,
"payload '{}': since_corpus_version {} out of [1, {}]",
p.label,
p.since_corpus_version,
CORPUS_VERSION,
);
}
}
}
#[test]
fn no_marker_collisions() {
let collisions = audit_marker_collisions();
assert!(
collisions.is_empty(),
"marker collisions detected (§16.3): {collisions:?}",
);
}
#[test]
fn ssrf_has_oob_nonce_slot() {
let has_oob = payloads_for(Cap::SSRF).iter().any(|p| p.oob_nonce_slot);
assert!(has_oob, "SSRF corpus must include an OOB-nonce-slot payload");
}
#[test]
fn materialise_static_payload() {
let p = payloads_for(Cap::SQL_QUERY)
.iter()
.find(|p| !p.is_benign && !p.oob_nonce_slot)
.expect("must have static SQLi payload");
let bytes =
materialise_bytes(p, None).expect("static payload must materialise without OOB");
assert_eq!(&*bytes, p.bytes);
}
#[test]
fn materialise_oob_payload_with_url() {
let p = payloads_for(Cap::SSRF)
.iter()
.find(|p| p.oob_nonce_slot)
.expect("must have OOB payload");
let url = "http://127.0.0.1:54321/mynonce";
let bytes =
materialise_bytes(p, Some(url)).expect("OOB payload materialises with URL");
assert_eq!(&*bytes, url.as_bytes());
}
#[test]
fn materialise_oob_payload_without_listener_returns_none() {
let p = payloads_for(Cap::SSRF)
.iter()
.find(|p| p.oob_nonce_slot)
.expect("must have OOB payload");
assert!(materialise_bytes(p, None).is_none(), "no OOB URL → None");
}
#[test]
fn benign_control_refs_resolve_for_paired_caps() {
let cases: &[(Cap, &str, &str)] = &[
(Cap::SQL_QUERY, "sqli-tautology", "sqli-benign"),
(Cap::SQL_QUERY, "sqli-union-nyx", "sqli-benign"),
(Cap::CODE_EXEC, "cmdi-echo-marker", "cmdi-benign"),
(Cap::FILE_IO, "path-traversal-passwd", "path-traversal-benign"),
(Cap::SSRF, "ssrf-file-scheme", "ssrf-benign"),
(Cap::HTML_ESCAPE, "xss-script-marker", "xss-benign-text"),
];
for (cap, vuln_label, benign_label) in cases {
let payloads = payloads_for(*cap);
let vuln = payloads
.iter()
.find(|p| p.label == *vuln_label)
.unwrap_or_else(|| panic!("missing vuln payload {vuln_label} for {cap:?}"));
let resolved = resolve_benign_control(vuln, *cap)
.unwrap_or_else(|| panic!("missing benign control for {vuln_label}"));
assert_eq!(resolved.label, *benign_label);
assert!(resolved.is_benign, "resolved control must be marked benign");
}
}
#[test]
fn oob_payload_has_no_benign_control() {
let payloads = payloads_for(Cap::SSRF);
let p = payloads
.iter()
.find(|p| p.oob_nonce_slot)
.expect("OOB payload");
assert!(p.benign_control.is_none(), "OOB-nonce → NoControl");
assert!(resolve_benign_control(p, Cap::SSRF).is_none());
assert!(
p.no_benign_control_rationale.is_some(),
"OOB-nonce must carry written no_benign_control_rationale",
);
}
#[test]
fn benign_entries_are_terminal() {
let caps = [
Cap::SQL_QUERY,
Cap::CODE_EXEC,
Cap::FILE_IO,
Cap::SSRF,
Cap::HTML_ESCAPE,
Cap::FMT_STRING,
];
for cap in caps {
for p in payloads_for(cap).iter().filter(|p| p.is_benign) {
assert!(
p.benign_control.is_none(),
"benign payload {} must not chain to another control",
p.label,
);
}
}
}
#[test]
fn payloads_for_lang_filters() {
// SQL_QUERY currently only registered for Rust.
assert!(!payloads_for_lang(Cap::SQL_QUERY, Lang::Rust).is_empty());
assert!(payloads_for_lang(Cap::SQL_QUERY, Lang::Python).is_empty());
// FMT_STRING is C-only.
assert!(!payloads_for_lang(Cap::FMT_STRING, Lang::C).is_empty());
assert!(payloads_for_lang(Cap::FMT_STRING, Lang::Rust).is_empty());
}
#[test]
fn back_compat_union_matches_registered_entry() {
// With one (cap, lang) entry per cap, the union must contain the
// same labels as the underlying slice (byte-identical verdict
// requirement, Phase 02 acceptance).
for &(cap, lang, slice) in CORPUS.entries {
let union = payloads_for(cap);
assert_eq!(
union.len(),
slice.len(),
"union for {cap:?} differs from {lang:?} slice",
);
for (u, s) in union.iter().zip(slice.iter()) {
assert_eq!(u.label, s.label);
assert_eq!(u.bytes, s.bytes);
}
}
}
}