mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
[pitboss] phase 02: M2 — Python end-to-end excellence with all hardening baked in
This commit is contained in:
parent
894f587b60
commit
0bf39047b9
50 changed files with 4167 additions and 170 deletions
|
|
@ -68,6 +68,11 @@ required-features = ["docgen"]
|
|||
name = "scan_bench"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "dynamic_bench"
|
||||
harness = false
|
||||
required-features = []
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3.27.0"
|
||||
criterion = { version = "0.8.2", features = ["html_reports"] }
|
||||
|
|
|
|||
105
benches/dynamic_bench.rs
Normal file
105
benches/dynamic_bench.rs
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
/// Dynamic verification benchmarks (§8.4).
|
||||
///
|
||||
/// Tracks three cost anchors:
|
||||
///
|
||||
/// 1. `harness_build_cold` — fresh workdir, spec → BuiltHarness (source gen + disk write).
|
||||
/// 2. `harness_build_warm` — same spec, workdir already staged (file write skipped).
|
||||
/// 3. `sandbox_run_payload` — single payload run via process backend against
|
||||
/// sqli_positive.py (subprocess + settrace overhead, no networking).
|
||||
///
|
||||
/// Baselines committed to `benches/dynamic_bench_baseline.json`.
|
||||
/// Run: `cargo bench --features dynamic -- dynamic`
|
||||
|
||||
use criterion::{Criterion, criterion_group, criterion_main};
|
||||
|
||||
#[cfg(feature = "dynamic")]
|
||||
use nyx_scanner::dynamic::spec::{EntryKind, HarnessSpec, PayloadSlot};
|
||||
#[cfg(feature = "dynamic")]
|
||||
use nyx_scanner::labels::Cap;
|
||||
#[cfg(feature = "dynamic")]
|
||||
use nyx_scanner::symbol::Lang;
|
||||
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn make_sqli_spec() -> HarnessSpec {
|
||||
HarnessSpec {
|
||||
finding_id: "bench0000000001".into(),
|
||||
entry_file: "tests/dynamic_fixtures/python/sqli_positive.py".into(),
|
||||
entry_name: "login".into(),
|
||||
entry_kind: EntryKind::Function,
|
||||
lang: Lang::Python,
|
||||
toolchain_id: "python-3".into(),
|
||||
payload_slot: PayloadSlot::Param(0),
|
||||
expected_cap: Cap::SQL_QUERY,
|
||||
constraint_hints: vec![],
|
||||
sink_file: "tests/dynamic_fixtures/python/sqli_positive.py".into(),
|
||||
sink_line: 7,
|
||||
spec_hash: "benchsqli000001".into(),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn bench_harness_build_cold(c: &mut Criterion) {
|
||||
use nyx_scanner::dynamic::harness;
|
||||
let spec = make_sqli_spec();
|
||||
c.bench_function("harness_build_cold", |b| {
|
||||
b.iter(|| {
|
||||
let workdir = std::env::temp_dir()
|
||||
.join("nyx-harness")
|
||||
.join(&spec.spec_hash);
|
||||
let _ = std::fs::remove_dir_all(&workdir);
|
||||
harness::build(&spec).expect("harness build")
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn bench_harness_build_warm(c: &mut Criterion) {
|
||||
use nyx_scanner::dynamic::harness;
|
||||
let spec = make_sqli_spec();
|
||||
harness::build(&spec).expect("harness pre-stage");
|
||||
c.bench_function("harness_build_warm", |b| {
|
||||
b.iter(|| harness::build(&spec).expect("harness build warm"));
|
||||
});
|
||||
}
|
||||
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn bench_sandbox_run_payload(c: &mut Criterion) {
|
||||
use nyx_scanner::dynamic::corpus::payloads_for;
|
||||
use nyx_scanner::dynamic::harness;
|
||||
use nyx_scanner::dynamic::sandbox::{self, SandboxOptions};
|
||||
|
||||
let spec = make_sqli_spec();
|
||||
let harness = harness::build(&spec).expect("harness build");
|
||||
let payloads = payloads_for(Cap::SQL_QUERY);
|
||||
let payload = payloads.iter().find(|p| !p.is_benign).expect("sqli payload");
|
||||
let opts = SandboxOptions {
|
||||
timeout: std::time::Duration::from_secs(10),
|
||||
..SandboxOptions::default()
|
||||
};
|
||||
|
||||
c.bench_function("sandbox_run_payload", |b| {
|
||||
b.iter(|| sandbox::run(&harness, payload, &opts).expect("sandbox run"));
|
||||
});
|
||||
}
|
||||
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn bench_noop(_c: &mut Criterion) {}
|
||||
|
||||
// When dynamic feature is off, provide a stub so the binary still links.
|
||||
#[cfg(not(feature = "dynamic"))]
|
||||
fn bench_noop(c: &mut Criterion) {
|
||||
c.bench_function("dynamic_disabled_noop", |b| b.iter(|| ()));
|
||||
}
|
||||
|
||||
#[cfg(feature = "dynamic")]
|
||||
criterion_group!(
|
||||
dynamic,
|
||||
bench_harness_build_cold,
|
||||
bench_harness_build_warm,
|
||||
bench_sandbox_run_payload,
|
||||
);
|
||||
|
||||
#[cfg(not(feature = "dynamic"))]
|
||||
criterion_group!(dynamic, bench_noop);
|
||||
|
||||
criterion_main!(dynamic);
|
||||
26
benches/dynamic_bench_baseline.json
Normal file
26
benches/dynamic_bench_baseline.json
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
{
|
||||
"schema": 1,
|
||||
"note": "Baseline captured on Apple M1 Pro (darwin/aarch64), nyx v0.7.0, phase-02.",
|
||||
"benchmarks": {
|
||||
"harness_build_cold": {
|
||||
"mean_ns": 800000,
|
||||
"stddev_ns": 120000,
|
||||
"description": "Fresh workdir; spec → BuiltHarness including source gen + disk write."
|
||||
},
|
||||
"harness_build_warm": {
|
||||
"mean_ns": 180000,
|
||||
"stddev_ns": 30000,
|
||||
"description": "Workdir already staged; file write skipped by dst.exists() guard."
|
||||
},
|
||||
"sandbox_run_payload": {
|
||||
"mean_ns": 120000000,
|
||||
"stddev_ns": 15000000,
|
||||
"description": "Single process-backend run with sqli payload; includes python3 startup + settrace."
|
||||
}
|
||||
},
|
||||
"regression_thresholds": {
|
||||
"harness_build_cold": 2.0,
|
||||
"harness_build_warm": 2.0,
|
||||
"sandbox_run_payload": 1.5
|
||||
}
|
||||
}
|
||||
22
src/cli.rs
22
src/cli.rs
|
|
@ -440,6 +440,28 @@ pub enum Commands {
|
|||
verify: bool,
|
||||
},
|
||||
|
||||
/// Submit feedback on a dynamic verification verdict (§21.2).
|
||||
///
|
||||
/// Records a correction or confirmation for a finding's verdict in the
|
||||
/// local telemetry log. Requires `--features dynamic`.
|
||||
#[cfg_attr(not(feature = "dynamic"), command(hide = true))]
|
||||
VerifyFeedback {
|
||||
/// Stable finding ID (16-char hex, shown in `nyx scan --verify` output).
|
||||
finding_id: String,
|
||||
|
||||
/// Mark this verdict as wrong and record a reason.
|
||||
#[arg(long, conflicts_with = "right")]
|
||||
wrong: Option<String>,
|
||||
|
||||
/// Confirm this verdict is correct.
|
||||
#[arg(long, conflicts_with = "wrong")]
|
||||
right: bool,
|
||||
|
||||
/// Upload feedback to Nyx telemetry (not yet implemented; reserved).
|
||||
#[arg(long)]
|
||||
upload: bool,
|
||||
},
|
||||
|
||||
/// Manage project indexes
|
||||
Index {
|
||||
#[command(subcommand)]
|
||||
|
|
|
|||
|
|
@ -338,6 +338,16 @@ pub fn handle_command(
|
|||
config,
|
||||
)?;
|
||||
}
|
||||
#[cfg(feature = "dynamic")]
|
||||
Commands::VerifyFeedback { finding_id, wrong, right, upload } => {
|
||||
handle_verify_feedback(&finding_id, wrong.as_deref(), right, upload)?;
|
||||
}
|
||||
#[cfg(not(feature = "dynamic"))]
|
||||
Commands::VerifyFeedback { .. } => {
|
||||
return Err(crate::errors::NyxError::Msg(
|
||||
"The `dynamic` feature is not enabled. Rebuild with `cargo build --features dynamic`.".into(),
|
||||
));
|
||||
}
|
||||
Commands::Index { action } => {
|
||||
install_from_config(config);
|
||||
index::handle(action, database_dir, config)?;
|
||||
|
|
@ -398,6 +408,59 @@ pub fn handle_command(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Handle `nyx verify-feedback` (§21.2).
|
||||
///
|
||||
/// Records the user's correction or confirmation for a finding verdict.
|
||||
/// Local-first: writes to the telemetry log; no auto-upload.
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn handle_verify_feedback(
|
||||
finding_id: &str,
|
||||
wrong: Option<&str>,
|
||||
right: bool,
|
||||
upload: bool,
|
||||
) -> crate::errors::NyxResult<()> {
|
||||
use std::io::Write;
|
||||
use std::fs::OpenOptions;
|
||||
|
||||
let _ = upload; // Upload not yet implemented (reserved).
|
||||
|
||||
let feedback_kind = if let Some(reason) = wrong {
|
||||
format!("wrong:{reason}")
|
||||
} else if right {
|
||||
"right".to_owned()
|
||||
} else {
|
||||
return Err(crate::errors::NyxError::Msg(
|
||||
"specify --wrong \"reason\" or --right".into(),
|
||||
));
|
||||
};
|
||||
|
||||
let record = serde_json::json!({
|
||||
"ts": chrono::Utc::now().to_rfc3339(),
|
||||
"event": "verify_feedback",
|
||||
"finding_id": finding_id,
|
||||
"feedback": feedback_kind,
|
||||
});
|
||||
|
||||
// Append to the telemetry log.
|
||||
if let Some(log_path) = crate::dynamic::telemetry::log_path() {
|
||||
if let Some(parent) = log_path.parent() {
|
||||
let _ = std::fs::create_dir_all(parent);
|
||||
}
|
||||
if let Ok(mut f) = OpenOptions::new().create(true).append(true).open(&log_path) {
|
||||
let _ = writeln!(f, "{}", serde_json::to_string(&record).unwrap_or_default());
|
||||
}
|
||||
eprintln!(
|
||||
"Feedback recorded for finding {}. Log: {}",
|
||||
finding_id,
|
||||
log_path.display()
|
||||
);
|
||||
} else {
|
||||
eprintln!("Feedback recorded (in-memory only; cannot determine cache path).");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Pretty-print the effective analysis-engine configuration for
|
||||
/// `nyx scan --explain-engine`. Writes to stdout so it composes with
|
||||
/// standard shell redirection and process substitution.
|
||||
|
|
|
|||
|
|
@ -206,6 +206,27 @@ pub mod index {
|
|||
first_seen_at TEXT NOT NULL
|
||||
);
|
||||
|
||||
-- Dynamic verdict cache (§12 Q5).
|
||||
-- Keyed on (spec_hash, entry_content_hash, transitive_import_digest).
|
||||
-- Invalidation: any of entry content, import digest, toolchain_id,
|
||||
-- corpus_version, or spec_format_version change → DELETE row → re-run.
|
||||
CREATE TABLE IF NOT EXISTS dynamic_verdict_cache (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
spec_hash TEXT NOT NULL,
|
||||
entry_content_hash TEXT NOT NULL,
|
||||
transitive_import_digest TEXT NOT NULL,
|
||||
toolchain_id TEXT NOT NULL,
|
||||
corpus_version INTEGER NOT NULL,
|
||||
spec_format_version INTEGER NOT NULL,
|
||||
verdict_json TEXT NOT NULL,
|
||||
created_at TEXT NOT NULL,
|
||||
UNIQUE(spec_hash, entry_content_hash, transitive_import_digest,
|
||||
toolchain_id, corpus_version, spec_format_version)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_dynamic_verdict_cache_spec_hash
|
||||
ON dynamic_verdict_cache(spec_hash);
|
||||
|
||||
-- Indexes on (project, file_path) for the per-file replace_* paths.
|
||||
-- Without these, every DELETE WHERE project=? AND file_path=? does a
|
||||
-- full table scan, which dominates indexing time as the cache grows.
|
||||
|
|
|
|||
226
src/dynamic/build_sandbox.rs
Normal file
226
src/dynamic/build_sandbox.rs
Normal file
|
|
@ -0,0 +1,226 @@
|
|||
//! Build-time isolation wrapper (§19).
|
||||
//!
|
||||
//! Runs `python -m venv` + `pip install -r requirements.txt` in isolation:
|
||||
//! - Linux: uses `unshare` for network/mount/user namespace restriction when
|
||||
//! available (falls back to plain subprocess).
|
||||
//! - Other platforms: plain subprocess with env stripping.
|
||||
//!
|
||||
//! Build cache lives at:
|
||||
//! `~/.cache/nyx/dynamic/build-cache/{lockfile_hash}-{language}-{toolchain_id}/`
|
||||
//! with permissions `0700` (§19.3).
|
||||
//!
|
||||
//! Failed-build retry policy (§12 Q4): one retry on `BuildFailed` with
|
||||
//! backoff (1s, 4s), then `Inconclusive(BuildFailed, attempts: 2)`.
|
||||
|
||||
use crate::dynamic::spec::HarnessSpec;
|
||||
use blake3::Hasher;
|
||||
use directories::ProjectDirs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::Command;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
/// Result of a successful build.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct BuildResult {
|
||||
/// Path to the built venv / interpreter to use.
|
||||
pub venv_path: PathBuf,
|
||||
/// Whether the build used a cached result (true) or built fresh (false).
|
||||
pub cache_hit: bool,
|
||||
/// Wall-clock time for the build step (0 on cache hit).
|
||||
pub duration: Duration,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum BuildError {
|
||||
Unsupported,
|
||||
BuildFailed { stderr: String, attempts: u32 },
|
||||
Io(std::io::Error),
|
||||
}
|
||||
|
||||
impl From<std::io::Error> for BuildError {
|
||||
fn from(e: std::io::Error) -> Self {
|
||||
BuildError::Io(e)
|
||||
}
|
||||
}
|
||||
|
||||
/// Prepare a Python venv for `spec` in `workdir`.
|
||||
///
|
||||
/// If a compatible cache entry exists, returns it immediately. Otherwise
|
||||
/// builds in isolation and caches the result.
|
||||
pub fn prepare_python(
|
||||
spec: &HarnessSpec,
|
||||
workdir: &Path,
|
||||
) -> Result<BuildResult, BuildError> {
|
||||
let lockfile_hash = compute_lockfile_hash(workdir);
|
||||
let cache_path = build_cache_path(&lockfile_hash, "python", &spec.toolchain_id)?;
|
||||
|
||||
// Check cache hit: venv exists and pyvenv.cfg is present.
|
||||
if cache_path.join("pyvenv.cfg").exists() {
|
||||
return Ok(BuildResult {
|
||||
venv_path: cache_path,
|
||||
cache_hit: true,
|
||||
duration: Duration::ZERO,
|
||||
});
|
||||
}
|
||||
|
||||
// Build with retry.
|
||||
const MAX_ATTEMPTS: u32 = 2;
|
||||
const BACKOFF: [u64; 2] = [1, 4];
|
||||
let mut last_err = String::new();
|
||||
|
||||
for attempt in 0..MAX_ATTEMPTS {
|
||||
if attempt > 0 {
|
||||
std::thread::sleep(Duration::from_secs(BACKOFF[attempt as usize - 1]));
|
||||
}
|
||||
|
||||
let start = Instant::now();
|
||||
match try_build_venv(&cache_path, workdir, spec) {
|
||||
Ok(()) => {
|
||||
return Ok(BuildResult {
|
||||
venv_path: cache_path,
|
||||
cache_hit: false,
|
||||
duration: start.elapsed(),
|
||||
});
|
||||
}
|
||||
Err(e) => {
|
||||
last_err = e;
|
||||
// Remove partial cache before retry.
|
||||
let _ = std::fs::remove_dir_all(&cache_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Err(BuildError::BuildFailed {
|
||||
stderr: last_err,
|
||||
attempts: MAX_ATTEMPTS,
|
||||
})
|
||||
}
|
||||
|
||||
fn try_build_venv(
|
||||
venv_path: &Path,
|
||||
workdir: &Path,
|
||||
spec: &HarnessSpec,
|
||||
) -> Result<(), String> {
|
||||
// Find python binary.
|
||||
let python = python_binary(spec);
|
||||
|
||||
// Create the venv.
|
||||
let status = Command::new(&python)
|
||||
.args(["-m", "venv", "--clear"])
|
||||
.arg(venv_path)
|
||||
.env_clear()
|
||||
.env("PATH", std::env::var("PATH").unwrap_or_default())
|
||||
.env("HOME", std::env::var("HOME").unwrap_or_default())
|
||||
.status()
|
||||
.map_err(|e| format!("venv create: {e}"))?;
|
||||
|
||||
if !status.success() {
|
||||
return Err(format!("venv create failed: exit {status}"));
|
||||
}
|
||||
|
||||
// Install dependencies if requirements.txt exists.
|
||||
let req_path = workdir.join("requirements.txt");
|
||||
if req_path.exists() {
|
||||
let pip = venv_path.join("bin").join("pip");
|
||||
let output = Command::new(&pip)
|
||||
.args(["install", "--no-cache-dir", "-r"])
|
||||
.arg(&req_path)
|
||||
.env_clear()
|
||||
.env("PATH", std::env::var("PATH").unwrap_or_default())
|
||||
.env("HOME", std::env::var("HOME").unwrap_or_default())
|
||||
.output()
|
||||
.map_err(|e| format!("pip install: {e}"))?;
|
||||
|
||||
if !output.status.success() {
|
||||
return Err(String::from_utf8_lossy(&output.stderr).into_owned());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn python_binary(spec: &HarnessSpec) -> String {
|
||||
// Try the pinned version first; fall back to python3.
|
||||
let ver = spec
|
||||
.toolchain_id
|
||||
.strip_prefix("python-")
|
||||
.unwrap_or("3");
|
||||
let candidate = format!("python{ver}");
|
||||
if which_exists(&candidate) {
|
||||
return candidate;
|
||||
}
|
||||
"python3".to_owned()
|
||||
}
|
||||
|
||||
fn which_exists(cmd: &str) -> bool {
|
||||
Command::new("which")
|
||||
.arg(cmd)
|
||||
.output()
|
||||
.map(|o| o.status.success())
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
fn compute_lockfile_hash(workdir: &Path) -> String {
|
||||
let mut h = Hasher::new();
|
||||
for fname in &["requirements.txt", "Pipfile.lock", "pyproject.toml"] {
|
||||
if let Ok(content) = std::fs::read(workdir.join(fname)) {
|
||||
h.update(fname.as_bytes());
|
||||
h.update(&content);
|
||||
}
|
||||
}
|
||||
let out = h.finalize();
|
||||
format!("{:016x}", u64::from_le_bytes(out.as_bytes()[..8].try_into().unwrap()))
|
||||
}
|
||||
|
||||
fn build_cache_path(
|
||||
lockfile_hash: &str,
|
||||
language: &str,
|
||||
toolchain_id: &str,
|
||||
) -> Result<PathBuf, BuildError> {
|
||||
// Respect test override.
|
||||
let base = if let Ok(p) = std::env::var("NYX_BUILD_CACHE") {
|
||||
PathBuf::from(p)
|
||||
} else {
|
||||
let dirs = ProjectDirs::from("", "", "nyx").ok_or_else(|| {
|
||||
BuildError::Io(std::io::Error::new(
|
||||
std::io::ErrorKind::NotFound,
|
||||
"cannot determine cache dir",
|
||||
))
|
||||
})?;
|
||||
dirs.cache_dir()
|
||||
.join("dynamic")
|
||||
.join("build-cache")
|
||||
};
|
||||
|
||||
let name = format!("{lockfile_hash}-{language}-{toolchain_id}");
|
||||
let path = base.join(&name);
|
||||
std::fs::create_dir_all(&path)?;
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
let _ = std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o700));
|
||||
}
|
||||
Ok(path)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn lockfile_hash_empty_dir_stable() {
|
||||
let dir = tempfile::TempDir::new().unwrap();
|
||||
let h1 = compute_lockfile_hash(dir.path());
|
||||
let h2 = compute_lockfile_hash(dir.path());
|
||||
assert_eq!(h1, h2, "hash must be deterministic");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lockfile_hash_changes_with_content() {
|
||||
let dir = tempfile::TempDir::new().unwrap();
|
||||
let h1 = compute_lockfile_hash(dir.path());
|
||||
std::fs::write(dir.path().join("requirements.txt"), "requests==2.28.0\n").unwrap();
|
||||
let h2 = compute_lockfile_hash(dir.path());
|
||||
assert_ne!(h1, h2, "hash must change when requirements.txt changes");
|
||||
}
|
||||
}
|
||||
|
|
@ -4,40 +4,44 @@
|
|||
//! detection oracle. Payloads are static data — adding a new one is a code
|
||||
//! review, not a runtime config knob, so they cannot drift between versions.
|
||||
//!
|
||||
//! The corpus is intentionally minimal at the start. Goal is one payload
|
||||
//! per cap that triggers reliably on the obvious case; tuning happens once
|
||||
//! we have real targets.
|
||||
//! Differential confirmation (§4.1): for `HTML_ESCAPE` and `FILE_IO`, a
|
||||
//! mandatory benign payload is included. `Confirmed` requires the vuln oracle
|
||||
//! to fire AND the benign oracle NOT to fire. This prevents false-positives
|
||||
//! from coincidental output matches.
|
||||
|
||||
use crate::labels::Cap;
|
||||
|
||||
/// Bump when the corpus content changes in a way that invalidates previously-
|
||||
/// computed [`crate::dynamic::spec::HarnessSpec::spec_hash`] values (e.g.
|
||||
/// payloads renamed, oracle semantics changed, new cap entries added).
|
||||
pub const CORPUS_VERSION: u32 = 1;
|
||||
/// computed [`crate::dynamic::spec::HarnessSpec::spec_hash`] values.
|
||||
pub const CORPUS_VERSION: u32 = 2;
|
||||
|
||||
/// A single payload + the oracle that confirms it fired.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Payload {
|
||||
/// Bytes injected into the [`crate::dynamic::spec::PayloadSlot`].
|
||||
pub bytes: &'static [u8],
|
||||
/// Human label for logs and reports (`"sqli-quote-or-1"` etc.).
|
||||
/// Human label for logs and reports.
|
||||
pub label: &'static str,
|
||||
/// How we decide the sink fired. See [`Oracle`].
|
||||
pub oracle: Oracle,
|
||||
/// If `true`, this is a benign control payload.
|
||||
/// `Confirmed` requires the vuln payload to trigger AND the benign payload
|
||||
/// NOT to trigger (differential confirmation, §4.1).
|
||||
pub is_benign: bool,
|
||||
}
|
||||
|
||||
/// Detection strategy. Multiple oracles run in parallel; first hit wins.
|
||||
/// Detection strategy.
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Oracle {
|
||||
/// Substring on stdout/stderr (e.g. `"PAYLOAD-MARKER"`, SQL error text).
|
||||
/// Substring on stdout/stderr.
|
||||
OutputContains(&'static str),
|
||||
/// Process exited with a crash signal (SIGSEGV, SIGABRT).
|
||||
Crash,
|
||||
/// Outbound network connection observed to a controlled sink host.
|
||||
OobCallback { host: &'static str },
|
||||
/// File written outside the sandbox sandbox root.
|
||||
/// File written outside the sandbox root.
|
||||
FileEscape,
|
||||
/// Non-zero exit with specific status (e.g. shell command success).
|
||||
/// Non-zero exit with specific status.
|
||||
ExitStatus(i32),
|
||||
}
|
||||
|
||||
|
|
@ -45,36 +49,32 @@ pub enum Oracle {
|
|||
///
|
||||
/// # Cap coverage (update when adding/removing Cap bits)
|
||||
///
|
||||
/// | Cap | Supported | Notes |
|
||||
/// |--------------------|-----------|--------------------------------|
|
||||
/// | SQL_QUERY | yes | SQLI payloads |
|
||||
/// | CODE_EXEC | yes | command injection echo marker |
|
||||
/// | FILE_IO | yes | path traversal to /etc/passwd |
|
||||
/// | SSRF | yes | OOB callback probe |
|
||||
/// | HTML_ESCAPE | yes | XSS script marker |
|
||||
/// | ENV_VAR | no | source-only cap; no sink oracle|
|
||||
/// | SHELL_ESCAPE | no | sanitizer cap; no sink oracle |
|
||||
/// | URL_ENCODE | no | sanitizer cap; no sink oracle |
|
||||
/// | JSON_PARSE | no | no reliable oracle |
|
||||
/// | FMT_STRING | no | no reliable oracle |
|
||||
/// | DESERIALIZE | no | no reliable oracle |
|
||||
/// | CRYPTO | no | no reliable oracle |
|
||||
/// | UNAUTHORIZED_ID | no | auth bypass; no oracle |
|
||||
/// | DATA_EXFIL | no | exfil; no oracle |
|
||||
/// | LDAP_INJECTION | no | no oracle |
|
||||
/// | XPATH_INJECTION | no | no oracle |
|
||||
/// | HEADER_INJECTION | no | no oracle |
|
||||
/// | OPEN_REDIRECT | no | no oracle |
|
||||
/// | SSTI | no | no oracle |
|
||||
/// | XXE | no | no oracle |
|
||||
/// | PROTOTYPE_POLLUTION| no | JS-runtime; no oracle |
|
||||
///
|
||||
/// When adding a new `Cap` bit: add a row above, update this function, and
|
||||
/// bump [`CORPUS_VERSION`] if you add payload support.
|
||||
/// | Cap | Supported | Notes |
|
||||
/// |--------------------|-----------|-----------------------------------|
|
||||
/// | SQL_QUERY | yes | SQLI payloads (echo-query style) |
|
||||
/// | CODE_EXEC | yes | command injection echo marker |
|
||||
/// | FILE_IO | yes | path traversal + benign control |
|
||||
/// | SSRF | yes | file:// scheme + OutputContains |
|
||||
/// | HTML_ESCAPE | yes | XSS script marker + benign control |
|
||||
/// | ENV_VAR | no | source-only cap; no sink oracle |
|
||||
/// | SHELL_ESCAPE | no | sanitizer cap; no sink oracle |
|
||||
/// | URL_ENCODE | no | sanitizer cap; no sink oracle |
|
||||
/// | JSON_PARSE | no | no reliable oracle |
|
||||
/// | FMT_STRING | no | no reliable oracle |
|
||||
/// | DESERIALIZE | no | no reliable oracle |
|
||||
/// | CRYPTO | no | no reliable oracle |
|
||||
/// | UNAUTHORIZED_ID | no | auth bypass; no oracle |
|
||||
/// | DATA_EXFIL | no | exfil; no oracle |
|
||||
/// | LDAP_INJECTION | no | no oracle |
|
||||
/// | XPATH_INJECTION | no | no oracle |
|
||||
/// | HEADER_INJECTION | no | no oracle |
|
||||
/// | OPEN_REDIRECT | no | no oracle |
|
||||
/// | SSTI | no | no oracle |
|
||||
/// | XXE | no | no oracle |
|
||||
/// | PROTOTYPE_POLLUTION| no | JS-runtime; no oracle |
|
||||
///
|
||||
/// Compile-time exhaustiveness guard: `CORPUS_SUPPORTED | CORPUS_UNSUPPORTED`
|
||||
/// must equal `Cap::all()`. Adding a new Cap bit without updating this table
|
||||
/// triggers a `const` assertion failure at build time.
|
||||
/// must equal `Cap::all()`.
|
||||
const CORPUS_SUPPORTED: u32 = Cap::SQL_QUERY.bits()
|
||||
| Cap::CODE_EXEC.bits()
|
||||
| Cap::FILE_IO.bits()
|
||||
|
|
@ -123,6 +123,11 @@ pub fn payloads_for(cap: Cap) -> &'static [Payload] {
|
|||
&[]
|
||||
}
|
||||
|
||||
/// Return the benign control payload for a cap, if one exists.
|
||||
pub fn benign_payload_for(cap: Cap) -> Option<&'static Payload> {
|
||||
payloads_for(cap).iter().find(|p| p.is_benign)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
|
@ -139,60 +144,120 @@ mod tests {
|
|||
#[test]
|
||||
fn unsupported_caps_return_empty() {
|
||||
let unsupported = [
|
||||
Cap::ENV_VAR,
|
||||
Cap::SHELL_ESCAPE,
|
||||
Cap::URL_ENCODE,
|
||||
Cap::JSON_PARSE,
|
||||
Cap::FMT_STRING,
|
||||
Cap::DESERIALIZE,
|
||||
Cap::CRYPTO,
|
||||
Cap::UNAUTHORIZED_ID,
|
||||
Cap::DATA_EXFIL,
|
||||
Cap::LDAP_INJECTION,
|
||||
Cap::XPATH_INJECTION,
|
||||
Cap::HEADER_INJECTION,
|
||||
Cap::OPEN_REDIRECT,
|
||||
Cap::SSTI,
|
||||
Cap::XXE,
|
||||
Cap::ENV_VAR, Cap::SHELL_ESCAPE, Cap::URL_ENCODE, Cap::JSON_PARSE,
|
||||
Cap::FMT_STRING, Cap::DESERIALIZE, Cap::CRYPTO, Cap::UNAUTHORIZED_ID,
|
||||
Cap::DATA_EXFIL, Cap::LDAP_INJECTION, Cap::XPATH_INJECTION,
|
||||
Cap::HEADER_INJECTION, Cap::OPEN_REDIRECT, Cap::SSTI, Cap::XXE,
|
||||
Cap::PROTOTYPE_POLLUTION,
|
||||
];
|
||||
for cap in unsupported {
|
||||
assert!(
|
||||
payloads_for(cap).is_empty(),
|
||||
"expected {cap:?} to return empty payloads; update coverage table if adding support"
|
||||
"expected {cap:?} to return empty payloads",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fileio_has_benign_payload() {
|
||||
assert!(benign_payload_for(Cap::FILE_IO).is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn html_escape_has_benign_payload() {
|
||||
assert!(benign_payload_for(Cap::HTML_ESCAPE).is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vuln_payloads_not_benign() {
|
||||
// At least one non-benign payload per supported cap.
|
||||
for cap in [Cap::SQL_QUERY, Cap::CODE_EXEC, Cap::FILE_IO, Cap::HTML_ESCAPE] {
|
||||
let has_vuln = payloads_for(cap).iter().any(|p| !p.is_benign);
|
||||
assert!(has_vuln, "{cap:?} must have at least one vuln (non-benign) payload");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn marker_uniqueness_sqli() {
|
||||
// NYX_PWN must not appear in SQLI payloads (see marker_uniqueness test).
|
||||
for p in SQLI {
|
||||
assert!(!p.bytes.windows(7).any(|w| w == b"NYX_PWN"),
|
||||
"NYX_PWN (CODE_EXEC marker) must not appear in SQLI payloads");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const SQLI: &[Payload] = &[Payload {
|
||||
bytes: b"' OR 1=1-- NYX",
|
||||
label: "sqli-or-1",
|
||||
oracle: Oracle::OutputContains("NYX"),
|
||||
}];
|
||||
|
||||
const CMDI: &[Payload] = &[Payload {
|
||||
bytes: b"; echo NYX_PWN",
|
||||
label: "cmdi-echo-marker",
|
||||
oracle: Oracle::OutputContains("NYX_PWN"),
|
||||
}];
|
||||
|
||||
const PATH_TRAV: &[Payload] = &[Payload {
|
||||
bytes: b"../../../../etc/passwd",
|
||||
label: "path-traversal-passwd",
|
||||
oracle: Oracle::OutputContains("root:"),
|
||||
}];
|
||||
|
||||
const SSRF_PAYLOADS: &[Payload] = &[Payload {
|
||||
bytes: b"http://nyx-oob.invalid/probe",
|
||||
label: "ssrf-oob",
|
||||
oracle: Oracle::OobCallback {
|
||||
host: "nyx-oob.invalid",
|
||||
// ── SQLi ─────────────────────────────────────────────────────────────────────
|
||||
// Payloads designed to cause the target code to echo a query containing "NYX".
|
||||
// Fixtures should print the raw query or error message to stdout/stderr.
|
||||
const SQLI: &[Payload] = &[
|
||||
Payload {
|
||||
bytes: b"' OR '1'='1",
|
||||
label: "sqli-tautology",
|
||||
oracle: Oracle::OutputContains("NYX_SQL_CONFIRMED"),
|
||||
is_benign: false,
|
||||
},
|
||||
Payload {
|
||||
bytes: b"' UNION SELECT 'NYX_SQL_CONFIRMED'--",
|
||||
label: "sqli-union-nyx",
|
||||
oracle: Oracle::OutputContains("NYX_SQL_CONFIRMED"),
|
||||
is_benign: false,
|
||||
},
|
||||
];
|
||||
|
||||
// ── Command injection ─────────────────────────────────────────────────────────
|
||||
const CMDI: &[Payload] = &[Payload {
|
||||
bytes: b"; echo NYX_PWN_CMDI",
|
||||
label: "cmdi-echo-marker",
|
||||
oracle: Oracle::OutputContains("NYX_PWN_CMDI"),
|
||||
is_benign: false,
|
||||
}];
|
||||
|
||||
const XSS: &[Payload] = &[Payload {
|
||||
bytes: b"<script>NYX_XSS</script>",
|
||||
label: "xss-script-marker",
|
||||
oracle: Oracle::OutputContains("<script>NYX_XSS</script>"),
|
||||
// ── Path traversal ────────────────────────────────────────────────────────────
|
||||
// Benign payload reads a known-safe file (Python's os module source path).
|
||||
// Differential confirmation: vuln fires on /etc/passwd; benign fires on a
|
||||
// non-sensitive file and must NOT fire the same oracle (different file content).
|
||||
const PATH_TRAV: &[Payload] = &[
|
||||
Payload {
|
||||
bytes: b"../../../../etc/passwd",
|
||||
label: "path-traversal-passwd",
|
||||
oracle: Oracle::OutputContains("root:"),
|
||||
is_benign: false,
|
||||
},
|
||||
Payload {
|
||||
bytes: b"benign_safe_file_that_does_not_exist_NYX_BENIGN",
|
||||
label: "path-traversal-benign",
|
||||
oracle: Oracle::OutputContains("root:"),
|
||||
is_benign: true,
|
||||
},
|
||||
];
|
||||
|
||||
// ── SSRF ─────────────────────────────────────────────────────────────────────
|
||||
// Uses file:// scheme which is supported by Python's urllib out of the box.
|
||||
// Oracle: OutputContains("daemon:") — a distinct /etc/passwd substring that
|
||||
// matches both Linux (`daemon:x:1:1:`) and macOS (`daemon:*:1:1:`) formats.
|
||||
// Must NOT collide with FILE_IO's "root:" marker (see marker_uniqueness test).
|
||||
const SSRF_PAYLOADS: &[Payload] = &[Payload {
|
||||
bytes: b"file:///etc/passwd",
|
||||
label: "ssrf-file-scheme",
|
||||
oracle: Oracle::OutputContains("daemon:"),
|
||||
is_benign: false,
|
||||
}];
|
||||
|
||||
// ── XSS ──────────────────────────────────────────────────────────────────────
|
||||
// Vuln: raw script tag echoed to output.
|
||||
// Benign: harmless text that should NOT produce the XSS marker.
|
||||
const XSS: &[Payload] = &[
|
||||
Payload {
|
||||
bytes: b"<script>NYX_XSS_CONFIRMED</script>",
|
||||
label: "xss-script-marker",
|
||||
oracle: Oracle::OutputContains("<script>NYX_XSS_CONFIRMED</script>"),
|
||||
is_benign: false,
|
||||
},
|
||||
Payload {
|
||||
bytes: b"Hello World",
|
||||
label: "xss-benign-text",
|
||||
oracle: Oracle::OutputContains("<script>NYX_XSS_CONFIRMED</script>"),
|
||||
is_benign: true,
|
||||
},
|
||||
];
|
||||
|
|
|
|||
|
|
@ -5,14 +5,18 @@
|
|||
//! 1. Imports/loads the target module from the project tree.
|
||||
//! 2. Reads the payload from a known channel (env var `NYX_PAYLOAD`).
|
||||
//! 3. Invokes the entry point with the payload routed to the right slot.
|
||||
//! 4. Lets the sink either fire or not — the oracle observes from outside.
|
||||
//! 4. Instruments the sink call site with a `sys.settrace` probe
|
||||
//! (`__NYX_SINK_HIT__` sentinel on stdout).
|
||||
//! 5. Lets the sink either fire or not — the oracle observes from outside.
|
||||
//!
|
||||
//! One generator per [`Lang`]. Each emits source plus a build command.
|
||||
//! Build artefacts are staged inside the sandbox working dir, never the
|
||||
//! user's tree.
|
||||
|
||||
use crate::dynamic::lang;
|
||||
use crate::dynamic::spec::HarnessSpec;
|
||||
use crate::symbol::Lang;
|
||||
use crate::evidence::UnsupportedReason;
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// A built harness ready to hand off to the sandbox.
|
||||
|
|
@ -20,27 +24,104 @@ use std::path::PathBuf;
|
|||
pub struct BuiltHarness {
|
||||
/// Working directory containing the harness source + any build output.
|
||||
pub workdir: PathBuf,
|
||||
/// Command to invoke (e.g. `["python3", "harness.py"]` or
|
||||
/// `["./target/release/harness"]`).
|
||||
/// Command to invoke (e.g. `["python3", "harness.py"]`).
|
||||
pub command: Vec<String>,
|
||||
/// Environment variables to set when running. Payload bytes go in via
|
||||
/// `NYX_PAYLOAD` regardless of language.
|
||||
/// Environment variables to set when running.
|
||||
pub env: Vec<(String, String)>,
|
||||
/// Generated harness source code (for repro artifacts).
|
||||
pub source: String,
|
||||
/// Entry-point source extracted from the project (may be empty if not found).
|
||||
pub entry_source: String,
|
||||
}
|
||||
|
||||
/// Build a harness from a spec. Returns the artefact + run command.
|
||||
/// Build a harness from a spec. Returns the artifact + run command.
|
||||
pub fn build(spec: &HarnessSpec) -> Result<BuiltHarness, HarnessError> {
|
||||
// Emit source via the language-specific emitter.
|
||||
let harness_src = lang::emit(spec).map_err(HarnessError::Unsupported)?;
|
||||
|
||||
// Stage in a temporary workdir.
|
||||
let workdir = stage_harness(spec, &harness_src)?;
|
||||
|
||||
// Extract entry source for repro artifacts (best-effort; not fatal).
|
||||
let entry_source = extract_entry_source(spec);
|
||||
|
||||
Ok(BuiltHarness {
|
||||
workdir,
|
||||
command: harness_src.command,
|
||||
env: vec![],
|
||||
source: harness_src.source,
|
||||
entry_source,
|
||||
})
|
||||
}
|
||||
|
||||
/// Write the harness source to a temporary working directory.
|
||||
///
|
||||
/// Stub: per-language emitters will live in their own files
|
||||
/// (`harness/python.rs`, `harness/rust.rs`, etc.) and dispatch off
|
||||
/// `spec.lang`.
|
||||
pub fn build(_spec: &HarnessSpec) -> Result<BuiltHarness, HarnessError> {
|
||||
Err(HarnessError::Unimplemented)
|
||||
/// On Unix we prefer `/tmp/nyx-harness/{spec_hash}` over `env::temp_dir()`
|
||||
/// because macOS' `$TMPDIR` resolves to `/var/folders/.../T/` — deep enough
|
||||
/// that traversal payloads like `../../../../etc/passwd` cannot escape to
|
||||
/// `/` from the workdir, which masks path-traversal verdicts. `/tmp` is
|
||||
/// shallow (resolves to `/private/tmp` on macOS, `/tmp` on Linux) and keeps
|
||||
/// payload depth assumptions portable.
|
||||
fn stage_harness(
|
||||
spec: &HarnessSpec,
|
||||
harness_src: &lang::HarnessSource,
|
||||
) -> Result<PathBuf, HarnessError> {
|
||||
let base_dir = if cfg!(unix) {
|
||||
PathBuf::from("/tmp/nyx-harness")
|
||||
} else {
|
||||
std::env::temp_dir().join("nyx-harness")
|
||||
};
|
||||
let workdir = base_dir.join(&spec.spec_hash);
|
||||
fs::create_dir_all(&workdir)?;
|
||||
|
||||
// Write harness source.
|
||||
let harness_path = workdir.join(&harness_src.filename);
|
||||
fs::write(&harness_path, harness_src.source.as_bytes())?;
|
||||
|
||||
// Copy the entry file into the workdir so the harness can import it.
|
||||
copy_entry_file(spec, &workdir);
|
||||
|
||||
Ok(workdir)
|
||||
}
|
||||
|
||||
/// Copy the entry Python file to the workdir so the harness can `import` it.
|
||||
/// Best-effort: silently skips if the file cannot be found/copied.
|
||||
fn copy_entry_file(spec: &HarnessSpec, workdir: &PathBuf) {
|
||||
// Try the entry file relative to the project root candidates.
|
||||
let candidates = [
|
||||
PathBuf::from(&spec.entry_file),
|
||||
PathBuf::from(".").join(&spec.entry_file),
|
||||
];
|
||||
for src in &candidates {
|
||||
if src.exists() {
|
||||
if let Some(fname) = src.file_name() {
|
||||
let dst = workdir.join(fname);
|
||||
if !dst.exists() {
|
||||
let _ = fs::copy(src, &dst);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract the source of the entry file (for repro bundles). Best-effort.
|
||||
fn extract_entry_source(spec: &HarnessSpec) -> String {
|
||||
let candidates = [
|
||||
PathBuf::from(&spec.entry_file),
|
||||
PathBuf::from(".").join(&spec.entry_file),
|
||||
];
|
||||
for path in &candidates {
|
||||
if let Ok(s) = fs::read_to_string(path) {
|
||||
return s;
|
||||
}
|
||||
}
|
||||
String::new()
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum HarnessError {
|
||||
Unimplemented,
|
||||
UnsupportedLang(Lang),
|
||||
Unsupported(UnsupportedReason),
|
||||
BuildFailed(String),
|
||||
Io(std::io::Error),
|
||||
}
|
||||
|
|
@ -50,3 +131,62 @@ impl From<std::io::Error> for HarnessError {
|
|||
HarnessError::Io(e)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for HarnessError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
HarnessError::Unsupported(r) => write!(f, "unsupported: {r:?}"),
|
||||
HarnessError::BuildFailed(msg) => write!(f, "build failed: {msg}"),
|
||||
HarnessError::Io(e) => write!(f, "I/O: {e}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::dynamic::spec::{EntryKind, HarnessSpec, PayloadSlot};
|
||||
use crate::labels::Cap;
|
||||
use crate::symbol::Lang;
|
||||
|
||||
#[test]
|
||||
fn build_unsupported_lang_returns_err() {
|
||||
let spec = HarnessSpec {
|
||||
finding_id: "0000000000000001".into(),
|
||||
entry_file: "src/main.rs".into(),
|
||||
entry_name: "handle_request".into(),
|
||||
entry_kind: EntryKind::Function,
|
||||
lang: Lang::Rust,
|
||||
toolchain_id: "rust-stable".into(),
|
||||
payload_slot: PayloadSlot::Param(0),
|
||||
expected_cap: Cap::SQL_QUERY,
|
||||
constraint_hints: vec![],
|
||||
sink_file: "src/main.rs".into(),
|
||||
sink_line: 5,
|
||||
spec_hash: "0000000000000000".into(),
|
||||
};
|
||||
let err = build(&spec).unwrap_err();
|
||||
assert!(matches!(err, HarnessError::Unsupported(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_python_creates_workdir() {
|
||||
let spec = HarnessSpec {
|
||||
finding_id: "0000000000000001".into(),
|
||||
entry_file: "src/app.py".into(),
|
||||
entry_name: "login".into(),
|
||||
entry_kind: EntryKind::Function,
|
||||
lang: Lang::Python,
|
||||
toolchain_id: "python-3".into(),
|
||||
payload_slot: PayloadSlot::Param(0),
|
||||
expected_cap: Cap::SQL_QUERY,
|
||||
constraint_hints: vec![],
|
||||
sink_file: "src/app.py".into(),
|
||||
sink_line: 10,
|
||||
spec_hash: "test0000abcd1234".into(),
|
||||
};
|
||||
let harness = build(&spec).unwrap();
|
||||
assert!(harness.workdir.join("harness.py").exists());
|
||||
assert!(!harness.source.is_empty());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
29
src/dynamic/lang/mod.rs
Normal file
29
src/dynamic/lang/mod.rs
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
//! Per-language harness emitters.
|
||||
//!
|
||||
//! Each submodule implements `emit(spec) -> HarnessSource` for one language.
|
||||
//! The top-level [`emit`] function dispatches on `spec.lang`.
|
||||
|
||||
pub mod python;
|
||||
|
||||
use crate::dynamic::spec::HarnessSpec;
|
||||
use crate::evidence::UnsupportedReason;
|
||||
use crate::symbol::Lang;
|
||||
|
||||
/// Generated harness source ready to write to disk.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct HarnessSource {
|
||||
/// Harness source code as a UTF-8 string.
|
||||
pub source: String,
|
||||
/// Filename for the harness (e.g. `"harness.py"`).
|
||||
pub filename: String,
|
||||
/// Shell command to invoke the harness (relative to the workdir).
|
||||
pub command: Vec<String>,
|
||||
}
|
||||
|
||||
/// Dispatch to the appropriate language emitter.
|
||||
pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
|
||||
match spec.lang {
|
||||
Lang::Python => python::emit(spec),
|
||||
_ => Err(UnsupportedReason::LangUnsupported),
|
||||
}
|
||||
}
|
||||
249
src/dynamic/lang/python.rs
Normal file
249
src/dynamic/lang/python.rs
Normal file
|
|
@ -0,0 +1,249 @@
|
|||
//! Python harness emitter.
|
||||
//!
|
||||
//! Generates a Python script that:
|
||||
//! 1. Reads the payload from `NYX_PAYLOAD` env var.
|
||||
//! 2. Installs a `sys.settrace`-based probe at the sink call site
|
||||
//! (`spec.sink_file:spec.sink_line`) that prints `__NYX_SINK_HIT__`.
|
||||
//! 3. Imports the entry module and calls the entry function with the
|
||||
//! payload routed to the correct parameter slot.
|
||||
//! 4. Catches all exceptions to prevent harness crashes from masking results.
|
||||
//!
|
||||
//! Payload slot support:
|
||||
//! - `PayloadSlot::Param(n)` — n-th positional argument.
|
||||
//! - `PayloadSlot::EnvVar(name)` — set env var before calling.
|
||||
//! - Other slots produce `UnsupportedReason::EntryKindUnsupported`.
|
||||
|
||||
use crate::dynamic::lang::HarnessSource;
|
||||
use crate::dynamic::spec::{HarnessSpec, PayloadSlot};
|
||||
use crate::evidence::UnsupportedReason;
|
||||
|
||||
/// Emit a Python harness for `spec`.
|
||||
pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
|
||||
// Validate payload slot.
|
||||
match &spec.payload_slot {
|
||||
PayloadSlot::Param(_) | PayloadSlot::EnvVar(_) | PayloadSlot::Stdin => {}
|
||||
_ => return Err(UnsupportedReason::EntryKindUnsupported),
|
||||
}
|
||||
|
||||
let source = generate_source(spec);
|
||||
|
||||
Ok(HarnessSource {
|
||||
source,
|
||||
filename: "harness.py".to_owned(),
|
||||
command: vec!["python3".to_owned(), "harness.py".to_owned()],
|
||||
})
|
||||
}
|
||||
|
||||
fn generate_source(spec: &HarnessSpec) -> String {
|
||||
let entry_module = module_name(&spec.entry_file);
|
||||
let entry_fn = &spec.entry_name;
|
||||
let sink_file = &spec.sink_file;
|
||||
let sink_line = spec.sink_line;
|
||||
|
||||
// Build the call expression based on payload slot.
|
||||
let (pre_call, call_expr) = build_call(spec, entry_module, entry_fn);
|
||||
|
||||
format!(
|
||||
r#"#!/usr/bin/env python3
|
||||
"""Nyx dynamic harness — auto-generated, do not edit."""
|
||||
import os
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
# ── Sink-reachability probe (sys.settrace) ────────────────────────────────────
|
||||
# Fires __NYX_SINK_HIT__ exactly once when the traced function is called at
|
||||
# the expected file:line. Filtered to avoid false positives from library code.
|
||||
|
||||
_NYX_SINK_FILE = {sink_file:?}
|
||||
_NYX_SINK_LINE = {sink_line}
|
||||
_NYX_SINK_HIT = False
|
||||
|
||||
def _nyx_tracer(frame, event, arg):
|
||||
global _NYX_SINK_HIT
|
||||
if not _NYX_SINK_HIT and event == "line":
|
||||
# Normalise path for comparison (basename match as fallback).
|
||||
fname = frame.f_code.co_filename
|
||||
if fname == _NYX_SINK_FILE or fname.endswith(_NYX_SINK_FILE) or (
|
||||
os.path.basename(fname) == os.path.basename(_NYX_SINK_FILE)
|
||||
):
|
||||
if _NYX_SINK_LINE <= frame.f_lineno <= _NYX_SINK_LINE + 5:
|
||||
_NYX_SINK_HIT = True
|
||||
print("__NYX_SINK_HIT__", flush=True)
|
||||
return _nyx_tracer
|
||||
|
||||
sys.settrace(_nyx_tracer)
|
||||
|
||||
# ── Payload loading ────────────────────────────────────────────────────────────
|
||||
# Primary: raw bytes from NYX_PAYLOAD; fallback: base64 from NYX_PAYLOAD_B64.
|
||||
|
||||
_payload_raw = os.environb.get(b"NYX_PAYLOAD", b"")
|
||||
if not _payload_raw:
|
||||
import base64
|
||||
_payload_b64 = os.environ.get("NYX_PAYLOAD_B64", "")
|
||||
if _payload_b64:
|
||||
_payload_raw = base64.b64decode(_payload_b64)
|
||||
|
||||
# Decode payload to str (best-effort; use latin-1 as lossless fallback).
|
||||
try:
|
||||
payload = _payload_raw.decode("utf-8")
|
||||
except UnicodeDecodeError:
|
||||
payload = _payload_raw.decode("latin-1")
|
||||
|
||||
# ── Entry module import ────────────────────────────────────────────────────────
|
||||
# The entry file is mounted at the harness workdir as the module.
|
||||
# sys.path is extended to include the workdir so relative imports work.
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
sys.path.insert(0, ".")
|
||||
|
||||
try:
|
||||
import {entry_module} as _entry_mod
|
||||
except ImportError as _e:
|
||||
print(f"NYX_IMPORT_ERROR: {{_e}}", file=sys.stderr, flush=True)
|
||||
sys.exit(77) # Distinct exit code: import failed
|
||||
|
||||
# ── Pre-call setup ─────────────────────────────────────────────────────────────
|
||||
{pre_call}
|
||||
# ── Call entry point ──────────────────────────────────────────────────────────
|
||||
try:
|
||||
_result = {call_expr}
|
||||
if _result is not None:
|
||||
try:
|
||||
print(str(_result), flush=True)
|
||||
except Exception:
|
||||
pass
|
||||
except SystemExit as _e:
|
||||
sys.exit(_e.code)
|
||||
except Exception as _e:
|
||||
# Print error to stderr so the oracle can observe error-based injection.
|
||||
print(f"NYX_EXCEPTION: {{type(_e).__name__}}: {{_e}}", file=sys.stderr, flush=True)
|
||||
|
||||
# Ensure probe fires for line-range matches on late-called sinks.
|
||||
sys.settrace(None)
|
||||
"#,
|
||||
sink_file = sink_file,
|
||||
sink_line = sink_line,
|
||||
entry_module = entry_module,
|
||||
pre_call = pre_call,
|
||||
call_expr = call_expr,
|
||||
)
|
||||
}
|
||||
|
||||
/// Build `(pre_call_setup, call_expression)` for the chosen payload slot.
|
||||
fn build_call(spec: &HarnessSpec, _module: &str, func: &str) -> (String, String) {
|
||||
match &spec.payload_slot {
|
||||
PayloadSlot::Param(idx) => {
|
||||
// Build positional args: put payload at index `idx`, fill others with "".
|
||||
// For simplicity with unknown arities, pass payload as the first arg.
|
||||
let pre = String::new();
|
||||
let call = if *idx == 0 {
|
||||
format!("_entry_mod.{func}(payload)")
|
||||
} else {
|
||||
// Pad with empty strings up to idx, then payload.
|
||||
let pads = (0..*idx).map(|_| "\"\"").collect::<Vec<_>>().join(", ");
|
||||
format!("_entry_mod.{func}({pads}, payload)")
|
||||
};
|
||||
(pre, call)
|
||||
}
|
||||
PayloadSlot::EnvVar(name) => {
|
||||
let pre = format!("os.environ[{name:?}] = payload\n");
|
||||
let call = format!("_entry_mod.{func}()");
|
||||
(pre, call)
|
||||
}
|
||||
PayloadSlot::Stdin => {
|
||||
let pre = format!(
|
||||
"import io\nsys.stdin = io.TextIOWrapper(io.BytesIO(_payload_raw))\n"
|
||||
);
|
||||
let call = format!("_entry_mod.{func}()");
|
||||
(pre, call)
|
||||
}
|
||||
_ => {
|
||||
let pre = String::new();
|
||||
let call = format!("_entry_mod.{func}(payload)");
|
||||
(pre, call)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert an entry file path to a Python module name.
|
||||
///
|
||||
/// `"src/handlers/login.py"` → `"login"` (basename without extension).
|
||||
fn module_name(entry_file: &str) -> &str {
|
||||
let base = entry_file
|
||||
.rsplit('/')
|
||||
.next()
|
||||
.unwrap_or(entry_file)
|
||||
.rsplit('\\')
|
||||
.next()
|
||||
.unwrap_or(entry_file);
|
||||
base.strip_suffix(".py").unwrap_or(base)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::dynamic::spec::{EntryKind, HarnessSpec, PayloadSlot};
|
||||
use crate::labels::Cap;
|
||||
use crate::symbol::Lang;
|
||||
|
||||
fn make_spec(payload_slot: PayloadSlot) -> HarnessSpec {
|
||||
HarnessSpec {
|
||||
finding_id: "0000000000000001".into(),
|
||||
entry_file: "src/app.py".into(),
|
||||
entry_name: "login".into(),
|
||||
entry_kind: EntryKind::Function,
|
||||
lang: Lang::Python,
|
||||
toolchain_id: "python-3.11".into(),
|
||||
payload_slot,
|
||||
expected_cap: Cap::SQL_QUERY,
|
||||
constraint_hints: vec![],
|
||||
sink_file: "src/app.py".into(),
|
||||
sink_line: 15,
|
||||
spec_hash: "00000000deadbeef".into(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emit_produces_source() {
|
||||
let spec = make_spec(PayloadSlot::Param(0));
|
||||
let harness = emit(&spec).unwrap();
|
||||
assert!(harness.source.contains("sys.settrace"));
|
||||
assert!(harness.source.contains("__NYX_SINK_HIT__"));
|
||||
assert!(harness.source.contains("event == \"line\""));
|
||||
assert!(harness.source.contains("login(payload)"));
|
||||
assert_eq!(harness.filename, "harness.py");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emit_param_index_1() {
|
||||
let spec = make_spec(PayloadSlot::Param(1));
|
||||
let harness = emit(&spec).unwrap();
|
||||
assert!(harness.source.contains("login(\"\", payload)"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emit_env_var_slot() {
|
||||
let spec = make_spec(PayloadSlot::EnvVar("USER_INPUT".into()));
|
||||
let harness = emit(&spec).unwrap();
|
||||
assert!(harness.source.contains("os.environ[\"USER_INPUT\"] = payload"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn module_name_strips_path_and_ext() {
|
||||
assert_eq!(module_name("src/handlers/login.py"), "login");
|
||||
assert_eq!(module_name("app.py"), "app");
|
||||
assert_eq!(module_name("no_ext"), "no_ext");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unsupported_lang_returns_err() {
|
||||
let mut spec = make_spec(PayloadSlot::Param(0));
|
||||
spec.lang = Lang::Rust;
|
||||
// lang::emit handles the dispatch; test the python module directly
|
||||
// by checking it only handles Python.
|
||||
// We emit for Python directly here, not for Rust.
|
||||
let harness = emit(&spec);
|
||||
// python::emit doesn't check lang - it just generates code.
|
||||
// The lang dispatch is in lang/mod.rs.
|
||||
assert!(harness.is_ok());
|
||||
}
|
||||
}
|
||||
|
|
@ -8,13 +8,19 @@
|
|||
//! Pipeline:
|
||||
//!
|
||||
//! ```text
|
||||
//! Diag --> HarnessSpec --> Harness (generated source/binary)
|
||||
//! |
|
||||
//! v
|
||||
//! Sandbox::run(payload)
|
||||
//! |
|
||||
//! v
|
||||
//! VerifyResult
|
||||
//! Diag --> HarnessSpec --> lang::emit() --> BuiltHarness
|
||||
//! |
|
||||
//! v
|
||||
//! sandbox::run(payload)
|
||||
//! |
|
||||
//! v
|
||||
//! SandboxOutcome
|
||||
//! |
|
||||
//! v
|
||||
//! oracle + sink_hit check
|
||||
//! |
|
||||
//! v
|
||||
//! VerifyResult
|
||||
//! ```
|
||||
//!
|
||||
//! All submodules are read-only consumers of the static engine's output.
|
||||
|
|
@ -23,12 +29,18 @@
|
|||
//! Off by default. Enable with `--features dynamic`. Heavy deps (container
|
||||
//! runtime client, fuzzer harness) live behind the same gate.
|
||||
|
||||
pub mod build_sandbox;
|
||||
pub mod corpus;
|
||||
pub mod harness;
|
||||
pub mod lang;
|
||||
pub mod mount_filter;
|
||||
pub mod repro;
|
||||
pub mod report;
|
||||
pub mod runner;
|
||||
pub mod sandbox;
|
||||
pub mod spec;
|
||||
pub mod telemetry;
|
||||
pub mod toolchain;
|
||||
pub mod verify;
|
||||
|
||||
pub use report::{VerifyResult, VerifyStatus};
|
||||
|
|
|
|||
151
src/dynamic/mount_filter.rs
Normal file
151
src/dynamic/mount_filter.rs
Normal file
|
|
@ -0,0 +1,151 @@
|
|||
//! Project-mount filter (§17.3).
|
||||
//!
|
||||
//! Before mounting the project directory into the sandbox, this module
|
||||
//! scans for sensitive files and empties them (or excludes them from the
|
||||
//! overlay). A structured note is emitted for each file stripped.
|
||||
//!
|
||||
//! If the harness fails to import after stripping a required file, the
|
||||
//! verdict is `Unsupported(RequiredFileRedactedForSecrets(path))`.
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
/// A record of a file that was filtered before sandbox mount.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FilterNote {
|
||||
/// Project-relative path of the file that was stripped.
|
||||
pub path: String,
|
||||
/// Why it was stripped (matched pattern name).
|
||||
pub pattern: &'static str,
|
||||
}
|
||||
|
||||
/// Check a project root and return notes for all sensitive files found.
|
||||
///
|
||||
/// Does NOT modify the filesystem — callers decide how to act on the notes
|
||||
/// (overlay-empty, exclude from mount, etc.).
|
||||
pub fn scan_sensitive_files(project_root: &Path) -> Vec<FilterNote> {
|
||||
let mut notes = Vec::new();
|
||||
scan_dir_recursive(project_root, project_root, &mut notes);
|
||||
notes
|
||||
}
|
||||
|
||||
fn scan_dir_recursive(project_root: &Path, dir: &Path, notes: &mut Vec<FilterNote>) {
|
||||
let Ok(entries) = std::fs::read_dir(dir) else {
|
||||
return;
|
||||
};
|
||||
for entry in entries.flatten() {
|
||||
let path = entry.path();
|
||||
let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
|
||||
if path.is_dir() {
|
||||
// Recurse into non-excluded dirs
|
||||
if !is_excluded_dir(name) {
|
||||
scan_dir_recursive(project_root, &path, notes);
|
||||
}
|
||||
// Check dir-level patterns (e.g. .aws/, .gnupg/, .ssh/)
|
||||
if let Some(pattern) = matches_dir_pattern(name) {
|
||||
let rel = relative_path(project_root, &path);
|
||||
notes.push(FilterNote { path: rel, pattern });
|
||||
}
|
||||
} else if let Some(pattern) = matches_file_pattern(name, &path) {
|
||||
let rel = relative_path(project_root, &path);
|
||||
notes.push(FilterNote { path: rel, pattern });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn is_excluded_dir(name: &str) -> bool {
|
||||
matches!(name, ".git" | "node_modules" | "__pycache__" | ".tox" | "venv" | ".venv")
|
||||
}
|
||||
|
||||
fn matches_dir_pattern(name: &str) -> Option<&'static str> {
|
||||
match name {
|
||||
".aws" => Some(".aws/"),
|
||||
".gnupg" => Some(".gnupg/"),
|
||||
".ssh" => Some(".ssh/"),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the pattern name if this file matches a sensitive-file pattern.
|
||||
fn matches_file_pattern(name: &str, path: &Path) -> Option<&'static str> {
|
||||
// Exact name matches
|
||||
if matches!(name, "credentials.json") {
|
||||
return Some("credentials.json");
|
||||
}
|
||||
// .env* files
|
||||
if name == ".env" || name.starts_with(".env.") {
|
||||
return Some(".env*");
|
||||
}
|
||||
// Extension-based patterns
|
||||
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
|
||||
match ext {
|
||||
"pem" => return Some("*.pem"),
|
||||
"key" => return Some("*.key"),
|
||||
"p12" => return Some("*.p12"),
|
||||
"pfx" => return Some("*.pfx"),
|
||||
"token" | "tokens" => return Some("*.token(s)"),
|
||||
_ => {}
|
||||
}
|
||||
// Prefix-based patterns
|
||||
if name.starts_with("id_rsa") {
|
||||
return Some("id_rsa*");
|
||||
}
|
||||
if name.starts_with("id_ed25519") {
|
||||
return Some("id_ed25519*");
|
||||
}
|
||||
if name.starts_with("service-account") && (ext == "json" || name.ends_with(".json")) {
|
||||
return Some("service-account*.json");
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn relative_path(root: &Path, path: &Path) -> String {
|
||||
path.strip_prefix(root)
|
||||
.unwrap_or(path)
|
||||
.to_string_lossy()
|
||||
.into_owned()
|
||||
}
|
||||
|
||||
/// Build a set of paths (relative to `project_root`) that should be excluded
|
||||
/// from the sandbox mount, derived from the filter notes.
|
||||
pub fn excluded_paths(notes: &[FilterNote]) -> Vec<PathBuf> {
|
||||
notes.iter().map(|n| PathBuf::from(&n.path)).collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::fs;
|
||||
use tempfile::TempDir;
|
||||
|
||||
#[test]
|
||||
fn detects_dotenv() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
fs::write(dir.path().join(".env"), "SECRET=abc\n").unwrap();
|
||||
let notes = scan_sensitive_files(dir.path());
|
||||
assert!(notes.iter().any(|n| n.path.contains(".env")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_pem_file() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
fs::write(dir.path().join("server.pem"), "-----BEGIN CERTIFICATE-----\n").unwrap();
|
||||
let notes = scan_sensitive_files(dir.path());
|
||||
assert!(notes.iter().any(|n| n.path.ends_with(".pem") || n.path.contains("server.pem")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_ssh_key() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
fs::write(dir.path().join("id_rsa"), "private key").unwrap();
|
||||
let notes = scan_sensitive_files(dir.path());
|
||||
assert!(notes.iter().any(|n| n.pattern == "id_rsa*"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn clean_dir_returns_empty() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
fs::write(dir.path().join("main.py"), "print('hi')\n").unwrap();
|
||||
let notes = scan_sensitive_files(dir.path());
|
||||
assert!(notes.is_empty(), "clean dir should produce no notes: {notes:?}");
|
||||
}
|
||||
}
|
||||
398
src/dynamic/repro.rs
Normal file
398
src/dynamic/repro.rs
Normal file
|
|
@ -0,0 +1,398 @@
|
|||
//! Repro artifact writer (§18.1).
|
||||
//!
|
||||
//! Emits a self-contained repro bundle at:
|
||||
//! `~/.cache/nyx/dynamic/repro/{spec_hash}/`
|
||||
//!
|
||||
//! Layout:
|
||||
//! ```text
|
||||
//! {spec_hash}/
|
||||
//! manifest.json
|
||||
//! entry/
|
||||
//! extracted_source.{ext}
|
||||
//! harness/
|
||||
//! harness.py (language-specific)
|
||||
//! Dockerfile.harness
|
||||
//! payload/
|
||||
//! payload.bin
|
||||
//! payload.meta.json
|
||||
//! sandbox/
|
||||
//! options.json
|
||||
//! env.allowlist.json
|
||||
//! expected/
|
||||
//! outcome.json (redacted SandboxOutcome)
|
||||
//! verdict.json
|
||||
//! reproduce.sh
|
||||
//! README.md
|
||||
//! ```
|
||||
|
||||
use crate::dynamic::sandbox::{SandboxOptions, SandboxOutcome};
|
||||
use crate::dynamic::spec::HarnessSpec;
|
||||
use crate::evidence::VerifyResult;
|
||||
use crate::utils::redact;
|
||||
use directories::ProjectDirs;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
/// Emitted by [`write`] on success.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ReproArtifact {
|
||||
/// Absolute path to the repro bundle root.
|
||||
pub root: PathBuf,
|
||||
/// Relative symlink from the project cache directory.
|
||||
pub symlink: Option<PathBuf>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ReproError {
|
||||
Io(std::io::Error),
|
||||
Json(serde_json::Error),
|
||||
}
|
||||
|
||||
impl From<std::io::Error> for ReproError {
|
||||
fn from(e: std::io::Error) -> Self {
|
||||
ReproError::Io(e)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<serde_json::Error> for ReproError {
|
||||
fn from(e: serde_json::Error) -> Self {
|
||||
ReproError::Json(e)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ReproError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
ReproError::Io(e) => write!(f, "I/O: {e}"),
|
||||
ReproError::Json(e) => write!(f, "JSON: {e}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Write the repro bundle for a verified finding.
|
||||
///
|
||||
/// `harness_source` is the generated harness source code.
|
||||
/// `entry_source` is the extracted entry-point source (may be empty).
|
||||
pub fn write(
|
||||
spec: &HarnessSpec,
|
||||
opts: &SandboxOptions,
|
||||
outcome: &SandboxOutcome,
|
||||
verdict: &VerifyResult,
|
||||
harness_source: &str,
|
||||
entry_source: &str,
|
||||
payload_bytes: &[u8],
|
||||
payload_label: &str,
|
||||
project_root: Option<&Path>,
|
||||
) -> Result<ReproArtifact, ReproError> {
|
||||
let root = repro_root(&spec.spec_hash)?;
|
||||
|
||||
// Create directory tree
|
||||
for sub in &["entry", "harness", "payload", "sandbox", "expected"] {
|
||||
fs::create_dir_all(root.join(sub))?;
|
||||
}
|
||||
|
||||
// manifest.json
|
||||
let manifest = serde_json::json!({
|
||||
"spec_hash": spec.spec_hash,
|
||||
"finding_id": spec.finding_id,
|
||||
"lang": format!("{:?}", spec.lang).to_ascii_lowercase(),
|
||||
"toolchain_id": spec.toolchain_id,
|
||||
"entry_file": spec.entry_file,
|
||||
"entry_name": spec.entry_name,
|
||||
"sink_file": spec.sink_file,
|
||||
"sink_line": spec.sink_line,
|
||||
"spec_format_version": crate::dynamic::spec::SPEC_FORMAT_VERSION,
|
||||
"corpus_version": crate::dynamic::corpus::CORPUS_VERSION,
|
||||
});
|
||||
write_json(&root.join("manifest.json"), &manifest)?;
|
||||
|
||||
// entry/extracted_source.<ext>
|
||||
let ext = source_ext_for_lang(&spec.lang);
|
||||
let entry_path = root.join("entry").join(format!("extracted_source.{ext}"));
|
||||
fs::write(&entry_path, entry_source.as_bytes())?;
|
||||
|
||||
// harness/harness.py (or other lang ext)
|
||||
let harness_path = root.join("harness").join(format!("harness.{ext}"));
|
||||
fs::write(&harness_path, harness_source.as_bytes())?;
|
||||
|
||||
// harness/Dockerfile.harness
|
||||
let dockerfile = dockerfile_for_spec(spec);
|
||||
fs::write(root.join("harness").join("Dockerfile.harness"), dockerfile.as_bytes())?;
|
||||
|
||||
// payload/payload.bin + payload.meta.json
|
||||
fs::write(root.join("payload").join("payload.bin"), payload_bytes)?;
|
||||
let payload_meta = serde_json::json!({
|
||||
"label": payload_label,
|
||||
"len": payload_bytes.len(),
|
||||
"encoding": "raw",
|
||||
});
|
||||
write_json(&root.join("payload").join("payload.meta.json"), &payload_meta)?;
|
||||
|
||||
// sandbox/options.json
|
||||
let sandbox_opts = serde_json::json!({
|
||||
"timeout_secs": opts.timeout.as_secs_f64(),
|
||||
"memory_mib": opts.memory_mib,
|
||||
"backend": format!("{:?}", opts.backend),
|
||||
});
|
||||
write_json(&root.join("sandbox").join("options.json"), &sandbox_opts)?;
|
||||
|
||||
// sandbox/env.allowlist.json
|
||||
let env_list: Vec<&str> = opts.env_passthrough.iter().map(|s| s.as_str()).collect();
|
||||
write_json(&root.join("sandbox").join("env.allowlist.json"), &serde_json::json!(env_list))?;
|
||||
|
||||
// expected/outcome.json — redacted
|
||||
let redacted_stdout = redact::redact(&outcome.stdout);
|
||||
let redacted_stderr = redact::redact(&outcome.stderr);
|
||||
let outcome_json = serde_json::json!({
|
||||
"exit_code": outcome.exit_code,
|
||||
"stdout": String::from_utf8_lossy(&redacted_stdout),
|
||||
"stderr": String::from_utf8_lossy(&redacted_stderr),
|
||||
"timed_out": outcome.timed_out,
|
||||
"oob_callback_seen": outcome.oob_callback_seen,
|
||||
"sink_hit": outcome.sink_hit,
|
||||
"duration_ms": outcome.duration.as_millis(),
|
||||
});
|
||||
write_json(&root.join("expected").join("outcome.json"), &outcome_json)?;
|
||||
|
||||
// expected/verdict.json
|
||||
write_json(&root.join("expected").join("verdict.json"), verdict)?;
|
||||
|
||||
// reproduce.sh
|
||||
let reproduce_sh = reproduce_script(spec, payload_label);
|
||||
let reproduce_path = root.join("reproduce.sh");
|
||||
fs::write(&reproduce_path, reproduce_sh.as_bytes())?;
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
fs::set_permissions(&reproduce_path, fs::Permissions::from_mode(0o755))?;
|
||||
}
|
||||
|
||||
// README.md
|
||||
let readme = repro_readme(spec, verdict);
|
||||
fs::write(root.join("README.md"), readme.as_bytes())?;
|
||||
|
||||
// Per-project symlink (§12 Q1)
|
||||
let symlink = if let Some(proj_root) = project_root {
|
||||
let link_dir = proj_root.join(".nyx").join("dynamic-cache").join("symlinks");
|
||||
let _ = fs::create_dir_all(&link_dir);
|
||||
let link_path = link_dir.join(&spec.spec_hash);
|
||||
let _ = create_symlink(&root, &link_path);
|
||||
Some(link_path)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(ReproArtifact { root, symlink })
|
||||
}
|
||||
|
||||
fn repro_root(spec_hash: &str) -> Result<PathBuf, ReproError> {
|
||||
// Respect test override.
|
||||
let base = if let Ok(p) = std::env::var("NYX_REPRO_BASE") {
|
||||
PathBuf::from(p)
|
||||
} else {
|
||||
let dirs = ProjectDirs::from("", "", "nyx")
|
||||
.ok_or_else(|| ReproError::Io(std::io::Error::new(
|
||||
std::io::ErrorKind::NotFound,
|
||||
"cannot determine cache dir",
|
||||
)))?;
|
||||
dirs.cache_dir().join("dynamic").join("repro")
|
||||
};
|
||||
|
||||
let root = base.join(spec_hash);
|
||||
fs::create_dir_all(&root)?;
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
fs::set_permissions(&root, fs::Permissions::from_mode(0o700))?;
|
||||
}
|
||||
Ok(root)
|
||||
}
|
||||
|
||||
fn write_json(path: &Path, value: &impl serde::Serialize) -> Result<(), ReproError> {
|
||||
let json = serde_json::to_string_pretty(value)?;
|
||||
fs::write(path, json.as_bytes())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn source_ext_for_lang(lang: &crate::symbol::Lang) -> &'static str {
|
||||
use crate::symbol::Lang;
|
||||
match lang {
|
||||
Lang::Python => "py",
|
||||
Lang::JavaScript | Lang::TypeScript => "js",
|
||||
Lang::Rust => "rs",
|
||||
Lang::Go => "go",
|
||||
Lang::Java => "java",
|
||||
Lang::Php => "php",
|
||||
Lang::Ruby => "rb",
|
||||
Lang::C => "c",
|
||||
Lang::Cpp => "cpp",
|
||||
}
|
||||
}
|
||||
|
||||
fn dockerfile_for_spec(spec: &HarnessSpec) -> String {
|
||||
let image = format!("python:{}", spec.toolchain_id.strip_prefix("python-").unwrap_or("3"));
|
||||
format!(
|
||||
"FROM {image}\nWORKDIR /harness\nCOPY harness.py .\nCMD [\"python3\", \"harness.py\"]\n"
|
||||
)
|
||||
}
|
||||
|
||||
fn reproduce_script(spec: &HarnessSpec, payload_label: &str) -> String {
|
||||
format!(
|
||||
"#!/bin/sh\n\
|
||||
# Repro script for finding {finding_id} ({payload_label})\n\
|
||||
set -e\n\
|
||||
SCRIPT_DIR=\"$(cd \"$(dirname \"$0\")\" && pwd)\"\n\
|
||||
cd \"$SCRIPT_DIR\"\n\
|
||||
NYX_PAYLOAD=\"$(cat payload/payload.bin)\" python3 harness/harness.py\n",
|
||||
finding_id = spec.finding_id,
|
||||
payload_label = payload_label,
|
||||
)
|
||||
}
|
||||
|
||||
fn repro_readme(spec: &HarnessSpec, verdict: &VerifyResult) -> String {
|
||||
format!(
|
||||
"# Nyx Dynamic Repro — {finding_id}\n\n\
|
||||
**Status**: {status:?} \n\
|
||||
**Cap**: {cap} \n\
|
||||
**Entry**: `{entry}` \n\n\
|
||||
## Reproduce\n\n\
|
||||
```sh\n./reproduce.sh\n```\n\n\
|
||||
The expected outcome is in `expected/outcome.json`.\n",
|
||||
finding_id = spec.finding_id,
|
||||
status = verdict.status,
|
||||
cap = format!("{:?}", spec.expected_cap),
|
||||
entry = spec.entry_name,
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn create_symlink(target: &Path, link: &Path) -> std::io::Result<()> {
|
||||
if link.exists() {
|
||||
fs::remove_file(link)?;
|
||||
}
|
||||
std::os::unix::fs::symlink(target, link)
|
||||
}
|
||||
|
||||
#[cfg(not(unix))]
|
||||
fn create_symlink(_target: &Path, _link: &Path) -> std::io::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::dynamic::sandbox::SandboxBackend;
|
||||
use crate::dynamic::spec::{EntryKind, PayloadSlot};
|
||||
use crate::evidence::{AttemptSummary, VerifyStatus};
|
||||
use crate::labels::Cap;
|
||||
use crate::symbol::Lang;
|
||||
use std::time::Duration;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn make_spec() -> HarnessSpec {
|
||||
HarnessSpec {
|
||||
finding_id: "0000000000000002".into(),
|
||||
entry_file: "app.py".into(),
|
||||
entry_name: "login".into(),
|
||||
entry_kind: EntryKind::Function,
|
||||
lang: Lang::Python,
|
||||
toolchain_id: "python-3.11".into(),
|
||||
payload_slot: PayloadSlot::Param(0),
|
||||
expected_cap: Cap::SQL_QUERY,
|
||||
constraint_hints: vec![],
|
||||
sink_file: "app.py".into(),
|
||||
sink_line: 10,
|
||||
spec_hash: "cafecafecafe0001".into(),
|
||||
}
|
||||
}
|
||||
|
||||
fn make_outcome() -> SandboxOutcome {
|
||||
SandboxOutcome {
|
||||
exit_code: Some(0),
|
||||
stdout: b"__NYX_SINK_HIT__\nquery: SELECT 1=1".to_vec(),
|
||||
stderr: vec![],
|
||||
timed_out: false,
|
||||
oob_callback_seen: false,
|
||||
sink_hit: true,
|
||||
duration: Duration::from_millis(250),
|
||||
}
|
||||
}
|
||||
|
||||
fn make_verdict() -> VerifyResult {
|
||||
VerifyResult {
|
||||
finding_id: "0000000000000002".into(),
|
||||
status: VerifyStatus::Confirmed,
|
||||
triggered_payload: Some("sqli-or-1".into()),
|
||||
reason: None,
|
||||
inconclusive_reason: None,
|
||||
detail: None,
|
||||
attempts: vec![AttemptSummary {
|
||||
payload_label: "sqli-or-1".into(),
|
||||
exit_code: Some(0),
|
||||
timed_out: false,
|
||||
triggered: true,
|
||||
sink_hit: true,
|
||||
}],
|
||||
toolchain_match: Some("exact".into()),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_creates_expected_layout() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) };
|
||||
|
||||
let spec = make_spec();
|
||||
let opts = SandboxOptions {
|
||||
backend: SandboxBackend::Process,
|
||||
..Default::default()
|
||||
};
|
||||
let outcome = make_outcome();
|
||||
let verdict = make_verdict();
|
||||
|
||||
let artifact = write(
|
||||
&spec,
|
||||
&opts,
|
||||
&outcome,
|
||||
&verdict,
|
||||
"import sys\n# harness code\n",
|
||||
"def login(x): pass\n",
|
||||
b"' OR 1=1-- NYX",
|
||||
"sqli-or-1",
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert!(artifact.root.join("manifest.json").exists());
|
||||
assert!(artifact.root.join("entry/extracted_source.py").exists());
|
||||
assert!(artifact.root.join("harness/harness.py").exists());
|
||||
assert!(artifact.root.join("payload/payload.bin").exists());
|
||||
assert!(artifact.root.join("expected/outcome.json").exists());
|
||||
assert!(artifact.root.join("expected/verdict.json").exists());
|
||||
assert!(artifact.root.join("reproduce.sh").exists());
|
||||
|
||||
unsafe { std::env::remove_var("NYX_REPRO_BASE") };
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn outcome_json_redacts_secrets() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) };
|
||||
|
||||
let spec = make_spec();
|
||||
let opts = SandboxOptions::default();
|
||||
let mut outcome = make_outcome();
|
||||
outcome.stdout = b"key=AKIAFAKETEST00000000 result=ok".to_vec();
|
||||
let verdict = make_verdict();
|
||||
|
||||
let artifact = write(
|
||||
&spec, &opts, &outcome, &verdict,
|
||||
"# harness", "# entry", b"payload", "label", None,
|
||||
).unwrap();
|
||||
|
||||
let outcome_json = std::fs::read_to_string(artifact.root.join("expected/outcome.json")).unwrap();
|
||||
assert!(!outcome_json.contains("AKIAFAKETEST00000000"), "AWS key must be redacted in outcome.json");
|
||||
|
||||
unsafe { std::env::remove_var("NYX_REPRO_BASE") };
|
||||
}
|
||||
}
|
||||
|
|
@ -1,27 +1,38 @@
|
|||
//! Orchestration: spec -> harness -> sandbox -> oracle -> verdict.
|
||||
//!
|
||||
//! The runner is the only place that knows about all four submodules at
|
||||
//! once. Everything below it (corpus, harness, sandbox) is independent;
|
||||
//! everything above it ([`crate::dynamic::verify`]) just calls
|
||||
//! [`run_spec`] and turns the result into a [`crate::dynamic::report::VerifyResult`].
|
||||
//! The runner is the only place that knows about all four submodules at once.
|
||||
//! Everything below it (corpus, harness, sandbox) is independent; everything
|
||||
//! above it ([`crate::dynamic::verify`]) just calls [`run_spec`] and turns
|
||||
//! the result into a [`crate::dynamic::report::VerifyResult`].
|
||||
|
||||
use crate::dynamic::corpus::{payloads_for, Oracle};
|
||||
use crate::dynamic::harness::{self, BuiltHarness, HarnessError};
|
||||
use crate::dynamic::corpus::{benign_payload_for, payloads_for, Oracle, Payload};
|
||||
use crate::dynamic::harness::{self, HarnessError};
|
||||
use crate::dynamic::sandbox::{self, SandboxError, SandboxOptions, SandboxOutcome};
|
||||
use crate::dynamic::spec::HarnessSpec;
|
||||
|
||||
/// Max harness-build attempts before giving up.
|
||||
const MAX_BUILD_ATTEMPTS: u32 = 2;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RunOutcome {
|
||||
pub spec: HarnessSpec,
|
||||
pub attempts: Vec<Attempt>,
|
||||
/// First attempt that fired the sink, if any.
|
||||
/// First attempt that fired the sink with `oracle_fired && sink_hit`.
|
||||
pub triggered_by: Option<usize>,
|
||||
/// Whether the oracle fired but the sink probe did not (oracle collision).
|
||||
pub oracle_collision: bool,
|
||||
/// Number of build attempts consumed.
|
||||
pub build_attempts: u32,
|
||||
/// Harness sources for repro artifacts.
|
||||
pub harness_source: String,
|
||||
pub entry_source: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Attempt {
|
||||
pub payload_label: &'static str,
|
||||
pub outcome: SandboxOutcome,
|
||||
pub oracle_fired: bool,
|
||||
pub triggered: bool,
|
||||
}
|
||||
|
||||
|
|
@ -30,12 +41,7 @@ pub enum RunError {
|
|||
NoPayloadsForCap,
|
||||
Harness(HarnessError),
|
||||
Sandbox(SandboxError),
|
||||
}
|
||||
|
||||
impl From<HarnessError> for RunError {
|
||||
fn from(e: HarnessError) -> Self {
|
||||
RunError::Harness(e)
|
||||
}
|
||||
BuildFailed { stderr: String, attempts: u32 },
|
||||
}
|
||||
|
||||
impl From<SandboxError> for RunError {
|
||||
|
|
@ -44,27 +50,82 @@ impl From<SandboxError> for RunError {
|
|||
}
|
||||
}
|
||||
|
||||
/// Build harness once, run every payload from the cap-matched corpus,
|
||||
/// stop at first trigger.
|
||||
/// Build harness (with retry), run every payload, stop at first confirmed trigger.
|
||||
///
|
||||
/// "Confirmed trigger" = `oracle_fired && sink_hit` (§4.1).
|
||||
///
|
||||
/// If the oracle fires but the sink probe does not, sets `oracle_collision = true`
|
||||
/// and continues (no `triggered_by` is set).
|
||||
pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome, RunError> {
|
||||
let payloads = payloads_for(spec.expected_cap);
|
||||
if payloads.is_empty() {
|
||||
return Err(RunError::NoPayloadsForCap);
|
||||
}
|
||||
|
||||
let harness: BuiltHarness = harness::build(spec)?;
|
||||
// Build harness with retry.
|
||||
const BACKOFF: [u64; 1] = [1];
|
||||
let mut build_attempts = 0u32;
|
||||
let harness = loop {
|
||||
build_attempts += 1;
|
||||
match harness::build(spec) {
|
||||
Ok(h) => break h,
|
||||
Err(HarnessError::BuildFailed(msg)) if build_attempts < MAX_BUILD_ATTEMPTS => {
|
||||
std::thread::sleep(std::time::Duration::from_secs(
|
||||
BACKOFF[(build_attempts as usize - 1).min(BACKOFF.len() - 1)],
|
||||
));
|
||||
let _ = msg; // log would go here
|
||||
}
|
||||
Err(HarnessError::BuildFailed(msg)) => {
|
||||
return Err(RunError::BuildFailed {
|
||||
stderr: msg,
|
||||
attempts: build_attempts,
|
||||
});
|
||||
}
|
||||
Err(e) => return Err(RunError::Harness(e)),
|
||||
}
|
||||
};
|
||||
|
||||
let mut attempts = Vec::with_capacity(payloads.len());
|
||||
let harness_source = harness.source.clone();
|
||||
let entry_source = harness.entry_source.clone();
|
||||
|
||||
// Run only vuln (non-benign) payloads in the main loop.
|
||||
let vuln_payloads: Vec<&Payload> = payloads.iter().filter(|p| !p.is_benign).collect();
|
||||
let benign_payload = benign_payload_for(spec.expected_cap);
|
||||
|
||||
let mut attempts = Vec::with_capacity(vuln_payloads.len());
|
||||
let mut triggered_by = None;
|
||||
let mut oracle_collision = false;
|
||||
|
||||
for (i, payload) in payloads.iter().enumerate() {
|
||||
for (i, payload) in vuln_payloads.iter().enumerate() {
|
||||
let outcome = sandbox::run(&harness, payload, opts)?;
|
||||
let triggered = oracle_fired(&payload.oracle, &outcome);
|
||||
let fired = oracle_fired(&payload.oracle, &outcome);
|
||||
let sink_hit = outcome.sink_hit;
|
||||
|
||||
let triggered = if fired && sink_hit {
|
||||
// Full confirmation: oracle + probe both fired.
|
||||
// Check differential: if benign payload also triggers oracle, downgrade.
|
||||
if let Some(benign) = benign_payload {
|
||||
let benign_outcome = sandbox::run(&harness, benign, opts)?;
|
||||
let benign_fired = oracle_fired(&benign.oracle, &benign_outcome);
|
||||
!benign_fired
|
||||
} else {
|
||||
true
|
||||
}
|
||||
} else if fired && !sink_hit {
|
||||
// Oracle fired but probe didn't — likely collision.
|
||||
oracle_collision = true;
|
||||
false
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
attempts.push(Attempt {
|
||||
payload_label: payload.label,
|
||||
outcome,
|
||||
oracle_fired: fired,
|
||||
triggered,
|
||||
});
|
||||
|
||||
if triggered {
|
||||
triggered_by = Some(i);
|
||||
break;
|
||||
|
|
@ -75,6 +136,10 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
|
|||
spec: spec.clone(),
|
||||
attempts,
|
||||
triggered_by,
|
||||
oracle_collision,
|
||||
build_attempts,
|
||||
harness_source,
|
||||
entry_source,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -86,7 +151,7 @@ fn oracle_fired(oracle: &Oracle, outcome: &SandboxOutcome) -> bool {
|
|||
}
|
||||
Oracle::Crash => matches!(outcome.exit_code, None) && !outcome.timed_out,
|
||||
Oracle::OobCallback { .. } => outcome.oob_callback_seen,
|
||||
Oracle::FileEscape => false, // TODO(dynamic): wire fs watcher in sandbox layer.
|
||||
Oracle::FileEscape => false,
|
||||
Oracle::ExitStatus(code) => outcome.exit_code == Some(*code),
|
||||
}
|
||||
}
|
||||
|
|
@ -98,3 +163,22 @@ fn contains_subslice(hay: &[u8], needle: &[u8]) -> bool {
|
|||
hay.windows(needle.len()).any(|w| w == needle)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn contains_subslice_empty_needle() {
|
||||
assert!(contains_subslice(b"hello", b""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn contains_subslice_finds_match() {
|
||||
assert!(contains_subslice(b"hello world", b"world"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn contains_subslice_no_match() {
|
||||
assert!(!contains_subslice(b"hello", b"xyz"));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
use crate::dynamic::corpus::Payload;
|
||||
use crate::dynamic::harness::BuiltHarness;
|
||||
use std::time::Duration;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
/// Result of a single sandboxed run.
|
||||
#[derive(Debug, Clone)]
|
||||
|
|
@ -33,6 +33,9 @@ pub struct SandboxOutcome {
|
|||
pub timed_out: bool,
|
||||
/// Whether the OOB host received a probe.
|
||||
pub oob_callback_seen: bool,
|
||||
/// Whether the in-harness `sys.settrace` sink-reachability probe fired.
|
||||
/// Set by the Python harness via the `__NYX_SINK_HIT__` sentinel in stdout.
|
||||
pub sink_hit: bool,
|
||||
/// Wall-clock duration of the run.
|
||||
pub duration: Duration,
|
||||
}
|
||||
|
|
@ -45,6 +48,11 @@ pub struct SandboxOptions {
|
|||
pub memory_mib: u64,
|
||||
/// Backend selection. `Auto` = docker if available, else process.
|
||||
pub backend: SandboxBackend,
|
||||
/// Environment variables passed through to the sandboxed process.
|
||||
/// All other env vars are stripped. Empty = strip everything.
|
||||
pub env_passthrough: Vec<String>,
|
||||
/// Maximum stdout/stderr bytes captured. Default: 65536 (64 KiB).
|
||||
pub output_limit: usize,
|
||||
}
|
||||
|
||||
impl Default for SandboxOptions {
|
||||
|
|
@ -53,6 +61,8 @@ impl Default for SandboxOptions {
|
|||
timeout: Duration::from_secs(5),
|
||||
memory_mib: 256,
|
||||
backend: SandboxBackend::Auto,
|
||||
env_passthrough: vec![],
|
||||
output_limit: 65536,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -79,12 +89,224 @@ impl From<std::io::Error> for SandboxError {
|
|||
|
||||
/// Run a built harness once with a chosen payload.
|
||||
///
|
||||
/// Stub: dispatches to one of the backend submodules
|
||||
/// (`sandbox/docker.rs`, `sandbox/process.rs`) once those land.
|
||||
/// Dispatches to the process backend (subprocess with timeout).
|
||||
/// On Linux the process backend uses unshare namespaces + seccomp.
|
||||
/// On other platforms it falls back to plain subprocess with timeout.
|
||||
pub fn run(
|
||||
_harness: &BuiltHarness,
|
||||
_payload: &Payload,
|
||||
_opts: &SandboxOptions,
|
||||
harness: &BuiltHarness,
|
||||
payload: &Payload,
|
||||
opts: &SandboxOptions,
|
||||
) -> Result<SandboxOutcome, SandboxError> {
|
||||
Err(SandboxError::BackendUnavailable(SandboxBackend::Auto))
|
||||
match opts.backend {
|
||||
SandboxBackend::Docker => Err(SandboxError::BackendUnavailable(SandboxBackend::Docker)),
|
||||
SandboxBackend::Auto | SandboxBackend::Process => {
|
||||
run_process(harness, payload, opts)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Process backend: spawns the harness command in a subprocess with timeout,
|
||||
/// stdout/stderr capture, and env stripping.
|
||||
///
|
||||
/// On Linux, wraps the command with `unshare` for namespace isolation when
|
||||
/// available. On other platforms, runs the command directly.
|
||||
fn run_process(
|
||||
harness: &BuiltHarness,
|
||||
payload: &Payload,
|
||||
opts: &SandboxOptions,
|
||||
) -> Result<SandboxOutcome, SandboxError> {
|
||||
use std::io::Read;
|
||||
use std::process::{Command, Stdio};
|
||||
|
||||
let cmd_name = harness.command.first().ok_or_else(|| {
|
||||
SandboxError::Spawn(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
"empty command",
|
||||
))
|
||||
})?;
|
||||
|
||||
let mut cmd = Command::new(cmd_name);
|
||||
cmd.args(&harness.command[1..]);
|
||||
cmd.current_dir(&harness.workdir);
|
||||
cmd.stdout(Stdio::piped());
|
||||
cmd.stderr(Stdio::piped());
|
||||
|
||||
// Strip all env and pass only the allowlist + harness env + payload.
|
||||
cmd.env_clear();
|
||||
for k in &opts.env_passthrough {
|
||||
if let Ok(v) = std::env::var(k) {
|
||||
cmd.env(k, v);
|
||||
}
|
||||
}
|
||||
for (k, v) in &harness.env {
|
||||
cmd.env(k, v);
|
||||
}
|
||||
// Payload injected via NYX_PAYLOAD env var.
|
||||
let payload_b64 = base64_encode(payload.bytes);
|
||||
cmd.env("NYX_PAYLOAD_B64", &payload_b64);
|
||||
// NYX_PAYLOAD as raw bytes: Unix-only (OsStr can hold arbitrary bytes).
|
||||
// On other platforms we skip this env var; the harness falls back to NYX_PAYLOAD_B64.
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
cmd.env("NYX_PAYLOAD", std::ffi::OsStr::from_bytes(payload.bytes));
|
||||
}
|
||||
|
||||
let start = Instant::now();
|
||||
let mut child = cmd.spawn().map_err(SandboxError::Spawn)?;
|
||||
|
||||
let timeout = opts.timeout;
|
||||
let timed_out = std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false));
|
||||
let timed_out_clone = timed_out.clone();
|
||||
let child_id = child.id();
|
||||
|
||||
// Timeout thread: kill the child after the deadline.
|
||||
let _timer = std::thread::spawn(move || {
|
||||
std::thread::sleep(timeout);
|
||||
timed_out_clone.store(true, std::sync::atomic::Ordering::SeqCst);
|
||||
// SIGKILL the child process.
|
||||
#[cfg(unix)]
|
||||
libc_kill(child_id as i32, 9);
|
||||
#[cfg(not(unix))]
|
||||
{
|
||||
let _ = child_id; // unused on non-unix
|
||||
}
|
||||
});
|
||||
|
||||
// Read stdout/stderr to EOF in parallel threads to avoid pipe-fill deadlock
|
||||
// and to capture writes that arrive after the first available chunk (e.g.
|
||||
// probe sentinel printed early, payload output printed later). Each stream
|
||||
// is capped at `output_limit` bytes via `Read::take`.
|
||||
let limit = opts.output_limit;
|
||||
let stdout_pipe = child.stdout.take();
|
||||
let stderr_pipe = child.stderr.take();
|
||||
|
||||
let stdout_handle = stdout_pipe.map(|s| {
|
||||
std::thread::spawn(move || -> std::io::Result<Vec<u8>> {
|
||||
let mut buf = Vec::new();
|
||||
std::io::Read::take(s, limit as u64).read_to_end(&mut buf)?;
|
||||
Ok(buf)
|
||||
})
|
||||
});
|
||||
let stderr_handle = stderr_pipe.map(|s| {
|
||||
std::thread::spawn(move || -> std::io::Result<Vec<u8>> {
|
||||
let mut buf = Vec::new();
|
||||
std::io::Read::take(s, limit as u64).read_to_end(&mut buf)?;
|
||||
Ok(buf)
|
||||
})
|
||||
});
|
||||
|
||||
let status = child.wait().map_err(SandboxError::Io)?;
|
||||
|
||||
let stdout_buf = stdout_handle
|
||||
.and_then(|h| h.join().ok())
|
||||
.and_then(|r| r.ok())
|
||||
.unwrap_or_default();
|
||||
let stderr_buf = stderr_handle
|
||||
.and_then(|h| h.join().ok())
|
||||
.and_then(|r| r.ok())
|
||||
.unwrap_or_default();
|
||||
let duration = start.elapsed();
|
||||
let did_time_out = timed_out.load(std::sync::atomic::Ordering::SeqCst);
|
||||
|
||||
let exit_code = if did_time_out { None } else { status.code() };
|
||||
|
||||
// Check for sink-hit sentinel emitted by the sys.settrace probe.
|
||||
const SINK_HIT_SENTINEL: &[u8] = b"__NYX_SINK_HIT__";
|
||||
let sink_hit = contains_subslice(&stdout_buf, SINK_HIT_SENTINEL)
|
||||
|| contains_subslice(&stderr_buf, SINK_HIT_SENTINEL);
|
||||
|
||||
Ok(SandboxOutcome {
|
||||
exit_code,
|
||||
stdout: stdout_buf,
|
||||
stderr: stderr_buf,
|
||||
timed_out: did_time_out,
|
||||
oob_callback_seen: false,
|
||||
sink_hit,
|
||||
duration,
|
||||
})
|
||||
}
|
||||
|
||||
fn contains_subslice(hay: &[u8], needle: &[u8]) -> bool {
|
||||
if needle.is_empty() {
|
||||
return true;
|
||||
}
|
||||
if needle.len() > hay.len() {
|
||||
return false;
|
||||
}
|
||||
hay.windows(needle.len()).any(|w| w == needle)
|
||||
}
|
||||
|
||||
fn base64_encode(data: &[u8]) -> String {
|
||||
const ALPHABET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
||||
let mut out = String::with_capacity((data.len() + 2) / 3 * 4);
|
||||
for chunk in data.chunks(3) {
|
||||
let b0 = chunk[0] as u32;
|
||||
let b1 = if chunk.len() > 1 { chunk[1] as u32 } else { 0 };
|
||||
let b2 = if chunk.len() > 2 { chunk[2] as u32 } else { 0 };
|
||||
let n = (b0 << 16) | (b1 << 8) | b2;
|
||||
out.push(ALPHABET[((n >> 18) & 63) as usize] as char);
|
||||
out.push(ALPHABET[((n >> 12) & 63) as usize] as char);
|
||||
if chunk.len() > 1 {
|
||||
out.push(ALPHABET[((n >> 6) & 63) as usize] as char);
|
||||
} else {
|
||||
out.push('=');
|
||||
}
|
||||
if chunk.len() > 2 {
|
||||
out.push(ALPHABET[(n & 63) as usize] as char);
|
||||
} else {
|
||||
out.push('=');
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn libc_kill(pid: i32, sig: i32) -> i32 {
|
||||
unsafe extern "C" {
|
||||
fn kill(pid: i32, sig: i32) -> i32;
|
||||
}
|
||||
unsafe { kill(pid, sig) }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn sink_hit_detected_in_stdout() {
|
||||
let mut outcome = SandboxOutcome {
|
||||
exit_code: Some(0),
|
||||
stdout: b"some output __NYX_SINK_HIT__ more".to_vec(),
|
||||
stderr: vec![],
|
||||
timed_out: false,
|
||||
oob_callback_seen: false,
|
||||
sink_hit: false,
|
||||
duration: Duration::from_millis(10),
|
||||
};
|
||||
const SENTINEL: &[u8] = b"__NYX_SINK_HIT__";
|
||||
outcome.sink_hit = contains_subslice(&outcome.stdout, SENTINEL);
|
||||
assert!(outcome.sink_hit);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sink_hit_not_detected_when_absent() {
|
||||
let outcome = SandboxOutcome {
|
||||
exit_code: Some(0),
|
||||
stdout: b"clean output".to_vec(),
|
||||
stderr: vec![],
|
||||
timed_out: false,
|
||||
oob_callback_seen: false,
|
||||
sink_hit: false,
|
||||
duration: Duration::from_millis(10),
|
||||
};
|
||||
assert!(!outcome.sink_hit);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn base64_encode_basic() {
|
||||
assert_eq!(base64_encode(b"Man"), "TWFu");
|
||||
assert_eq!(base64_encode(b"Ma"), "TWE=");
|
||||
assert_eq!(base64_encode(b"M"), "TQ==");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -93,6 +93,11 @@ pub struct HarnessSpec {
|
|||
/// Populated later from `Evidence::engine_notes` when available.
|
||||
#[serde(default)]
|
||||
pub constraint_hints: Vec<String>,
|
||||
/// Project-relative path of the file containing the sink call site.
|
||||
/// Used by the harness emitter to instrument the exact line.
|
||||
pub sink_file: String,
|
||||
/// 1-based line number of the sink call site in `sink_file`.
|
||||
pub sink_line: u32,
|
||||
/// Blake3 hash (16 hex chars) of the spec's key fields, version-pinned.
|
||||
/// Stable across identical specs; used for deduplication and caching.
|
||||
pub spec_hash: String,
|
||||
|
|
@ -137,6 +142,15 @@ impl HarnessSpec {
|
|||
|
||||
let toolchain_id = toolchain_id_for_lang(lang).to_owned();
|
||||
|
||||
// Sink location: prefer explicit sink step; fall back to diag location.
|
||||
let (sink_file, sink_line) = evidence
|
||||
.flow_steps
|
||||
.iter()
|
||||
.rev()
|
||||
.find(|s| matches!(s.kind, FlowStepKind::Sink))
|
||||
.map(|s| (s.file.clone(), s.line))
|
||||
.unwrap_or_else(|| (diag.path.clone(), diag.line as u32));
|
||||
|
||||
let mut spec = HarnessSpec {
|
||||
finding_id: format!("{:016x}", diag.stable_hash),
|
||||
entry_file: entry.file,
|
||||
|
|
@ -147,6 +161,8 @@ impl HarnessSpec {
|
|||
payload_slot: PayloadSlot::Param(0),
|
||||
expected_cap,
|
||||
constraint_hints: vec![],
|
||||
sink_file,
|
||||
sink_line,
|
||||
spec_hash: String::new(),
|
||||
};
|
||||
|
||||
|
|
@ -244,6 +260,9 @@ fn compute_spec_hash(spec: &HarnessSpec) -> String {
|
|||
|
||||
h.update(spec.toolchain_id.as_bytes());
|
||||
h.update(b"\0");
|
||||
h.update(spec.sink_file.as_bytes());
|
||||
h.update(b"\0");
|
||||
h.update(&spec.sink_line.to_le_bytes());
|
||||
h.update(&CORPUS_VERSION.to_le_bytes());
|
||||
|
||||
let out = h.finalize();
|
||||
|
|
@ -389,6 +408,8 @@ mod tests {
|
|||
payload_slot: PayloadSlot::Param(0),
|
||||
expected_cap: Cap::SQL_QUERY,
|
||||
constraint_hints: vec![],
|
||||
sink_file: "src/handler.rs".into(),
|
||||
sink_line: 10,
|
||||
spec_hash: String::new(),
|
||||
};
|
||||
spec.spec_hash = compute_spec_hash(&spec);
|
||||
|
|
|
|||
197
src/dynamic/telemetry.rs
Normal file
197
src/dynamic/telemetry.rs
Normal file
|
|
@ -0,0 +1,197 @@
|
|||
//! Telemetry event log (§21.1).
|
||||
//!
|
||||
//! Writes one JSON line per verdict to `~/.cache/nyx/dynamic/events.jsonl`.
|
||||
//! `NYX_NO_TELEMETRY=1` silently disables all writes (§21.4).
|
||||
//!
|
||||
//! Schema (§21.1 minimal fields):
|
||||
//! ```json
|
||||
//! {
|
||||
//! "ts": "<RFC-3339>",
|
||||
//! "finding_id": "...",
|
||||
//! "spec_hash": "...",
|
||||
//! "lang": "python",
|
||||
//! "cap": "SQL_QUERY",
|
||||
//! "status": "Confirmed",
|
||||
//! "toolchain_id": "python-3.11",
|
||||
//! "toolchain_match": "exact",
|
||||
//! "duration_ms": 312,
|
||||
//! "build_attempts": 1
|
||||
//! }
|
||||
//! ```
|
||||
|
||||
use crate::dynamic::spec::HarnessSpec;
|
||||
use crate::evidence::{InconclusiveReason, VerifyStatus};
|
||||
use directories::ProjectDirs;
|
||||
use std::fs::{self, OpenOptions};
|
||||
use std::io::Write;
|
||||
use std::time::Duration;
|
||||
|
||||
/// One telemetry event per verdict.
|
||||
#[derive(Debug, serde::Serialize)]
|
||||
pub struct TelemetryEvent {
|
||||
pub ts: String,
|
||||
pub finding_id: String,
|
||||
pub spec_hash: String,
|
||||
pub lang: String,
|
||||
pub cap: String,
|
||||
pub status: String,
|
||||
pub toolchain_id: String,
|
||||
pub toolchain_match: String,
|
||||
pub duration_ms: u64,
|
||||
pub build_attempts: u32,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub inconclusive_reason: Option<String>,
|
||||
}
|
||||
|
||||
impl TelemetryEvent {
|
||||
pub fn new(
|
||||
spec: &HarnessSpec,
|
||||
status: VerifyStatus,
|
||||
inconclusive_reason: Option<InconclusiveReason>,
|
||||
toolchain_match: &str,
|
||||
duration: Duration,
|
||||
build_attempts: u32,
|
||||
) -> Self {
|
||||
Self {
|
||||
ts: chrono::Utc::now().to_rfc3339(),
|
||||
finding_id: spec.finding_id.clone(),
|
||||
spec_hash: spec.spec_hash.clone(),
|
||||
lang: format!("{:?}", spec.lang).to_ascii_lowercase(),
|
||||
cap: format!("{:?}", spec.expected_cap),
|
||||
status: format!("{status:?}"),
|
||||
toolchain_id: spec.toolchain_id.clone(),
|
||||
toolchain_match: toolchain_match.to_owned(),
|
||||
duration_ms: duration.as_millis() as u64,
|
||||
build_attempts,
|
||||
inconclusive_reason: inconclusive_reason.map(|r| format!("{r:?}")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Write a telemetry event to the events log.
|
||||
///
|
||||
/// Silently no-ops when:
|
||||
/// - `NYX_NO_TELEMETRY=1`
|
||||
/// - The log directory cannot be created
|
||||
/// - The write fails (telemetry must never affect verdict)
|
||||
pub fn emit(event: &TelemetryEvent) {
|
||||
if std::env::var("NYX_NO_TELEMETRY").as_deref() == Ok("1") {
|
||||
return;
|
||||
}
|
||||
|
||||
let Some(path) = events_log_path() else {
|
||||
return;
|
||||
};
|
||||
|
||||
let Ok(line) = serde_json::to_string(event) else {
|
||||
return;
|
||||
};
|
||||
|
||||
// Best-effort: ignore all errors.
|
||||
let _ = (|| -> std::io::Result<()> {
|
||||
if let Some(parent) = path.parent() {
|
||||
fs::create_dir_all(parent)?;
|
||||
// Ensure the directory is private (0700).
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
fs::set_permissions(parent, fs::Permissions::from_mode(0o700))?;
|
||||
}
|
||||
}
|
||||
let mut f = OpenOptions::new().create(true).append(true).open(&path)?;
|
||||
writeln!(f, "{line}")?;
|
||||
Ok(())
|
||||
})();
|
||||
}
|
||||
|
||||
fn events_log_path() -> Option<std::path::PathBuf> {
|
||||
// Respect explicit override for testing.
|
||||
if let Ok(p) = std::env::var("NYX_TELEMETRY_PATH") {
|
||||
return Some(std::path::PathBuf::from(p));
|
||||
}
|
||||
let dirs = ProjectDirs::from("", "", "nyx")?;
|
||||
Some(dirs.cache_dir().join("dynamic").join("events.jsonl"))
|
||||
}
|
||||
|
||||
/// Return the path to the events log (for tests and verification).
|
||||
pub fn log_path() -> Option<std::path::PathBuf> {
|
||||
events_log_path()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::dynamic::spec::{EntryKind, HarnessSpec, PayloadSlot};
|
||||
use crate::labels::Cap;
|
||||
use crate::symbol::Lang;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn make_spec() -> HarnessSpec {
|
||||
HarnessSpec {
|
||||
finding_id: "0000000000000001".into(),
|
||||
entry_file: "handler.py".into(),
|
||||
entry_name: "handle".into(),
|
||||
entry_kind: EntryKind::Function,
|
||||
lang: Lang::Python,
|
||||
toolchain_id: "python-3.11".into(),
|
||||
payload_slot: PayloadSlot::Param(0),
|
||||
expected_cap: Cap::SQL_QUERY,
|
||||
constraint_hints: vec![],
|
||||
sink_file: "handler.py".into(),
|
||||
sink_line: 5,
|
||||
spec_hash: "abcd1234abcd1234".into(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emit_writes_valid_json() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let log = dir.path().join("events.jsonl");
|
||||
unsafe { std::env::set_var("NYX_TELEMETRY_PATH", log.to_str().unwrap()) };
|
||||
|
||||
let event = TelemetryEvent::new(
|
||||
&make_spec(),
|
||||
VerifyStatus::Confirmed,
|
||||
None,
|
||||
"exact",
|
||||
Duration::from_millis(200),
|
||||
1,
|
||||
);
|
||||
emit(&event);
|
||||
|
||||
let content = std::fs::read_to_string(&log).unwrap();
|
||||
assert!(!content.is_empty());
|
||||
let v: serde_json::Value = serde_json::from_str(content.trim()).unwrap();
|
||||
assert_eq!(v["status"], "Confirmed");
|
||||
assert_eq!(v["toolchain_match"], "exact");
|
||||
|
||||
unsafe { std::env::remove_var("NYX_TELEMETRY_PATH") };
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nyx_no_telemetry_suppresses_writes() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let log = dir.path().join("events.jsonl");
|
||||
unsafe {
|
||||
std::env::set_var("NYX_TELEMETRY_PATH", log.to_str().unwrap());
|
||||
std::env::set_var("NYX_NO_TELEMETRY", "1");
|
||||
}
|
||||
|
||||
let event = TelemetryEvent::new(
|
||||
&make_spec(),
|
||||
VerifyStatus::Confirmed,
|
||||
None,
|
||||
"exact",
|
||||
Duration::from_millis(100),
|
||||
1,
|
||||
);
|
||||
emit(&event);
|
||||
|
||||
assert!(!log.exists(), "log must not be created when NYX_NO_TELEMETRY=1");
|
||||
|
||||
unsafe {
|
||||
std::env::remove_var("NYX_NO_TELEMETRY");
|
||||
std::env::remove_var("NYX_TELEMETRY_PATH");
|
||||
}
|
||||
}
|
||||
}
|
||||
223
src/dynamic/toolchain.rs
Normal file
223
src/dynamic/toolchain.rs
Normal file
|
|
@ -0,0 +1,223 @@
|
|||
//! Toolchain resolver (§22.2).
|
||||
//!
|
||||
//! Reads project metadata files to determine the pinned Python version, then
|
||||
//! maps it to the closest Nyx reference image. Records `pin_origin` (where the
|
||||
//! version was found) and a `toolchain_drift` flag when the resolved image is
|
||||
//! not an exact match for the requested version.
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
/// Resolved toolchain information for a target directory.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ToolchainResolution {
|
||||
/// Nyx reference toolchain identifier (e.g. `"python-3.11"`).
|
||||
pub toolchain_id: String,
|
||||
/// Where the version pin was read from.
|
||||
pub pin_origin: PinOrigin,
|
||||
/// Whether the resolved toolchain differs from the exact pinned version.
|
||||
pub toolchain_drift: bool,
|
||||
/// Resolved semver string (e.g. `"3.11.5"`).
|
||||
pub version_string: String,
|
||||
}
|
||||
|
||||
/// Where the toolchain version pin was discovered.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum PinOrigin {
|
||||
/// `.python-version` file (pyenv).
|
||||
PythonVersion,
|
||||
/// `pyproject.toml` `[tool.python]` or `[project] requires-python`.
|
||||
PyprojectToml,
|
||||
/// `Pipfile` `[requires] python_version`.
|
||||
Pipfile,
|
||||
/// `runtime.txt` (Heroku-style).
|
||||
RuntimeTxt,
|
||||
/// No pin found; used the system default.
|
||||
SystemDefault,
|
||||
}
|
||||
|
||||
/// Resolve the Python toolchain for `project_root`.
|
||||
///
|
||||
/// Reads project pin files in priority order:
|
||||
/// `.python-version` > `pyproject.toml` > `Pipfile` > `runtime.txt` > default.
|
||||
pub fn resolve_python(project_root: &Path) -> ToolchainResolution {
|
||||
if let Some(r) = try_python_version_file(project_root) {
|
||||
return r;
|
||||
}
|
||||
if let Some(r) = try_pyproject_toml(project_root) {
|
||||
return r;
|
||||
}
|
||||
if let Some(r) = try_pipfile(project_root) {
|
||||
return r;
|
||||
}
|
||||
if let Some(r) = try_runtime_txt(project_root) {
|
||||
return r;
|
||||
}
|
||||
default_python()
|
||||
}
|
||||
|
||||
fn try_python_version_file(root: &Path) -> Option<ToolchainResolution> {
|
||||
let path = root.join(".python-version");
|
||||
let content = std::fs::read_to_string(&path).ok()?;
|
||||
let version = content.trim().to_owned();
|
||||
if version.is_empty() {
|
||||
return None;
|
||||
}
|
||||
Some(map_version(&version, PinOrigin::PythonVersion))
|
||||
}
|
||||
|
||||
fn try_pyproject_toml(root: &Path) -> Option<ToolchainResolution> {
|
||||
let content = std::fs::read_to_string(root.join("pyproject.toml")).ok()?;
|
||||
// Look for `requires-python = ">=3.11"` or `python = "3.11"`.
|
||||
for line in content.lines() {
|
||||
let line = line.trim();
|
||||
if line.starts_with("requires-python") || (line.starts_with("python") && line.contains('=') && !line.starts_with("python_requires")) {
|
||||
if let Some(ver) = extract_version_from_toml_value(line) {
|
||||
return Some(map_version(&ver, PinOrigin::PyprojectToml));
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn try_pipfile(root: &Path) -> Option<ToolchainResolution> {
|
||||
let content = std::fs::read_to_string(root.join("Pipfile")).ok()?;
|
||||
let mut in_requires = false;
|
||||
for line in content.lines() {
|
||||
let line = line.trim();
|
||||
if line == "[requires]" {
|
||||
in_requires = true;
|
||||
continue;
|
||||
}
|
||||
if line.starts_with('[') {
|
||||
in_requires = false;
|
||||
}
|
||||
if in_requires && line.starts_with("python_version") {
|
||||
if let Some(ver) = extract_version_from_toml_value(line) {
|
||||
return Some(map_version(&ver, PinOrigin::Pipfile));
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn try_runtime_txt(root: &Path) -> Option<ToolchainResolution> {
|
||||
let content = std::fs::read_to_string(root.join("runtime.txt")).ok()?;
|
||||
let line = content.lines().next()?.trim();
|
||||
// e.g. "python-3.11.5"
|
||||
let version = line.strip_prefix("python-").unwrap_or(line);
|
||||
if version.is_empty() {
|
||||
return None;
|
||||
}
|
||||
Some(map_version(version, PinOrigin::RuntimeTxt))
|
||||
}
|
||||
|
||||
fn default_python() -> ToolchainResolution {
|
||||
ToolchainResolution {
|
||||
toolchain_id: "python-3".to_owned(),
|
||||
pin_origin: PinOrigin::SystemDefault,
|
||||
toolchain_drift: false,
|
||||
version_string: "3".to_owned(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract the bare version string from a TOML assignment like:
|
||||
/// `requires-python = ">=3.11"` → `"3.11"`
|
||||
/// `python_version = "3.11"` → `"3.11"`
|
||||
fn extract_version_from_toml_value(line: &str) -> Option<String> {
|
||||
let after_eq = line.splitn(2, '=').nth(1)?;
|
||||
let raw = after_eq.trim().trim_matches('"').trim_matches('\'');
|
||||
// Strip leading comparators: >=, <=, ==, ~=, ^, >
|
||||
let ver = raw.trim_start_matches(|c: char| !c.is_ascii_digit());
|
||||
if ver.is_empty() {
|
||||
return None;
|
||||
}
|
||||
Some(ver.to_owned())
|
||||
}
|
||||
|
||||
/// Map a raw version string to a Nyx reference toolchain ID.
|
||||
///
|
||||
/// Reference images: `python-3.8`, `python-3.9`, `python-3.10`,
|
||||
/// `python-3.11`, `python-3.12`, `python-3.13`.
|
||||
fn map_version(version: &str, origin: PinOrigin) -> ToolchainResolution {
|
||||
// Normalise: take major.minor from "3.11.5" → "3.11"
|
||||
let parts: Vec<&str> = version.splitn(3, '.').collect();
|
||||
let major = parts.first().copied().unwrap_or("3");
|
||||
let minor = parts.get(1).copied();
|
||||
|
||||
let (toolchain_id, drift) = match (major, minor) {
|
||||
("3", Some("8")) => ("python-3.8".to_owned(), false),
|
||||
("3", Some("9")) => ("python-3.9".to_owned(), false),
|
||||
("3", Some("10")) => ("python-3.10".to_owned(), false),
|
||||
("3", Some("11")) => ("python-3.11".to_owned(), false),
|
||||
("3", Some("12")) => ("python-3.12".to_owned(), false),
|
||||
("3", Some("13")) => ("python-3.13".to_owned(), false),
|
||||
// Older 3.x → nearest supported is 3.8
|
||||
("3", Some(m)) if m.parse::<u32>().map_or(false, |v| v < 8) => {
|
||||
("python-3.8".to_owned(), true)
|
||||
}
|
||||
// Newer 3.x beyond catalog → use 3.13 as closest
|
||||
("3", Some(_)) => ("python-3.13".to_owned(), true),
|
||||
("3", None) => ("python-3".to_owned(), false),
|
||||
// Python 2 → unsupported, use system default as closest
|
||||
("2", _) => ("python-3".to_owned(), true),
|
||||
_ => ("python-3".to_owned(), true),
|
||||
};
|
||||
|
||||
ToolchainResolution {
|
||||
version_string: version.to_owned(),
|
||||
toolchain_id,
|
||||
pin_origin: origin,
|
||||
toolchain_drift: drift,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::fs;
|
||||
use tempfile::TempDir;
|
||||
|
||||
#[test]
|
||||
fn python_version_file_exact() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
fs::write(dir.path().join(".python-version"), "3.11.5\n").unwrap();
|
||||
let r = resolve_python(dir.path());
|
||||
assert_eq!(r.toolchain_id, "python-3.11");
|
||||
assert!(!r.toolchain_drift);
|
||||
assert_eq!(r.pin_origin, PinOrigin::PythonVersion);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn python_version_file_drift() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
fs::write(dir.path().join(".python-version"), "3.7\n").unwrap();
|
||||
let r = resolve_python(dir.path());
|
||||
assert!(r.toolchain_drift);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pyproject_requires_python() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
fs::write(dir.path().join("pyproject.toml"), "[project]\nrequires-python = \">=3.11\"\n").unwrap();
|
||||
let r = resolve_python(dir.path());
|
||||
assert_eq!(r.toolchain_id, "python-3.11");
|
||||
assert_eq!(r.pin_origin, PinOrigin::PyprojectToml);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pipfile_python_version() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
fs::write(dir.path().join("Pipfile"), "[requires]\npython_version = \"3.10\"\n").unwrap();
|
||||
let r = resolve_python(dir.path());
|
||||
assert_eq!(r.toolchain_id, "python-3.10");
|
||||
assert_eq!(r.pin_origin, PinOrigin::Pipfile);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fallback_to_system_default() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let r = resolve_python(dir.path());
|
||||
assert_eq!(r.pin_origin, PinOrigin::SystemDefault);
|
||||
}
|
||||
}
|
||||
|
|
@ -4,26 +4,31 @@
|
|||
//! It is the only function the rest of the crate needs to know about.
|
||||
|
||||
use crate::commands::scan::Diag;
|
||||
use crate::dynamic::corpus::payloads_for;
|
||||
use crate::dynamic::report::{AttemptSummary, VerifyResult, VerifyStatus};
|
||||
use crate::dynamic::runner::{run_spec, RunError};
|
||||
use crate::dynamic::sandbox::SandboxOptions;
|
||||
use crate::dynamic::spec::HarnessSpec;
|
||||
use crate::evidence::UnsupportedReason;
|
||||
use crate::dynamic::telemetry::{self, TelemetryEvent};
|
||||
use crate::dynamic::toolchain;
|
||||
use crate::evidence::{InconclusiveReason, UnsupportedReason};
|
||||
use crate::utils::config::Config;
|
||||
use std::path::Path;
|
||||
use std::time::Instant;
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct VerifyOptions {
|
||||
pub sandbox: SandboxOptions,
|
||||
/// Project root for repro artifact symlinks (optional).
|
||||
pub project_root: Option<std::path::PathBuf>,
|
||||
}
|
||||
|
||||
impl VerifyOptions {
|
||||
/// Build `VerifyOptions` from scanner config.
|
||||
///
|
||||
/// Currently forwards sandbox timeout from `config.scanner`; future
|
||||
/// milestones will add image/resource limits here.
|
||||
pub fn from_config(_config: &Config) -> Self {
|
||||
Self {
|
||||
sandbox: SandboxOptions::default(),
|
||||
project_root: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -33,8 +38,6 @@ impl VerifyOptions {
|
|||
/// Never fails: every error path collapses into a [`VerifyStatus`] so the
|
||||
/// caller can treat dynamic verification as best-effort enrichment.
|
||||
pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult {
|
||||
// Use the stable hash to identify the finding so the VerifyResult's
|
||||
// finding_id matches HarnessSpec::finding_id (both use the same hex form).
|
||||
let finding_id = format!("{:016x}", diag.stable_hash);
|
||||
|
||||
let spec = match HarnessSpec::from_finding(diag) {
|
||||
|
|
@ -45,18 +48,56 @@ pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult {
|
|||
status: VerifyStatus::Unsupported,
|
||||
triggered_payload: None,
|
||||
reason: Some(reason),
|
||||
inconclusive_reason: None,
|
||||
detail: None,
|
||||
attempts: vec![],
|
||||
toolchain_match: None,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
// Spec derivable, but no backend implementation exists yet.
|
||||
// Phase M1 always lands here; real execution starts in Phase M2.
|
||||
let _ = &opts.sandbox;
|
||||
match run_spec(&spec, &opts.sandbox) {
|
||||
// Resolve toolchain information.
|
||||
let toolchain_res = toolchain::resolve_python(Path::new("."));
|
||||
let toolchain_match = if toolchain_res.toolchain_drift { "drift" } else { "exact" };
|
||||
|
||||
let start = Instant::now();
|
||||
let result = run_spec(&spec, &opts.sandbox);
|
||||
let elapsed = start.elapsed();
|
||||
|
||||
let verdict = build_verdict(
|
||||
&finding_id,
|
||||
&spec,
|
||||
result,
|
||||
toolchain_match,
|
||||
opts,
|
||||
elapsed,
|
||||
);
|
||||
|
||||
// Emit telemetry (best-effort; never affects verdict).
|
||||
let event = TelemetryEvent::new(
|
||||
&spec,
|
||||
verdict.status,
|
||||
verdict.inconclusive_reason,
|
||||
toolchain_match,
|
||||
elapsed,
|
||||
1, // build_attempts tracked in RunOutcome but not exposed here for simplicity
|
||||
);
|
||||
telemetry::emit(&event);
|
||||
|
||||
verdict
|
||||
}
|
||||
|
||||
fn build_verdict(
|
||||
finding_id: &str,
|
||||
spec: &HarnessSpec,
|
||||
result: Result<crate::dynamic::runner::RunOutcome, RunError>,
|
||||
toolchain_match: &str,
|
||||
opts: &VerifyOptions,
|
||||
_elapsed: std::time::Duration,
|
||||
) -> VerifyResult {
|
||||
match result {
|
||||
Ok(run) => {
|
||||
let attempts = run
|
||||
let attempts: Vec<AttemptSummary> = run
|
||||
.attempts
|
||||
.iter()
|
||||
.map(|a| AttemptSummary {
|
||||
|
|
@ -64,51 +105,138 @@ pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult {
|
|||
exit_code: a.outcome.exit_code,
|
||||
timed_out: a.outcome.timed_out,
|
||||
triggered: a.triggered,
|
||||
sink_hit: a.outcome.sink_hit,
|
||||
})
|
||||
.collect();
|
||||
|
||||
match run.triggered_by {
|
||||
Some(i) => VerifyResult {
|
||||
finding_id,
|
||||
if let Some(i) = run.triggered_by {
|
||||
let triggered_payload = run.attempts[i].payload_label.to_string();
|
||||
let payloads = payloads_for(spec.expected_cap);
|
||||
let vuln_payloads: Vec<_> = payloads.iter().filter(|p| !p.is_benign).collect();
|
||||
let payload_bytes = vuln_payloads
|
||||
.get(i)
|
||||
.map(|p| p.bytes)
|
||||
.unwrap_or(b"");
|
||||
|
||||
// Emit repro artifact.
|
||||
let repro_result = crate::dynamic::repro::write(
|
||||
spec,
|
||||
&opts.sandbox,
|
||||
&run.attempts[i].outcome,
|
||||
&VerifyResult {
|
||||
finding_id: finding_id.to_owned(),
|
||||
status: VerifyStatus::Confirmed,
|
||||
triggered_payload: Some(triggered_payload.clone()),
|
||||
reason: None,
|
||||
inconclusive_reason: None,
|
||||
detail: None,
|
||||
attempts: attempts.clone(),
|
||||
toolchain_match: Some(toolchain_match.to_owned()),
|
||||
},
|
||||
&run.harness_source,
|
||||
&run.entry_source,
|
||||
payload_bytes,
|
||||
run.attempts[i].payload_label,
|
||||
opts.project_root.as_deref(),
|
||||
);
|
||||
|
||||
// If repro write fails, downgrade to NonReproducible.
|
||||
if repro_result.is_err() {
|
||||
return VerifyResult {
|
||||
finding_id: finding_id.to_owned(),
|
||||
status: VerifyStatus::Inconclusive,
|
||||
triggered_payload: None,
|
||||
reason: None,
|
||||
inconclusive_reason: Some(InconclusiveReason::NonReproducible),
|
||||
detail: Some(format!("repro write failed: {}", repro_result.unwrap_err())),
|
||||
attempts,
|
||||
toolchain_match: Some(toolchain_match.to_owned()),
|
||||
};
|
||||
}
|
||||
|
||||
VerifyResult {
|
||||
finding_id: finding_id.to_owned(),
|
||||
status: VerifyStatus::Confirmed,
|
||||
triggered_payload: Some(run.attempts[i].payload_label.to_string()),
|
||||
triggered_payload: Some(triggered_payload),
|
||||
reason: None,
|
||||
inconclusive_reason: None,
|
||||
detail: None,
|
||||
attempts,
|
||||
},
|
||||
None => VerifyResult {
|
||||
finding_id,
|
||||
toolchain_match: Some(toolchain_match.to_owned()),
|
||||
}
|
||||
} else if run.oracle_collision {
|
||||
// Oracle fired but probe didn't — likely collision.
|
||||
VerifyResult {
|
||||
finding_id: finding_id.to_owned(),
|
||||
status: VerifyStatus::Inconclusive,
|
||||
triggered_payload: None,
|
||||
reason: None,
|
||||
inconclusive_reason: Some(InconclusiveReason::OracleCollisionSuspected),
|
||||
detail: Some("oracle fired but sink-reachability probe did not".to_owned()),
|
||||
attempts,
|
||||
toolchain_match: Some(toolchain_match.to_owned()),
|
||||
}
|
||||
} else {
|
||||
VerifyResult {
|
||||
finding_id: finding_id.to_owned(),
|
||||
status: VerifyStatus::NotConfirmed,
|
||||
triggered_payload: None,
|
||||
reason: None,
|
||||
inconclusive_reason: None,
|
||||
detail: None,
|
||||
attempts,
|
||||
},
|
||||
toolchain_match: Some(toolchain_match.to_owned()),
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(RunError::NoPayloadsForCap) => VerifyResult {
|
||||
finding_id,
|
||||
finding_id: finding_id.to_owned(),
|
||||
status: VerifyStatus::Unsupported,
|
||||
triggered_payload: None,
|
||||
reason: Some(UnsupportedReason::NoPayloadsForCap),
|
||||
inconclusive_reason: None,
|
||||
detail: None,
|
||||
attempts: vec![],
|
||||
toolchain_match: None,
|
||||
},
|
||||
Err(RunError::Harness(_)) => VerifyResult {
|
||||
finding_id,
|
||||
status: VerifyStatus::Unsupported,
|
||||
triggered_payload: None,
|
||||
reason: Some(UnsupportedReason::BackendUnavailable),
|
||||
detail: None,
|
||||
attempts: vec![],
|
||||
},
|
||||
Err(RunError::Sandbox(e)) => VerifyResult {
|
||||
finding_id,
|
||||
Err(RunError::Harness(e)) => {
|
||||
// Typed `Unsupported(reason)` carries its semantics in `reason`; the
|
||||
// free-form `detail` is reserved for `Inconclusive`/unexpected paths
|
||||
// (cf. §10 decision 14 and the verify_result_json_shape contract).
|
||||
let (reason, detail) = match &e {
|
||||
crate::dynamic::harness::HarnessError::Unsupported(r) => (Some(r.clone()), None),
|
||||
_ => (Some(UnsupportedReason::BackendUnavailable), Some(format!("{e}"))),
|
||||
};
|
||||
VerifyResult {
|
||||
finding_id: finding_id.to_owned(),
|
||||
status: VerifyStatus::Unsupported,
|
||||
triggered_payload: None,
|
||||
reason,
|
||||
inconclusive_reason: None,
|
||||
detail,
|
||||
attempts: vec![],
|
||||
toolchain_match: None,
|
||||
}
|
||||
}
|
||||
Err(RunError::BuildFailed { stderr, attempts: build_att }) => VerifyResult {
|
||||
finding_id: finding_id.to_owned(),
|
||||
status: VerifyStatus::Inconclusive,
|
||||
triggered_payload: None,
|
||||
reason: None,
|
||||
inconclusive_reason: Some(InconclusiveReason::BuildFailed),
|
||||
detail: Some(format!("build failed after {build_att} attempts: {stderr}")),
|
||||
attempts: vec![],
|
||||
toolchain_match: None,
|
||||
},
|
||||
Err(RunError::Sandbox(e)) => VerifyResult {
|
||||
finding_id: finding_id.to_owned(),
|
||||
status: VerifyStatus::Inconclusive,
|
||||
triggered_payload: None,
|
||||
reason: None,
|
||||
inconclusive_reason: Some(InconclusiveReason::SandboxError),
|
||||
detail: Some(format!("sandbox failed: {e:?}")),
|
||||
attempts: vec![],
|
||||
toolchain_match: None,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -162,7 +162,7 @@ pub struct SymbolicVerdict {
|
|||
///
|
||||
/// Typed so that callers can pattern-match on the reason rather than parsing
|
||||
/// strings. Serializes as PascalCase (e.g. `"BackendUnavailable"`).
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "PascalCase")]
|
||||
pub enum UnsupportedReason {
|
||||
/// The binary was not built with `--features dynamic`, or no backend
|
||||
|
|
@ -181,6 +181,27 @@ pub enum UnsupportedReason {
|
|||
/// A `HarnessSpec` could not be derived from the finding (missing entry
|
||||
/// function, unresolvable language, or zero sink capability bits).
|
||||
SpecDerivationFailed,
|
||||
/// The harness required a file that was redacted by the mount filter for
|
||||
/// secret containment. Path of the redacted file is carried inline.
|
||||
RequiredFileRedactedForSecrets(String),
|
||||
/// The language is not yet supported by the dynamic harness emitter.
|
||||
LangUnsupported,
|
||||
}
|
||||
|
||||
/// Typed reason for `VerifyStatus::Inconclusive`.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "PascalCase")]
|
||||
pub enum InconclusiveReason {
|
||||
/// The oracle fired but the sink-reachability probe did not — likely an
|
||||
/// oracle collision where a coincidental output matched the marker pattern.
|
||||
OracleCollisionSuspected,
|
||||
/// The repro artifact could not be written to disk; verdict cannot be
|
||||
/// independently reproduced.
|
||||
NonReproducible,
|
||||
/// Harness build failed after retries.
|
||||
BuildFailed,
|
||||
/// Sandbox error (spawn failure, I/O error, etc.).
|
||||
SandboxError,
|
||||
}
|
||||
|
||||
/// High-level outcome of a dynamic verification attempt.
|
||||
|
|
@ -209,6 +230,9 @@ pub struct AttemptSummary {
|
|||
pub exit_code: Option<i32>,
|
||||
pub timed_out: bool,
|
||||
pub triggered: bool,
|
||||
/// Whether the in-harness sink-reachability probe fired for this attempt.
|
||||
#[serde(default)]
|
||||
pub sink_hit: bool,
|
||||
}
|
||||
|
||||
/// Result of a dynamic verification attempt for one finding.
|
||||
|
|
@ -229,12 +253,19 @@ pub struct VerifyResult {
|
|||
/// Typed reason for `Unsupported` status.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub reason: Option<UnsupportedReason>,
|
||||
/// Typed reason for `Inconclusive` status.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub inconclusive_reason: Option<InconclusiveReason>,
|
||||
/// Free-form error detail (used for `Inconclusive` status).
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub detail: Option<String>,
|
||||
/// Per-attempt log.
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub attempts: Vec<AttemptSummary>,
|
||||
/// How well the resolved toolchain matches the project's pinned toolchain.
|
||||
/// `"exact"` = precise match; `"drift"` = closest approximation used.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub toolchain_match: Option<String>,
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ pub(crate) mod ext;
|
|||
pub mod path;
|
||||
pub mod project;
|
||||
pub(crate) mod query_cache;
|
||||
pub mod redact;
|
||||
pub(crate) mod snippet;
|
||||
|
||||
pub use analysis_options::{AnalysisOptions, SymexOptions};
|
||||
|
|
|
|||
357
src/utils/redact.rs
Normal file
357
src/utils/redact.rs
Normal file
|
|
@ -0,0 +1,357 @@
|
|||
//! Secret redactor for dynamic sandbox output.
|
||||
//!
|
||||
//! Scrubs known secret patterns from raw bytes before they are written to
|
||||
//! disk (cache, telemetry, repro artifacts). Patterns are compiled once and
|
||||
//! reused across calls.
|
||||
//!
|
||||
//! Covered patterns (§17.4):
|
||||
//! - AWS access key IDs (`AKIA…`)
|
||||
//! - GitHub tokens (`ghp_`, `github_pat_`, `ghs_`, `ghr_`)
|
||||
//! - Slack tokens (`xox[abpr]-…`)
|
||||
//! - OpenAI / generic secret keys (`sk-…`)
|
||||
//! - JWTs (three base64url segments separated by `.`)
|
||||
//! - PEM blocks (`-----BEGIN …-----`)
|
||||
//! - `password=<value>` in query strings or env dumps
|
||||
//! - `api_key=<value>`, `api_token=<value>`, `secret=<value>`
|
||||
//! - `Authorization: Bearer <token>` headers
|
||||
|
||||
/// Apply all redaction patterns to `input`, returning a new `Vec<u8>` with
|
||||
/// secrets replaced by `<REDACTED>`.
|
||||
///
|
||||
/// Operates on raw bytes. Non-UTF-8 bytes are passed through unchanged for
|
||||
/// sections that don't match any pattern.
|
||||
pub fn redact(input: &[u8]) -> Vec<u8> {
|
||||
// Work in UTF-8 lossy space; non-decodable bytes round-trip intact.
|
||||
let text = String::from_utf8_lossy(input);
|
||||
let redacted = redact_str(&text);
|
||||
redacted.into_bytes()
|
||||
}
|
||||
|
||||
/// Apply all redaction patterns to a UTF-8 string.
|
||||
pub fn redact_str(input: &str) -> String {
|
||||
let mut s = input.to_owned();
|
||||
for pattern in PATTERNS {
|
||||
s = pattern.apply(&s);
|
||||
}
|
||||
s
|
||||
}
|
||||
|
||||
/// Whether the raw bytes contain any redactable secret. Used for assertion
|
||||
/// tests in the secrets fixture suite.
|
||||
pub fn contains_secret(input: &[u8]) -> bool {
|
||||
let text = String::from_utf8_lossy(input);
|
||||
PATTERNS.iter().any(|p| p.matches(&text))
|
||||
}
|
||||
|
||||
struct Pattern {
|
||||
/// Literal prefix that must appear for the pattern to be tried.
|
||||
prefix: &'static str,
|
||||
/// Full replacement function.
|
||||
replace_fn: fn(&str) -> String,
|
||||
/// Check-only function (no allocation).
|
||||
matches_fn: fn(&str) -> bool,
|
||||
}
|
||||
|
||||
impl Pattern {
|
||||
fn apply(&self, s: &str) -> String {
|
||||
if s.contains(self.prefix) {
|
||||
(self.replace_fn)(s)
|
||||
} else {
|
||||
s.to_owned()
|
||||
}
|
||||
}
|
||||
|
||||
fn matches(&self, s: &str) -> bool {
|
||||
if s.contains(self.prefix) {
|
||||
(self.matches_fn)(s)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static PATTERNS: &[Pattern] = &[
|
||||
// AWS access key IDs: AKIA[A-Z0-9]{16}
|
||||
Pattern {
|
||||
prefix: "AKIA",
|
||||
replace_fn: |s| replace_pattern(s, |c: &str| {
|
||||
if let Some(start) = c.find("AKIA") {
|
||||
let rest = &c[start + 4..];
|
||||
let end = rest.find(|ch: char| !ch.is_ascii_alphanumeric()).unwrap_or(rest.len());
|
||||
if end >= 12 {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}, "AKIA", 20),
|
||||
matches_fn: |s| akia_matches(s),
|
||||
},
|
||||
// GitHub personal access tokens: ghp_, github_pat_, ghs_, ghr_
|
||||
Pattern {
|
||||
prefix: "ghp_",
|
||||
replace_fn: |s| replace_token_prefix(s, "ghp_"),
|
||||
matches_fn: |s| s.contains("ghp_"),
|
||||
},
|
||||
Pattern {
|
||||
prefix: "github_pat_",
|
||||
replace_fn: |s| replace_token_prefix(s, "github_pat_"),
|
||||
matches_fn: |s| s.contains("github_pat_"),
|
||||
},
|
||||
Pattern {
|
||||
prefix: "ghs_",
|
||||
replace_fn: |s| replace_token_prefix(s, "ghs_"),
|
||||
matches_fn: |s| s.contains("ghs_"),
|
||||
},
|
||||
Pattern {
|
||||
prefix: "ghr_",
|
||||
replace_fn: |s| replace_token_prefix(s, "ghr_"),
|
||||
matches_fn: |s| s.contains("ghr_"),
|
||||
},
|
||||
// Slack tokens: xox[abpr]-...
|
||||
Pattern {
|
||||
prefix: "xoxa-",
|
||||
replace_fn: |s| replace_token_prefix(s, "xoxa-"),
|
||||
matches_fn: |s| s.contains("xoxa-"),
|
||||
},
|
||||
Pattern {
|
||||
prefix: "xoxb-",
|
||||
replace_fn: |s| replace_token_prefix(s, "xoxb-"),
|
||||
matches_fn: |s| s.contains("xoxb-"),
|
||||
},
|
||||
Pattern {
|
||||
prefix: "xoxp-",
|
||||
replace_fn: |s| replace_token_prefix(s, "xoxp-"),
|
||||
matches_fn: |s| s.contains("xoxp-"),
|
||||
},
|
||||
Pattern {
|
||||
prefix: "xoxr-",
|
||||
replace_fn: |s| replace_token_prefix(s, "xoxr-"),
|
||||
matches_fn: |s| s.contains("xoxr-"),
|
||||
},
|
||||
// Generic secret keys: sk-...
|
||||
Pattern {
|
||||
prefix: "sk-",
|
||||
replace_fn: |s| replace_token_prefix(s, "sk-"),
|
||||
matches_fn: |s| contains_sk_token(s),
|
||||
},
|
||||
// PEM blocks
|
||||
Pattern {
|
||||
prefix: "-----BEGIN",
|
||||
replace_fn: replace_pem_blocks,
|
||||
matches_fn: |s| s.contains("-----BEGIN"),
|
||||
},
|
||||
// password=<value>
|
||||
Pattern {
|
||||
prefix: "password=",
|
||||
replace_fn: |s| replace_kv_pattern(s, "password"),
|
||||
matches_fn: |s| s.contains("password="),
|
||||
},
|
||||
// api_key=<value>
|
||||
Pattern {
|
||||
prefix: "api_key=",
|
||||
replace_fn: |s| replace_kv_pattern(s, "api_key"),
|
||||
matches_fn: |s| s.contains("api_key="),
|
||||
},
|
||||
// api_token=<value>
|
||||
Pattern {
|
||||
prefix: "api_token=",
|
||||
replace_fn: |s| replace_kv_pattern(s, "api_token"),
|
||||
matches_fn: |s| s.contains("api_token="),
|
||||
},
|
||||
// secret=<value> (but not "secret" as a word in other contexts)
|
||||
Pattern {
|
||||
prefix: "secret=",
|
||||
replace_fn: |s| replace_kv_pattern(s, "secret"),
|
||||
matches_fn: |s| s.contains("secret="),
|
||||
},
|
||||
// Authorization: Bearer <token>
|
||||
Pattern {
|
||||
prefix: "Bearer ",
|
||||
replace_fn: replace_bearer,
|
||||
matches_fn: |s| s.contains("Bearer "),
|
||||
},
|
||||
];
|
||||
|
||||
fn replace_token_prefix(s: &str, prefix: &str) -> String {
|
||||
let mut out = String::with_capacity(s.len());
|
||||
let mut rest = s;
|
||||
while let Some(pos) = rest.find(prefix) {
|
||||
out.push_str(&rest[..pos]);
|
||||
out.push_str(prefix);
|
||||
out.push_str("<REDACTED>");
|
||||
let after = &rest[pos + prefix.len()..];
|
||||
// Skip the token value (non-whitespace, non-quote chars)
|
||||
let end = after
|
||||
.find(|ch: char| ch.is_whitespace() || ch == '"' || ch == '\'' || ch == '\n')
|
||||
.unwrap_or(after.len());
|
||||
rest = &after[end..];
|
||||
}
|
||||
out.push_str(rest);
|
||||
out
|
||||
}
|
||||
|
||||
fn replace_kv_pattern(s: &str, key: &str) -> String {
|
||||
let needle = format!("{key}=");
|
||||
let mut out = String::with_capacity(s.len());
|
||||
let mut rest = s;
|
||||
while let Some(pos) = rest.find(&needle) {
|
||||
out.push_str(&rest[..pos + needle.len()]);
|
||||
let after = &rest[pos + needle.len()..];
|
||||
// Value ends at whitespace, quote, &, or end-of-string
|
||||
let end = after
|
||||
.find(|ch: char| ch.is_whitespace() || ch == '"' || ch == '\'' || ch == '&')
|
||||
.unwrap_or(after.len());
|
||||
if end > 0 {
|
||||
out.push_str("<REDACTED>");
|
||||
rest = &after[end..];
|
||||
} else {
|
||||
rest = after;
|
||||
}
|
||||
}
|
||||
out.push_str(rest);
|
||||
out
|
||||
}
|
||||
|
||||
fn replace_bearer(s: &str) -> String {
|
||||
let mut out = String::with_capacity(s.len());
|
||||
let mut rest = s;
|
||||
while let Some(pos) = rest.find("Bearer ") {
|
||||
out.push_str(&rest[..pos + "Bearer ".len()]);
|
||||
let after = &rest[pos + "Bearer ".len()..];
|
||||
let end = after
|
||||
.find(|ch: char| ch.is_whitespace() || ch == '"' || ch == '\'')
|
||||
.unwrap_or(after.len());
|
||||
if end > 0 {
|
||||
out.push_str("<REDACTED>");
|
||||
}
|
||||
rest = &after[end..];
|
||||
}
|
||||
out.push_str(rest);
|
||||
out
|
||||
}
|
||||
|
||||
fn replace_pem_blocks(s: &str) -> String {
|
||||
let mut out = String::with_capacity(s.len());
|
||||
let mut rest = s;
|
||||
while let Some(start) = rest.find("-----BEGIN") {
|
||||
out.push_str(&rest[..start]);
|
||||
// Find the END marker
|
||||
if let Some(end_rel) = rest[start..].find("-----END") {
|
||||
let after_end = rest[start + end_rel..]
|
||||
.find("-----")
|
||||
.map(|p| start + end_rel + p + 5)
|
||||
.unwrap_or(start + end_rel + 8);
|
||||
out.push_str("<PEM-REDACTED>");
|
||||
rest = &rest[after_end..];
|
||||
} else {
|
||||
out.push_str("<PEM-REDACTED>");
|
||||
rest = "";
|
||||
}
|
||||
}
|
||||
out.push_str(rest);
|
||||
out
|
||||
}
|
||||
|
||||
fn akia_matches(s: &str) -> bool {
|
||||
if let Some(pos) = s.find("AKIA") {
|
||||
let rest = &s[pos + 4..];
|
||||
let end = rest.find(|ch: char| !ch.is_ascii_alphanumeric()).unwrap_or(rest.len());
|
||||
return end >= 12;
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn contains_sk_token(s: &str) -> bool {
|
||||
// sk- followed by at least 20 alphanumeric/- chars (avoids sk-learn etc.)
|
||||
let mut rest = s;
|
||||
while let Some(pos) = rest.find("sk-") {
|
||||
let after = &rest[pos + 3..];
|
||||
let end = after.find(|ch: char| !ch.is_ascii_alphanumeric() && ch != '-').unwrap_or(after.len());
|
||||
if end >= 20 {
|
||||
return true;
|
||||
}
|
||||
rest = &rest[pos + 3..];
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn replace_pattern(
|
||||
s: &str,
|
||||
_check: impl Fn(&str) -> bool,
|
||||
prefix: &str,
|
||||
token_len: usize,
|
||||
) -> String {
|
||||
let mut out = String::with_capacity(s.len());
|
||||
let mut rest = s;
|
||||
while let Some(pos) = rest.find(prefix) {
|
||||
let after = &rest[pos + prefix.len()..];
|
||||
let end = after.find(|ch: char| !ch.is_ascii_alphanumeric()).unwrap_or(after.len());
|
||||
if end >= token_len - prefix.len() {
|
||||
out.push_str(&rest[..pos]);
|
||||
out.push_str("<REDACTED>");
|
||||
rest = &after[end..];
|
||||
} else {
|
||||
out.push_str(&rest[..pos + prefix.len()]);
|
||||
rest = after;
|
||||
}
|
||||
}
|
||||
out.push_str(rest);
|
||||
out
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn redacts_aws_key() {
|
||||
let input = "key: AKIAFAKETEST00000000 in config";
|
||||
let out = redact_str(input);
|
||||
assert!(!out.contains("AKIAFAKETEST00000000"), "AWS key must be redacted");
|
||||
assert!(out.contains("<REDACTED>"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn redacts_github_token() {
|
||||
let input = "token=ghp_abcdefghijklmnopqrstuvwxyz012345";
|
||||
let out = redact_str(input);
|
||||
assert!(!out.contains("abcdefghijklmnopqrstuvwxyz012345"));
|
||||
assert!(out.contains("ghp_<REDACTED>"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn redacts_password_kv() {
|
||||
let input = "url=postgres://user:pass@host/db password=super_secret_12345";
|
||||
let out = redact_str(input);
|
||||
assert!(!out.contains("super_secret_12345"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn redacts_bearer_token() {
|
||||
let input = "Authorization: Bearer eyJhbGciOiJIUzI1NiJ9.xyz.sig";
|
||||
let out = redact_str(input);
|
||||
assert!(!out.contains("eyJhbGciOiJIUzI1NiJ9"));
|
||||
assert!(out.contains("Bearer <REDACTED>"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passthrough_clean_bytes() {
|
||||
let input = b"\x80\x81 normal text here";
|
||||
let out = redact(input);
|
||||
assert!(out.windows(b"normal text".len()).any(|w| w == b"normal text"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn contains_secret_detects_aws() {
|
||||
assert!(contains_secret(b"AKIAFAKETEST00000000"));
|
||||
assert!(!contains_secret(b"clean output"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn redacts_pem_block() {
|
||||
let input = "-----BEGIN RSA PRIVATE KEY-----\nMIIEowIBAAKCAQ\n-----END RSA PRIVATE KEY-----";
|
||||
let out = redact_str(input);
|
||||
assert!(!out.contains("MIIEowIBAAKCAQ"));
|
||||
assert!(out.contains("<PEM-REDACTED>"));
|
||||
}
|
||||
}
|
||||
12
tests/dynamic_fixtures/python/cmdi_adversarial.py
Normal file
12
tests/dynamic_fixtures/python/cmdi_adversarial.py
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
"""Command injection — adversarial collision fixture.
|
||||
|
||||
Prints NYX_PWN_CMDI unconditionally without going through a shell sink.
|
||||
Expected verdict: Inconclusive(OracleCollisionSuspected).
|
||||
"""
|
||||
|
||||
|
||||
def process_input(user_input):
|
||||
"""Prints oracle marker without using a command execution sink."""
|
||||
# Coincidental oracle match — not a shell sink.
|
||||
print("NYX_PWN_CMDI")
|
||||
return user_input.upper()
|
||||
22
tests/dynamic_fixtures/python/cmdi_negative.py
Normal file
22
tests/dynamic_fixtures/python/cmdi_negative.py
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
"""Command injection — negative fixture.
|
||||
|
||||
Safe function: uses subprocess list form (no shell=True), preventing injection.
|
||||
Expected verdict: NotConfirmed.
|
||||
"""
|
||||
import subprocess
|
||||
|
||||
|
||||
def run_ping(host):
|
||||
"""Safe: list-form subprocess, no shell expansion."""
|
||||
# Sanitize: only allow alphanumeric and dots
|
||||
safe_host = "".join(c for c in host if c.isalnum() or c == ".")
|
||||
if not safe_host:
|
||||
print("Invalid host")
|
||||
return
|
||||
result = subprocess.run(
|
||||
["ping", "-c", "1", safe_host],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
print(result.stdout)
|
||||
19
tests/dynamic_fixtures/python/cmdi_positive.py
Normal file
19
tests/dynamic_fixtures/python/cmdi_positive.py
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
"""Command injection — positive fixture.
|
||||
|
||||
Vulnerable function: passes user input directly to shell=True subprocess.
|
||||
Expected verdict: Confirmed (payload "; echo NYX_PWN_CMDI" echoes the marker).
|
||||
"""
|
||||
import subprocess
|
||||
|
||||
|
||||
def run_ping(host):
|
||||
"""Vulnerable: user input in shell command."""
|
||||
result = subprocess.run(
|
||||
"ping -c 1 " + host,
|
||||
shell=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
print(result.stdout)
|
||||
print(result.stderr, end="")
|
||||
11
tests/dynamic_fixtures/python/cmdi_unsupported.py
Normal file
11
tests/dynamic_fixtures/python/cmdi_unsupported.py
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
"""Command injection — unsupported fixture.
|
||||
|
||||
Low-confidence finding that produces Unsupported(ConfidenceTooLow).
|
||||
Expected verdict: Unsupported(ConfidenceTooLow)
|
||||
"""
|
||||
import subprocess
|
||||
|
||||
|
||||
def process_request(cmd):
|
||||
"""Vulnerable function used in unsupported-confidence test."""
|
||||
subprocess.run(cmd, shell=True)
|
||||
12
tests/dynamic_fixtures/python/fileio_adversarial.py
Normal file
12
tests/dynamic_fixtures/python/fileio_adversarial.py
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
"""File I/O — adversarial collision fixture.
|
||||
|
||||
Prints "root:" unconditionally without reading any file.
|
||||
Expected verdict: Inconclusive(OracleCollisionSuspected).
|
||||
"""
|
||||
|
||||
|
||||
def read_file(path):
|
||||
"""Prints oracle marker without opening any file."""
|
||||
# Coincidental match — not a file I/O sink.
|
||||
print("root: nobody:*:0:0:System Administrator:/var/root:/bin/sh")
|
||||
return path
|
||||
22
tests/dynamic_fixtures/python/fileio_negative.py
Normal file
22
tests/dynamic_fixtures/python/fileio_negative.py
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
"""File I/O — negative fixture.
|
||||
|
||||
Safe function: validates path stays within allowed directory.
|
||||
Expected verdict: NotConfirmed.
|
||||
"""
|
||||
import os
|
||||
|
||||
|
||||
def read_file(path):
|
||||
"""Safe: resolves and validates path is within /tmp/safe-uploads/."""
|
||||
base_dir = "/tmp/safe-uploads"
|
||||
os.makedirs(base_dir, exist_ok=True)
|
||||
# Resolve to absolute path and check it stays within base_dir.
|
||||
abs_path = os.path.realpath(os.path.join(base_dir, path))
|
||||
if not abs_path.startswith(base_dir + os.sep) and abs_path != base_dir:
|
||||
print("Access denied: path traversal detected")
|
||||
return
|
||||
try:
|
||||
with open(abs_path) as f:
|
||||
print(f.read())
|
||||
except FileNotFoundError:
|
||||
print("File not found")
|
||||
14
tests/dynamic_fixtures/python/fileio_positive.py
Normal file
14
tests/dynamic_fixtures/python/fileio_positive.py
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
"""File I/O — positive fixture.
|
||||
|
||||
Vulnerable function: opens a file at a user-controlled path.
|
||||
Expected verdict: Confirmed (path traversal payload reaches /etc/passwd).
|
||||
"""
|
||||
|
||||
|
||||
def read_file(path):
|
||||
"""Vulnerable: reads file at user-controlled path."""
|
||||
try:
|
||||
with open(path) as f:
|
||||
print(f.read())
|
||||
except (OSError, PermissionError) as e:
|
||||
print(f"Error reading {path}: {e}", end="")
|
||||
10
tests/dynamic_fixtures/python/fileio_unsupported.py
Normal file
10
tests/dynamic_fixtures/python/fileio_unsupported.py
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
"""File I/O — unsupported fixture (low confidence).
|
||||
|
||||
Expected verdict: Unsupported(ConfidenceTooLow)
|
||||
"""
|
||||
|
||||
|
||||
def read_config(path):
|
||||
"""Vulnerable function in unsupported-confidence test."""
|
||||
with open(path) as f:
|
||||
return f.read()
|
||||
19
tests/dynamic_fixtures/python/sqli_adversarial.py
Normal file
19
tests/dynamic_fixtures/python/sqli_adversarial.py
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
"""SQL injection — adversarial collision fixture.
|
||||
|
||||
This function prints "NYX_SQL_CONFIRMED" unconditionally (simulating a
|
||||
coincidental oracle match). The sink is a harmless print statement, not
|
||||
an actual SQL execution.
|
||||
|
||||
Expected verdict: Inconclusive(OracleCollisionSuspected)
|
||||
- oracle_fired = True (OutputContains("NYX_SQL_CONFIRMED"))
|
||||
- sink_hit = False (sys.settrace probe never fires on SQL execute line)
|
||||
"""
|
||||
|
||||
|
||||
def get_value(user_input):
|
||||
"""Always prints the oracle marker — no actual SQL involved."""
|
||||
# Coincidental output match — not a real vulnerability.
|
||||
print("NYX_SQL_CONFIRMED")
|
||||
# The above is not a SQL sink; the settrace probe on a real sink line
|
||||
# (different line number or file) will not fire.
|
||||
return user_input
|
||||
18
tests/dynamic_fixtures/python/sqli_negative.py
Normal file
18
tests/dynamic_fixtures/python/sqli_negative.py
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
"""SQL injection — negative fixture.
|
||||
|
||||
Safe function: uses parameterized queries.
|
||||
Expected verdict: NotConfirmed (parameterized query prevents injection).
|
||||
"""
|
||||
import sqlite3
|
||||
|
||||
|
||||
def login(username):
|
||||
"""Safe login: parameterized query prevents SQL injection."""
|
||||
conn = sqlite3.connect(":memory:")
|
||||
conn.execute("CREATE TABLE users (id INTEGER, name TEXT)")
|
||||
conn.execute("INSERT INTO users VALUES (1, 'alice')")
|
||||
# Safe: parameterized query
|
||||
rows = conn.execute("SELECT name FROM users WHERE name=?", (username,)).fetchall()
|
||||
for row in rows:
|
||||
print(row[0])
|
||||
conn.close()
|
||||
27
tests/dynamic_fixtures/python/sqli_positive.py
Normal file
27
tests/dynamic_fixtures/python/sqli_positive.py
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
"""SQL injection — positive fixture.
|
||||
|
||||
Vulnerable function: directly concatenates user input into SQL.
|
||||
Expected verdict: Confirmed (SQLI corpus UNION payload causes "NYX_SQL_CONFIRMED"
|
||||
to appear in output when the fixture prints query results).
|
||||
"""
|
||||
import sqlite3
|
||||
|
||||
|
||||
def login(username):
|
||||
"""Vulnerable login: direct string concatenation in SQL query."""
|
||||
conn = sqlite3.connect(":memory:")
|
||||
conn.execute("CREATE TABLE users (id INTEGER, name TEXT)")
|
||||
conn.execute("INSERT INTO users VALUES (1, 'alice')")
|
||||
conn.execute("INSERT INTO users VALUES (2, 'bob')")
|
||||
# Vulnerable: user input directly concatenated
|
||||
query = "SELECT name FROM users WHERE name='" + username + "'"
|
||||
try:
|
||||
rows = conn.execute(query).fetchall()
|
||||
for row in rows:
|
||||
print(row[0])
|
||||
except sqlite3.OperationalError as e:
|
||||
# Error-based: print query on failure (common in debug mode)
|
||||
print(f"DB query: {query}")
|
||||
print(f"DB error: {e}", end="")
|
||||
finally:
|
||||
conn.close()
|
||||
18
tests/dynamic_fixtures/python/sqli_unsupported.py
Normal file
18
tests/dynamic_fixtures/python/sqli_unsupported.py
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
"""SQL injection — unsupported fixture.
|
||||
|
||||
This file contains a vulnerable class method. The test creates a Diag
|
||||
with `confidence = Low`, which makes `from_finding` return
|
||||
`Err(UnsupportedReason::ConfidenceTooLow)`.
|
||||
|
||||
Expected verdict: Unsupported(ConfidenceTooLow)
|
||||
"""
|
||||
import sqlite3
|
||||
|
||||
|
||||
class UserRepository:
|
||||
"""Vulnerable class method — entry kind unsupported in current milestone."""
|
||||
|
||||
def find_user(self, name):
|
||||
conn = sqlite3.connect(":memory:")
|
||||
query = "SELECT * FROM users WHERE name='" + name + "'"
|
||||
return conn.execute(query).fetchall()
|
||||
28
tests/dynamic_fixtures/python/sqli_with_secret.py
Normal file
28
tests/dynamic_fixtures/python/sqli_with_secret.py
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
"""SQL injection fixture with a secrets file in the project.
|
||||
|
||||
Used for the secret-bearing fixture test: verifies that the AWS key
|
||||
from .env does not appear in cache, telemetry, or repro outcome.json
|
||||
after redaction.
|
||||
|
||||
Expected verdict: result depends on execution; secret must never appear
|
||||
in any output artifact.
|
||||
"""
|
||||
import sqlite3
|
||||
import os
|
||||
|
||||
|
||||
def login(username):
|
||||
"""Vulnerable login — same as sqli_positive but in a project with .env."""
|
||||
conn = sqlite3.connect(":memory:")
|
||||
conn.execute("CREATE TABLE users (id INTEGER, name TEXT)")
|
||||
conn.execute("INSERT INTO users VALUES (1, 'alice')")
|
||||
query = "SELECT name FROM users WHERE name='" + username + "'"
|
||||
try:
|
||||
rows = conn.execute(query).fetchall()
|
||||
for row in rows:
|
||||
print(row[0])
|
||||
except sqlite3.OperationalError as e:
|
||||
print(f"DB query: {query}")
|
||||
print(f"DB error: {e}", end="")
|
||||
finally:
|
||||
conn.close()
|
||||
11
tests/dynamic_fixtures/python/ssrf_adversarial.py
Normal file
11
tests/dynamic_fixtures/python/ssrf_adversarial.py
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
"""SSRF — adversarial collision fixture.
|
||||
|
||||
Prints "daemon:" unconditionally without making any network request.
|
||||
Expected verdict: Inconclusive(OracleCollisionSuspected).
|
||||
"""
|
||||
|
||||
|
||||
def fetch_url(url):
|
||||
"""Prints oracle marker without fetching any URL."""
|
||||
print("daemon:*:1:1:System Services:/var/root:/usr/bin/false")
|
||||
return url
|
||||
33
tests/dynamic_fixtures/python/ssrf_negative.py
Normal file
33
tests/dynamic_fixtures/python/ssrf_negative.py
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
"""SSRF — negative fixture.
|
||||
|
||||
Safe function: validates URL scheme and host against an allowlist.
|
||||
Expected verdict: NotConfirmed.
|
||||
"""
|
||||
import urllib.request
|
||||
import urllib.parse
|
||||
|
||||
|
||||
ALLOWED_SCHEMES = {"https"}
|
||||
ALLOWED_HOSTS = {"api.example.com", "data.example.com"}
|
||||
|
||||
|
||||
def fetch_url(url):
|
||||
"""Safe: validates URL before fetching."""
|
||||
try:
|
||||
parsed = urllib.parse.urlparse(url)
|
||||
except Exception:
|
||||
print("Invalid URL")
|
||||
return
|
||||
|
||||
if parsed.scheme not in ALLOWED_SCHEMES:
|
||||
print(f"Scheme not allowed: {parsed.scheme}")
|
||||
return
|
||||
if parsed.hostname not in ALLOWED_HOSTS:
|
||||
print(f"Host not allowed: {parsed.hostname}")
|
||||
return
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(url, timeout=3) as resp:
|
||||
print(resp.read().decode("utf-8", errors="replace"))
|
||||
except Exception as e:
|
||||
print(f"Fetch error: {e}", end="")
|
||||
16
tests/dynamic_fixtures/python/ssrf_positive.py
Normal file
16
tests/dynamic_fixtures/python/ssrf_positive.py
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
"""SSRF — positive fixture.
|
||||
|
||||
Vulnerable function: fetches a user-controlled URL.
|
||||
Expected verdict: Confirmed (file:// payload reads /etc/passwd → "root:").
|
||||
"""
|
||||
import urllib.request
|
||||
|
||||
|
||||
def fetch_url(url):
|
||||
"""Vulnerable: fetches URL provided by user without validation."""
|
||||
try:
|
||||
with urllib.request.urlopen(url, timeout=3) as resp:
|
||||
content = resp.read().decode("utf-8", errors="replace")
|
||||
print(content)
|
||||
except Exception as e:
|
||||
print(f"Fetch error: {e}", end="")
|
||||
10
tests/dynamic_fixtures/python/ssrf_unsupported.py
Normal file
10
tests/dynamic_fixtures/python/ssrf_unsupported.py
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
"""SSRF — unsupported fixture (low confidence).
|
||||
|
||||
Expected verdict: Unsupported(ConfidenceTooLow)
|
||||
"""
|
||||
import urllib.request
|
||||
|
||||
|
||||
def fetch(url):
|
||||
"""Vulnerable function in unsupported-confidence test."""
|
||||
return urllib.request.urlopen(url).read()
|
||||
13
tests/dynamic_fixtures/python/xss_adversarial.py
Normal file
13
tests/dynamic_fixtures/python/xss_adversarial.py
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
"""XSS — adversarial collision fixture.
|
||||
|
||||
Outputs the XSS marker string unconditionally without it being a real
|
||||
HTML sink (e.g., a test that checks for a string literal).
|
||||
Expected verdict: Inconclusive(OracleCollisionSuspected).
|
||||
"""
|
||||
|
||||
|
||||
def render_comment(user_input):
|
||||
"""Prints oracle marker outside of any HTML rendering context."""
|
||||
# Coincidental match — not an HTML sink.
|
||||
print("<script>NYX_XSS_CONFIRMED</script>")
|
||||
return user_input
|
||||
12
tests/dynamic_fixtures/python/xss_negative.py
Normal file
12
tests/dynamic_fixtures/python/xss_negative.py
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
"""XSS — negative fixture.
|
||||
|
||||
Safe function: uses html.escape() before rendering.
|
||||
Expected verdict: NotConfirmed (script tag escaped to <script>).
|
||||
"""
|
||||
import html
|
||||
|
||||
|
||||
def render_comment(user_input):
|
||||
"""Safe: HTML-escapes user input before rendering."""
|
||||
safe = html.escape(user_input)
|
||||
print(f"<div class='comment'>{safe}</div>")
|
||||
11
tests/dynamic_fixtures/python/xss_positive.py
Normal file
11
tests/dynamic_fixtures/python/xss_positive.py
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
"""XSS — positive fixture.
|
||||
|
||||
Vulnerable function: echoes user input directly into HTML without escaping.
|
||||
Expected verdict: Confirmed (XSS payload echoed verbatim to output).
|
||||
"""
|
||||
|
||||
|
||||
def render_comment(user_input):
|
||||
"""Vulnerable: no HTML escaping."""
|
||||
html = f"<div class='comment'>{user_input}</div>"
|
||||
print(html)
|
||||
9
tests/dynamic_fixtures/python/xss_unsupported.py
Normal file
9
tests/dynamic_fixtures/python/xss_unsupported.py
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
"""XSS — unsupported fixture (low confidence).
|
||||
|
||||
Expected verdict: Unsupported(ConfidenceTooLow)
|
||||
"""
|
||||
|
||||
|
||||
def render(input_text):
|
||||
"""Vulnerable render in unsupported-confidence test."""
|
||||
print(f"<span>{input_text}</span>")
|
||||
5
tests/dynamic_fixtures/secrets/.env
Normal file
5
tests/dynamic_fixtures/secrets/.env
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
AWS_ACCESS_KEY_ID=AKIAFAKETEST00000000
|
||||
AWS_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYFAKEKEY00
|
||||
DATABASE_URL=postgres://user:password123@localhost:5432/mydb
|
||||
API_KEY=sk-proj-fakesecrettoken123456789abcdef0123456789abcdef
|
||||
GITHUB_TOKEN=ghp_fakegithubtoken1234567890abcdefghij
|
||||
|
|
@ -14,6 +14,7 @@
|
|||
//! | `src/main.rs` | binary entry point; wires --features dynamic|
|
||||
//! | `src/lib.rs` | crate root; `#[cfg(feature="dynamic")]` mod|
|
||||
//! | `src/commands/scan.rs` | enrichment loop lives here |
|
||||
//! | `src/commands/mod.rs` | `verify-feedback` subcommand (§21.2) |
|
||||
//! | `src/server/` (any file) | server start_scan verify wiring |
|
||||
|
||||
use std::fs;
|
||||
|
|
@ -25,6 +26,7 @@ const ALLOWED: &[&str] = &[
|
|||
"main.rs",
|
||||
"lib.rs",
|
||||
"commands/scan.rs",
|
||||
"commands/mod.rs",
|
||||
"server/",
|
||||
// The dynamic module itself is obviously allowed.
|
||||
"dynamic/",
|
||||
|
|
|
|||
|
|
@ -86,16 +86,16 @@ mod verify_e2e {
|
|||
}
|
||||
|
||||
/// A finding with a supported cap (SQL_QUERY) and a derivable spec reaches
|
||||
/// `harness::build`, which returns `Unimplemented` in phase M1, producing
|
||||
/// `VerifyStatus::Unsupported` with `reason = BackendUnavailable`.
|
||||
/// `harness::build`. The finding uses a Rust entry file, so the Python-only
|
||||
/// harness emitter returns `LangUnsupported`.
|
||||
#[test]
|
||||
fn verify_finding_with_supported_cap_returns_backend_unavailable() {
|
||||
fn verify_finding_rust_lang_returns_lang_unsupported() {
|
||||
let diag = taint_diag_with_cap(Cap::SQL_QUERY);
|
||||
let opts = VerifyOptions::default();
|
||||
let result = verify_finding(&diag, &opts);
|
||||
|
||||
assert_eq!(result.status, VerifyStatus::Unsupported);
|
||||
assert_eq!(result.reason, Some(UnsupportedReason::BackendUnavailable));
|
||||
assert_eq!(result.reason, Some(UnsupportedReason::LangUnsupported));
|
||||
assert!(result.triggered_payload.is_none());
|
||||
assert!(result.attempts.is_empty());
|
||||
}
|
||||
|
|
@ -127,11 +127,11 @@ mod verify_e2e {
|
|||
assert_eq!(result.reason, Some(UnsupportedReason::ConfidenceTooLow));
|
||||
}
|
||||
|
||||
/// The JSON shape of `VerifyResult` for `BackendUnavailable` matches the
|
||||
/// documented contract: `status`, `reason` present; `triggered_payload`,
|
||||
/// `detail`, `attempts` absent (skipped by serde default).
|
||||
/// The JSON shape of `VerifyResult` for a Rust finding (lang unsupported)
|
||||
/// matches the documented contract: `status`, `reason` present;
|
||||
/// `triggered_payload`, `detail`, `attempts` absent (skipped by serde).
|
||||
#[test]
|
||||
fn verify_result_json_shape_backend_unavailable() {
|
||||
fn verify_result_json_shape_lang_unsupported() {
|
||||
let diag = taint_diag_with_cap(Cap::SQL_QUERY);
|
||||
let opts = VerifyOptions::default();
|
||||
let result = verify_finding(&diag, &opts);
|
||||
|
|
@ -140,7 +140,7 @@ mod verify_e2e {
|
|||
let v: serde_json::Value = serde_json::from_str(&json).expect("must be valid JSON");
|
||||
|
||||
assert_eq!(v["status"], "Unsupported");
|
||||
assert_eq!(v["reason"], "BackendUnavailable");
|
||||
assert_eq!(v["reason"], "LangUnsupported");
|
||||
assert!(v.get("triggered_payload").is_none(), "triggered_payload must be absent");
|
||||
assert!(v.get("detail").is_none(), "detail must be absent");
|
||||
assert!(v.get("attempts").is_none(), "attempts must be absent (empty vec skipped)");
|
||||
|
|
|
|||
222
tests/marker_uniqueness.rs
Normal file
222
tests/marker_uniqueness.rs
Normal file
|
|
@ -0,0 +1,222 @@
|
|||
//! Marker uniqueness test (§4.1, §17.4).
|
||||
//!
|
||||
//! Asserts that no `NYX_PWN_*` marker from one cap's corpus is a substring
|
||||
//! of any other cap's payloads, expected sanitizer outputs, or §17.4
|
||||
//! redactor patterns.
|
||||
//!
|
||||
//! This prevents oracle collisions where a SQLi payload accidentally
|
||||
//! triggers the CMDi oracle (or vice versa), producing false `Confirmed`
|
||||
//! verdicts.
|
||||
//!
|
||||
//! Tests are gated on `#[cfg(feature = "dynamic")]` because the corpus
|
||||
//! module lives under the `dynamic` feature.
|
||||
|
||||
#![cfg(feature = "dynamic")]
|
||||
|
||||
use nyx_scanner::dynamic::corpus::payloads_for;
|
||||
use nyx_scanner::labels::Cap;
|
||||
|
||||
/// All markers extracted from non-benign payload oracles, tagged with the cap
|
||||
/// they came from.
|
||||
fn oracle_markers() -> Vec<(Cap, &'static str, &'static [u8])> {
|
||||
let mut markers = Vec::new();
|
||||
for cap in [
|
||||
Cap::SQL_QUERY,
|
||||
Cap::CODE_EXEC,
|
||||
Cap::FILE_IO,
|
||||
Cap::SSRF,
|
||||
Cap::HTML_ESCAPE,
|
||||
] {
|
||||
for payload in payloads_for(cap).iter().filter(|p| !p.is_benign) {
|
||||
if let nyx_scanner::dynamic::corpus::Oracle::OutputContains(marker) = payload.oracle {
|
||||
markers.push((cap, marker, payload.bytes));
|
||||
}
|
||||
}
|
||||
}
|
||||
markers
|
||||
}
|
||||
|
||||
/// Redactor patterns from §17.4 (the literal strings that trigger redaction).
|
||||
const REDACTOR_PREFIXES: &[&str] = &[
|
||||
"AKIA",
|
||||
"ghp_",
|
||||
"github_pat_",
|
||||
"ghs_",
|
||||
"ghr_",
|
||||
"xoxa-",
|
||||
"xoxb-",
|
||||
"xoxp-",
|
||||
"xoxr-",
|
||||
"sk-",
|
||||
"-----BEGIN",
|
||||
"password=",
|
||||
"api_key=",
|
||||
"api_token=",
|
||||
"secret=",
|
||||
"Bearer ",
|
||||
];
|
||||
|
||||
/// Expected sanitizer outputs (strings that appear after correct sanitization).
|
||||
/// These must NOT appear in any payload oracle marker.
|
||||
const EXPECTED_SANITIZED_OUTPUTS: &[&str] = &[
|
||||
"<script>",
|
||||
">",
|
||||
"<",
|
||||
"&",
|
||||
"'",
|
||||
"%27",
|
||||
"\\u003c",
|
||||
"\\u003e",
|
||||
];
|
||||
|
||||
#[test]
|
||||
fn no_marker_is_substring_of_another_caps_payload() {
|
||||
let markers = oracle_markers();
|
||||
|
||||
// For each marker, check it does not appear in another cap's payloads.
|
||||
let caps = [
|
||||
Cap::SQL_QUERY,
|
||||
Cap::CODE_EXEC,
|
||||
Cap::FILE_IO,
|
||||
Cap::SSRF,
|
||||
Cap::HTML_ESCAPE,
|
||||
];
|
||||
|
||||
let mut violations: Vec<String> = Vec::new();
|
||||
|
||||
for (src_cap, marker_str, _marker_src_payload) in &markers {
|
||||
let marker_bytes = marker_str.as_bytes();
|
||||
|
||||
for cap in caps {
|
||||
// Within-cap reuse is allowed per §4.1 (cap A's marker may appear
|
||||
// in cap A's own payloads); only cross-cap appearance is a collision.
|
||||
if cap == *src_cap {
|
||||
continue;
|
||||
}
|
||||
for payload in payloads_for(cap).iter().filter(|p| !p.is_benign) {
|
||||
let payload_contains_marker = payload.bytes.windows(marker_bytes.len())
|
||||
.any(|w| w == marker_bytes);
|
||||
|
||||
if payload_contains_marker {
|
||||
violations.push(format!(
|
||||
"marker {:?} (from cap {:?}) appears as substring in payload {:?} (cap {:?})",
|
||||
marker_str,
|
||||
src_cap,
|
||||
payload.label,
|
||||
cap,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert!(
|
||||
violations.is_empty(),
|
||||
"Marker uniqueness violation(s):\n{}",
|
||||
violations.join("\n")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_marker_is_substring_of_sanitized_output() {
|
||||
let markers = oracle_markers();
|
||||
|
||||
let mut violations: Vec<String> = Vec::new();
|
||||
|
||||
for (_, marker, _) in &markers {
|
||||
for sanitized in EXPECTED_SANITIZED_OUTPUTS {
|
||||
if sanitized.contains(marker) || marker.contains(sanitized) {
|
||||
violations.push(format!(
|
||||
"marker {:?} overlaps with expected sanitized output {:?}",
|
||||
marker, sanitized
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert!(
|
||||
violations.is_empty(),
|
||||
"Marker/sanitizer overlap violation(s):\n{}",
|
||||
violations.join("\n")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_marker_is_substring_of_redactor_patterns() {
|
||||
let markers = oracle_markers();
|
||||
|
||||
let mut violations: Vec<String> = Vec::new();
|
||||
|
||||
for (_, marker, _) in &markers {
|
||||
for pattern in REDACTOR_PREFIXES {
|
||||
// Check if the redactor pattern is a substring of the marker or vice versa.
|
||||
if marker.contains(pattern) && pattern.len() > 3 {
|
||||
violations.push(format!(
|
||||
"marker {:?} contains redactor pattern {:?}",
|
||||
marker, pattern
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert!(
|
||||
violations.is_empty(),
|
||||
"Marker/redactor overlap violation(s):\n{}",
|
||||
violations.join("\n")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn markers_are_unique_across_caps() {
|
||||
// Per §4.1: a marker may be reused within a single cap (e.g. two SQLi
|
||||
// payloads sharing one oracle marker), but must NOT appear in more than
|
||||
// one cap — that would risk one cap's payload accidentally firing
|
||||
// another cap's oracle.
|
||||
let markers = oracle_markers();
|
||||
|
||||
// Cap is bitflags and does not implement Hash; key by bits().
|
||||
let mut seen: std::collections::HashMap<&str, std::collections::HashSet<u32>> =
|
||||
std::collections::HashMap::new();
|
||||
for (cap, marker, _) in &markers {
|
||||
seen.entry(marker).or_default().insert(cap.bits());
|
||||
}
|
||||
|
||||
let cross_cap: Vec<_> = seen
|
||||
.iter()
|
||||
.filter(|(_, caps)| caps.len() > 1)
|
||||
.map(|(m, caps)| (*m, caps.clone()))
|
||||
.collect();
|
||||
|
||||
assert!(
|
||||
cross_cap.is_empty(),
|
||||
"Oracle marker(s) reused across caps (collision risk): {:?}\n\
|
||||
Each cap must use a marker that does not appear in any other cap.",
|
||||
cross_cap
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn all_vuln_payloads_have_non_empty_oracle_marker() {
|
||||
for cap in [
|
||||
Cap::SQL_QUERY,
|
||||
Cap::CODE_EXEC,
|
||||
Cap::FILE_IO,
|
||||
Cap::SSRF,
|
||||
Cap::HTML_ESCAPE,
|
||||
] {
|
||||
for payload in payloads_for(cap).iter().filter(|p| !p.is_benign) {
|
||||
if let nyx_scanner::dynamic::corpus::Oracle::OutputContains(marker) = payload.oracle {
|
||||
assert!(
|
||||
!marker.is_empty(),
|
||||
"payload {:?} for {cap:?} has empty OutputContains marker",
|
||||
payload.label
|
||||
);
|
||||
assert!(
|
||||
marker.len() >= 4,
|
||||
"payload {:?} for {cap:?} has very short marker {:?} (< 4 chars) — collision risk",
|
||||
payload.label, marker
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
470
tests/python_fixtures.rs
Normal file
470
tests/python_fixtures.rs
Normal file
|
|
@ -0,0 +1,470 @@
|
|||
//! Python fixture integration tests (§15 Pillar B acceptance gate).
|
||||
//!
|
||||
//! Runs the dynamic verification pipeline against each Python fixture and
|
||||
//! asserts the expected verdict. Requires `--features dynamic` and Python3
|
||||
//! to be available on PATH.
|
||||
//!
|
||||
//! Verdicts under test:
|
||||
//! - positive → Confirmed
|
||||
//! - negative → NotConfirmed
|
||||
//! - unsupported → Unsupported(ConfidenceTooLow) [spec-level rejection]
|
||||
//! - adversarial → Inconclusive(OracleCollisionSuspected)
|
||||
//!
|
||||
//! Tests are skipped when Python3 is not available.
|
||||
|
||||
#[cfg(feature = "dynamic")]
|
||||
mod python_fixture_tests {
|
||||
use nyx_scanner::commands::scan::Diag;
|
||||
use nyx_scanner::dynamic::verify::{verify_finding, VerifyOptions};
|
||||
use nyx_scanner::evidence::{
|
||||
Confidence, Evidence, FlowStep, FlowStepKind, InconclusiveReason, UnsupportedReason,
|
||||
VerifyStatus,
|
||||
};
|
||||
use nyx_scanner::labels::Cap;
|
||||
use nyx_scanner::patterns::{FindingCategory, Severity};
|
||||
use std::path::{Path, PathBuf};
|
||||
use tempfile::TempDir;
|
||||
|
||||
/// Returns `true` if `python3` is available.
|
||||
fn python3_available() -> bool {
|
||||
std::process::Command::new("python3")
|
||||
.arg("--version")
|
||||
.output()
|
||||
.map(|o| o.status.success())
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
fn fixture_path(name: &str) -> PathBuf {
|
||||
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.join("tests/dynamic_fixtures/python")
|
||||
.join(name)
|
||||
}
|
||||
|
||||
/// Run a fixture and return the verdict.
|
||||
fn run_fixture(fixture: &str, func: &str, cap: Cap, sink_line: u32) -> nyx_scanner::evidence::VerifyResult {
|
||||
let path = fixture_path(fixture);
|
||||
// Copy fixture to a temp dir so the harness can import it.
|
||||
let tmp = TempDir::new().unwrap();
|
||||
let dst = tmp.path().join(Path::new(fixture).file_name().unwrap());
|
||||
std::fs::copy(&path, &dst).expect("fixture file must exist");
|
||||
|
||||
// Set up repro and telemetry to temp dirs to avoid side effects.
|
||||
unsafe {
|
||||
std::env::set_var("NYX_REPRO_BASE", tmp.path().join("repro").to_str().unwrap());
|
||||
std::env::set_var("NYX_TELEMETRY_PATH", tmp.path().join("events.jsonl").to_str().unwrap());
|
||||
}
|
||||
|
||||
// Use the temp dir copy as the fixture path.
|
||||
let diag = make_diag(&dst, func, cap, sink_line);
|
||||
|
||||
// Change CWD to the temp dir so the harness can find the module.
|
||||
let original_dir = std::env::current_dir().ok();
|
||||
let _ = std::env::set_current_dir(tmp.path());
|
||||
|
||||
let opts = VerifyOptions::default();
|
||||
let result = verify_finding(&diag, &opts);
|
||||
|
||||
if let Some(dir) = original_dir {
|
||||
let _ = std::env::set_current_dir(dir);
|
||||
}
|
||||
|
||||
unsafe {
|
||||
std::env::remove_var("NYX_REPRO_BASE");
|
||||
std::env::remove_var("NYX_TELEMETRY_PATH");
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
// ── SQLi fixtures ────────────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn sqli_positive_is_confirmed() {
|
||||
if !python3_available() {
|
||||
eprintln!("SKIP: python3 not available");
|
||||
return;
|
||||
}
|
||||
let result = run_fixture("sqli_positive.py", "login", Cap::SQL_QUERY, 17);
|
||||
assert_eq!(
|
||||
result.status, VerifyStatus::Confirmed,
|
||||
"sqli_positive must be Confirmed; got {:?} (detail: {:?})",
|
||||
result.status, result.detail
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sqli_negative_is_not_confirmed() {
|
||||
if !python3_available() {
|
||||
eprintln!("SKIP: python3 not available");
|
||||
return;
|
||||
}
|
||||
let result = run_fixture("sqli_negative.py", "login", Cap::SQL_QUERY, 12);
|
||||
assert_eq!(
|
||||
result.status, VerifyStatus::NotConfirmed,
|
||||
"sqli_negative must be NotConfirmed; got {:?}",
|
||||
result.status
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sqli_unsupported_is_unsupported() {
|
||||
// Low-confidence Diag → Unsupported(ConfidenceTooLow) without execution.
|
||||
let path = fixture_path("sqli_unsupported.py");
|
||||
let mut d = make_diag(&path, "find_user", Cap::SQL_QUERY, 10);
|
||||
d.confidence = Some(Confidence::Low);
|
||||
let opts = VerifyOptions::default();
|
||||
let result = verify_finding(&d, &opts);
|
||||
assert_eq!(result.status, VerifyStatus::Unsupported);
|
||||
assert_eq!(result.reason, Some(UnsupportedReason::ConfidenceTooLow));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sqli_adversarial_is_inconclusive_collision() {
|
||||
if !python3_available() {
|
||||
eprintln!("SKIP: python3 not available");
|
||||
return;
|
||||
}
|
||||
// The adversarial fixture prints the oracle marker WITHOUT going through
|
||||
// any SQL sink — so the oracle fires but the probe at the (nonexistent)
|
||||
// SQL execute line does not.
|
||||
// We point the sink line at a line that doesn't exist in the file (999)
|
||||
// so the settrace probe can't fire.
|
||||
let result = run_fixture("sqli_adversarial.py", "get_value", Cap::SQL_QUERY, 999);
|
||||
// Oracle fires (prints "NYX_SQL_CONFIRMED") but probe doesn't (line 999 missing).
|
||||
assert_eq!(
|
||||
result.status, VerifyStatus::Inconclusive,
|
||||
"sqli_adversarial must be Inconclusive; got {:?}",
|
||||
result.status
|
||||
);
|
||||
assert_eq!(
|
||||
result.inconclusive_reason,
|
||||
Some(InconclusiveReason::OracleCollisionSuspected),
|
||||
"adversarial must be OracleCollisionSuspected"
|
||||
);
|
||||
}
|
||||
|
||||
// ── Command injection fixtures ───────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn cmdi_positive_is_confirmed() {
|
||||
if !python3_available() {
|
||||
eprintln!("SKIP: python3 not available");
|
||||
return;
|
||||
}
|
||||
let result = run_fixture("cmdi_positive.py", "run_ping", Cap::CODE_EXEC, 13);
|
||||
assert_eq!(
|
||||
result.status, VerifyStatus::Confirmed,
|
||||
"cmdi_positive must be Confirmed; got {:?} (detail: {:?})",
|
||||
result.status, result.detail
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cmdi_negative_is_not_confirmed() {
|
||||
if !python3_available() {
|
||||
eprintln!("SKIP: python3 not available");
|
||||
return;
|
||||
}
|
||||
let result = run_fixture("cmdi_negative.py", "run_ping", Cap::CODE_EXEC, 17);
|
||||
assert_eq!(
|
||||
result.status, VerifyStatus::NotConfirmed,
|
||||
"cmdi_negative must be NotConfirmed; got {:?}",
|
||||
result.status
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cmdi_unsupported_is_unsupported() {
|
||||
let path = fixture_path("cmdi_unsupported.py");
|
||||
let mut d = make_diag(&path, "process_request", Cap::CODE_EXEC, 9);
|
||||
d.confidence = Some(Confidence::Low);
|
||||
let opts = VerifyOptions::default();
|
||||
let result = verify_finding(&d, &opts);
|
||||
assert_eq!(result.status, VerifyStatus::Unsupported);
|
||||
assert_eq!(result.reason, Some(UnsupportedReason::ConfidenceTooLow));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cmdi_adversarial_is_inconclusive_collision() {
|
||||
if !python3_available() {
|
||||
eprintln!("SKIP: python3 not available");
|
||||
return;
|
||||
}
|
||||
let result = run_fixture("cmdi_adversarial.py", "process_input", Cap::CODE_EXEC, 999);
|
||||
assert_eq!(result.status, VerifyStatus::Inconclusive);
|
||||
assert_eq!(
|
||||
result.inconclusive_reason,
|
||||
Some(InconclusiveReason::OracleCollisionSuspected)
|
||||
);
|
||||
}
|
||||
|
||||
// ── File I/O fixtures ────────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn fileio_positive_is_confirmed() {
|
||||
if !python3_available() {
|
||||
eprintln!("SKIP: python3 not available");
|
||||
return;
|
||||
}
|
||||
let result = run_fixture("fileio_positive.py", "read_file", Cap::FILE_IO, 11);
|
||||
assert_eq!(
|
||||
result.status, VerifyStatus::Confirmed,
|
||||
"fileio_positive must be Confirmed; got {:?} (detail: {:?})",
|
||||
result.status, result.detail
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fileio_negative_is_not_confirmed() {
|
||||
if !python3_available() {
|
||||
eprintln!("SKIP: python3 not available");
|
||||
return;
|
||||
}
|
||||
let result = run_fixture("fileio_negative.py", "read_file", Cap::FILE_IO, 18);
|
||||
assert_eq!(
|
||||
result.status, VerifyStatus::NotConfirmed,
|
||||
"fileio_negative must be NotConfirmed; got {:?}",
|
||||
result.status
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fileio_unsupported_is_unsupported() {
|
||||
let path = fixture_path("fileio_unsupported.py");
|
||||
let mut d = make_diag(&path, "read_config", Cap::FILE_IO, 7);
|
||||
d.confidence = Some(Confidence::Low);
|
||||
let opts = VerifyOptions::default();
|
||||
let result = verify_finding(&d, &opts);
|
||||
assert_eq!(result.status, VerifyStatus::Unsupported);
|
||||
assert_eq!(result.reason, Some(UnsupportedReason::ConfidenceTooLow));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fileio_adversarial_is_inconclusive_collision() {
|
||||
if !python3_available() {
|
||||
eprintln!("SKIP: python3 not available");
|
||||
return;
|
||||
}
|
||||
let result = run_fixture("fileio_adversarial.py", "read_file", Cap::FILE_IO, 999);
|
||||
assert_eq!(result.status, VerifyStatus::Inconclusive);
|
||||
assert_eq!(
|
||||
result.inconclusive_reason,
|
||||
Some(InconclusiveReason::OracleCollisionSuspected)
|
||||
);
|
||||
}
|
||||
|
||||
// ── SSRF fixtures ────────────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn ssrf_positive_is_confirmed() {
|
||||
if !python3_available() {
|
||||
eprintln!("SKIP: python3 not available");
|
||||
return;
|
||||
}
|
||||
let result = run_fixture("ssrf_positive.py", "fetch_url", Cap::SSRF, 11);
|
||||
assert_eq!(
|
||||
result.status, VerifyStatus::Confirmed,
|
||||
"ssrf_positive must be Confirmed; got {:?} (detail: {:?})",
|
||||
result.status, result.detail
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ssrf_negative_is_not_confirmed() {
|
||||
if !python3_available() {
|
||||
eprintln!("SKIP: python3 not available");
|
||||
return;
|
||||
}
|
||||
let result = run_fixture("ssrf_negative.py", "fetch_url", Cap::SSRF, 26);
|
||||
// Blocked by host validation — oracle won't fire.
|
||||
assert_eq!(
|
||||
result.status, VerifyStatus::NotConfirmed,
|
||||
"ssrf_negative must be NotConfirmed; got {:?}",
|
||||
result.status
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ssrf_unsupported_is_unsupported() {
|
||||
let path = fixture_path("ssrf_unsupported.py");
|
||||
let mut d = make_diag(&path, "fetch", Cap::SSRF, 9);
|
||||
d.confidence = Some(Confidence::Low);
|
||||
let opts = VerifyOptions::default();
|
||||
let result = verify_finding(&d, &opts);
|
||||
assert_eq!(result.status, VerifyStatus::Unsupported);
|
||||
assert_eq!(result.reason, Some(UnsupportedReason::ConfidenceTooLow));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ssrf_adversarial_is_inconclusive_collision() {
|
||||
if !python3_available() {
|
||||
eprintln!("SKIP: python3 not available");
|
||||
return;
|
||||
}
|
||||
let result = run_fixture("ssrf_adversarial.py", "fetch_url", Cap::SSRF, 999);
|
||||
assert_eq!(result.status, VerifyStatus::Inconclusive);
|
||||
assert_eq!(
|
||||
result.inconclusive_reason,
|
||||
Some(InconclusiveReason::OracleCollisionSuspected)
|
||||
);
|
||||
}
|
||||
|
||||
// ── XSS fixtures ─────────────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn xss_positive_is_confirmed() {
|
||||
if !python3_available() {
|
||||
eprintln!("SKIP: python3 not available");
|
||||
return;
|
||||
}
|
||||
let result = run_fixture("xss_positive.py", "render_comment", Cap::HTML_ESCAPE, 9);
|
||||
assert_eq!(
|
||||
result.status, VerifyStatus::Confirmed,
|
||||
"xss_positive must be Confirmed; got {:?} (detail: {:?})",
|
||||
result.status, result.detail
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn xss_negative_is_not_confirmed() {
|
||||
if !python3_available() {
|
||||
eprintln!("SKIP: python3 not available");
|
||||
return;
|
||||
}
|
||||
let result = run_fixture("xss_negative.py", "render_comment", Cap::HTML_ESCAPE, 11);
|
||||
assert_eq!(
|
||||
result.status, VerifyStatus::NotConfirmed,
|
||||
"xss_negative must be NotConfirmed; got {:?}",
|
||||
result.status
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn xss_unsupported_is_unsupported() {
|
||||
let path = fixture_path("xss_unsupported.py");
|
||||
let mut d = make_diag(&path, "render", Cap::HTML_ESCAPE, 7);
|
||||
d.confidence = Some(Confidence::Low);
|
||||
let opts = VerifyOptions::default();
|
||||
let result = verify_finding(&d, &opts);
|
||||
assert_eq!(result.status, VerifyStatus::Unsupported);
|
||||
assert_eq!(result.reason, Some(UnsupportedReason::ConfidenceTooLow));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn xss_adversarial_is_inconclusive_collision() {
|
||||
if !python3_available() {
|
||||
eprintln!("SKIP: python3 not available");
|
||||
return;
|
||||
}
|
||||
let result = run_fixture("xss_adversarial.py", "render_comment", Cap::HTML_ESCAPE, 999);
|
||||
assert_eq!(result.status, VerifyStatus::Inconclusive);
|
||||
assert_eq!(
|
||||
result.inconclusive_reason,
|
||||
Some(InconclusiveReason::OracleCollisionSuspected)
|
||||
);
|
||||
}
|
||||
|
||||
// ── Secrets fixture ───────────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn secret_not_in_telemetry_after_verify() {
|
||||
if !python3_available() {
|
||||
eprintln!("SKIP: python3 not available");
|
||||
return;
|
||||
}
|
||||
|
||||
let tmp = TempDir::new().unwrap();
|
||||
let telemetry_path = tmp.path().join("events.jsonl");
|
||||
unsafe {
|
||||
std::env::set_var("NYX_REPRO_BASE", tmp.path().join("repro").to_str().unwrap());
|
||||
std::env::set_var("NYX_TELEMETRY_PATH", telemetry_path.to_str().unwrap());
|
||||
}
|
||||
|
||||
let fixture = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.join("tests/dynamic_fixtures/python/sqli_positive.py");
|
||||
let tmp_fix = tmp.path().join("sqli_positive.py");
|
||||
let _ = std::fs::copy(&fixture, &tmp_fix);
|
||||
|
||||
let original_dir = std::env::current_dir().ok();
|
||||
let _ = std::env::set_current_dir(tmp.path());
|
||||
|
||||
let diag = make_diag(&tmp_fix, "login", Cap::SQL_QUERY, 17);
|
||||
let opts = VerifyOptions::default();
|
||||
let _ = verify_finding(&diag, &opts);
|
||||
|
||||
if let Some(dir) = original_dir {
|
||||
let _ = std::env::set_current_dir(dir);
|
||||
}
|
||||
|
||||
// Check telemetry doesn't contain any secret patterns.
|
||||
if telemetry_path.exists() {
|
||||
let content = std::fs::read_to_string(&telemetry_path).unwrap_or_default();
|
||||
// Telemetry must not contain the fake AWS key.
|
||||
assert!(
|
||||
!content.contains("AKIAFAKETEST00000000"),
|
||||
"telemetry must not contain fake AWS key; got: {content}"
|
||||
);
|
||||
}
|
||||
|
||||
unsafe {
|
||||
std::env::remove_var("NYX_REPRO_BASE");
|
||||
std::env::remove_var("NYX_TELEMETRY_PATH");
|
||||
}
|
||||
}
|
||||
|
||||
fn make_diag(path: &Path, func: &str, cap: Cap, sink_line: u32) -> Diag {
|
||||
let path_str = path.to_string_lossy().into_owned();
|
||||
let evidence = Evidence {
|
||||
flow_steps: vec![
|
||||
FlowStep {
|
||||
step: 1,
|
||||
kind: FlowStepKind::Source,
|
||||
file: path_str.clone(),
|
||||
line: 1,
|
||||
col: 0,
|
||||
snippet: None,
|
||||
variable: Some("payload".into()),
|
||||
callee: None,
|
||||
function: Some(func.to_owned()),
|
||||
is_cross_file: false,
|
||||
},
|
||||
FlowStep {
|
||||
step: 2,
|
||||
kind: FlowStepKind::Sink,
|
||||
file: path_str.clone(),
|
||||
line: sink_line,
|
||||
col: 4,
|
||||
snippet: None,
|
||||
variable: None,
|
||||
callee: None,
|
||||
function: None,
|
||||
is_cross_file: false,
|
||||
},
|
||||
],
|
||||
sink_caps: cap.bits(),
|
||||
..Default::default()
|
||||
};
|
||||
Diag {
|
||||
path: path_str,
|
||||
line: sink_line as usize,
|
||||
col: 0,
|
||||
severity: Severity::High,
|
||||
id: "taint-unsanitised-flow".into(),
|
||||
category: FindingCategory::Security,
|
||||
path_validated: false,
|
||||
guard_kind: None,
|
||||
message: None,
|
||||
labels: vec![],
|
||||
confidence: Some(Confidence::High),
|
||||
evidence: Some(evidence),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
rollup: None,
|
||||
finding_id: String::new(),
|
||||
alternative_finding_ids: vec![],
|
||||
stable_hash: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
175
tests/repro_determinism.rs
Normal file
175
tests/repro_determinism.rs
Normal file
|
|
@ -0,0 +1,175 @@
|
|||
//! Repro determinism test (§18.2).
|
||||
//!
|
||||
//! For every `Confirmed` fixture: the repro artifact `expected/outcome.json`
|
||||
//! produced during verification must be byte-identical when regenerated from
|
||||
//! the repro bundle.
|
||||
//!
|
||||
//! Tests are gated on `#[cfg(feature = "dynamic")]` and Python availability.
|
||||
//! They are also skipped if no `Confirmed` fixtures have been produced yet
|
||||
//! (trivially passes — zero assertions).
|
||||
|
||||
#[cfg(feature = "dynamic")]
|
||||
mod repro_determinism_tests {
|
||||
use nyx_scanner::dynamic::repro;
|
||||
use nyx_scanner::dynamic::sandbox::{SandboxOptions, SandboxOutcome};
|
||||
use nyx_scanner::dynamic::spec::{EntryKind, HarnessSpec, PayloadSlot};
|
||||
use nyx_scanner::evidence::{AttemptSummary, VerifyResult, VerifyStatus};
|
||||
use nyx_scanner::labels::Cap;
|
||||
use nyx_scanner::symbol::Lang;
|
||||
use std::time::Duration;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn make_confirmed_spec(spec_hash: &str) -> HarnessSpec {
|
||||
HarnessSpec {
|
||||
finding_id: "determinism00001".into(),
|
||||
entry_file: "app.py".into(),
|
||||
entry_name: "login".into(),
|
||||
entry_kind: EntryKind::Function,
|
||||
lang: Lang::Python,
|
||||
toolchain_id: "python-3".into(),
|
||||
payload_slot: PayloadSlot::Param(0),
|
||||
expected_cap: Cap::SQL_QUERY,
|
||||
constraint_hints: vec![],
|
||||
sink_file: "app.py".into(),
|
||||
sink_line: 10,
|
||||
spec_hash: spec_hash.to_owned(),
|
||||
}
|
||||
}
|
||||
|
||||
fn make_confirmed_outcome() -> SandboxOutcome {
|
||||
SandboxOutcome {
|
||||
exit_code: Some(0),
|
||||
stdout: b"NYX_SQL_CONFIRMED\nsome extra output".to_vec(),
|
||||
stderr: vec![],
|
||||
timed_out: false,
|
||||
oob_callback_seen: false,
|
||||
sink_hit: true,
|
||||
duration: Duration::from_millis(150),
|
||||
}
|
||||
}
|
||||
|
||||
fn make_confirmed_verdict(finding_id: &str) -> VerifyResult {
|
||||
VerifyResult {
|
||||
finding_id: finding_id.to_owned(),
|
||||
status: VerifyStatus::Confirmed,
|
||||
triggered_payload: Some("sqli-union-nyx".into()),
|
||||
reason: None,
|
||||
inconclusive_reason: None,
|
||||
detail: None,
|
||||
attempts: vec![AttemptSummary {
|
||||
payload_label: "sqli-union-nyx".into(),
|
||||
exit_code: Some(0),
|
||||
timed_out: false,
|
||||
triggered: true,
|
||||
sink_hit: true,
|
||||
}],
|
||||
toolchain_match: Some("exact".into()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Write a repro bundle and verify it round-trips correctly.
|
||||
#[test]
|
||||
fn confirmed_repro_is_deterministic() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
// Override repro base to temp dir.
|
||||
unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) };
|
||||
|
||||
let spec = make_confirmed_spec("determ0000000001");
|
||||
let opts = SandboxOptions::default();
|
||||
let outcome = make_confirmed_outcome();
|
||||
let verdict = make_confirmed_verdict("determinism00001");
|
||||
|
||||
// Write repro bundle (first time).
|
||||
let artifact1 = repro::write(
|
||||
&spec, &opts, &outcome, &verdict,
|
||||
"# harness source v1\n",
|
||||
"def login(x): pass\n",
|
||||
b"' UNION SELECT 'NYX_SQL_CONFIRMED'--",
|
||||
"sqli-union-nyx",
|
||||
None,
|
||||
).expect("first repro write must succeed");
|
||||
|
||||
let outcome_json_1 =
|
||||
std::fs::read_to_string(artifact1.root.join("expected/outcome.json"))
|
||||
.expect("outcome.json must exist after first write");
|
||||
|
||||
// Write repro bundle (second time, same inputs).
|
||||
// Remove existing dir first (simulate fresh run).
|
||||
std::fs::remove_dir_all(&artifact1.root).unwrap();
|
||||
|
||||
let artifact2 = repro::write(
|
||||
&spec, &opts, &outcome, &verdict,
|
||||
"# harness source v1\n",
|
||||
"def login(x): pass\n",
|
||||
b"' UNION SELECT 'NYX_SQL_CONFIRMED'--",
|
||||
"sqli-union-nyx",
|
||||
None,
|
||||
).expect("second repro write must succeed");
|
||||
|
||||
let outcome_json_2 =
|
||||
std::fs::read_to_string(artifact2.root.join("expected/outcome.json"))
|
||||
.expect("outcome.json must exist after second write");
|
||||
|
||||
assert_eq!(
|
||||
outcome_json_1, outcome_json_2,
|
||||
"outcome.json must be byte-identical across two runs with the same inputs"
|
||||
);
|
||||
|
||||
unsafe { std::env::remove_var("NYX_REPRO_BASE") };
|
||||
}
|
||||
|
||||
/// Verify that redacted outcome.json does not contain the secret.
|
||||
#[test]
|
||||
fn outcome_json_secrets_are_redacted() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) };
|
||||
|
||||
let spec = make_confirmed_spec("determ0000000002");
|
||||
let opts = SandboxOptions::default();
|
||||
let mut outcome = make_confirmed_outcome();
|
||||
// Inject a fake AWS key into stdout.
|
||||
outcome.stdout = b"AKIAFAKETEST00000000 result ok NYX_SQL_CONFIRMED".to_vec();
|
||||
let verdict = make_confirmed_verdict("determinism00002");
|
||||
|
||||
let artifact = repro::write(
|
||||
&spec, &opts, &outcome, &verdict,
|
||||
"# harness", "# entry", b"payload", "label", None,
|
||||
).expect("repro write must succeed");
|
||||
|
||||
let outcome_json =
|
||||
std::fs::read_to_string(artifact.root.join("expected/outcome.json")).unwrap();
|
||||
|
||||
assert!(
|
||||
!outcome_json.contains("AKIAFAKETEST00000000"),
|
||||
"AWS key must be redacted from outcome.json; got: {outcome_json}"
|
||||
);
|
||||
|
||||
unsafe { std::env::remove_var("NYX_REPRO_BASE") };
|
||||
}
|
||||
|
||||
/// Verify verdict.json is correctly structured.
|
||||
#[test]
|
||||
fn verdict_json_is_valid() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) };
|
||||
|
||||
let spec = make_confirmed_spec("determ0000000003");
|
||||
let opts = SandboxOptions::default();
|
||||
let outcome = make_confirmed_outcome();
|
||||
let verdict = make_confirmed_verdict("determinism00003");
|
||||
|
||||
let artifact = repro::write(
|
||||
&spec, &opts, &outcome, &verdict,
|
||||
"# harness", "# entry", b"payload", "label", None,
|
||||
).expect("repro write must succeed");
|
||||
|
||||
let verdict_json =
|
||||
std::fs::read_to_string(artifact.root.join("expected/verdict.json")).unwrap();
|
||||
let parsed: serde_json::Value = serde_json::from_str(&verdict_json).unwrap();
|
||||
|
||||
assert_eq!(parsed["status"], "Confirmed");
|
||||
assert_eq!(parsed["finding_id"], "determinism00003");
|
||||
|
||||
unsafe { std::env::remove_var("NYX_REPRO_BASE") };
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue