mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
443 lines
14 KiB
Rust
443 lines
14 KiB
Rust
//! Repro artifact writer (§18.1).
|
|
//!
|
|
//! Emits a self-contained repro bundle at:
|
|
//! `~/.cache/nyx/dynamic/repro/{spec_hash}/`
|
|
//!
|
|
//! Layout:
|
|
//! ```text
|
|
//! {spec_hash}/
|
|
//! manifest.json
|
|
//! entry/
|
|
//! extracted_source.{ext}
|
|
//! harness/
|
|
//! harness.py (language-specific)
|
|
//! Dockerfile.harness
|
|
//! payload/
|
|
//! payload.bin
|
|
//! payload.meta.json
|
|
//! sandbox/
|
|
//! options.json
|
|
//! env.allowlist.json
|
|
//! expected/
|
|
//! outcome.json (redacted SandboxOutcome)
|
|
//! verdict.json
|
|
//! reproduce.sh
|
|
//! README.md
|
|
//! ```
|
|
|
|
use crate::dynamic::sandbox::{SandboxOptions, SandboxOutcome};
|
|
use crate::dynamic::spec::HarnessSpec;
|
|
use crate::evidence::VerifyResult;
|
|
use crate::utils::redact;
|
|
use directories::ProjectDirs;
|
|
use std::fs;
|
|
use std::path::{Path, PathBuf};
|
|
|
|
/// Emitted by [`write`] on success.
|
|
#[derive(Debug, Clone)]
|
|
pub struct ReproArtifact {
|
|
/// Absolute path to the repro bundle root.
|
|
pub root: PathBuf,
|
|
/// Relative symlink from the project cache directory.
|
|
pub symlink: Option<PathBuf>,
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub enum ReproError {
|
|
Io(std::io::Error),
|
|
Json(serde_json::Error),
|
|
}
|
|
|
|
impl From<std::io::Error> for ReproError {
|
|
fn from(e: std::io::Error) -> Self {
|
|
ReproError::Io(e)
|
|
}
|
|
}
|
|
|
|
impl From<serde_json::Error> for ReproError {
|
|
fn from(e: serde_json::Error) -> Self {
|
|
ReproError::Json(e)
|
|
}
|
|
}
|
|
|
|
impl std::fmt::Display for ReproError {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
match self {
|
|
ReproError::Io(e) => write!(f, "I/O: {e}"),
|
|
ReproError::Json(e) => write!(f, "JSON: {e}"),
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Write the repro bundle for a verified finding.
|
|
///
|
|
/// `harness_source` is the generated harness source code.
|
|
/// `entry_source` is the extracted entry-point source (may be empty).
|
|
pub fn write(
|
|
spec: &HarnessSpec,
|
|
opts: &SandboxOptions,
|
|
outcome: &SandboxOutcome,
|
|
verdict: &VerifyResult,
|
|
harness_source: &str,
|
|
entry_source: &str,
|
|
payload_bytes: &[u8],
|
|
payload_label: &str,
|
|
project_root: Option<&Path>,
|
|
) -> Result<ReproArtifact, ReproError> {
|
|
let root = repro_root(&spec.spec_hash)?;
|
|
|
|
// Create directory tree
|
|
for sub in &["entry", "harness", "payload", "sandbox", "expected"] {
|
|
fs::create_dir_all(root.join(sub))?;
|
|
}
|
|
|
|
// manifest.json
|
|
let manifest = serde_json::json!({
|
|
"spec_hash": spec.spec_hash,
|
|
"finding_id": spec.finding_id,
|
|
"lang": format!("{:?}", spec.lang).to_ascii_lowercase(),
|
|
"toolchain_id": spec.toolchain_id,
|
|
"entry_file": spec.entry_file,
|
|
"entry_name": spec.entry_name,
|
|
"sink_file": spec.sink_file,
|
|
"sink_line": spec.sink_line,
|
|
"spec_format_version": crate::dynamic::spec::SPEC_FORMAT_VERSION,
|
|
"corpus_version": crate::dynamic::corpus::CORPUS_VERSION,
|
|
});
|
|
write_json(&root.join("manifest.json"), &manifest)?;
|
|
|
|
// entry/extracted_source.<ext>
|
|
let ext = source_ext_for_lang(&spec.lang);
|
|
let entry_path = root.join("entry").join(format!("extracted_source.{ext}"));
|
|
fs::write(&entry_path, entry_source.as_bytes())?;
|
|
|
|
// harness/harness.{ext} (or for Rust: harness/src/main.rs)
|
|
use crate::symbol::Lang;
|
|
let harness_path = if matches!(spec.lang, Lang::Rust) {
|
|
let src_dir = root.join("harness").join("src");
|
|
fs::create_dir_all(&src_dir)?;
|
|
// Also write Cargo.toml for Rust repro bundles.
|
|
let cargo_content = crate::dynamic::lang::rust::generate_cargo_toml(spec.expected_cap);
|
|
fs::write(root.join("harness").join("Cargo.toml"), cargo_content.as_bytes())?;
|
|
src_dir.join("main.rs")
|
|
} else {
|
|
root.join("harness").join(format!("harness.{ext}"))
|
|
};
|
|
fs::write(&harness_path, harness_source.as_bytes())?;
|
|
|
|
// harness/Dockerfile.harness
|
|
let dockerfile = dockerfile_for_spec(spec);
|
|
fs::write(root.join("harness").join("Dockerfile.harness"), dockerfile.as_bytes())?;
|
|
|
|
// payload/payload.bin + payload.meta.json
|
|
fs::write(root.join("payload").join("payload.bin"), payload_bytes)?;
|
|
let payload_meta = serde_json::json!({
|
|
"label": payload_label,
|
|
"len": payload_bytes.len(),
|
|
"encoding": "raw",
|
|
});
|
|
write_json(&root.join("payload").join("payload.meta.json"), &payload_meta)?;
|
|
|
|
// sandbox/options.json
|
|
let sandbox_opts = serde_json::json!({
|
|
"timeout_secs": opts.timeout.as_secs_f64(),
|
|
"memory_mib": opts.memory_mib,
|
|
"backend": format!("{:?}", opts.backend),
|
|
});
|
|
write_json(&root.join("sandbox").join("options.json"), &sandbox_opts)?;
|
|
|
|
// sandbox/env.allowlist.json
|
|
let env_list: Vec<&str> = opts.env_passthrough.iter().map(|s| s.as_str()).collect();
|
|
write_json(&root.join("sandbox").join("env.allowlist.json"), &serde_json::json!(env_list))?;
|
|
|
|
// expected/outcome.json — redacted
|
|
let redacted_stdout = redact::redact(&outcome.stdout);
|
|
let redacted_stderr = redact::redact(&outcome.stderr);
|
|
// duration_ms is omitted from the persisted outcome so that outcome.json is
|
|
// byte-identical when regenerated from the repro bundle (§18.2 determinism).
|
|
// Wall-clock timing goes to telemetry only.
|
|
let outcome_json = serde_json::json!({
|
|
"exit_code": outcome.exit_code,
|
|
"stdout": String::from_utf8_lossy(&redacted_stdout),
|
|
"stderr": String::from_utf8_lossy(&redacted_stderr),
|
|
"timed_out": outcome.timed_out,
|
|
"oob_callback_seen": outcome.oob_callback_seen,
|
|
"sink_hit": outcome.sink_hit,
|
|
});
|
|
write_json(&root.join("expected").join("outcome.json"), &outcome_json)?;
|
|
|
|
// expected/verdict.json
|
|
write_json(&root.join("expected").join("verdict.json"), verdict)?;
|
|
|
|
// reproduce.sh
|
|
let reproduce_sh = reproduce_script(spec, payload_label);
|
|
let reproduce_path = root.join("reproduce.sh");
|
|
fs::write(&reproduce_path, reproduce_sh.as_bytes())?;
|
|
#[cfg(unix)]
|
|
{
|
|
use std::os::unix::fs::PermissionsExt;
|
|
fs::set_permissions(&reproduce_path, fs::Permissions::from_mode(0o755))?;
|
|
}
|
|
|
|
// README.md
|
|
let readme = repro_readme(spec, verdict);
|
|
fs::write(root.join("README.md"), readme.as_bytes())?;
|
|
|
|
// Per-project symlink (§12 Q1)
|
|
let symlink = if let Some(proj_root) = project_root {
|
|
let link_dir = proj_root.join(".nyx").join("dynamic-cache").join("symlinks");
|
|
let _ = fs::create_dir_all(&link_dir);
|
|
let link_path = link_dir.join(&spec.spec_hash);
|
|
let _ = create_symlink(&root, &link_path);
|
|
Some(link_path)
|
|
} else {
|
|
None
|
|
};
|
|
|
|
Ok(ReproArtifact { root, symlink })
|
|
}
|
|
|
|
fn repro_root(spec_hash: &str) -> Result<PathBuf, ReproError> {
|
|
// Respect test override.
|
|
let base = if let Ok(p) = std::env::var("NYX_REPRO_BASE") {
|
|
PathBuf::from(p)
|
|
} else {
|
|
let dirs = ProjectDirs::from("", "", "nyx")
|
|
.ok_or_else(|| ReproError::Io(std::io::Error::new(
|
|
std::io::ErrorKind::NotFound,
|
|
"cannot determine cache dir",
|
|
)))?;
|
|
dirs.cache_dir().join("dynamic").join("repro")
|
|
};
|
|
|
|
let root = base.join(spec_hash);
|
|
fs::create_dir_all(&root)?;
|
|
#[cfg(unix)]
|
|
{
|
|
use std::os::unix::fs::PermissionsExt;
|
|
fs::set_permissions(&root, fs::Permissions::from_mode(0o700))?;
|
|
}
|
|
Ok(root)
|
|
}
|
|
|
|
fn write_json(path: &Path, value: &impl serde::Serialize) -> Result<(), ReproError> {
|
|
let json = serde_json::to_string_pretty(value)?;
|
|
fs::write(path, json.as_bytes())?;
|
|
Ok(())
|
|
}
|
|
|
|
fn source_ext_for_lang(lang: &crate::symbol::Lang) -> &'static str {
|
|
use crate::symbol::Lang;
|
|
match lang {
|
|
Lang::Python => "py",
|
|
Lang::JavaScript | Lang::TypeScript => "js",
|
|
Lang::Rust => "rs",
|
|
Lang::Go => "go",
|
|
Lang::Java => "java",
|
|
Lang::Php => "php",
|
|
Lang::Ruby => "rb",
|
|
Lang::C => "c",
|
|
Lang::Cpp => "cpp",
|
|
}
|
|
}
|
|
|
|
fn dockerfile_for_spec(spec: &HarnessSpec) -> String {
|
|
use crate::symbol::Lang;
|
|
match spec.lang {
|
|
Lang::Rust => {
|
|
let toolchain = spec.toolchain_id.strip_prefix("rust-").unwrap_or("stable");
|
|
// Multi-stage: build with Rust, run the binary directly.
|
|
format!(
|
|
"FROM rust:{toolchain}-slim AS builder\n\
|
|
WORKDIR /harness\n\
|
|
COPY Cargo.toml Cargo.lock* ./\n\
|
|
COPY src/ src/\n\
|
|
RUN cargo build --release\n\n\
|
|
FROM debian:bookworm-slim\n\
|
|
WORKDIR /harness\n\
|
|
COPY --from=builder /harness/target/release/nyx_harness .\n\
|
|
CMD [\"/harness/nyx_harness\"]\n"
|
|
)
|
|
}
|
|
Lang::Python => {
|
|
let image = format!("python:{}", spec.toolchain_id.strip_prefix("python-").unwrap_or("3"));
|
|
format!(
|
|
"FROM {image}\nWORKDIR /harness\nCOPY harness.py .\nCMD [\"python3\", \"harness.py\"]\n"
|
|
)
|
|
}
|
|
_ => {
|
|
format!("# Unsupported language: {:?}\nFROM ubuntu:latest\n", spec.lang)
|
|
}
|
|
}
|
|
}
|
|
|
|
fn reproduce_script(spec: &HarnessSpec, payload_label: &str) -> String {
|
|
use crate::symbol::Lang;
|
|
let run_cmd = match spec.lang {
|
|
Lang::Rust => {
|
|
"NYX_PAYLOAD=\"$(cat payload/payload.bin)\" ./harness/nyx_harness".to_owned()
|
|
}
|
|
_ => {
|
|
"NYX_PAYLOAD=\"$(cat payload/payload.bin)\" python3 harness/harness.py".to_owned()
|
|
}
|
|
};
|
|
format!(
|
|
"#!/bin/sh\n\
|
|
# Repro script for finding {finding_id} ({payload_label})\n\
|
|
set -e\n\
|
|
SCRIPT_DIR=\"$(cd \"$(dirname \"$0\")\" && pwd)\"\n\
|
|
cd \"$SCRIPT_DIR\"\n\
|
|
{run_cmd}\n",
|
|
finding_id = spec.finding_id,
|
|
payload_label = payload_label,
|
|
run_cmd = run_cmd,
|
|
)
|
|
}
|
|
|
|
fn repro_readme(spec: &HarnessSpec, verdict: &VerifyResult) -> String {
|
|
format!(
|
|
"# Nyx Dynamic Repro — {finding_id}\n\n\
|
|
**Status**: {status:?} \n\
|
|
**Cap**: {cap} \n\
|
|
**Entry**: `{entry}` \n\n\
|
|
## Reproduce\n\n\
|
|
```sh\n./reproduce.sh\n```\n\n\
|
|
The expected outcome is in `expected/outcome.json`.\n",
|
|
finding_id = spec.finding_id,
|
|
status = verdict.status,
|
|
cap = format!("{:?}", spec.expected_cap),
|
|
entry = spec.entry_name,
|
|
)
|
|
}
|
|
|
|
#[cfg(unix)]
|
|
fn create_symlink(target: &Path, link: &Path) -> std::io::Result<()> {
|
|
if link.exists() {
|
|
fs::remove_file(link)?;
|
|
}
|
|
std::os::unix::fs::symlink(target, link)
|
|
}
|
|
|
|
#[cfg(not(unix))]
|
|
fn create_symlink(_target: &Path, _link: &Path) -> std::io::Result<()> {
|
|
Ok(())
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::dynamic::sandbox::SandboxBackend;
|
|
use crate::dynamic::spec::{EntryKind, PayloadSlot};
|
|
use crate::evidence::{AttemptSummary, VerifyStatus};
|
|
use crate::labels::Cap;
|
|
use crate::symbol::Lang;
|
|
use std::time::Duration;
|
|
use tempfile::TempDir;
|
|
|
|
fn make_spec() -> HarnessSpec {
|
|
HarnessSpec {
|
|
finding_id: "0000000000000002".into(),
|
|
entry_file: "app.py".into(),
|
|
entry_name: "login".into(),
|
|
entry_kind: EntryKind::Function,
|
|
lang: Lang::Python,
|
|
toolchain_id: "python-3.11".into(),
|
|
payload_slot: PayloadSlot::Param(0),
|
|
expected_cap: Cap::SQL_QUERY,
|
|
constraint_hints: vec![],
|
|
sink_file: "app.py".into(),
|
|
sink_line: 10,
|
|
spec_hash: "cafecafecafe0001".into(),
|
|
}
|
|
}
|
|
|
|
fn make_outcome() -> SandboxOutcome {
|
|
SandboxOutcome {
|
|
exit_code: Some(0),
|
|
stdout: b"__NYX_SINK_HIT__\nquery: SELECT 1=1".to_vec(),
|
|
stderr: vec![],
|
|
timed_out: false,
|
|
oob_callback_seen: false,
|
|
sink_hit: true,
|
|
duration: Duration::from_millis(250),
|
|
}
|
|
}
|
|
|
|
fn make_verdict() -> VerifyResult {
|
|
VerifyResult {
|
|
finding_id: "0000000000000002".into(),
|
|
status: VerifyStatus::Confirmed,
|
|
triggered_payload: Some("sqli-or-1".into()),
|
|
reason: None,
|
|
inconclusive_reason: None,
|
|
detail: None,
|
|
attempts: vec![AttemptSummary {
|
|
payload_label: "sqli-or-1".into(),
|
|
exit_code: Some(0),
|
|
timed_out: false,
|
|
triggered: true,
|
|
sink_hit: true,
|
|
}],
|
|
toolchain_match: Some("exact".into()),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn write_creates_expected_layout() {
|
|
let dir = TempDir::new().unwrap();
|
|
unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) };
|
|
|
|
let spec = make_spec();
|
|
let opts = SandboxOptions {
|
|
backend: SandboxBackend::Process,
|
|
..Default::default()
|
|
};
|
|
let outcome = make_outcome();
|
|
let verdict = make_verdict();
|
|
|
|
let artifact = write(
|
|
&spec,
|
|
&opts,
|
|
&outcome,
|
|
&verdict,
|
|
"import sys\n# harness code\n",
|
|
"def login(x): pass\n",
|
|
b"' OR 1=1-- NYX",
|
|
"sqli-or-1",
|
|
None,
|
|
)
|
|
.unwrap();
|
|
|
|
assert!(artifact.root.join("manifest.json").exists());
|
|
assert!(artifact.root.join("entry/extracted_source.py").exists());
|
|
assert!(artifact.root.join("harness/harness.py").exists());
|
|
assert!(artifact.root.join("payload/payload.bin").exists());
|
|
assert!(artifact.root.join("expected/outcome.json").exists());
|
|
assert!(artifact.root.join("expected/verdict.json").exists());
|
|
assert!(artifact.root.join("reproduce.sh").exists());
|
|
|
|
unsafe { std::env::remove_var("NYX_REPRO_BASE") };
|
|
}
|
|
|
|
#[test]
|
|
fn outcome_json_redacts_secrets() {
|
|
let dir = TempDir::new().unwrap();
|
|
unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) };
|
|
|
|
let spec = make_spec();
|
|
let opts = SandboxOptions::default();
|
|
let mut outcome = make_outcome();
|
|
outcome.stdout = b"key=AKIAFAKETEST00000000 result=ok".to_vec();
|
|
let verdict = make_verdict();
|
|
|
|
let artifact = write(
|
|
&spec, &opts, &outcome, &verdict,
|
|
"# harness", "# entry", b"payload", "label", None,
|
|
).unwrap();
|
|
|
|
let outcome_json = std::fs::read_to_string(artifact.root.join("expected/outcome.json")).unwrap();
|
|
assert!(!outcome_json.contains("AKIAFAKETEST00000000"), "AWS key must be redacted in outcome.json");
|
|
|
|
unsafe { std::env::remove_var("NYX_REPRO_BASE") };
|
|
}
|
|
}
|