//! C++ harness emitter. //! //! Phase 16 (Track B Rust + C/C++ vertical) replaces the stub body with //! dispatch over [`CppShape`] — `main(int argc, char *argv[])`, libFuzzer //! `LLVMFuzzerTestOneInput`, and free functions with `(const char*, //! size_t)` or `(const std::string&)` signatures. //! //! File layout in workdir: //! ```text //! main.cpp ← harness entry point (generated, includes entry.cpp) //! entry.cpp ← user entry source (copied from project) //! CMakeLists.txt ← optional, generated for reference //! ``` //! //! Build step: `prepare_cpp()` in `build_sandbox.rs` runs //! `g++ -O0 -std=c++17 -o nyx_harness main.cpp` in the workdir. use crate::dynamic::lang::{ChainStepHarness, HarnessSource, LangEmitter}; use crate::dynamic::spec::{EntryKind, HarnessSpec, PayloadSlot}; use crate::evidence::UnsupportedReason; use std::path::PathBuf; /// Zero-sized [`LangEmitter`] handle for C++. pub struct CppEmitter; /// Entry kinds the C++ emitter understands after Phase 16. const SUPPORTED: &[EntryKind] = &[ EntryKind::Function, EntryKind::CliSubcommand, EntryKind::LibraryApi, ]; // ── Phase 16: shape detector ───────────────────────────────────────────────── /// Concrete per-file shape resolved by reading the entry source. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum CppShape { /// `int main(int argc, char *argv[])`. MainArgv, /// libFuzzer-style: `int LLVMFuzzerTestOneInput(const uint8_t *, size_t)`. LibfuzzerEntry, /// Free function with `(const char *, size_t)` or `(const std::string&)` /// signature. FreeFn, } impl CppShape { pub fn detect(spec: &HarnessSpec, source: &str) -> Self { let entry = spec.entry_name.as_str(); let kind = spec.entry_kind; let has_main_argv = (source.contains("int main(") || source.contains("int main (")) && (source.contains("argc") || source.contains("char *argv") || source.contains("char* argv") || source.contains("char **argv")); let has_libfuzzer = source.contains("LLVMFuzzerTestOneInput") || entry == "LLVMFuzzerTestOneInput"; if has_libfuzzer { return Self::LibfuzzerEntry; } if entry == "main" || has_main_argv { return Self::MainArgv; } match kind { EntryKind::CliSubcommand => Self::MainArgv, EntryKind::LibraryApi => Self::LibfuzzerEntry, _ => Self::FreeFn, } } } pub fn detect_shape(spec: &HarnessSpec) -> CppShape { let src = read_entry_source(&spec.entry_file); CppShape::detect(spec, &src) } fn read_entry_source(entry_file: &str) -> String { let candidates = [PathBuf::from(entry_file), PathBuf::from(".").join(entry_file)]; for path in &candidates { if let Ok(s) = std::fs::read_to_string(path) { return s; } } String::new() } /// Source of the `__nyx_probe` shim for the (future) C++ harness /// (Phase 06 — Track C.1). Uses `` + variadic templates; the /// JSON-emit format matches [`crate::dynamic::probe::SinkProbe`]. pub fn probe_shim() -> &'static str { r#" /* ── __nyx_probe shim (Phase 06 — Track C.1, Phase 08 — Track C.4 + C.5) ── */ #include #include #include #include #include #include #include #include #include #include #include #ifndef __NYX_PAYLOAD_LIMIT #define __NYX_PAYLOAD_LIMIT (16 * 1024) #endif #define __NYX_REDACTED "" extern char **environ; static const char *__nyx_deny_substrings_cpp[] = { "TOKEN","SECRET","PASSWORD","PASSWD","API_KEY","APIKEY","PRIVATE_KEY", "CREDENTIAL","SESSION","COOKIE","AUTH","BEARER","AWS_ACCESS","AWS_SESSION", "GH_TOKEN","GITHUB_TOKEN","NPM_TOKEN","PYPI_TOKEN","DOCKER_PASS", }; inline void __nyx_probe_one(std::ostringstream &out, const std::string &v) { out << "{\"kind\":\"String\",\"value\":\""; for (char c : v) { switch (c) { case '"': out << "\\\""; break; case '\\': out << "\\\\"; break; case '\n': out << "\\n"; break; case '\r': out << "\\r"; break; case '\t': out << "\\t"; break; default: out << c; } } out << "\"}"; } inline void __nyx_esc(std::ostringstream &out, const std::string &v) { for (char c : v) { switch (c) { case '"': out << "\\\""; break; case '\\': out << "\\\\"; break; case '\n': out << "\\n"; break; case '\r': out << "\\r"; break; case '\t': out << "\\t"; break; default: out << c; } } } inline std::string __nyx_witness_json(const char *sink_callee, const std::vector &args_repr) { std::ostringstream out; out << "{\"env_snapshot\":{"; bool first = true; for (char **e = environ; *e; ++e) { const char *eq = std::strchr(*e, '='); if (!eq) continue; std::string k(*e, static_cast(eq - *e)); std::string ku = k; std::transform(ku.begin(), ku.end(), ku.begin(), [](unsigned char c){ return (char)std::toupper(c); }); bool denied = false; for (const char *needle : __nyx_deny_substrings_cpp) { if (ku.find(needle) != std::string::npos) { denied = true; break; } } if (!first) out << ','; first = false; out << '"'; __nyx_esc(out, k); out << "\":\""; if (denied) out << __NYX_REDACTED; else __nyx_esc(out, std::string(eq + 1)); out << '"'; } out << "},\"cwd\":\""; char cwdbuf[4096]; if (::getcwd(cwdbuf, sizeof(cwdbuf))) __nyx_esc(out, std::string(cwdbuf)); out << "\",\"payload_bytes\":["; const char *payload = std::getenv("NYX_PAYLOAD"); if (payload) { size_t plen = std::strlen(payload); if (plen > __NYX_PAYLOAD_LIMIT) plen = __NYX_PAYLOAD_LIMIT; for (size_t i = 0; i < plen; ++i) { if (i > 0) out << ','; out << static_cast(static_cast(payload[i])); } } out << "],\"callee\":\""; __nyx_esc(out, std::string(sink_callee)); out << "\",\"args_repr\":["; for (size_t i = 0; i < args_repr.size(); ++i) { if (i > 0) out << ','; out << '"'; __nyx_esc(out, args_repr[i]); out << '"'; } out << "]}"; return out.str(); } template inline void __nyx_probe(const char *sink_callee, Args... args) { const char *p = std::getenv("NYX_PROBE_PATH"); if (!p || *p == '\0') return; std::ostringstream out; out << "{\"sink_callee\":\"" << sink_callee << "\",\"args\":["; bool first = true; std::vector repr; auto emit = [&](const std::string &s) { if (!first) out << ','; first = false; __nyx_probe_one(out, s); repr.push_back(s); }; (emit(std::string(args)), ...); const char *pid = std::getenv("NYX_PAYLOAD_ID"); auto now = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch() ).count(); out << "],\"captured_at_ns\":" << now << ",\"payload_id\":\"" << (pid ? pid : "") << "\","; out << "\"kind\":{\"kind\":\"Normal\"},\"witness\":" << __nyx_witness_json(sink_callee, repr) << "}\n"; std::ofstream f(p, std::ios::app); if (f.is_open()) f << out.str(); } /* Phase 08: sink-site sigaction handler. Mirrors the C variant; the * captured `sink_callee` is held in a file-scope const char* so the * async-signal-unsafe write path can pull it without TLS. */ static const char *__nyx_crash_sink_callee = ""; inline void __nyx_crash_handler(int sig) { const char *p = std::getenv("NYX_PROBE_PATH"); if (p && *p) { std::ofstream f(p, std::ios::app); if (f.is_open()) { const char *name = "SIGABRT"; switch (sig) { case SIGSEGV: name = "SIGSEGV"; break; case SIGABRT: name = "SIGABRT"; break; case SIGBUS: name = "SIGBUS"; break; case SIGFPE: name = "SIGFPE"; break; case SIGILL: name = "SIGILL"; break; } auto now = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch() ).count(); const char *pid = std::getenv("NYX_PAYLOAD_ID"); std::ostringstream out; out << "{\"sink_callee\":\"" << __nyx_crash_sink_callee << "\",\"args\":[],\"captured_at_ns\":" << now << ",\"payload_id\":\"" << (pid ? pid : "") << "\",\"kind\":{\"kind\":\"Crash\",\"signal\":\"" << name << "\"},\"witness\":" << __nyx_witness_json(__nyx_crash_sink_callee, {}) << "}\n"; f << out.str(); } } struct sigaction dfl; std::memset(&dfl, 0, sizeof(dfl)); dfl.sa_handler = SIG_DFL; sigaction(sig, &dfl, nullptr); raise(sig); } inline void __nyx_install_crash_guard(const char *sink_callee) { __nyx_crash_sink_callee = sink_callee; struct sigaction sa; std::memset(&sa, 0, sizeof(sa)); sa.sa_handler = __nyx_crash_handler; sigemptyset(&sa.sa_mask); for (int sig : { SIGSEGV, SIGABRT, SIGBUS, SIGFPE, SIGILL }) { sigaction(sig, &sa, nullptr); } } "# } impl LangEmitter for CppEmitter { fn emit(&self, spec: &HarnessSpec) -> Result { emit(spec) } fn entry_kinds_supported(&self) -> &'static [EntryKind] { SUPPORTED } fn entry_kind_hint(&self, attempted: EntryKind) -> String { format!( "cpp emitter supports {SUPPORTED:?}; this finding's enclosing context is `EntryKind::{attempted}` — see Phase 16 shape dispatch (main / libFuzzer / free function)" ) } fn compose_chain_step(&self, prev_output: Option<&[u8]>) -> ChainStepHarness { chain_step(prev_output) } } /// Phase 26 — C++ chain-step harness. fn chain_step(prev_output: Option<&[u8]>) -> ChainStepHarness { let source = "#include \n#include \n\nint main() {\n const char *prev = std::getenv(\"NYX_PREV_OUTPUT\");\n if (prev) std::fputs(prev, stdout);\n return 0;\n}\n".to_owned(); ChainStepHarness { source, filename: "step.cpp".to_owned(), command: vec!["c++".to_owned(), "step.cpp".to_owned(), "-o".to_owned(), "step".to_owned()], extra_env: prev_output .map(|bytes| { vec![( ChainStepHarness::PREV_OUTPUT_ENV.to_owned(), String::from_utf8_lossy(bytes).into_owned(), )] }) .unwrap_or_default(), } } /// Emit a C++ harness for `spec`. pub fn emit(spec: &HarnessSpec) -> Result { let shape = detect_shape(spec); match (&spec.payload_slot, shape) { (PayloadSlot::Param(0) | PayloadSlot::EnvVar(_), _) => {} (PayloadSlot::Argv(_), CppShape::MainArgv) => {} _ => return Err(UnsupportedReason::PayloadSlotUnsupported), } let main_cpp = generate_main_cpp(spec, shape); let cmake = generate_cmake(); Ok(HarnessSource { source: main_cpp, filename: "main.cpp".into(), command: vec!["./nyx_harness".into()], extra_files: vec![("CMakeLists.txt".into(), cmake)], entry_subpath: Some("entry.cpp".into()), }) } fn generate_main_cpp(spec: &HarnessSpec, shape: CppShape) -> String { let invocation = invoke_for_shape(spec, shape); format!( r#"// Nyx dynamic harness — auto-generated, do not edit (Phase 16 — CppShape::{shape:?}). #include #include #include #include #include #include #include static std::string nyx_payload(); #include "entry.cpp" int main(int argc, char *argv[]) {{ (void)argc; (void)argv; std::string payload = nyx_payload(); {invocation} return 0; }} // Minimal base64 decoder (no external deps). static int nyx_b64_value(unsigned char c) {{ if (c >= 'A' && c <= 'Z') return c - 'A'; if (c >= 'a' && c <= 'z') return c - 'a' + 26; if (c >= '0' && c <= '9') return c - '0' + 52; if (c == '+') return 62; if (c == '/') return 63; return -1; }} static std::string nyx_b64_decode(const std::string &in) {{ std::string out; int buf = 0, bits = 0; for (char c : in) {{ if (c == '\n' || c == '\r' || c == '=') continue; int v = nyx_b64_value(static_cast(c)); if (v < 0) return std::string(); buf = (buf << 6) | v; bits += 6; if (bits >= 8) {{ bits -= 8; out.push_back(static_cast((buf >> bits) & 0xFF)); }} }} return out; }} static std::string nyx_payload() {{ if (const char *v = std::getenv("NYX_PAYLOAD")) {{ if (*v) return std::string(v); }} if (const char *b64 = std::getenv("NYX_PAYLOAD_B64")) {{ if (*b64) return nyx_b64_decode(std::string(b64)); }} return std::string(); }} "#, shape = shape, invocation = invocation, ) } fn invoke_for_shape(spec: &HarnessSpec, shape: CppShape) -> String { let entry_fn = &spec.entry_name; match shape { CppShape::FreeFn => match &spec.payload_slot { PayloadSlot::EnvVar(name) => format!( " setenv({name:?}, payload.c_str(), 1);\n {entry_fn}(payload.c_str(), payload.size());\n", ), _ => format!(" {entry_fn}(payload.c_str(), payload.size());\n"), }, CppShape::LibfuzzerEntry => { format!( " {entry_fn}(reinterpret_cast(payload.data()), payload.size());\n", entry_fn = entry_fn, ) } CppShape::MainArgv => { let pad = match &spec.payload_slot { PayloadSlot::Argv(n) => *n, _ => 0, }; let mut buf = String::from(" std::vector new_argv;\n"); buf.push_str(" std::vector argv_storage;\n"); buf.push_str(" argv_storage.emplace_back(\"nyx_harness\");\n"); for _ in 0..pad { buf.push_str(" argv_storage.emplace_back(\"\");\n"); } buf.push_str(" argv_storage.push_back(payload);\n"); buf.push_str(" for (auto &s : argv_storage) new_argv.push_back(s.data());\n"); buf.push_str(" new_argv.push_back(nullptr);\n"); buf.push_str(&format!( " {entry_fn}(static_cast(argv_storage.size()), new_argv.data());\n", )); buf } } } fn generate_cmake() -> String { r#"# Phase 16 — reference CMakeLists.txt, not used by the runner (the build # sandbox calls g++ / clang++ directly). Kept so reproductions can re-build # the harness by hand via `cmake -B build && cmake --build build`. cmake_minimum_required(VERSION 3.10) project(nyx_harness CXX) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) add_executable(nyx_harness main.cpp) "# .to_owned() } #[cfg(test)] mod tests { use super::*; use crate::dynamic::spec::{EntryKind, HarnessSpec, PayloadSlot}; use crate::labels::Cap; use crate::symbol::Lang; fn make_spec(payload_slot: PayloadSlot) -> HarnessSpec { HarnessSpec { finding_id: "cpp0000000000001".into(), entry_file: "entry.cpp".into(), entry_name: "run".into(), entry_kind: EntryKind::Function, lang: Lang::Cpp, toolchain_id: "g++-stable".into(), payload_slot, expected_cap: Cap::CODE_EXEC, constraint_hints: vec![], sink_file: "entry.cpp".into(), sink_line: 10, spec_hash: "cpptest00000001".into(), derivation: crate::dynamic::spec::SpecDerivationStrategy::FromFlowSteps, stubs_required: vec![], } } #[test] fn entry_kinds_supported_is_non_empty() { assert!(!CppEmitter.entry_kinds_supported().is_empty()); assert!(CppEmitter.entry_kinds_supported().contains(&EntryKind::Function)); assert!(CppEmitter.entry_kinds_supported().contains(&EntryKind::CliSubcommand)); assert!(CppEmitter.entry_kinds_supported().contains(&EntryKind::LibraryApi)); } #[test] fn entry_kind_hint_names_attempted_and_phase() { let hint = CppEmitter.entry_kind_hint(EntryKind::CliSubcommand); assert!(hint.contains("CliSubcommand")); assert!(hint.contains("Phase 16")); } #[test] fn shape_detect_main_argv() { let src = "int main(int argc, char *argv[]) { return 0; }"; let mut spec = make_spec(PayloadSlot::Argv(0)); spec.entry_kind = EntryKind::CliSubcommand; spec.entry_name = "main".into(); assert_eq!(CppShape::detect(&spec, src), CppShape::MainArgv); } #[test] fn shape_detect_libfuzzer() { let src = "extern \"C\" int LLVMFuzzerTestOneInput(const uint8_t* d, size_t n) { return 0; }"; let mut spec = make_spec(PayloadSlot::Param(0)); spec.entry_kind = EntryKind::LibraryApi; spec.entry_name = "LLVMFuzzerTestOneInput".into(); assert_eq!(CppShape::detect(&spec, src), CppShape::LibfuzzerEntry); } #[test] fn shape_detect_free_fn() { let src = "void run(const char *s, size_t n) { (void)s; (void)n; }"; let spec = make_spec(PayloadSlot::Param(0)); assert_eq!(CppShape::detect(&spec, src), CppShape::FreeFn); } #[test] fn emit_produces_source() { let spec = make_spec(PayloadSlot::Param(0)); let h = emit(&spec).unwrap(); assert_eq!(h.filename, "main.cpp"); assert!(h.source.contains("#include \"entry.cpp\"")); assert!(h.source.contains("run(payload.c_str(), payload.size())")); assert_eq!(h.command, vec!["./nyx_harness"]); assert_eq!(h.entry_subpath, Some("entry.cpp".to_string())); } #[test] fn emit_libfuzzer_shape_passes_bytes() { let mut spec = make_spec(PayloadSlot::Param(0)); spec.entry_kind = EntryKind::LibraryApi; spec.entry_name = "LLVMFuzzerTestOneInput".into(); let h = emit(&spec).unwrap(); assert!(h.source.contains("LLVMFuzzerTestOneInput(reinterpret_cast(payload.data()), payload.size())")); } #[test] fn emit_main_argv_shape_builds_argv() { let mut spec = make_spec(PayloadSlot::Argv(0)); spec.entry_kind = EntryKind::CliSubcommand; spec.entry_name = "nyx_entry_main".into(); let h = emit(&spec).unwrap(); assert!(h.source.contains("argv_storage.push_back(payload)")); assert!(h.source.contains("nyx_entry_main(static_cast(argv_storage.size()), new_argv.data())")); } #[test] fn emit_cmake_in_extra_files() { let spec = make_spec(PayloadSlot::Param(0)); let h = emit(&spec).unwrap(); let mk = h.extra_files.iter().find(|(n, _)| n == "CMakeLists.txt").expect("CMakeLists.txt must be staged"); assert!(mk.1.contains("add_executable(nyx_harness main.cpp)")); } }