//! C harness emitter. //! //! Phase 16 (Track B Rust + C/C++ vertical) replaces the stub body with //! dispatch over [`CShape`] — the cross product of [`EntryKind`] and a //! lightweight per-file shape detector that inspects the entry file for //! `main(int argc, char *argv[])`, libFuzzer's `LLVMFuzzerTestOneInput`, //! and free functions with `(const char*, size_t)` signatures. //! //! Each shape emits a single `main.c` that: //! 1. Reads the payload from `NYX_PAYLOAD` / `NYX_PAYLOAD_B64` env vars. //! 2. `#include`s `entry.c` (the user's vulnerable code) and dispatches //! via the per-shape adapter. //! //! Build step: `prepare_c()` in `build_sandbox.rs` runs //! `cc -O0 -o nyx_harness main.c` in the workdir. //! //! File layout in workdir: //! ```text //! main.c ← harness entry point (generated, includes entry.c) //! entry.c ← user entry source (copied from project) //! Makefile ← optional, generated for reference //! ``` //! //! Payload slot support: //! - `PayloadSlot::Param(0)` — pass payload as the first parameter (string //! or `(buf, len)` pair depending on shape). //! - `PayloadSlot::EnvVar(name)` — set env var before invoking entry. //! - `PayloadSlot::Argv(n)` — `main(argc, argv)` shape: appended to argv. use crate::dynamic::lang::{ChainStepHarness, HarnessSource, LangEmitter}; use crate::dynamic::spec::{EntryKind, HarnessSpec, PayloadSlot}; use crate::evidence::UnsupportedReason; use std::path::PathBuf; /// Zero-sized [`LangEmitter`] handle for C. pub struct CEmitter; /// Entry kinds the C emitter understands after Phase 16. /// /// `Function` covers free functions (libfuzzer-style + plain (const /// char*, size_t)). `CliSubcommand` covers `main(argc, argv)`. /// `LibraryApi` covers libFuzzer `LLVMFuzzerTestOneInput`. const SUPPORTED: &[EntryKind] = &[ EntryKind::Function, EntryKind::CliSubcommand, EntryKind::LibraryApi, ]; // ── Phase 16: shape detector ───────────────────────────────────────────────── /// Concrete per-file shape resolved by reading the entry source. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum CShape { /// `int main(int argc, char *argv[])`. Harness embeds payload into /// argv and calls `main(argc, argv)` directly. MainArgv, /// libFuzzer-style: `int LLVMFuzzerTestOneInput(const uint8_t *data, /// size_t size)`. Harness invokes with `payload` bytes + length. LibfuzzerEntry, /// Free function with `(const char *, size_t)` or `(const char *)` /// signature. Harness invokes directly. FreeFn, } impl CShape { /// Detect the shape from `(spec, source)`. pub fn detect(spec: &HarnessSpec, source: &str) -> Self { let entry = spec.entry_name.as_str(); let kind = spec.entry_kind; let has_main_argv = (source.contains("int main(") || source.contains("int main (")) && (source.contains("argc") || source.contains("char *argv") || source.contains("char* argv") || source.contains("char **argv")); let has_libfuzzer = source.contains("LLVMFuzzerTestOneInput") || entry == "LLVMFuzzerTestOneInput"; if has_libfuzzer { return Self::LibfuzzerEntry; } if entry == "main" || has_main_argv { return Self::MainArgv; } match kind { EntryKind::CliSubcommand => Self::MainArgv, EntryKind::LibraryApi => Self::LibfuzzerEntry, _ => Self::FreeFn, } } } /// Public wrapper: detect the shape for a finalised `HarnessSpec`, reading /// the entry file from disk. pub fn detect_shape(spec: &HarnessSpec) -> CShape { let src = read_entry_source(&spec.entry_file); CShape::detect(spec, &src) } fn read_entry_source(entry_file: &str) -> String { let candidates = [PathBuf::from(entry_file), PathBuf::from(".").join(entry_file)]; for path in &candidates { if let Ok(s) = std::fs::read_to_string(path) { return s; } } String::new() } /// Source of the `__nyx_probe` shim for the (future) C harness (Phase 06 — /// Track C.1). Variadic over `const char *` args; hand-rolled JSON keeps /// the only dep on libc / stdio. pub fn probe_shim() -> &'static str { r#" /* ── __nyx_probe shim (Phase 06 — Track C.1, Phase 08 — Track C.4 + C.5) ── */ #include #include #include #include #include #include #include #ifndef __NYX_PAYLOAD_LIMIT #define __NYX_PAYLOAD_LIMIT (16 * 1024) #endif #define __NYX_REDACTED "" extern char **environ; static const char *__nyx_deny[] = { "TOKEN","SECRET","PASSWORD","PASSWD","API_KEY","APIKEY","PRIVATE_KEY", "CREDENTIAL","SESSION","COOKIE","AUTH","BEARER","AWS_ACCESS","AWS_SESSION", "GH_TOKEN","GITHUB_TOKEN","NPM_TOKEN","PYPI_TOKEN","DOCKER_PASS", NULL, }; static int __nyx_is_denied_upper(const char *k_upper) { for (int i = 0; __nyx_deny[i]; ++i) { if (strstr(k_upper, __nyx_deny[i])) return 1; } return 0; } static void __nyx_write_witness(FILE *f, const char *sink_callee, int nargs, const char **args) { fputs("{\"env_snapshot\":{", f); int first = 1; for (char **e = environ; *e; ++e) { const char *eq = strchr(*e, '='); if (!eq) continue; size_t klen = (size_t)(eq - *e); char *kup = (char *)malloc(klen + 1); if (!kup) continue; for (size_t i = 0; i < klen; ++i) { char c = (*e)[i]; if (c >= 'a' && c <= 'z') c -= 32; kup[i] = c; } kup[klen] = '\0'; int denied = __nyx_is_denied_upper(kup); if (!first) fputc(',', f); first = 0; fputc('"', f); fwrite(*e, 1, klen, f); fputs("\":\"", f); if (denied) { fputs(__NYX_REDACTED, f); } else { const char *v = eq + 1; for (; *v; ++v) { switch (*v) { case '"': fputs("\\\"", f); break; case '\\': fputs("\\\\", f); break; case '\n': fputs("\\n", f); break; case '\r': fputs("\\r", f); break; case '\t': fputs("\\t", f); break; default: fputc(*v, f); } } } fputc('"', f); free(kup); } fputs("},\"cwd\":\"", f); char cwdbuf[4096]; if (getcwd(cwdbuf, sizeof(cwdbuf))) { fputs(cwdbuf, f); } fputs("\",\"payload_bytes\":[", f); const char *payload = getenv("NYX_PAYLOAD"); if (payload) { size_t plen = strlen(payload); if (plen > __NYX_PAYLOAD_LIMIT) plen = __NYX_PAYLOAD_LIMIT; for (size_t i = 0; i < plen; ++i) { if (i > 0) fputc(',', f); fprintf(f, "%d", (unsigned char)payload[i]); } } fputs("],\"callee\":\"", f); fputs(sink_callee, f); fputs("\",\"args_repr\":[", f); for (int i = 0; i < nargs; ++i) { if (i > 0) fputc(',', f); fputc('"', f); if (args && args[i]) fputs(args[i], f); fputc('"', f); } fputs("]}", f); } static void __nyx_probe(const char *sink_callee, int nargs, ...) { const char *p = getenv("NYX_PROBE_PATH"); if (!p || *p == '\0') return; FILE *f = fopen(p, "a"); if (!f) return; struct timespec ts; clock_gettime(CLOCK_REALTIME, &ts); unsigned long long ns = (unsigned long long)ts.tv_sec * 1000000000ULL + (unsigned long long)ts.tv_nsec; const char *pid = getenv("NYX_PAYLOAD_ID"); if (!pid) pid = ""; fprintf(f, "{\"sink_callee\":\"%s\",\"args\":[", sink_callee); va_list ap; va_start(ap, nargs); const char *args_arr[32]; int captured = nargs > 32 ? 32 : nargs; for (int i = 0; i < nargs; ++i) { const char *arg = va_arg(ap, const char *); if (!arg) arg = ""; if (i < captured) args_arr[i] = arg; if (i > 0) fputc(',', f); fprintf(f, "{\"kind\":\"String\",\"value\":\"%s\"}", arg); } va_end(ap); fprintf(f, "],\"captured_at_ns\":%llu,\"payload_id\":\"%s\",", ns, pid); fputs("\"kind\":{\"kind\":\"Normal\"},\"witness\":", f); __nyx_write_witness(f, sink_callee, captured, args_arr); fputs("}\n", f); fclose(f); } /* Phase 08: sink-site signal handler. __nyx_install_crash_guard sets a * sigaction(2) handler over SIGSEGV / SIGABRT / SIGBUS / SIGFPE / SIGILL * that writes a Crash probe with witness before restoring SIG_DFL and * re-raising the signal — the process still dies with the same exit * code, but the probe channel now carries the forensic record. */ static const char *__nyx_crash_sink_callee = ""; static void __nyx_crash_handler(int sig) { const char *p = getenv("NYX_PROBE_PATH"); if (p && *p) { FILE *f = fopen(p, "a"); if (f) { const char *name = "SIGABRT"; switch (sig) { case SIGSEGV: name = "SIGSEGV"; break; case SIGABRT: name = "SIGABRT"; break; case SIGBUS: name = "SIGBUS"; break; case SIGFPE: name = "SIGFPE"; break; case SIGILL: name = "SIGILL"; break; } struct timespec ts; clock_gettime(CLOCK_REALTIME, &ts); unsigned long long ns = (unsigned long long)ts.tv_sec * 1000000000ULL + (unsigned long long)ts.tv_nsec; const char *pid = getenv("NYX_PAYLOAD_ID"); if (!pid) pid = ""; fprintf(f, "{\"sink_callee\":\"%s\",\"args\":[],\"captured_at_ns\":%llu," "\"payload_id\":\"%s\",\"kind\":{\"kind\":\"Crash\",\"signal\":\"%s\"}," "\"witness\":", __nyx_crash_sink_callee, ns, pid, name); __nyx_write_witness(f, __nyx_crash_sink_callee, 0, NULL); fputs("}\n", f); fclose(f); } } struct sigaction dfl; memset(&dfl, 0, sizeof(dfl)); dfl.sa_handler = SIG_DFL; sigaction(sig, &dfl, NULL); raise(sig); } static void __nyx_install_crash_guard(const char *sink_callee) { __nyx_crash_sink_callee = sink_callee; struct sigaction sa; memset(&sa, 0, sizeof(sa)); sa.sa_handler = __nyx_crash_handler; sigemptyset(&sa.sa_mask); int sigs[] = { SIGSEGV, SIGABRT, SIGBUS, SIGFPE, SIGILL }; for (size_t i = 0; i < sizeof(sigs)/sizeof(sigs[0]); ++i) { sigaction(sigs[i], &sa, NULL); } } "# } impl LangEmitter for CEmitter { fn emit(&self, spec: &HarnessSpec) -> Result { emit(spec) } fn entry_kinds_supported(&self) -> &'static [EntryKind] { SUPPORTED } fn entry_kind_hint(&self, attempted: EntryKind) -> String { format!( "c emitter supports {SUPPORTED:?}; this finding's enclosing context is `EntryKind::{attempted}` — see Phase 16 shape dispatch (main / libFuzzer / free function)" ) } fn compose_chain_step(&self, prev_output: Option<&[u8]>) -> ChainStepHarness { chain_step(prev_output) } } /// Phase 26 — C chain-step harness. fn chain_step(prev_output: Option<&[u8]>) -> ChainStepHarness { let source = "#include \n#include \n\nint main(void) {\n const char *prev = getenv(\"NYX_PREV_OUTPUT\");\n if (prev) fputs(prev, stdout);\n return 0;\n}\n".to_owned(); ChainStepHarness { source, filename: "step.c".to_owned(), command: vec!["cc".to_owned(), "step.c".to_owned(), "-o".to_owned(), "step".to_owned()], extra_env: prev_output .map(|bytes| { vec![( ChainStepHarness::PREV_OUTPUT_ENV.to_owned(), String::from_utf8_lossy(bytes).into_owned(), )] }) .unwrap_or_default(), } } /// Emit a C harness for `spec`. pub fn emit(spec: &HarnessSpec) -> Result { let shape = detect_shape(spec); match (&spec.payload_slot, shape) { (PayloadSlot::Param(0) | PayloadSlot::EnvVar(_), _) => {} (PayloadSlot::Argv(_), CShape::MainArgv) => {} _ => return Err(UnsupportedReason::PayloadSlotUnsupported), } let main_c = generate_main_c(spec, shape); let makefile = generate_makefile(); Ok(HarnessSource { source: main_c, filename: "main.c".into(), command: vec!["./nyx_harness".into()], extra_files: vec![("Makefile".into(), makefile)], entry_subpath: Some("entry.c".into()), }) } /// Generate the harness `main.c` for the resolved shape. fn generate_main_c(spec: &HarnessSpec, shape: CShape) -> String { let invocation = invoke_for_shape(spec, shape); format!( r#"/* Nyx dynamic harness — auto-generated, do not edit (Phase 16 — CShape::{shape:?}). */ #include #include #include #include #include /* Forward declarations: the entry file is appended below via `#include` * so the harness can call user-defined functions without a separate * compilation unit. */ static char *nyx_payload(void); #include "entry.c" int main(int argc, char *argv[]) {{ (void)argc; (void)argv; char *payload = nyx_payload(); if (!payload) payload = (char*)""; {invocation} /* Intentionally no free(payload): payload is either a strdup/b64_decode * heap pointer or a string literal substituted above when allocation * failed. free() on the literal is UB; the process exits immediately * so the kernel reclaims the heap copy. */ return 0; }} /* Minimal base64 decoder (no external deps). */ static int nyx_b64_value(unsigned char c) {{ if (c >= 'A' && c <= 'Z') return c - 'A'; if (c >= 'a' && c <= 'z') return c - 'a' + 26; if (c >= '0' && c <= '9') return c - '0' + 52; if (c == '+') return 62; if (c == '/') return 63; return -1; }} static char *nyx_b64_decode(const char *in) {{ size_t n = strlen(in); char *out = (char *)malloc(n + 1); if (!out) return NULL; size_t outi = 0; int buf = 0, bits = 0; for (size_t i = 0; i < n; ++i) {{ if (in[i] == '\n' || in[i] == '\r' || in[i] == '=') continue; int v = nyx_b64_value((unsigned char)in[i]); if (v < 0) {{ free(out); return NULL; }} buf = (buf << 6) | v; bits += 6; if (bits >= 8) {{ bits -= 8; out[outi++] = (char)((buf >> bits) & 0xFF); }} }} out[outi] = '\0'; return out; }} static char *nyx_payload(void) {{ const char *v = getenv("NYX_PAYLOAD"); if (v && *v) {{ return strdup(v); }} const char *b64 = getenv("NYX_PAYLOAD_B64"); if (b64 && *b64) {{ return nyx_b64_decode(b64); }} return strdup(""); }} "#, shape = shape, invocation = invocation, ) } fn invoke_for_shape(spec: &HarnessSpec, shape: CShape) -> String { let entry_fn = &spec.entry_name; match shape { CShape::FreeFn => match &spec.payload_slot { PayloadSlot::EnvVar(name) => format!( " setenv({name:?}, payload, 1);\n {entry_fn}(payload, strlen(payload));\n", ), _ => format!(" {entry_fn}(payload, strlen(payload));\n"), }, CShape::LibfuzzerEntry => { // libFuzzer: `int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)`. format!( " {entry_fn}((const uint8_t *)payload, strlen(payload));\n", entry_fn = entry_fn, ) } CShape::MainArgv => { // Rename the user-supplied entry to `nyx_entry_main` via macro so // it does not collide with the harness `main` symbol when the // entry source defines `int main(...)`. Fixture authors should // expose the entry as a function named in `spec.entry_name`. // // Heap-allocate `new_argv` so a future `PayloadSlot::Argv(n)` with // `n >= 6` cannot overrun a fixed stack array. Slots: 1 // ("nyx_harness") + pad + 1 (payload) + 1 (NULL terminator). let pad = match &spec.payload_slot { PayloadSlot::Argv(n) => *n, _ => 0, }; let slots = pad + 3; let mut buf = String::new(); buf.push_str(&format!( " char **new_argv = (char**)calloc({slots}, sizeof(char*));\n", )); buf.push_str(" if (!new_argv) return 1;\n"); buf.push_str(" int new_argc = 0;\n"); buf.push_str(" new_argv[new_argc++] = (char*)\"nyx_harness\";\n"); for _ in 0..pad { buf.push_str(" new_argv[new_argc++] = (char*)\"\";\n"); } buf.push_str(" new_argv[new_argc++] = payload;\n"); buf.push_str(" new_argv[new_argc] = NULL;\n"); buf.push_str(&format!(" {entry_fn}(new_argc, new_argv);\n")); buf.push_str(" free(new_argv);\n"); buf } } } fn generate_makefile() -> String { r#"# Phase 16 — reference Makefile, not used by the runner (the build sandbox # calls cc directly). Kept so reproductions can re-build the harness by hand. CC ?= cc CFLAGS ?= -O0 -g all: nyx_harness nyx_harness: main.c entry.c $(CC) $(CFLAGS) -o nyx_harness main.c clean: rm -f nyx_harness "# .to_owned() } #[cfg(test)] mod tests { use super::*; use crate::dynamic::spec::{EntryKind, HarnessSpec, PayloadSlot}; use crate::labels::Cap; use crate::symbol::Lang; fn make_spec(payload_slot: PayloadSlot) -> HarnessSpec { HarnessSpec { finding_id: "c00000000000001".into(), entry_file: "entry.c".into(), entry_name: "run".into(), entry_kind: EntryKind::Function, lang: Lang::C, toolchain_id: "gcc-stable".into(), payload_slot, expected_cap: Cap::CODE_EXEC, constraint_hints: vec![], sink_file: "entry.c".into(), sink_line: 10, spec_hash: "ctest0000000001".into(), derivation: crate::dynamic::spec::SpecDerivationStrategy::FromFlowSteps, stubs_required: vec![], } } #[test] fn entry_kinds_supported_is_non_empty() { assert!(!CEmitter.entry_kinds_supported().is_empty()); assert!(CEmitter.entry_kinds_supported().contains(&EntryKind::Function)); assert!(CEmitter.entry_kinds_supported().contains(&EntryKind::CliSubcommand)); assert!(CEmitter.entry_kinds_supported().contains(&EntryKind::LibraryApi)); } #[test] fn entry_kind_hint_names_attempted_and_phase() { let hint = CEmitter.entry_kind_hint(EntryKind::LibraryApi); assert!(hint.contains("LibraryApi")); assert!(hint.contains("Phase 16")); } #[test] fn shape_detect_main_argv() { let src = "int main(int argc, char *argv[]) { return 0; }"; let mut spec = make_spec(PayloadSlot::Argv(0)); spec.entry_kind = EntryKind::CliSubcommand; spec.entry_name = "main".into(); assert_eq!(CShape::detect(&spec, src), CShape::MainArgv); } #[test] fn shape_detect_libfuzzer_entry() { let src = "int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { return 0; }"; let mut spec = make_spec(PayloadSlot::Param(0)); spec.entry_kind = EntryKind::LibraryApi; spec.entry_name = "LLVMFuzzerTestOneInput".into(); assert_eq!(CShape::detect(&spec, src), CShape::LibfuzzerEntry); } #[test] fn shape_detect_free_fn() { let src = "void run(const char *s, size_t n) { (void)s; (void)n; }"; let spec = make_spec(PayloadSlot::Param(0)); assert_eq!(CShape::detect(&spec, src), CShape::FreeFn); } #[test] fn emit_produces_source() { let spec = make_spec(PayloadSlot::Param(0)); let h = emit(&spec).unwrap(); assert_eq!(h.filename, "main.c"); assert!(h.source.contains("#include \"entry.c\"")); assert!(h.source.contains("run(payload, strlen(payload))")); assert_eq!(h.command, vec!["./nyx_harness"]); assert_eq!(h.entry_subpath, Some("entry.c".to_string())); } #[test] fn emit_main_argv_shape_routes_through_new_argv() { let mut spec = make_spec(PayloadSlot::Argv(0)); spec.entry_kind = EntryKind::CliSubcommand; spec.entry_name = "nyx_entry_main".into(); let h = emit(&spec).unwrap(); assert!(h.source.contains("new_argv[new_argc++] = payload")); assert!(h.source.contains("nyx_entry_main(new_argc, new_argv)")); } #[test] fn emit_main_argv_uses_heap_allocation_sized_for_pad() { // Phase 16 follow-up: heap-allocate `new_argv` so deep `Argv(n)` slots // cannot overrun a fixed stack array. Slots = pad + 3 // (nyx_harness + pad + payload + NULL). let mut spec = make_spec(PayloadSlot::Argv(0)); spec.entry_kind = EntryKind::CliSubcommand; spec.entry_name = "nyx_entry_main".into(); let h = emit(&spec).unwrap(); assert!( !h.source.contains("char *new_argv[8]"), "fixed-size stack array must be gone — Argv(n>=6) used to overrun", ); assert!(h.source.contains("char **new_argv = (char**)calloc(3, sizeof(char*))")); assert!(h.source.contains("free(new_argv);")); let mut spec6 = make_spec(PayloadSlot::Argv(6)); spec6.entry_kind = EntryKind::CliSubcommand; spec6.entry_name = "nyx_entry_main".into(); let h6 = emit(&spec6).unwrap(); assert!(h6.source.contains("char **new_argv = (char**)calloc(9, sizeof(char*))")); assert!(h6.source.contains("free(new_argv);")); } #[test] fn emit_libfuzzer_shape_passes_bytes() { let mut spec = make_spec(PayloadSlot::Param(0)); spec.entry_kind = EntryKind::LibraryApi; spec.entry_name = "LLVMFuzzerTestOneInput".into(); let h = emit(&spec).unwrap(); assert!(h.source.contains("LLVMFuzzerTestOneInput((const uint8_t *)payload, strlen(payload))")); } #[test] fn emit_makefile_in_extra_files() { let spec = make_spec(PayloadSlot::Param(0)); let h = emit(&spec).unwrap(); let mk = h.extra_files.iter().find(|(n, _)| n == "Makefile").expect("Makefile must be staged"); assert!(mk.1.contains("nyx_harness: main.c entry.c")); } }