From 93eb98edda87b88e3ef7da4f9554db94f54664ea Mon Sep 17 00:00:00 2001 From: pitboss Date: Thu, 14 May 2026 13:10:22 -0500 Subject: [PATCH] =?UTF-8?q?[pitboss]=20phase=2008:=20Track=20C.4=20+=20C.5?= =?UTF-8?q?=20=E2=80=94=20SinkCrash=20oracle=20+=20per-probe=20witness=20c?= =?UTF-8?q?apture?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/dynamic/differential.rs | 6 +- src/dynamic/lang/c.rs | 154 +++++++++++++++++- src/dynamic/lang/cpp.rs | 134 +++++++++++++++- src/dynamic/lang/go.rs | 131 ++++++++++++++-- src/dynamic/lang/java.rs | 107 +++++++++++-- src/dynamic/lang/javascript.rs | 98 +++++++++++- src/dynamic/lang/php.rs | 108 +++++++++++-- src/dynamic/lang/python.rs | 103 +++++++++++- src/dynamic/lang/ruby.rs | 74 ++++++++- src/dynamic/lang/rust.rs | 215 ++++++++++++++++++++----- src/dynamic/mod.rs | 1 + src/dynamic/oracle.rs | 258 +++++++++++++++++++++++++++++- src/dynamic/policy.rs | 192 +++++++++++++++++++++++ src/dynamic/probe.rs | 178 +++++++++++++++++++++ src/dynamic/runner.rs | 27 +++- src/dynamic/verify.rs | 19 +++ src/evidence.rs | 8 + src/fmt.rs | 1 + tests/oracle_differential.rs | 4 +- tests/oracle_sink_crash.rs | 279 +++++++++++++++++++++++++++++++++ tests/oracle_sink_probe.rs | 6 +- 21 files changed, 1988 insertions(+), 115 deletions(-) create mode 100644 src/dynamic/policy.rs create mode 100644 tests/oracle_sink_crash.rs diff --git a/src/dynamic/differential.rs b/src/dynamic/differential.rs index 2c4f0ec3..460aca59 100644 --- a/src/dynamic/differential.rs +++ b/src/dynamic/differential.rs @@ -110,18 +110,22 @@ mod tests { #[test] fn build_outcome_carries_both_traces() { - use crate::dynamic::probe::{ProbeArg, SinkProbe}; + use crate::dynamic::probe::{ProbeArg, ProbeKind, ProbeWitness, SinkProbe}; let vuln = vec![SinkProbe { sink_callee: "os.system".into(), args: vec![ProbeArg::String("; echo X".into())], captured_at_ns: 1, payload_id: "cmdi-echo-marker".into(), + kind: ProbeKind::Normal, + witness: ProbeWitness::empty(), }]; let benign = vec![SinkProbe { sink_callee: "os.system".into(), args: vec![ProbeArg::String("safe".into())], captured_at_ns: 2, payload_id: "cmdi-benign".into(), + kind: ProbeKind::Normal, + witness: ProbeWitness::empty(), }]; let outcome = build_outcome( "cmdi-echo-marker", diff --git a/src/dynamic/lang/c.rs b/src/dynamic/lang/c.rs index 96dbf3a7..4797d00b 100644 --- a/src/dynamic/lang/c.rs +++ b/src/dynamic/lang/c.rs @@ -23,12 +23,101 @@ const SUPPORTED: &[EntryKind] = &[EntryKind::Function]; /// the only dep on libc / stdio. pub fn probe_shim() -> &'static str { r#" -/* ── __nyx_probe shim (Phase 06 — Track C.1) ─────────────────────────────── */ +/* ── __nyx_probe shim (Phase 06 — Track C.1, Phase 08 — Track C.4 + C.5) ── */ +#include #include #include #include #include #include +#include + +#ifndef __NYX_PAYLOAD_LIMIT +#define __NYX_PAYLOAD_LIMIT (16 * 1024) +#endif +#define __NYX_REDACTED "" + +extern char **environ; + +static const char *__nyx_deny[] = { + "TOKEN","SECRET","PASSWORD","PASSWD","API_KEY","APIKEY","PRIVATE_KEY", + "CREDENTIAL","SESSION","COOKIE","AUTH","BEARER","AWS_ACCESS","AWS_SESSION", + "GH_TOKEN","GITHUB_TOKEN","NPM_TOKEN","PYPI_TOKEN","DOCKER_PASS", + NULL, +}; + +static int __nyx_is_denied_upper(const char *k_upper) { + for (int i = 0; __nyx_deny[i]; ++i) { + if (strstr(k_upper, __nyx_deny[i])) return 1; + } + return 0; +} + +static void __nyx_write_witness(FILE *f, const char *sink_callee, int nargs, const char **args) { + fputs("{\"env_snapshot\":{", f); + int first = 1; + for (char **e = environ; *e; ++e) { + const char *eq = strchr(*e, '='); + if (!eq) continue; + size_t klen = (size_t)(eq - *e); + char *kup = (char *)malloc(klen + 1); + if (!kup) continue; + for (size_t i = 0; i < klen; ++i) { + char c = (*e)[i]; + if (c >= 'a' && c <= 'z') c -= 32; + kup[i] = c; + } + kup[klen] = '\0'; + int denied = __nyx_is_denied_upper(kup); + if (!first) fputc(',', f); + first = 0; + fputc('"', f); + fwrite(*e, 1, klen, f); + fputs("\":\"", f); + if (denied) { + fputs(__NYX_REDACTED, f); + } else { + const char *v = eq + 1; + for (; *v; ++v) { + switch (*v) { + case '"': fputs("\\\"", f); break; + case '\\': fputs("\\\\", f); break; + case '\n': fputs("\\n", f); break; + case '\r': fputs("\\r", f); break; + case '\t': fputs("\\t", f); break; + default: fputc(*v, f); + } + } + } + fputc('"', f); + free(kup); + } + fputs("},\"cwd\":\"", f); + char cwdbuf[4096]; + if (getcwd(cwdbuf, sizeof(cwdbuf))) { + fputs(cwdbuf, f); + } + fputs("\",\"payload_bytes\":[", f); + const char *payload = getenv("NYX_PAYLOAD"); + if (payload) { + size_t plen = strlen(payload); + if (plen > __NYX_PAYLOAD_LIMIT) plen = __NYX_PAYLOAD_LIMIT; + for (size_t i = 0; i < plen; ++i) { + if (i > 0) fputc(',', f); + fprintf(f, "%d", (unsigned char)payload[i]); + } + } + fputs("],\"callee\":\"", f); + fputs(sink_callee, f); + fputs("\",\"args_repr\":[", f); + for (int i = 0; i < nargs; ++i) { + if (i > 0) fputc(',', f); + fputc('"', f); + if (args && args[i]) fputs(args[i], f); + fputc('"', f); + } + fputs("]}", f); +} static void __nyx_probe(const char *sink_callee, int nargs, ...) { const char *p = getenv("NYX_PROBE_PATH"); @@ -44,16 +133,77 @@ static void __nyx_probe(const char *sink_callee, int nargs, ...) { fprintf(f, "{\"sink_callee\":\"%s\",\"args\":[", sink_callee); va_list ap; va_start(ap, nargs); + const char *args_arr[32]; + int captured = nargs > 32 ? 32 : nargs; for (int i = 0; i < nargs; ++i) { const char *arg = va_arg(ap, const char *); if (!arg) arg = ""; + if (i < captured) args_arr[i] = arg; if (i > 0) fputc(',', f); fprintf(f, "{\"kind\":\"String\",\"value\":\"%s\"}", arg); } va_end(ap); - fprintf(f, "],\"captured_at_ns\":%llu,\"payload_id\":\"%s\"}\n", ns, pid); + fprintf(f, "],\"captured_at_ns\":%llu,\"payload_id\":\"%s\",", ns, pid); + fputs("\"kind\":{\"kind\":\"Normal\"},\"witness\":", f); + __nyx_write_witness(f, sink_callee, captured, args_arr); + fputs("}\n", f); fclose(f); } + +/* Phase 08: sink-site signal handler. __nyx_install_crash_guard sets a + * sigaction(2) handler over SIGSEGV / SIGABRT / SIGBUS / SIGFPE / SIGILL + * that writes a Crash probe with witness before restoring SIG_DFL and + * re-raising the signal — the process still dies with the same exit + * code, but the probe channel now carries the forensic record. */ +static const char *__nyx_crash_sink_callee = ""; + +static void __nyx_crash_handler(int sig) { + const char *p = getenv("NYX_PROBE_PATH"); + if (p && *p) { + FILE *f = fopen(p, "a"); + if (f) { + const char *name = "SIGABRT"; + switch (sig) { + case SIGSEGV: name = "SIGSEGV"; break; + case SIGABRT: name = "SIGABRT"; break; + case SIGBUS: name = "SIGBUS"; break; + case SIGFPE: name = "SIGFPE"; break; + case SIGILL: name = "SIGILL"; break; + } + struct timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); + unsigned long long ns = (unsigned long long)ts.tv_sec * 1000000000ULL + + (unsigned long long)ts.tv_nsec; + const char *pid = getenv("NYX_PAYLOAD_ID"); + if (!pid) pid = ""; + fprintf(f, + "{\"sink_callee\":\"%s\",\"args\":[],\"captured_at_ns\":%llu," + "\"payload_id\":\"%s\",\"kind\":{\"kind\":\"Crash\",\"signal\":\"%s\"}," + "\"witness\":", + __nyx_crash_sink_callee, ns, pid, name); + __nyx_write_witness(f, __nyx_crash_sink_callee, 0, NULL); + fputs("}\n", f); + fclose(f); + } + } + struct sigaction dfl; + memset(&dfl, 0, sizeof(dfl)); + dfl.sa_handler = SIG_DFL; + sigaction(sig, &dfl, NULL); + raise(sig); +} + +static void __nyx_install_crash_guard(const char *sink_callee) { + __nyx_crash_sink_callee = sink_callee; + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = __nyx_crash_handler; + sigemptyset(&sa.sa_mask); + int sigs[] = { SIGSEGV, SIGABRT, SIGBUS, SIGFPE, SIGILL }; + for (size_t i = 0; i < sizeof(sigs)/sizeof(sigs[0]); ++i) { + sigaction(sigs[i], &sa, NULL); + } +} "# } diff --git a/src/dynamic/lang/cpp.rs b/src/dynamic/lang/cpp.rs index f825a086..cec881f1 100644 --- a/src/dynamic/lang/cpp.rs +++ b/src/dynamic/lang/cpp.rs @@ -23,12 +23,31 @@ const SUPPORTED: &[EntryKind] = &[EntryKind::Function]; /// JSON-emit format matches [`crate::dynamic::probe::SinkProbe`]. pub fn probe_shim() -> &'static str { r#" -/* ── __nyx_probe shim (Phase 06 — Track C.1) ─────────────────────────────── */ +/* ── __nyx_probe shim (Phase 06 — Track C.1, Phase 08 — Track C.4 + C.5) ── */ +#include +#include #include +#include #include +#include #include #include #include +#include +#include + +#ifndef __NYX_PAYLOAD_LIMIT +#define __NYX_PAYLOAD_LIMIT (16 * 1024) +#endif +#define __NYX_REDACTED "" + +extern char **environ; + +static const char *__nyx_deny_substrings_cpp[] = { + "TOKEN","SECRET","PASSWORD","PASSWD","API_KEY","APIKEY","PRIVATE_KEY", + "CREDENTIAL","SESSION","COOKIE","AUTH","BEARER","AWS_ACCESS","AWS_SESSION", + "GH_TOKEN","GITHUB_TOKEN","NPM_TOKEN","PYPI_TOKEN","DOCKER_PASS", +}; inline void __nyx_probe_one(std::ostringstream &out, const std::string &v) { out << "{\"kind\":\"String\",\"value\":\""; @@ -45,6 +64,63 @@ inline void __nyx_probe_one(std::ostringstream &out, const std::string &v) { out << "\"}"; } +inline void __nyx_esc(std::ostringstream &out, const std::string &v) { + for (char c : v) { + switch (c) { + case '"': out << "\\\""; break; + case '\\': out << "\\\\"; break; + case '\n': out << "\\n"; break; + case '\r': out << "\\r"; break; + case '\t': out << "\\t"; break; + default: out << c; + } + } +} + +inline std::string __nyx_witness_json(const char *sink_callee, const std::vector &args_repr) { + std::ostringstream out; + out << "{\"env_snapshot\":{"; + bool first = true; + for (char **e = environ; *e; ++e) { + const char *eq = std::strchr(*e, '='); + if (!eq) continue; + std::string k(*e, static_cast(eq - *e)); + std::string ku = k; + std::transform(ku.begin(), ku.end(), ku.begin(), [](unsigned char c){ return (char)std::toupper(c); }); + bool denied = false; + for (const char *needle : __nyx_deny_substrings_cpp) { + if (ku.find(needle) != std::string::npos) { denied = true; break; } + } + if (!first) out << ','; + first = false; + out << '"'; __nyx_esc(out, k); out << "\":\""; + if (denied) out << __NYX_REDACTED; + else __nyx_esc(out, std::string(eq + 1)); + out << '"'; + } + out << "},\"cwd\":\""; + char cwdbuf[4096]; + if (::getcwd(cwdbuf, sizeof(cwdbuf))) __nyx_esc(out, std::string(cwdbuf)); + out << "\",\"payload_bytes\":["; + const char *payload = std::getenv("NYX_PAYLOAD"); + if (payload) { + size_t plen = std::strlen(payload); + if (plen > __NYX_PAYLOAD_LIMIT) plen = __NYX_PAYLOAD_LIMIT; + for (size_t i = 0; i < plen; ++i) { + if (i > 0) out << ','; + out << static_cast(static_cast(payload[i])); + } + } + out << "],\"callee\":\""; __nyx_esc(out, std::string(sink_callee)); + out << "\",\"args_repr\":["; + for (size_t i = 0; i < args_repr.size(); ++i) { + if (i > 0) out << ','; + out << '"'; __nyx_esc(out, args_repr[i]); out << '"'; + } + out << "]}"; + return out.str(); +} + template inline void __nyx_probe(const char *sink_callee, Args... args) { const char *p = std::getenv("NYX_PROBE_PATH"); @@ -52,10 +128,12 @@ inline void __nyx_probe(const char *sink_callee, Args... args) { std::ostringstream out; out << "{\"sink_callee\":\"" << sink_callee << "\",\"args\":["; bool first = true; + std::vector repr; auto emit = [&](const std::string &s) { if (!first) out << ','; first = false; __nyx_probe_one(out, s); + repr.push_back(s); }; (emit(std::string(args)), ...); const char *pid = std::getenv("NYX_PAYLOAD_ID"); @@ -63,10 +141,62 @@ inline void __nyx_probe(const char *sink_callee, Args... args) { std::chrono::system_clock::now().time_since_epoch() ).count(); out << "],\"captured_at_ns\":" << now << ",\"payload_id\":\"" - << (pid ? pid : "") << "\"}\n"; + << (pid ? pid : "") << "\","; + out << "\"kind\":{\"kind\":\"Normal\"},\"witness\":" + << __nyx_witness_json(sink_callee, repr) << "}\n"; std::ofstream f(p, std::ios::app); if (f.is_open()) f << out.str(); } + +/* Phase 08: sink-site sigaction handler. Mirrors the C variant; the + * captured `sink_callee` is held in a file-scope const char* so the + * async-signal-unsafe write path can pull it without TLS. */ +static const char *__nyx_crash_sink_callee = ""; + +inline void __nyx_crash_handler(int sig) { + const char *p = std::getenv("NYX_PROBE_PATH"); + if (p && *p) { + std::ofstream f(p, std::ios::app); + if (f.is_open()) { + const char *name = "SIGABRT"; + switch (sig) { + case SIGSEGV: name = "SIGSEGV"; break; + case SIGABRT: name = "SIGABRT"; break; + case SIGBUS: name = "SIGBUS"; break; + case SIGFPE: name = "SIGFPE"; break; + case SIGILL: name = "SIGILL"; break; + } + auto now = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch() + ).count(); + const char *pid = std::getenv("NYX_PAYLOAD_ID"); + std::ostringstream out; + out << "{\"sink_callee\":\"" << __nyx_crash_sink_callee + << "\",\"args\":[],\"captured_at_ns\":" << now + << ",\"payload_id\":\"" << (pid ? pid : "") + << "\",\"kind\":{\"kind\":\"Crash\",\"signal\":\"" << name + << "\"},\"witness\":" + << __nyx_witness_json(__nyx_crash_sink_callee, {}) << "}\n"; + f << out.str(); + } + } + struct sigaction dfl; + std::memset(&dfl, 0, sizeof(dfl)); + dfl.sa_handler = SIG_DFL; + sigaction(sig, &dfl, nullptr); + raise(sig); +} + +inline void __nyx_install_crash_guard(const char *sink_callee) { + __nyx_crash_sink_callee = sink_callee; + struct sigaction sa; + std::memset(&sa, 0, sizeof(sa)); + sa.sa_handler = __nyx_crash_handler; + sigemptyset(&sa.sa_mask); + for (int sig : { SIGSEGV, SIGABRT, SIGBUS, SIGFPE, SIGILL }) { + sigaction(sig, &sa, nullptr); + } +} "# } diff --git a/src/dynamic/lang/go.rs b/src/dynamic/lang/go.rs index d53e81f2..2b04d64e 100644 --- a/src/dynamic/lang/go.rs +++ b/src/dynamic/lang/go.rs @@ -58,12 +58,71 @@ impl LangEmitter for GoEmitter { /// captured args at the sink site. pub fn probe_shim() -> &'static str { r#" -// ── __nyx_probe shim (Phase 06 — Track C.1) ────────────────────────────────── -func __nyx_probe(sinkCallee string, args ...string) { - p := os.Getenv("NYX_PROBE_PATH") - if p == "" { - return +// ── __nyx_probe shim (Phase 06 — Track C.1, Phase 08 — Track C.4 + C.5) ────── +var __nyx_deny_substrings = []string{ + "TOKEN","SECRET","PASSWORD","PASSWD","API_KEY","APIKEY","PRIVATE_KEY", + "CREDENTIAL","SESSION","COOKIE","AUTH","BEARER","AWS_ACCESS","AWS_SESSION", + "GH_TOKEN","GITHUB_TOKEN","NPM_TOKEN","PYPI_TOKEN","DOCKER_PASS", +} + +const __nyx_payload_limit = 16 * 1024 +const __nyx_redacted = "" + +func __nyx_scrub_env() map[string]string { + out := map[string]string{} + for _, e := range os.Environ() { + idx := -1 + for i, c := range e { + if c == '=' { idx = i; break } + } + if idx < 0 { continue } + k := e[:idx] + v := e[idx+1:] + ku := strings.ToUpper(k) + denied := false + for _, n := range __nyx_deny_substrings { + if strings.Contains(ku, n) { denied = true; break } + } + if denied { + out[k] = __nyx_redacted + } else { + out[k] = v + } } + return out +} + +func __nyx_witness(sinkCallee string, args []string) map[string]interface{} { + payload := os.Getenv("NYX_PAYLOAD") + pb := []byte(payload) + if len(pb) > __nyx_payload_limit { pb = pb[:__nyx_payload_limit] } + repr := make([]string, len(args)) + for i, a := range args { repr[i] = a } + cwd, _ := os.Getwd() + bytes_int := make([]int, len(pb)) + for i, b := range pb { bytes_int[i] = int(b) } + return map[string]interface{}{ + "env_snapshot": __nyx_scrub_env(), + "cwd": cwd, + "payload_bytes": bytes_int, + "callee": sinkCallee, + "args_repr": repr, + } +} + +func __nyx_emit(rec map[string]interface{}) { + p := os.Getenv("NYX_PROBE_PATH") + if p == "" { return } + b, err := json.Marshal(rec) + if err != nil { return } + f, err := os.OpenFile(p, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { return } + defer f.Close() + f.Write(b) + f.Write([]byte("\n")) +} + +func __nyx_probe(sinkCallee string, args ...string) { serArgs := make([]map[string]interface{}, 0, len(args)) for _, a := range args { serArgs = append(serArgs, map[string]interface{}{ @@ -71,23 +130,61 @@ func __nyx_probe(sinkCallee string, args ...string) { "value": a, }) } - rec := map[string]interface{}{ + __nyx_emit(map[string]interface{}{ "sink_callee": sinkCallee, "args": serArgs, "captured_at_ns": uint64(time.Now().UnixNano()), "payload_id": os.Getenv("NYX_PAYLOAD_ID"), + "kind": map[string]interface{}{"kind": "Normal"}, + "witness": __nyx_witness(sinkCallee, args), + }) +} + +// Phase 08: install a sink-site signal listener via `signal.Notify`. Go +// can intercept SIGABRT but not SIGSEGV (the Go runtime panics on +// memory faults before user handlers see them); for SIGSEGV we rely on +// the runtime's panic catch via `recover()` inside __nyx_run_sink. +func __nyx_install_crash_guard(sinkCallee string) { + ch := make(chan os.Signal, 1) + signal.Notify(ch, syscall.SIGABRT, syscall.SIGBUS, syscall.SIGFPE, syscall.SIGILL) + go func() { + sig := <-ch + name := "SIGABRT" + switch sig { + case syscall.SIGBUS: name = "SIGBUS" + case syscall.SIGFPE: name = "SIGFPE" + case syscall.SIGILL: name = "SIGILL" + } + __nyx_emit(map[string]interface{}{ + "sink_callee": sinkCallee, + "args": []interface{}{}, + "captured_at_ns": uint64(time.Now().UnixNano()), + "payload_id": os.Getenv("NYX_PAYLOAD_ID"), + "kind": map[string]interface{}{"kind": "Crash", "signal": name}, + "witness": __nyx_witness(sinkCallee, nil), + }) + signal.Reset(sig) + syscall.Kill(syscall.Getpid(), sig.(syscall.Signal)) + }() +} + +// Phase 08: panic-recover hook for Go runtime-caught faults (SIGSEGV nil- +// deref, divide-by-zero treated as panic). Call as `defer __nyx_recover_crash("callee")()` +// around the instrumented sink invocation. +func __nyx_recover_crash(sinkCallee string) func() { + return func() { + if r := recover(); r != nil { + __nyx_emit(map[string]interface{}{ + "sink_callee": sinkCallee, + "args": []interface{}{}, + "captured_at_ns": uint64(time.Now().UnixNano()), + "payload_id": os.Getenv("NYX_PAYLOAD_ID"), + "kind": map[string]interface{}{"kind": "Crash", "signal": "SIGSEGV"}, + "witness": __nyx_witness(sinkCallee, nil), + }) + panic(r) + } } - b, err := json.Marshal(rec) - if err != nil { - return - } - f, err := os.OpenFile(p, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) - if err != nil { - return - } - defer f.Close() - f.Write(b) - f.Write([]byte("\n")) } "# } diff --git a/src/dynamic/lang/java.rs b/src/dynamic/lang/java.rs index 2ebdd1da..fd758123 100644 --- a/src/dynamic/lang/java.rs +++ b/src/dynamic/lang/java.rs @@ -64,16 +64,78 @@ impl LangEmitter for JavaEmitter { /// [`crate::dynamic::probe::SinkProbe`] wire format. pub fn probe_shim() -> &'static str { r#" - // ── __nyx_probe shim (Phase 06 — Track C.1) ────────────────────────────────── - static void __nyx_probe(String sinkCallee, String... args) { - String p = System.getenv("NYX_PROBE_PATH"); - if (p == null || p.isEmpty()) { - return; + // ── __nyx_probe shim (Phase 06 — Track C.1, Phase 08 — Track C.4 + C.5) ── + private static final String[] __NYX_DENY = { + "TOKEN","SECRET","PASSWORD","PASSWD","API_KEY","APIKEY","PRIVATE_KEY", + "CREDENTIAL","SESSION","COOKIE","AUTH","BEARER","AWS_ACCESS","AWS_SESSION", + "GH_TOKEN","GITHUB_TOKEN","NPM_TOKEN","PYPI_TOKEN","DOCKER_PASS" + }; + private static final int __NYX_PAYLOAD_LIMIT = 16 * 1024; + private static final String __NYX_REDACTED = ""; + + private static boolean nyxIsDeniedKey(String k) { + String ku = k.toUpperCase(); + for (String n : __NYX_DENY) { + if (ku.contains(n)) return true; } + return false; + } + + private static String nyxWitnessJson(String sinkCallee, String[] args) { + StringBuilder out = new StringBuilder(256); + out.append("{\"env_snapshot\":{"); + boolean first = true; + java.util.TreeMap envSorted = new java.util.TreeMap<>(System.getenv()); + for (java.util.Map.Entry e : envSorted.entrySet()) { + if (!first) out.append(','); + first = false; + out.append('"'); nyxJsonEscape(e.getKey(), out); out.append("\":\""); + if (nyxIsDeniedKey(e.getKey())) { + out.append(__NYX_REDACTED); + } else { + nyxJsonEscape(e.getValue() == null ? "" : e.getValue(), out); + } + out.append('"'); + } + out.append("},\"cwd\":\""); + nyxJsonEscape(System.getProperty("user.dir", ""), out); + out.append("\",\"payload_bytes\":["); + String payload = System.getenv("NYX_PAYLOAD"); + if (payload != null) { + byte[] pb = payload.getBytes(java.nio.charset.StandardCharsets.UTF_8); + int cap = Math.min(pb.length, __NYX_PAYLOAD_LIMIT); + for (int i = 0; i < cap; i++) { + if (i > 0) out.append(','); + out.append(((int) pb[i]) & 0xff); + } + } + out.append("],\"callee\":\""); nyxJsonEscape(sinkCallee, out); + out.append("\",\"args_repr\":["); + if (args != null) { + for (int i = 0; i < args.length; i++) { + if (i > 0) out.append(','); + out.append('"'); nyxJsonEscape(args[i] == null ? "" : args[i], out); out.append('"'); + } + } + out.append("]}"); + return out.toString(); + } + + private static void nyxEmit(String line) { + String p = System.getenv("NYX_PROBE_PATH"); + if (p == null || p.isEmpty()) return; + try (java.io.FileWriter fw = new java.io.FileWriter(p, true)) { + fw.write(line); + } catch (java.io.IOException e) { + // best-effort + } + } + + static void __nyx_probe(String sinkCallee, String... args) { long now = System.nanoTime(); String payloadId = System.getenv("NYX_PAYLOAD_ID"); if (payloadId == null) payloadId = ""; - StringBuilder line = new StringBuilder(128); + StringBuilder line = new StringBuilder(256); line.append("{\"sink_callee\":\""); nyxJsonEscape(sinkCallee, line); line.append("\",\"args\":["); @@ -85,12 +147,33 @@ pub fn probe_shim() -> &'static str { } line.append("],\"captured_at_ns\":").append(now).append(",\"payload_id\":\""); nyxJsonEscape(payloadId, line); - line.append("\"}\n"); - try (java.io.FileWriter fw = new java.io.FileWriter(p, true)) { - fw.write(line.toString()); - } catch (java.io.IOException e) { - // best-effort - } + line.append("\",\"kind\":{\"kind\":\"Normal\"},\"witness\":"); + line.append(nyxWitnessJson(sinkCallee, args)); + line.append("}\n"); + nyxEmit(line.toString()); + } + + // Phase 08: install a sink-site Throwable handler. Java cannot catch + // SIGSEGV / SIGFPE directly (JVM aborts), but it can intercept the + // uncaught-exception path which fires for any Error / RuntimeException + // escaping the sink call. Map them onto SIGABRT for the oracle. + static void __nyx_install_crash_guard(String sinkCallee) { + Thread.setDefaultUncaughtExceptionHandler((t, e) -> { + long now = System.nanoTime(); + String payloadId = System.getenv("NYX_PAYLOAD_ID"); + if (payloadId == null) payloadId = ""; + StringBuilder line = new StringBuilder(256); + line.append("{\"sink_callee\":\""); + nyxJsonEscape(sinkCallee, line); + line.append("\",\"args\":[],\"captured_at_ns\":").append(now) + .append(",\"payload_id\":\""); + nyxJsonEscape(payloadId, line); + line.append("\",\"kind\":{\"kind\":\"Crash\",\"signal\":\"SIGABRT\"},\"witness\":"); + line.append(nyxWitnessJson(sinkCallee, new String[0])); + line.append("}\n"); + nyxEmit(line.toString()); + System.exit(134); + }); } private static void nyxJsonEscape(String s, StringBuilder out) { diff --git a/src/dynamic/lang/javascript.rs b/src/dynamic/lang/javascript.rs index f4165b42..5e13291a 100644 --- a/src/dynamic/lang/javascript.rs +++ b/src/dynamic/lang/javascript.rs @@ -58,11 +58,62 @@ impl LangEmitter for JavaScriptEmitter { /// unset. pub fn probe_shim() -> &'static str { r#" -// ── __nyx_probe shim (Phase 06 — Track C.1) ────────────────────────────────── -function __nyx_probe(sinkCallee, ...args) { +// ── __nyx_probe shim (Phase 06 — Track C.1, Phase 08 — Track C.4 + C.5) ────── +const _NYX_DENY_SUBSTRINGS = [ + 'TOKEN','SECRET','PASSWORD','PASSWD','API_KEY','APIKEY','PRIVATE_KEY', + 'CREDENTIAL','SESSION','COOKIE','AUTH','BEARER','AWS_ACCESS','AWS_SESSION', + 'GH_TOKEN','GITHUB_TOKEN','NPM_TOKEN','PYPI_TOKEN','DOCKER_PASS' +]; +const _NYX_PAYLOAD_LIMIT = 16 * 1024; +const _NYX_REDACTED = ''; + +function __nyx_scrub_env() { + const out = {}; + const env = process.env || {}; + for (const k of Object.keys(env)) { + const ku = String(k).toUpperCase(); + if (_NYX_DENY_SUBSTRINGS.some((n) => ku.indexOf(n) !== -1)) { + out[k] = _NYX_REDACTED; + } else { + out[k] = env[k]; + } + } + return out; +} + +function __nyx_witness(sinkCallee, args) { + let payload = process.env.NYX_PAYLOAD || ''; + let buf = Buffer.from(String(payload), 'utf8'); + if (buf.length > _NYX_PAYLOAD_LIMIT) buf = buf.slice(0, _NYX_PAYLOAD_LIMIT); + const argsRepr = args.map(function (a) { + if (a && typeof a === 'object' && (a instanceof Buffer || a instanceof Uint8Array)) { + return ''; + } + return String(a); + }); + let cwd = ''; + try { cwd = process.cwd(); } catch (e) {} + return { + env_snapshot: __nyx_scrub_env(), + cwd: cwd, + payload_bytes: Array.from(buf), + callee: String(sinkCallee), + args_repr: argsRepr, + }; +} + +function __nyx_emit(rec) { const _fs = require('fs'); const _p = process.env.NYX_PROBE_PATH; if (!_p) return; + try { + _fs.appendFileSync(_p, JSON.stringify(rec) + '\n'); + } catch (e) { + // best-effort: probe channel write failure is non-fatal. + } +} + +function __nyx_probe(sinkCallee, ...args) { const _ser = args.map(function (a) { if (a && typeof a === 'object' && (a instanceof Buffer || a instanceof Uint8Array)) { return { kind: 'Bytes', value: Array.from(a) }; @@ -75,16 +126,49 @@ function __nyx_probe(sinkCallee, ...args) { } return { kind: 'String', value: String(a) }; }); - const _rec = { + __nyx_emit({ sink_callee: String(sinkCallee), args: _ser, captured_at_ns: Number(process.hrtime.bigint()), payload_id: String(process.env.NYX_PAYLOAD_ID || ''), + kind: { kind: 'Normal' }, + witness: __nyx_witness(sinkCallee, args), + }); +} + +// Phase 08: V8 cannot catch native SIGSEGV in pure JS, but it can intercept +// `uncaughtException` / `unhandledRejection` plus the synchronously +// deliverable signals (SIGABRT via process.kill). __nyx_install_crash_guard +// registers both: the uncaught path maps Error-shaped failures to a SIGABRT +// crash probe; explicit process.on('SIG*') registers the others where the +// runtime exposes them. Re-raise via process.exit(134) so the outcome's +// exit_code still reflects an abort-style death. +function __nyx_install_crash_guard(sinkCallee) { + const _emit_crash = function (signalName) { + __nyx_emit({ + sink_callee: String(sinkCallee), + args: [], + captured_at_ns: Number(process.hrtime.bigint()), + payload_id: String(process.env.NYX_PAYLOAD_ID || ''), + kind: { kind: 'Crash', signal: signalName }, + witness: __nyx_witness(sinkCallee, []), + }); }; - try { - _fs.appendFileSync(_p, JSON.stringify(_rec) + '\n'); - } catch (e) { - // best-effort: probe channel write failure is non-fatal. + process.on('uncaughtException', function (_err) { + _emit_crash('SIGABRT'); + process.exit(134); + }); + process.on('unhandledRejection', function (_reason) { + _emit_crash('SIGABRT'); + process.exit(134); + }); + for (const nm of ['SIGSEGV','SIGABRT','SIGBUS','SIGFPE','SIGILL']) { + try { + process.on(nm, function () { + _emit_crash(nm); + process.exit(128 + (nm === 'SIGABRT' ? 6 : 11)); + }); + } catch (e) { /* runtime refused signal handler */ } } } "# diff --git a/src/dynamic/lang/php.rs b/src/dynamic/lang/php.rs index 0a4bb45c..8368a5d0 100644 --- a/src/dynamic/lang/php.rs +++ b/src/dynamic/lang/php.rs @@ -51,12 +51,53 @@ impl LangEmitter for PhpEmitter { /// Track C.1). pub fn probe_shim() -> &'static str { r#" -// ── __nyx_probe shim (Phase 06 — Track C.1) ────────────────────────────────── -function __nyx_probe(string $sinkCallee, ...$args): void { - $p = getenv('NYX_PROBE_PATH'); - if ($p === false || $p === '') { - return; +// ── __nyx_probe shim (Phase 06 — Track C.1, Phase 08 — Track C.4 + C.5) ────── +const __NYX_DENY_SUBSTRINGS = [ + 'TOKEN','SECRET','PASSWORD','PASSWD','API_KEY','APIKEY','PRIVATE_KEY', + 'CREDENTIAL','SESSION','COOKIE','AUTH','BEARER','AWS_ACCESS','AWS_SESSION', + 'GH_TOKEN','GITHUB_TOKEN','NPM_TOKEN','PYPI_TOKEN','DOCKER_PASS', +]; +const __NYX_PAYLOAD_LIMIT = 16 * 1024; +const __NYX_REDACTED = ''; + +function __nyx_is_denied_key(string $k): bool { + $ku = strtoupper($k); + foreach (__NYX_DENY_SUBSTRINGS as $n) { + if (strpos($ku, $n) !== false) return true; } + return false; +} + +function __nyx_witness(string $sinkCallee, array $args): array { + $env = []; + foreach ($_ENV as $k => $v) { + $env[(string)$k] = __nyx_is_denied_key((string)$k) ? __NYX_REDACTED : (string)$v; + } + // Sort for deterministic output. + ksort($env); + $payload = (string) (getenv('NYX_PAYLOAD') ?: ''); + $pb = substr($payload, 0, __NYX_PAYLOAD_LIMIT); + $bytes = []; + for ($i = 0; $i < strlen($pb); $i++) $bytes[] = ord($pb[$i]); + $repr = []; + foreach ($args as $a) $repr[] = is_string($a) ? $a : (string) $a; + return [ + 'env_snapshot' => $env, + 'cwd' => @getcwd() ?: '', + 'payload_bytes' => $bytes, + 'callee' => $sinkCallee, + 'args_repr' => $repr, + ]; +} + +function __nyx_emit(array $rec): void { + $p = getenv('NYX_PROBE_PATH'); + if ($p === false || $p === '') return; + $line = json_encode($rec) . "\n"; + @file_put_contents($p, $line, FILE_APPEND); +} + +function __nyx_probe(string $sinkCallee, ...$args): void { $ser = []; foreach ($args as $a) { if (is_int($a)) { @@ -65,14 +106,57 @@ function __nyx_probe(string $sinkCallee, ...$args): void { $ser[] = ['kind' => 'String', 'value' => (string) $a]; } } - $rec = [ - 'sink_callee' => $sinkCallee, - 'args' => $ser, + __nyx_emit([ + 'sink_callee' => $sinkCallee, + 'args' => $ser, 'captured_at_ns' => (int) (microtime(true) * 1e9), - 'payload_id' => (string) (getenv('NYX_PAYLOAD_ID') ?: ''), - ]; - $line = json_encode($rec) . "\n"; - @file_put_contents($p, $line, FILE_APPEND); + 'payload_id' => (string) (getenv('NYX_PAYLOAD_ID') ?: ''), + 'kind' => ['kind' => 'Normal'], + 'witness' => __nyx_witness($sinkCallee, $args), + ]); +} + +// Phase 08: PHP cannot catch SIGSEGV from userland, but pcntl_signal and +// register_shutdown_function intercept SIGABRT-class fatal errors. +function __nyx_install_crash_guard(string $sinkCallee): void { + $emit_crash = function (string $signalName) use ($sinkCallee) { + __nyx_emit([ + 'sink_callee' => $sinkCallee, + 'args' => [], + 'captured_at_ns' => (int) (microtime(true) * 1e9), + 'payload_id' => (string) (getenv('NYX_PAYLOAD_ID') ?: ''), + 'kind' => ['kind' => 'Crash', 'signal' => $signalName], + 'witness' => __nyx_witness($sinkCallee, []), + ]); + }; + set_error_handler(function ($errno, $errstr) use ($emit_crash) { + if ($errno & (E_ERROR | E_PARSE | E_CORE_ERROR | E_COMPILE_ERROR | E_USER_ERROR)) { + $emit_crash('SIGABRT'); + } + return false; + }); + register_shutdown_function(function () use ($emit_crash) { + $err = error_get_last(); + if ($err && ($err['type'] & (E_ERROR | E_PARSE | E_CORE_ERROR | E_COMPILE_ERROR))) { + $emit_crash('SIGABRT'); + } + }); + if (function_exists('pcntl_signal') && function_exists('pcntl_async_signals')) { + pcntl_async_signals(true); + foreach ([SIGABRT, SIGBUS ?? null, SIGFPE ?? null, SIGILL ?? null] as $sig) { + if ($sig === null) continue; + pcntl_signal($sig, function ($s) use ($emit_crash) { + $name = 'SIGABRT'; + if (defined('SIGABRT') && $s === SIGABRT) $name = 'SIGABRT'; + if (defined('SIGBUS') && $s === SIGBUS) $name = 'SIGBUS'; + if (defined('SIGFPE') && $s === SIGFPE) $name = 'SIGFPE'; + if (defined('SIGILL') && $s === SIGILL) $name = 'SIGILL'; + $emit_crash($name); + pcntl_signal($s, SIG_DFL); + posix_kill(posix_getpid(), $s); + }); + } + } } "# } diff --git a/src/dynamic/lang/python.rs b/src/dynamic/lang/python.rs index 67d54473..d0306574 100644 --- a/src/dynamic/lang/python.rs +++ b/src/dynamic/lang/python.rs @@ -51,12 +51,66 @@ impl LangEmitter for PythonEmitter { /// configured a probe channel. pub fn probe_shim() -> &'static str { r#" -# ── __nyx_probe shim (Phase 06 — Track C.1) ────────────────────────────────── -def __nyx_probe(sink_callee, *args): - import os, time, json +# ── __nyx_probe shim (Phase 06 — Track C.1, Phase 08 — Track C.4 + C.5) ────── +# Deny-substring list mirrors crate::dynamic::policy::DENY_KEY_SUBSTRINGS; keep +# in sync when the host-side policy gains new entries. +_NYX_DENY_SUBSTRINGS = ( + "TOKEN", "SECRET", "PASSWORD", "PASSWD", "API_KEY", "APIKEY", + "PRIVATE_KEY", "CREDENTIAL", "SESSION", "COOKIE", "AUTH", "BEARER", + "AWS_ACCESS", "AWS_SESSION", "GH_TOKEN", "GITHUB_TOKEN", "NPM_TOKEN", + "PYPI_TOKEN", "DOCKER_PASS", +) +_NYX_PAYLOAD_LIMIT = 16 * 1024 +_NYX_REDACTED = "" + +def __nyx_scrub_env(): + import os + out = {} + for k, v in os.environ.items(): + ku = str(k).upper() + if any(n in ku for n in _NYX_DENY_SUBSTRINGS): + out[k] = _NYX_REDACTED + else: + out[k] = v + return out + +def __nyx_witness(sink_callee, args): + import os + payload = os.environ.get("NYX_PAYLOAD", "") + payload_bytes = payload.encode("utf-8", "replace") if isinstance(payload, str) else bytes(payload) + if len(payload_bytes) > _NYX_PAYLOAD_LIMIT: + payload_bytes = payload_bytes[:_NYX_PAYLOAD_LIMIT] + args_repr = [] + for a in args: + if isinstance(a, (bytes, bytearray)): + args_repr.append("" % len(a)) + else: + args_repr.append(str(a)) + try: + cwd = os.getcwd() + except OSError: + cwd = "" + return { + "env_snapshot": __nyx_scrub_env(), + "cwd": cwd, + "payload_bytes": list(payload_bytes), + "callee": str(sink_callee), + "args_repr": args_repr, + } + +def __nyx_emit(rec): + import os, json p = os.environ.get("NYX_PROBE_PATH") if not p: return + try: + with open(p, "a") as _f: + _f.write(json.dumps(rec) + "\n") + except OSError: + pass + +def __nyx_probe(sink_callee, *args): + import os, time serialised = [] for a in args: if isinstance(a, (bytes, bytearray)): @@ -72,12 +126,45 @@ def __nyx_probe(sink_callee, *args): "args": serialised, "captured_at_ns": time.time_ns(), "payload_id": os.environ.get("NYX_PAYLOAD_ID", ""), + "kind": {"kind": "Normal"}, + "witness": __nyx_witness(sink_callee, args), } - try: - with open(p, "a") as _f: - _f.write(json.dumps(rec) + "\n") - except OSError: - pass + __nyx_emit(rec) + +# Phase 08: sink-site signal handler. Call __nyx_install_crash_guard before +# invoking the instrumented sink so a SIGSEGV / SIGABRT / etc. is captured as +# a Crash probe (with witness) before the process aborts. The shim re-raises +# the signal on the default handler after writing so process-level outcome +# observers (exit_code) still see the death. +_NYX_SIGNAL_NAMES = {} + +def __nyx_install_crash_guard(sink_callee): + import signal, os, time + catchable = [] + for nm in ("SIGSEGV", "SIGABRT", "SIGBUS", "SIGFPE", "SIGILL"): + s = getattr(signal, nm, None) + if s is not None: + catchable.append((nm, s)) + _NYX_SIGNAL_NAMES[s] = nm + def _handler(signum, frame): + nm = _NYX_SIGNAL_NAMES.get(signum, "SIG?") + rec = { + "sink_callee": str(sink_callee), + "args": [], + "captured_at_ns": time.time_ns(), + "payload_id": os.environ.get("NYX_PAYLOAD_ID", ""), + "kind": {"kind": "Crash", "signal": nm}, + "witness": __nyx_witness(sink_callee, []), + } + __nyx_emit(rec) + # Reset to default and re-raise so the process actually dies. + signal.signal(signum, signal.SIG_DFL) + os.kill(os.getpid(), signum) + for _nm, s in catchable: + try: + signal.signal(s, _handler) + except (OSError, ValueError): + pass "# } diff --git a/src/dynamic/lang/ruby.rs b/src/dynamic/lang/ruby.rs index a546b1ac..4111ce0c 100644 --- a/src/dynamic/lang/ruby.rs +++ b/src/dynamic/lang/ruby.rs @@ -25,11 +25,50 @@ const SUPPORTED: &[EntryKind] = &[EntryKind::Function]; /// even though `emit` returns `LangUnsupported` until Phase 15 lands. pub fn probe_shim() -> &'static str { r#" -# ── __nyx_probe shim (Phase 06 — Track C.1) ────────────────────────────────── -def __nyx_probe(sink_callee, *args) +# ── __nyx_probe shim (Phase 06 — Track C.1, Phase 08 — Track C.4 + C.5) ────── +__NYX_DENY_SUBSTRINGS = %w[ + TOKEN SECRET PASSWORD PASSWD API_KEY APIKEY PRIVATE_KEY CREDENTIAL SESSION + COOKIE AUTH BEARER AWS_ACCESS AWS_SESSION GH_TOKEN GITHUB_TOKEN NPM_TOKEN + PYPI_TOKEN DOCKER_PASS +].freeze +__NYX_PAYLOAD_LIMIT = 16 * 1024 +__NYX_REDACTED = '' + +def __nyx_is_denied_key(k) + ku = k.to_s.upcase + __NYX_DENY_SUBSTRINGS.any? { |n| ku.include?(n) } +end + +def __nyx_witness(sink_callee, args) + env_snapshot = {} + ENV.each do |k, v| + env_snapshot[k] = __nyx_is_denied_key(k) ? __NYX_REDACTED : v + end + payload = ENV['NYX_PAYLOAD'] || '' + pb = payload.bytes + pb = pb[0, __NYX_PAYLOAD_LIMIT] if pb.length > __NYX_PAYLOAD_LIMIT + repr = args.map { |a| a.is_a?(String) ? a : a.to_s } + cwd = (Dir.pwd rescue '') + { + env_snapshot: env_snapshot, + cwd: cwd, + payload_bytes: pb, + callee: sink_callee.to_s, + args_repr: repr, + } +end + +def __nyx_emit(rec) require 'json' p = ENV['NYX_PROBE_PATH'] return if p.nil? || p.empty? + begin + File.open(p, 'a') { |f| f.puts(rec.to_json) } + rescue StandardError + end +end + +def __nyx_probe(sink_callee, *args) ser = args.map do |a| case a when Integer then { kind: 'Int', value: a } @@ -37,15 +76,36 @@ def __nyx_probe(sink_callee, *args) else { kind: 'String', value: a.to_s } end end - rec = { + __nyx_emit({ sink_callee: sink_callee.to_s, args: ser, captured_at_ns: (Process.clock_gettime(Process::CLOCK_REALTIME, :nanosecond)), payload_id: (ENV['NYX_PAYLOAD_ID'] || ''), - } - begin - File.open(p, 'a') { |f| f.puts(rec.to_json) } - rescue StandardError + kind: { kind: 'Normal' }, + witness: __nyx_witness(sink_callee, args), + }) +end + +# Phase 08: install a sink-site signal trap. Ruby traps run in interrupt +# context but can write to a file before re-raising via Process.kill. +def __nyx_install_crash_guard(sink_callee) + %w[SEGV ABRT BUS FPE ILL].each do |nm| + begin + Signal.trap(nm) do + __nyx_emit({ + sink_callee: sink_callee.to_s, + args: [], + captured_at_ns: (Process.clock_gettime(Process::CLOCK_REALTIME, :nanosecond)), + payload_id: (ENV['NYX_PAYLOAD_ID'] || ''), + kind: { kind: 'Crash', signal: "SIG#{nm}" }, + witness: __nyx_witness(sink_callee, []), + }) + Signal.trap(nm, 'DEFAULT') + Process.kill(nm, Process.pid) + end + rescue ArgumentError, Errno::EINVAL + # signal not supported on this platform + end end end "# diff --git a/src/dynamic/lang/rust.rs b/src/dynamic/lang/rust.rs index a36de567..e3120b1d 100644 --- a/src/dynamic/lang/rust.rs +++ b/src/dynamic/lang/rust.rs @@ -61,58 +61,197 @@ impl LangEmitter for RustEmitter { /// [`crate::dynamic::probe::SinkProbe`] wire format. pub fn probe_shim() -> &'static str { r#" -// ── __nyx_probe shim (Phase 06 — Track C.1) ────────────────────────────────── +// ── __nyx_probe shim (Phase 06 — Track C.1, Phase 08 — Track C.4 + C.5) ────── #[allow(dead_code)] -fn __nyx_probe(sink_callee: &str, args: &[&str]) { +const __NYX_DENY_SUBSTRINGS: &[&str] = &[ + "TOKEN","SECRET","PASSWORD","PASSWD","API_KEY","APIKEY","PRIVATE_KEY", + "CREDENTIAL","SESSION","COOKIE","AUTH","BEARER","AWS_ACCESS","AWS_SESSION", + "GH_TOKEN","GITHUB_TOKEN","NPM_TOKEN","PYPI_TOKEN","DOCKER_PASS", +]; +#[allow(dead_code)] +const __NYX_PAYLOAD_LIMIT: usize = 16 * 1024; +#[allow(dead_code)] +const __NYX_REDACTED: &str = ""; + +#[allow(dead_code)] +fn __nyx_esc(s: &str, out: &mut String) { + for ch in s.chars() { + match ch { + '"' => out.push_str("\\\""), + '\\' => out.push_str("\\\\"), + '\n' => out.push_str("\\n"), + '\r' => out.push_str("\\r"), + '\t' => out.push_str("\\t"), + c if (c as u32) < 0x20 => out.push_str(&format!("\\u{:04x}", c as u32)), + c => out.push(c), + } + } +} + +#[allow(dead_code)] +fn __nyx_witness_json(sink_callee: &str, args: &[&str]) -> String { + let mut out = String::with_capacity(256); + out.push_str("{\"env_snapshot\":{"); + let mut first = true; + let mut keys: Vec<(String, String)> = std::env::vars().collect(); + keys.sort(); + for (k, v) in keys { + let ku = k.to_ascii_uppercase(); + let denied = __NYX_DENY_SUBSTRINGS.iter().any(|n| ku.contains(n)); + let val = if denied { __NYX_REDACTED } else { v.as_str() }; + if !first { out.push(','); } + first = false; + out.push('"'); + __nyx_esc(&k, &mut out); + out.push_str("\":\""); + __nyx_esc(val, &mut out); + out.push('"'); + } + out.push_str("},\"cwd\":\""); + let cwd = std::env::current_dir() + .map(|p| p.to_string_lossy().into_owned()) + .unwrap_or_default(); + __nyx_esc(&cwd, &mut out); + out.push_str("\",\"payload_bytes\":["); + let payload = std::env::var("NYX_PAYLOAD").unwrap_or_default(); + let bytes = payload.as_bytes(); + let cap = bytes.len().min(__NYX_PAYLOAD_LIMIT); + for i in 0..cap { + if i > 0 { out.push(','); } + out.push_str(&format!("{}", bytes[i])); + } + out.push_str("],\"callee\":\""); + __nyx_esc(sink_callee, &mut out); + out.push_str("\",\"args_repr\":["); + for (i, a) in args.iter().enumerate() { + if i > 0 { out.push(','); } + out.push('"'); + __nyx_esc(a, &mut out); + out.push('"'); + } + out.push_str("]}"); + out +} + +#[allow(dead_code)] +fn __nyx_emit(line: &str) { use std::io::Write; let p = match std::env::var("NYX_PROBE_PATH") { Ok(v) => v, Err(_) => return, }; - let now = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map(|d| d.as_nanos() as u64) - .unwrap_or(0); - let payload_id = std::env::var("NYX_PAYLOAD_ID").unwrap_or_default(); - fn esc(s: &str, out: &mut String) { - for ch in s.chars() { - match ch { - '"' => out.push_str("\\\""), - '\\' => out.push_str("\\\\"), - '\n' => out.push_str("\\n"), - '\r' => out.push_str("\\r"), - '\t' => out.push_str("\\t"), - c if (c as u32) < 0x20 => out.push_str(&format!("\\u{:04x}", c as u32)), - c => out.push(c), - } - } - } - let mut line = String::with_capacity(128); - line.push_str("{\"sink_callee\":\""); - esc(sink_callee, &mut line); - line.push_str("\",\"args\":["); - for (i, a) in args.iter().enumerate() { - if i > 0 { - line.push(','); - } - line.push_str("{\"kind\":\"String\",\"value\":\""); - esc(a, &mut line); - line.push_str("\"}"); - } - line.push_str(&format!( - "],\"captured_at_ns\":{},\"payload_id\":\"", - now - )); - esc(&payload_id, &mut line); - line.push_str("\"}\n"); if let Ok(mut f) = std::fs::OpenOptions::new() .create(true) .append(true) .open(&p) { let _ = f.write_all(line.as_bytes()); + let _ = f.write_all(b"\n"); } } + +#[allow(dead_code)] +fn __nyx_probe(sink_callee: &str, args: &[&str]) { + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_nanos() as u64) + .unwrap_or(0); + let payload_id = std::env::var("NYX_PAYLOAD_ID").unwrap_or_default(); + let mut line = String::with_capacity(256); + line.push_str("{\"sink_callee\":\""); + __nyx_esc(sink_callee, &mut line); + line.push_str("\",\"args\":["); + for (i, a) in args.iter().enumerate() { + if i > 0 { line.push(','); } + line.push_str("{\"kind\":\"String\",\"value\":\""); + __nyx_esc(a, &mut line); + line.push_str("\"}"); + } + line.push_str(&format!( + "],\"captured_at_ns\":{},\"payload_id\":\"", + now + )); + __nyx_esc(&payload_id, &mut line); + line.push_str("\",\"kind\":{\"kind\":\"Normal\"},\"witness\":"); + line.push_str(&__nyx_witness_json(sink_callee, args)); + line.push('}'); + __nyx_emit(&line); +} + +// Phase 08: install a sink-site signal handler via `libc::sigaction` so a +// SIGSEGV / SIGABRT / etc. inside the sink call is captured as a Crash +// probe before the kernel re-delivers it via SIG_DFL. The shim is +// no-op on non-Unix targets (the dynamic-verification supported set is +// Unix-only) so consumers can splice it unconditionally. +#[cfg(unix)] +#[allow(dead_code)] +fn __nyx_install_crash_guard(sink_callee: &'static str) { + use std::sync::atomic::{AtomicPtr, Ordering}; + static SINK_CALLEE: AtomicPtr = AtomicPtr::new(std::ptr::null_mut()); + SINK_CALLEE.store(sink_callee.as_ptr() as *mut u8, Ordering::SeqCst); + let len = sink_callee.len(); + static CALLEE_LEN: std::sync::atomic::AtomicUsize = std::sync::atomic::AtomicUsize::new(0); + CALLEE_LEN.store(len, Ordering::SeqCst); + extern "C" fn handler(sig: i32) { + // async-signal-unsafe code is unavoidable here (file I/O); we + // accept the risk because the process is already dying and we + // need the forensic record. + let name = match sig { + libc::SIGSEGV => "SIGSEGV", + libc::SIGABRT => "SIGABRT", + libc::SIGBUS => "SIGBUS", + libc::SIGFPE => "SIGFPE", + libc::SIGILL => "SIGILL", + _ => "SIGABRT", + }; + let p = SINK_CALLEE.load(Ordering::SeqCst); + let len = CALLEE_LEN.load(Ordering::SeqCst); + let sink_callee: &str = unsafe { + if p.is_null() { + "" + } else { + let slice = std::slice::from_raw_parts(p as *const u8, len); + std::str::from_utf8_unchecked(slice) + } + }; + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_nanos() as u64) + .unwrap_or(0); + let payload_id = std::env::var("NYX_PAYLOAD_ID").unwrap_or_default(); + let mut line = String::with_capacity(256); + line.push_str("{\"sink_callee\":\""); + __nyx_esc(sink_callee, &mut line); + line.push_str("\",\"args\":[],\"captured_at_ns\":"); + line.push_str(&format!("{now},\"payload_id\":\"")); + __nyx_esc(&payload_id, &mut line); + line.push_str("\",\"kind\":{\"kind\":\"Crash\",\"signal\":\""); + line.push_str(name); + line.push_str("\"},\"witness\":"); + line.push_str(&__nyx_witness_json(sink_callee, &[])); + line.push('}'); + __nyx_emit(&line); + // Restore default handler and re-raise so process actually dies. + unsafe { + let mut sa: libc::sigaction = std::mem::zeroed(); + sa.sa_sigaction = libc::SIG_DFL; + libc::sigaction(sig, &sa, std::ptr::null_mut()); + libc::raise(sig); + } + } + unsafe { + let mut sa: libc::sigaction = std::mem::zeroed(); + sa.sa_sigaction = handler as usize; + libc::sigemptyset(&mut sa.sa_mask); + for sig in [libc::SIGSEGV, libc::SIGABRT, libc::SIGBUS, libc::SIGFPE, libc::SIGILL] { + libc::sigaction(sig, &sa, std::ptr::null_mut()); + } + } +} + +#[cfg(not(unix))] +#[allow(dead_code)] +fn __nyx_install_crash_guard(_sink_callee: &'static str) {} "# } diff --git a/src/dynamic/mod.rs b/src/dynamic/mod.rs index 35b2bc64..90032ccd 100644 --- a/src/dynamic/mod.rs +++ b/src/dynamic/mod.rs @@ -73,6 +73,7 @@ pub mod lang; pub mod mount_filter; pub mod oob; pub mod oracle; +pub mod policy; pub mod probe; pub mod repro; pub mod report; diff --git a/src/dynamic/oracle.rs b/src/dynamic/oracle.rs index 7ed3488c..628ee091 100644 --- a/src/dynamic/oracle.rs +++ b/src/dynamic/oracle.rs @@ -7,12 +7,145 @@ //! evaluates the predicates against the captured arguments. A run is //! Confirmed iff at least one drained record satisfies *every* predicate. //! -//! The legacy [`Oracle::OutputContains`] path is retained for fixtures that -//! pre-date Phase 06 and migrated downstream; it is marked -//! `#[deprecated]` so the compiler nags every new use-site. +//! Phase 08 (Track C.4) replaces the coarse [`Oracle::Crash`] with +//! [`Oracle::SinkCrash`]. The new variant only confirms when a probe +//! observation in the channel carries +//! [`crate::dynamic::probe::ProbeKind::Crash { signal }`] *and* the captured +//! signal is present in the payload's [`SignalSet`] — i.e. the SIGSEGV / +//! SIGABRT / etc. must have been caught by a sink-site signal handler, not +//! by random crashing setup code. A process-level abort that escapes the +//! sink handler leaves no Crash probe, the oracle does not fire, and the +//! runner downgrades the verdict to +//! [`crate::evidence::InconclusiveReason::UnrelatedCrash`] instead of +//! stamping `Confirmed`. +//! +//! The legacy [`Oracle::OutputContains`] and [`Oracle::Crash`] paths are +//! retained for fixtures that pre-date Phase 06 / Phase 08 and migrated +//! downstream; both are marked `#[deprecated]` so the compiler nags every +//! new use-site. -use crate::dynamic::probe::SinkProbe; +use crate::dynamic::probe::{ProbeKind, SinkProbe}; use crate::dynamic::sandbox::SandboxOutcome; +use serde::{Deserialize, Serialize}; + +/// POSIX-style signal name carried inside [`ProbeKind::Crash`] and the +/// [`Oracle::SinkCrash`] match set. +/// +/// Restricted to the signals a sink-site handler can plausibly catch and +/// route back through the probe channel. Anything outside this enum (e.g. +/// `SIGKILL`, `SIGSTOP`) cannot be caught by a userspace handler and is +/// therefore not modellable as a confirmable crash signal. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub enum Signal { + /// Segmentation fault. + #[serde(rename = "SIGSEGV", alias = "Sigsegv", alias = "SEGV")] + Sigsegv, + /// Abort (typically from `abort(3)` or `assert(3)`). + #[serde(rename = "SIGABRT", alias = "Sigabrt", alias = "ABRT")] + Sigabrt, + /// Bus error (misaligned access, mmap fault). + #[serde(rename = "SIGBUS", alias = "Sigbus", alias = "BUS")] + Sigbus, + /// Floating-point exception (incl. integer divide-by-zero on x86). + #[serde(rename = "SIGFPE", alias = "Sigfpe", alias = "FPE")] + Sigfpe, + /// Illegal instruction. + #[serde(rename = "SIGILL", alias = "Sigill", alias = "ILL")] + Sigill, +} + +impl Signal { + /// Bit position of `self` inside a [`SignalSet`]. Stable across builds + /// so the wire format of a serialised [`SignalSet`] stays compatible. + pub const fn bit(self) -> u8 { + match self { + Signal::Sigsegv => 0, + Signal::Sigabrt => 1, + Signal::Sigbus => 2, + Signal::Sigfpe => 3, + Signal::Sigill => 4, + } + } + + /// Render a [`Signal`] as the conventional uppercase POSIX name (e.g. + /// `"SIGSEGV"`). Used by the per-language probe shims so their + /// captured `signal` strings are identical to what the host-side + /// [`Signal::from_name`] decoder expects. + pub const fn as_name(self) -> &'static str { + match self { + Signal::Sigsegv => "SIGSEGV", + Signal::Sigabrt => "SIGABRT", + Signal::Sigbus => "SIGBUS", + Signal::Sigfpe => "SIGFPE", + Signal::Sigill => "SIGILL", + } + } + + /// Inverse of [`as_name`](Signal::as_name). Matches both the canonical + /// uppercase form and a couple of common variants emitted by language + /// runtimes (`"sigsegv"`, `"Segmentation fault"`). Returns `None` for + /// signals the oracle does not model. + pub fn from_name(s: &str) -> Option { + let upper = s.trim().to_ascii_uppercase(); + match upper.as_str() { + "SIGSEGV" | "SEGV" | "SEGMENTATION FAULT" => Some(Signal::Sigsegv), + "SIGABRT" | "ABRT" | "ABORTED" => Some(Signal::Sigabrt), + "SIGBUS" | "BUS" | "BUS ERROR" => Some(Signal::Sigbus), + "SIGFPE" | "FPE" | "FLOATING POINT EXCEPTION" => Some(Signal::Sigfpe), + "SIGILL" | "ILL" | "ILLEGAL INSTRUCTION" => Some(Signal::Sigill), + _ => None, + } + } +} + +/// Bitset of [`Signal`]s the [`Oracle::SinkCrash`] variant treats as +/// confirmable. Stored as a `u8` so a `const`-declared corpus entry can +/// build the set without runtime allocation. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +pub struct SignalSet(u8); + +impl SignalSet { + /// Empty set — no signal is confirmable. Mostly useful in tests as a + /// "this oracle should never fire" baseline. + pub const fn empty() -> Self { + Self(0) + } + + /// Set built from a slice of [`Signal`]s, callable from `const` + /// context. Order-independent; duplicates are collapsed. + pub const fn from_slice(sigs: &[Signal]) -> Self { + let mut bits = 0u8; + let mut i = 0; + while i < sigs.len() { + bits |= 1 << sigs[i].bit(); + i += 1; + } + Self(bits) + } + + /// `SignalSet` containing every modelled signal. Default for payloads + /// whose crash-on-arbitrary-input is the actual vulnerability (e.g. C + /// memory corruption fuzzed via libFuzzer). + pub const fn all() -> Self { + Self::from_slice(&[ + Signal::Sigsegv, + Signal::Sigabrt, + Signal::Sigbus, + Signal::Sigfpe, + Signal::Sigill, + ]) + } + + /// True iff `sig` is in the set. + pub const fn contains(self, sig: Signal) -> bool { + (self.0 & (1 << sig.bit())) != 0 + } + + /// True iff the set is empty. + pub const fn is_empty(self) -> bool { + self.0 == 0 + } +} /// Predicate evaluated against a single [`SinkProbe`] when the oracle is /// [`Oracle::SinkProbe`]. @@ -45,6 +178,12 @@ pub enum Oracle { /// `Vec` shape the plan listed because the corpus is /// declared in static memory; a `Vec` would require runtime init). SinkProbe { predicates: &'static [ProbePredicate] }, + /// Phase 08 sink-site crash oracle. Fires iff at least one drained + /// probe has [`ProbeKind::Crash { signal }`] with `signal ∈ signals`. + /// A process-level abort that did not reach the sink handler leaves no + /// matching probe and the run does *not* confirm — the runner maps + /// that case to [`crate::evidence::InconclusiveReason::UnrelatedCrash`]. + SinkCrash { signals: SignalSet }, /// Legacy stdout/stderr substring oracle. Kept for fixtures that /// pre-date Phase 06; new payloads should prefer /// [`Oracle::SinkProbe`] which is robust to oracle collisions. @@ -52,7 +191,15 @@ pub enum Oracle { note = "use Oracle::SinkProbe with ProbePredicate args; OutputContains is brittle to oracle collisions (§16.3)" )] OutputContains(&'static str), - /// Process exited with a crash signal (SIGSEGV, SIGABRT). + /// Process exited with any crash signal (SIGSEGV, SIGABRT). + /// + /// Coarse: fires on *any* uncaught crash, including ones unrelated to + /// the sink (e.g. `abort()` in setup code). Phase 08 introduces + /// [`Oracle::SinkCrash`] which scopes the signal to the sink handler; + /// new payloads should migrate. + #[deprecated( + note = "use Oracle::SinkCrash with a SignalSet; Crash confirms on any process abort, including setup-code failures (Phase 08 §C.4)" + )] Crash, /// Outbound network connection observed at the controlled sink host. OobCallback { host: &'static str }, @@ -71,6 +218,10 @@ pub fn oracle_fired(oracle: &Oracle, outcome: &SandboxOutcome, probes: &[SinkPro Oracle::SinkProbe { predicates } => probes .iter() .any(|p| probe_satisfies_all(p, predicates)), + Oracle::SinkCrash { signals } => probes.iter().any(|p| match p.kind { + ProbeKind::Crash { signal } => signals.contains(signal), + ProbeKind::Normal => false, + }), Oracle::OutputContains(needle) => { let nb = needle.as_bytes(); contains_subslice(&outcome.stdout, nb) || contains_subslice(&outcome.stderr, nb) @@ -122,10 +273,22 @@ fn contains_subslice(hay: &[u8], needle: &[u8]) -> bool { hay.windows(needle.len()).any(|w| w == needle) } +/// Convenience: returns the [`Signal`] captured by a [`SinkProbe`] when +/// its kind is `Crash`, else `None`. Used by the runner to distinguish +/// "process crashed but no matching sink-site probe" (→ +/// `Inconclusive(UnrelatedCrash)`) from "process crashed and a sink-site +/// probe matched" (→ `Confirmed` via `Oracle::SinkCrash`). +pub fn probe_crash_signal(probe: &SinkProbe) -> Option { + match probe.kind { + ProbeKind::Crash { signal } => Some(signal), + ProbeKind::Normal => None, + } +} + #[cfg(test)] mod tests { use super::*; - use crate::dynamic::probe::{ProbeArg, SinkProbe}; + use crate::dynamic::probe::{ProbeArg, ProbeKind, ProbeWitness, SinkProbe}; use std::time::Duration; fn outcome() -> SandboxOutcome { @@ -146,6 +309,19 @@ mod tests { args, captured_at_ns: 1, payload_id: "test".into(), + kind: ProbeKind::Normal, + witness: ProbeWitness::empty(), + } + } + + fn crash_probe(callee: &str, signal: Signal) -> SinkProbe { + SinkProbe { + sink_callee: callee.into(), + args: vec![], + captured_at_ns: 1, + payload_id: "test".into(), + kind: ProbeKind::Crash { signal }, + witness: ProbeWitness::empty(), } } @@ -242,4 +418,74 @@ mod tests { assert!(oracle_fired(&oracle, &outcome(), &hit)); assert!(!oracle_fired(&oracle, &outcome(), &miss)); } + + #[test] + fn signal_set_round_trips_via_const_slice() { + const SIGS: SignalSet = SignalSet::from_slice(&[Signal::Sigsegv, Signal::Sigabrt]); + assert!(SIGS.contains(Signal::Sigsegv)); + assert!(SIGS.contains(Signal::Sigabrt)); + assert!(!SIGS.contains(Signal::Sigfpe)); + assert!(!SIGS.is_empty()); + assert!(SignalSet::empty().is_empty()); + } + + #[test] + fn signal_set_all_contains_every_modelled_signal() { + let all = SignalSet::all(); + for s in [ + Signal::Sigsegv, + Signal::Sigabrt, + Signal::Sigbus, + Signal::Sigfpe, + Signal::Sigill, + ] { + assert!(all.contains(s), "SignalSet::all missing {s:?}"); + } + } + + #[test] + fn signal_from_name_matches_canonical_and_lowercase() { + assert_eq!(Signal::from_name("SIGSEGV"), Some(Signal::Sigsegv)); + assert_eq!(Signal::from_name(" sigsegv "), Some(Signal::Sigsegv)); + assert_eq!(Signal::from_name("Aborted"), Some(Signal::Sigabrt)); + assert_eq!(Signal::from_name("nope"), None); + } + + #[test] + fn sink_crash_confirms_only_on_matching_signal_probe() { + let oracle = Oracle::SinkCrash { + signals: SignalSet::from_slice(&[Signal::Sigsegv]), + }; + let probes = vec![crash_probe("victim", Signal::Sigsegv)]; + assert!(oracle_fired(&oracle, &outcome(), &probes)); + } + + #[test] + fn sink_crash_ignores_normal_probes() { + let oracle = Oracle::SinkCrash { + signals: SignalSet::all(), + }; + let probes = vec![probe("victim", vec![ProbeArg::String("x".into())])]; + assert!(!oracle_fired(&oracle, &outcome(), &probes)); + } + + #[test] + fn sink_crash_ignores_unrelated_signal() { + let oracle = Oracle::SinkCrash { + signals: SignalSet::from_slice(&[Signal::Sigsegv]), + }; + let probes = vec![crash_probe("victim", Signal::Sigabrt)]; + assert!(!oracle_fired(&oracle, &outcome(), &probes)); + } + + #[test] + fn sink_crash_without_probes_does_not_fire_even_on_process_crash() { + let mut o = outcome(); + o.exit_code = None; + o.timed_out = false; + let oracle = Oracle::SinkCrash { + signals: SignalSet::all(), + }; + assert!(!oracle_fired(&oracle, &o, &[])); + } } diff --git a/src/dynamic/policy.rs b/src/dynamic/policy.rs new file mode 100644 index 00000000..672b23e7 --- /dev/null +++ b/src/dynamic/policy.rs @@ -0,0 +1,192 @@ +//! Track-security cross-cutting policy module (Phase 08 — Track C.4 + C.5). +//! +//! Centralises the deny rules and byte-bound limits that the per-run +//! [`crate::dynamic::probe::ProbeWitness`] construction uses to keep +//! captured forensic data both privacy-safe and bounded in size. +//! +//! Two responsibilities, intentionally kept in one module so the security +//! envelope is auditable in a single file: +//! +//! 1. **Env scrubbing** — [`scrub_env`] redacts the host environment when +//! snapshotted onto a [`crate::dynamic::probe::ProbeWitness`]. Any key +//! matching a [`DENY_KEY_SUBSTRINGS`] entry (case-insensitive substring +//! match against the upper-cased key) has its value replaced with +//! [`REDACTED_VALUE`]. Whitelist semantics (allow-list) were rejected +//! because the harness env is heterogeneous across CI / local / +//! container runs; a deny-substring list matches the common-suffix +//! naming used in practice (`*_TOKEN`, `*_KEY`, `*_SECRET`, …) with no +//! false negatives on the cases we have evidence for. +//! 2. **Byte bounds** — [`PAYLOAD_CAPTURE_LIMIT_BYTES`] caps the +//! `payload_bytes` field at 16 KiB so a fuzzer-emitted megabyte payload +//! does not turn the probe file into a memory hog or balloon downstream +//! repro artifacts. [`truncate_payload_bytes`] is the only sanctioned +//! truncation entry point — every probe construction path goes through +//! it so the bound is enforced uniformly. +//! +//! The module deliberately depends on `std` only (no third-party crates) +//! so `cargo deny check` and `cargo doc` both see it as a leaf with no +//! transitive license risk. + +use std::collections::BTreeMap; + +/// Maximum number of bytes retained in +/// [`crate::dynamic::probe::ProbeWitness::payload_bytes`]. +/// +/// 16 KiB is the cap the Phase 08 plan calls for; matches the upper bound +/// any reasonable injection payload will need (the existing curated corpus +/// peaks under 200 B). Anything larger is truncated head-first via +/// [`truncate_payload_bytes`] because that is the prefix the sink actually +/// sees first. +pub const PAYLOAD_CAPTURE_LIMIT_BYTES: usize = 16 * 1024; + +/// Placeholder written in place of a denied environment variable's value +/// when [`scrub_env`] redacts it. Lower-case so it is visually distinct +/// from a real CI env value (which is overwhelmingly upper-snake). +pub const REDACTED_VALUE: &str = ""; + +/// Substrings that mark a key as carrying credential-shaped data. +/// +/// Matched case-insensitively against the upper-cased env var key. Order +/// is not significant — the first match wins because all matches lead to +/// the same redaction. +/// +/// The list is intentionally short and high-precision: false-positive +/// redactions just remove a value from a forensic snapshot, but false +/// negatives leak credentials into a probe file that may be persisted as +/// a repro artifact. +pub const DENY_KEY_SUBSTRINGS: &[&str] = &[ + "TOKEN", + "SECRET", + "PASSWORD", + "PASSWD", + "API_KEY", + "APIKEY", + "PRIVATE_KEY", + "CREDENTIAL", + "SESSION", + "COOKIE", + "AUTH", + "BEARER", + // Cloud provider shapes that don't end in TOKEN / SECRET / KEY. + "AWS_ACCESS", + "AWS_SESSION", + "GH_TOKEN", + "GITHUB_TOKEN", + "NPM_TOKEN", + "PYPI_TOKEN", + "DOCKER_PASS", +]; + +/// True iff `key` matches any [`DENY_KEY_SUBSTRINGS`] entry under +/// case-insensitive substring comparison. The exposed predicate so +/// [`crate::dynamic::probe`] tests can reason about individual keys +/// without round-tripping through [`scrub_env`]. +pub fn is_denied_env_key(key: &str) -> bool { + let upper = key.to_ascii_uppercase(); + DENY_KEY_SUBSTRINGS + .iter() + .any(|needle| upper.contains(*needle)) +} + +/// Redact denied keys' values in an env iterator and collect into a +/// [`BTreeMap`]. `BTreeMap` rather than `HashMap` so the serialised +/// witness is byte-deterministic across runs — repro reproducibility +/// depends on it. +pub fn scrub_env(iter: I) -> BTreeMap +where + I: IntoIterator, + S: Into, +{ + let mut out = BTreeMap::new(); + for (k, v) in iter { + let k: String = k.into(); + let v: String = v.into(); + if is_denied_env_key(&k) { + out.insert(k, REDACTED_VALUE.to_owned()); + } else { + out.insert(k, v); + } + } + out +} + +/// Truncate `bytes` to at most [`PAYLOAD_CAPTURE_LIMIT_BYTES`]. +/// +/// Head-keeping: the prefix the sink reads first is retained; the tail is +/// dropped. Returns `bytes` unchanged when it already fits the cap so +/// callers can use the return value without allocating in the common case. +pub fn truncate_payload_bytes(bytes: &[u8]) -> &[u8] { + if bytes.len() <= PAYLOAD_CAPTURE_LIMIT_BYTES { + bytes + } else { + &bytes[..PAYLOAD_CAPTURE_LIMIT_BYTES] + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn deny_substring_match_is_case_insensitive() { + assert!(is_denied_env_key("AWS_SECRET_ACCESS_KEY")); + assert!(is_denied_env_key("aws_secret_access_key")); + assert!(is_denied_env_key("MyToken")); + assert!(is_denied_env_key("DATABASE_PASSWORD")); + } + + #[test] + fn non_credential_keys_pass_through() { + assert!(!is_denied_env_key("PATH")); + assert!(!is_denied_env_key("HOME")); + assert!(!is_denied_env_key("NYX_PAYLOAD")); + } + + #[test] + fn scrub_redacts_denied_keys_and_keeps_others() { + let env = vec![ + ("PATH".to_owned(), "/usr/bin".to_owned()), + ("AWS_SECRET_ACCESS_KEY".to_owned(), "AKIA...".to_owned()), + ("HOME".to_owned(), "/home/x".to_owned()), + ]; + let scrubbed = scrub_env(env); + assert_eq!(scrubbed.get("PATH").map(String::as_str), Some("/usr/bin")); + assert_eq!(scrubbed.get("HOME").map(String::as_str), Some("/home/x")); + assert_eq!( + scrubbed.get("AWS_SECRET_ACCESS_KEY").map(String::as_str), + Some(REDACTED_VALUE) + ); + } + + #[test] + fn truncate_keeps_short_payloads_unchanged() { + let bytes = b"short payload"; + assert_eq!(truncate_payload_bytes(bytes), bytes); + } + + #[test] + fn truncate_caps_long_payloads_at_limit() { + let bytes = vec![b'A'; PAYLOAD_CAPTURE_LIMIT_BYTES + 100]; + let truncated = truncate_payload_bytes(&bytes); + assert_eq!(truncated.len(), PAYLOAD_CAPTURE_LIMIT_BYTES); + assert!(truncated.iter().all(|b| *b == b'A')); + } + + #[test] + fn truncate_at_exact_boundary_unchanged() { + let bytes = vec![0u8; PAYLOAD_CAPTURE_LIMIT_BYTES]; + assert_eq!(truncate_payload_bytes(&bytes).len(), PAYLOAD_CAPTURE_LIMIT_BYTES); + } + + #[test] + fn scrub_is_deterministic_btree() { + // Same iterator yields the same map; BTreeMap guarantees iteration order. + let env = vec![ + ("B".to_owned(), "1".to_owned()), + ("A".to_owned(), "2".to_owned()), + ]; + let m = scrub_env(env); + let keys: Vec<&str> = m.keys().map(String::as_str).collect(); + assert_eq!(keys, vec!["A", "B"]); + } +} diff --git a/src/dynamic/probe.rs b/src/dynamic/probe.rs index 48084387..49fdfa5c 100644 --- a/src/dynamic/probe.rs +++ b/src/dynamic/probe.rs @@ -8,6 +8,19 @@ //! [`crate::dynamic::oracle::oracle_fired`]) evaluates a payload's //! [`crate::dynamic::oracle::ProbePredicate`] set against the captured args. //! +//! # Phase 08 extensions (Track C.4 + C.5) +//! +//! - [`ProbeKind`] discriminates a normal sink observation from a crash +//! intercepted by a sink-site signal handler. The handler stamps +//! `ProbeKind::Crash { signal }` onto the probe before re-raising so the +//! oracle can distinguish "the sink crashed under my payload" +//! (Confirmed) from "some unrelated setup code crashed" +//! (Inconclusive(UnrelatedCrash)). +//! - [`ProbeWitness`] carries bounded forensic data — scrubbed env, cwd, +//! payload-bytes prefix, callee, args repr — so downstream repro and +//! chain composition need only the probe file, not a live sandbox. All +//! bounding goes through [`crate::dynamic::policy`]. +//! //! # Channel medium //! //! Currently file-based: one JSON record per line at @@ -22,7 +35,10 @@ //! The runner truncates the file via [`ProbeChannel::clear`] before each //! payload to keep verdicts independent. +use crate::dynamic::oracle::Signal; +use crate::dynamic::policy; use serde::{Deserialize, Serialize}; +use std::collections::BTreeMap; use std::fs::{File, OpenOptions}; use std::io::{BufRead, BufReader, Write}; use std::path::{Path, PathBuf}; @@ -87,6 +103,107 @@ impl ProbeArg { } } +/// Discriminator on a [`SinkProbe`] (Phase 08 — Track C.4). +/// +/// Distinguishes a probe written from the normal sink-instrumentation +/// path from one written by a sink-site signal handler when the sink +/// invocation crashed under the active payload. The oracle's +/// [`crate::dynamic::oracle::Oracle::SinkCrash`] variant ignores anything +/// other than `Crash { signal }`, so a process-level abort outside the +/// sink no longer satisfies the oracle. +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +#[serde(tag = "kind")] +pub enum ProbeKind { + /// Standard sink observation: arguments were captured before the sink + /// returned normally (or raised a non-crash exception). + Normal, + /// Sink invocation was interrupted by a fatal signal that the + /// sink-site handler intercepted. The captured `signal` is the one + /// the handler observed; the handler re-raises after writing the + /// probe so the runner's outcome still records the process death. + Crash { + /// Signal that interrupted the sink call. + signal: Signal, + }, +} + +impl Default for ProbeKind { + fn default() -> Self { + ProbeKind::Normal + } +} + +/// Bounded forensic snapshot captured alongside a [`SinkProbe`] +/// (Phase 08 — Track C.5). +/// +/// Every byte that lands in a witness is policed by +/// [`crate::dynamic::policy`]: env keys are scrubbed against +/// [`crate::dynamic::policy::DENY_KEY_SUBSTRINGS`] and payload bytes are +/// truncated at [`crate::dynamic::policy::PAYLOAD_CAPTURE_LIMIT_BYTES`]. +/// All fields are `#[serde(default, skip_serializing_if = "...")]` so +/// host-side host-emitted probes (which don't carry a witness) and +/// per-language shim-emitted probes (which do) round-trip through the +/// same JSON schema. +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)] +pub struct ProbeWitness { + /// Scrubbed snapshot of the harness process environment at probe + /// time. Keys matching a deny substring carry + /// [`crate::dynamic::policy::REDACTED_VALUE`]. + #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] + pub env_snapshot: BTreeMap, + /// Current working directory of the harness when the probe fired. + /// Empty when the language shim could not determine it. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub cwd: String, + /// Head-truncated payload bytes routed into the sink, capped at + /// [`crate::dynamic::policy::PAYLOAD_CAPTURE_LIMIT_BYTES`]. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub payload_bytes: Vec, + /// Same callee name as [`SinkProbe::sink_callee`]; retained on the + /// witness so repro tooling can consume the witness in isolation. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub callee: String, + /// Per-arg human-readable repr, parallel to [`SinkProbe::args`]. + /// `String` for textual / numeric args; `""` for binary + /// payloads the shim chose not to inline. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub args_repr: Vec, +} + +impl ProbeWitness { + /// An empty witness — every field at its `Default` value. Used by + /// tests and the host-side [`ProbeChannel::write`] path that does + /// not snapshot any forensic state. + pub fn empty() -> Self { + Self::default() + } + + /// Construct a bounded witness from raw inputs. Goes through + /// [`crate::dynamic::policy::scrub_env`] and + /// [`crate::dynamic::policy::truncate_payload_bytes`] so the + /// host-side constructor cannot accidentally produce an + /// unscrubbed / unbounded witness. + pub fn from_inputs( + env: I, + cwd: impl Into, + payload: &[u8], + callee: impl Into, + args_repr: Vec, + ) -> Self + where + I: IntoIterator, + S: Into, + { + Self { + env_snapshot: policy::scrub_env(env), + cwd: cwd.into(), + payload_bytes: policy::truncate_payload_bytes(payload).to_vec(), + callee: callee.into(), + args_repr, + } + } +} + /// One structured observation written by the harness when the instrumented /// sink fires. Serialised as a single JSON object on its own line. #[derive(Debug, Clone, Serialize, Deserialize)] @@ -103,6 +220,16 @@ pub struct SinkProbe { pub captured_at_ns: u64, /// Identifier of the payload in flight when the probe fired. pub payload_id: PayloadId, + /// Phase 08: normal sink observation vs sink-site crash. Defaults to + /// `Normal` so probes written by the Phase 06 shims (no `kind` field + /// on the wire) deserialise as normal observations. + #[serde(default)] + pub kind: ProbeKind, + /// Phase 08: bounded forensic snapshot. Empty when the shim did not + /// capture one — the field stays `default` so older probe files + /// round-trip unchanged. + #[serde(default)] + pub witness: ProbeWitness, } /// Per-run handle on a file-backed [`SinkProbe`] channel. @@ -212,6 +339,8 @@ mod tests { args: vec![ProbeArg::String("ls; whoami".into())], captured_at_ns: 42, payload_id: label.into(), + kind: ProbeKind::Normal, + witness: ProbeWitness::empty(), } } @@ -271,4 +400,53 @@ mod tests { let ch = ProbeChannel::for_workdir(dir.path()).unwrap(); assert!(ch.drain().is_empty()); } + + #[test] + fn probe_kind_defaults_to_normal_when_field_omitted() { + // Legacy probe-line shape (Phase 06) — no `kind` field on the wire. + let line = r#"{"sink_callee":"os.system","args":[],"captured_at_ns":1,"payload_id":"p"}"#; + let p: SinkProbe = serde_json::from_str(line).unwrap(); + assert_eq!(p.kind, ProbeKind::Normal); + assert_eq!(p.witness, ProbeWitness::empty()); + } + + #[test] + fn crash_probe_round_trips_through_channel() { + let dir = TempDir::new().unwrap(); + let ch = ProbeChannel::for_workdir(dir.path()).unwrap(); + let mut p = sample_probe("crash-test"); + p.kind = ProbeKind::Crash { signal: Signal::Sigsegv }; + ch.write(&p).unwrap(); + let drained = ch.drain(); + assert_eq!(drained.len(), 1); + assert!(matches!( + drained[0].kind, + ProbeKind::Crash { signal: Signal::Sigsegv } + )); + } + + #[test] + fn witness_from_inputs_redacts_and_truncates() { + let huge_payload = vec![0xAB; policy::PAYLOAD_CAPTURE_LIMIT_BYTES * 2]; + let env = vec![ + ("PATH".to_owned(), "/bin".to_owned()), + ("AWS_SECRET_ACCESS_KEY".to_owned(), "secret!!!".to_owned()), + ]; + let w = ProbeWitness::from_inputs( + env, + "/tmp/run", + &huge_payload, + "os.system", + vec!["ls; whoami".to_owned()], + ); + assert_eq!(w.cwd, "/tmp/run"); + assert_eq!(w.payload_bytes.len(), policy::PAYLOAD_CAPTURE_LIMIT_BYTES); + assert_eq!(w.env_snapshot.get("PATH").map(String::as_str), Some("/bin")); + assert_eq!( + w.env_snapshot.get("AWS_SECRET_ACCESS_KEY").map(String::as_str), + Some(policy::REDACTED_VALUE) + ); + assert_eq!(w.args_repr, vec!["ls; whoami".to_owned()]); + assert_eq!(w.callee, "os.system"); + } } diff --git a/src/dynamic/runner.rs b/src/dynamic/runner.rs index 5a7e8ac9..ec06825c 100644 --- a/src/dynamic/runner.rs +++ b/src/dynamic/runner.rs @@ -11,7 +11,7 @@ use crate::dynamic::corpus::{ }; use crate::dynamic::differential; use crate::dynamic::harness::{self, HarnessError}; -use crate::dynamic::oracle::oracle_fired; +use crate::dynamic::oracle::{oracle_fired, probe_crash_signal, Oracle}; use crate::dynamic::probe::{ProbeChannel, SinkProbe}; use crate::dynamic::sandbox::{self, SandboxBackend, SandboxError, SandboxOptions, SandboxOutcome}; use crate::dynamic::spec::HarnessSpec; @@ -47,6 +47,13 @@ pub struct RunOutcome { /// reference was `None` (or unresolved). The verifier maps this to /// [`crate::evidence::InconclusiveReason::NoBenignControl`]. pub no_benign_control: bool, + /// Phase 08 §C.4: at least one payload's sandbox outcome reported a + /// process-level crash (no exit code, no timeout) but no + /// [`crate::dynamic::probe::ProbeKind::Crash`] record was drained + /// from the channel. The verifier maps this to + /// [`crate::evidence::InconclusiveReason::UnrelatedCrash`] so a + /// setup-code abort cannot impersonate a confirmed sink fire. + pub unrelated_crash: bool, } #[derive(Debug)] @@ -240,6 +247,7 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result = None; for (i, payload) in vuln_payloads.iter().enumerate() { @@ -288,6 +296,22 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result Result String } InconclusiveReason::NoBenignControl => "no benign control payload".to_string(), InconclusiveReason::ReversedDifferential => "reversed differential".to_string(), + InconclusiveReason::UnrelatedCrash => "unrelated crash (not sink-site)".to_string(), } } diff --git a/tests/oracle_differential.rs b/tests/oracle_differential.rs index 9fc01140..210010a6 100644 --- a/tests/oracle_differential.rs +++ b/tests/oracle_differential.rs @@ -14,7 +14,7 @@ #![cfg(feature = "dynamic")] use nyx_scanner::dynamic::differential::{build_outcome, evaluate}; -use nyx_scanner::dynamic::probe::{ProbeArg, SinkProbe}; +use nyx_scanner::dynamic::probe::{ProbeArg, ProbeKind, ProbeWitness, SinkProbe}; use nyx_scanner::evidence::DifferentialVerdict; // ── Rule table ────────────────────────────────────────────────────────────── @@ -74,6 +74,8 @@ fn sample_probe(callee: &str, arg: &str, label: &str) -> SinkProbe { args: vec![ProbeArg::String(arg.into())], captured_at_ns: 1, payload_id: label.into(), + kind: ProbeKind::Normal, + witness: ProbeWitness::empty(), } } diff --git a/tests/oracle_sink_crash.rs b/tests/oracle_sink_crash.rs new file mode 100644 index 00000000..46e25bc1 --- /dev/null +++ b/tests/oracle_sink_crash.rs @@ -0,0 +1,279 @@ +//! Phase 08 — Track C.4 + C.5 acceptance tests. +//! +//! The runner-side path is exercised in isolation by the +//! `oracle_differential` tests; here we lock down the synthetic side of +//! Phase 08 — that a sink-site crash probe confirms via +//! [`Oracle::SinkCrash`], that an outside-sink process abort *does not* +//! confirm, and that witness construction stays bounded. +//! +//! Acceptance bullets (`plan.md` phase 08): +//! +//! - (a) sink-site crash → `Confirmed` +//! - (b) crash outside sink → `Inconclusive(UnrelatedCrash)` +//! - (c) bounded witness capture for known payloads + +#![cfg(feature = "dynamic")] + +use nyx_scanner::dynamic::oracle::{ + oracle_fired, probe_crash_signal, Oracle, Signal, SignalSet, +}; +use nyx_scanner::dynamic::policy; +use nyx_scanner::dynamic::probe::{ + ProbeArg, ProbeChannel, ProbeKind, ProbeWitness, SinkProbe, +}; +use nyx_scanner::dynamic::sandbox::SandboxOutcome; +use nyx_scanner::evidence::InconclusiveReason; +use std::time::Duration; +use tempfile::TempDir; + +fn crashed_outcome() -> SandboxOutcome { + // Process-level abort: no exit code, no timeout. + SandboxOutcome { + exit_code: None, + stdout: vec![], + stderr: vec![], + timed_out: false, + oob_callback_seen: false, + sink_hit: false, + duration: Duration::from_millis(1), + } +} + +fn clean_outcome() -> SandboxOutcome { + SandboxOutcome { + exit_code: Some(0), + stdout: vec![], + stderr: vec![], + timed_out: false, + oob_callback_seen: false, + sink_hit: false, + duration: Duration::from_millis(1), + } +} + +fn crash_probe(callee: &str, signal: Signal, witness: ProbeWitness) -> SinkProbe { + SinkProbe { + sink_callee: callee.into(), + args: vec![], + captured_at_ns: 1, + payload_id: "crash-test".into(), + kind: ProbeKind::Crash { signal }, + witness, + } +} + +// ── (a) Sink-site crash → Confirmed ────────────────────────────────────────── + +#[test] +fn case_a_sink_site_crash_confirms() { + // Simulates the per-language signal handler: harness aborted, but + // before re-raising it wrote a Crash probe to the channel. + let dir = TempDir::new().unwrap(); + let channel = ProbeChannel::for_workdir(dir.path()).unwrap(); + let witness = ProbeWitness::from_inputs( + vec![("PATH".to_owned(), "/bin".to_owned())], + "/tmp/run", + b"", + "system", + vec!["".to_owned()], + ); + channel + .write(&crash_probe("system", Signal::Sigsegv, witness)) + .unwrap(); + + let probes = channel.drain(); + assert_eq!(probes.len(), 1); + + let oracle = Oracle::SinkCrash { + signals: SignalSet::from_slice(&[Signal::Sigsegv]), + }; + assert!( + oracle_fired(&oracle, &crashed_outcome(), &probes), + "sink-site Crash probe with matching signal must fire SinkCrash oracle" + ); + + // Helper accessor exposes the signal so the runner can distinguish + // "matching probe present" from "process crashed only". + assert_eq!(probe_crash_signal(&probes[0]), Some(Signal::Sigsegv)); +} + +// ── (b) Crash outside sink → Inconclusive(UnrelatedCrash) ──────────────────── + +#[test] +fn case_b_outside_sink_crash_does_not_fire_and_is_unrelated() { + // The harness was instrumented with Oracle::SinkCrash but the + // process aborted in setup code (e.g. abort() in module init) + // before the sink ran — no Crash probe was written. + let dir = TempDir::new().unwrap(); + let channel = ProbeChannel::for_workdir(dir.path()).unwrap(); + let probes = channel.drain(); + assert!(probes.is_empty(), "no probe written from outside-sink abort"); + + let oracle = Oracle::SinkCrash { + signals: SignalSet::all(), + }; + assert!( + !oracle_fired(&oracle, &crashed_outcome(), &probes), + "process crash without a sink-site probe must NOT fire SinkCrash" + ); + + // The verifier's runner-side condition that promotes this case to + // `Inconclusive(UnrelatedCrash)` is: SinkCrash oracle + crashed + // outcome + no probe with a crash signal. Lock the predicate + // here so the runner's wiring in src/dynamic/runner.rs stays in + // sync with what the test labels expect. + let process_crashed = + crashed_outcome().exit_code.is_none() && !crashed_outcome().timed_out; + let has_sink_crash_probe = probes.iter().any(|p| probe_crash_signal(p).is_some()); + let is_sink_crash_oracle = matches!(oracle, Oracle::SinkCrash { .. }); + assert!(is_sink_crash_oracle && process_crashed && !has_sink_crash_probe); + + // The verdict mapping itself is constructed by the verifier; reference + // the variant so a rename keeps this test honest. + let _reason = InconclusiveReason::UnrelatedCrash; +} + +#[test] +fn case_b_clean_exit_does_not_fire_sink_crash() { + // Sanity: a clean run with no probe is also not Confirmed (and not + // UnrelatedCrash either, since the process did not crash). + let oracle = Oracle::SinkCrash { + signals: SignalSet::all(), + }; + assert!(!oracle_fired(&oracle, &clean_outcome(), &[])); +} + +// ── (c) Bounded witness capture ───────────────────────────────────────────── + +#[test] +fn case_c_witness_capture_is_bounded_and_scrubbed() { + // Construct a witness from intentionally oversized + credential-tainted + // inputs to lock the policy contract: payload truncated at 16 KiB and + // denied env keys redacted. + let huge_payload = vec![0x41u8; policy::PAYLOAD_CAPTURE_LIMIT_BYTES * 4]; + let env = vec![ + ("PATH".to_owned(), "/usr/bin".to_owned()), + ("AWS_SECRET_ACCESS_KEY".to_owned(), "AKIAEXAMPLE".to_owned()), + ("GITHUB_TOKEN".to_owned(), "ghs_fake".to_owned()), + ("HOME".to_owned(), "/home/x".to_owned()), + ]; + let witness = ProbeWitness::from_inputs( + env, + "/tmp/nyx-run-1", + &huge_payload, + "exec", + vec!["arg0".to_owned(), "arg1".to_owned()], + ); + + assert_eq!( + witness.payload_bytes.len(), + policy::PAYLOAD_CAPTURE_LIMIT_BYTES, + "payload must be truncated to the 16 KiB cap" + ); + assert!( + witness.payload_bytes.iter().all(|b| *b == 0x41), + "head-truncation keeps prefix bytes" + ); + + // PATH / HOME unchanged. + assert_eq!( + witness.env_snapshot.get("PATH").map(String::as_str), + Some("/usr/bin"), + ); + assert_eq!( + witness.env_snapshot.get("HOME").map(String::as_str), + Some("/home/x"), + ); + + // Credential-shaped keys redacted. + assert_eq!( + witness + .env_snapshot + .get("AWS_SECRET_ACCESS_KEY") + .map(String::as_str), + Some(policy::REDACTED_VALUE), + ); + assert_eq!( + witness.env_snapshot.get("GITHUB_TOKEN").map(String::as_str), + Some(policy::REDACTED_VALUE), + ); + + assert_eq!(witness.cwd, "/tmp/nyx-run-1"); + assert_eq!(witness.callee, "exec"); + assert_eq!(witness.args_repr, vec!["arg0".to_owned(), "arg1".to_owned()]); +} + +#[test] +fn case_c_witness_round_trips_through_probe_channel() { + // The witness must survive serde round-trip so downstream repro + // tools see what the harness captured. + let dir = TempDir::new().unwrap(); + let channel = ProbeChannel::for_workdir(dir.path()).unwrap(); + let witness = ProbeWitness::from_inputs( + vec![ + ("PATH".to_owned(), "/usr/bin".to_owned()), + ("API_KEY".to_owned(), "live".to_owned()), + ], + "/tmp/run", + b"; rm -rf /", + "system", + vec!["; rm -rf /".to_owned()], + ); + let probe = SinkProbe { + sink_callee: "system".into(), + args: vec![ProbeArg::String("; rm -rf /".into())], + captured_at_ns: 42, + payload_id: "phase08-c".into(), + kind: ProbeKind::Crash { + signal: Signal::Sigabrt, + }, + witness, + }; + channel.write(&probe).unwrap(); + + let drained = channel.drain(); + assert_eq!(drained.len(), 1); + let p = &drained[0]; + assert!(matches!( + p.kind, + ProbeKind::Crash { + signal: Signal::Sigabrt + } + )); + assert_eq!(p.witness.cwd, "/tmp/run"); + assert_eq!( + p.witness.env_snapshot.get("API_KEY").map(String::as_str), + Some(policy::REDACTED_VALUE), + ); + assert_eq!( + p.witness.env_snapshot.get("PATH").map(String::as_str), + Some("/usr/bin"), + ); + assert_eq!(p.witness.payload_bytes, b"; rm -rf /".to_vec()); +} + +#[test] +fn signal_wire_format_accepts_canonical_and_short_aliases() { + // The per-language shims write SIGSEGV / SIGABRT / etc. as the + // signal value; downstream JSON consumers and the host-side oracle + // both need to deserialise the same wire format. + let canonical = + serde_json::from_str::("\"SIGSEGV\"").expect("canonical SIG name"); + assert_eq!(canonical, Signal::Sigsegv); + let short = serde_json::from_str::("\"SEGV\"").expect("short alias"); + assert_eq!(short, Signal::Sigsegv); + let title = + serde_json::from_str::("\"Sigsegv\"").expect("derive-default alias"); + assert_eq!(title, Signal::Sigsegv); +} + +#[test] +fn signal_set_const_construction_is_order_independent() { + const A: SignalSet = SignalSet::from_slice(&[Signal::Sigsegv, Signal::Sigabrt]); + const B: SignalSet = SignalSet::from_slice(&[Signal::Sigabrt, Signal::Sigsegv]); + assert!(A.contains(Signal::Sigsegv)); + assert!(A.contains(Signal::Sigabrt)); + assert!(B.contains(Signal::Sigsegv)); + assert!(B.contains(Signal::Sigabrt)); + assert!(!A.contains(Signal::Sigfpe)); +} diff --git a/tests/oracle_sink_probe.rs b/tests/oracle_sink_probe.rs index fc80ac00..2f288da7 100644 --- a/tests/oracle_sink_probe.rs +++ b/tests/oracle_sink_probe.rs @@ -18,7 +18,9 @@ #![cfg(feature = "dynamic")] use nyx_scanner::dynamic::oracle::{oracle_fired, Oracle, ProbePredicate}; -use nyx_scanner::dynamic::probe::{ProbeArg, ProbeChannel, SinkProbe, PROBE_PATH_ENV}; +use nyx_scanner::dynamic::probe::{ + ProbeArg, ProbeChannel, ProbeKind, ProbeWitness, SinkProbe, PROBE_PATH_ENV, +}; use std::time::Duration; use tempfile::TempDir; @@ -53,6 +55,8 @@ fn synthetic_harness_fires_probe( args: vec![ProbeArg::String(captured_arg.into())], captured_at_ns: 1, payload_id: payload_id.into(), + kind: ProbeKind::Normal, + witness: ProbeWitness::empty(), }; channel.write(&probe).expect("synthetic harness probe write"); }