[pitboss] phase 08: Track C.4 + C.5 — SinkCrash oracle + per-probe witness capture

This commit is contained in:
pitboss 2026-05-14 13:10:22 -05:00
parent 4eccbd48b4
commit 93eb98edda
21 changed files with 1988 additions and 115 deletions

View file

@ -110,18 +110,22 @@ mod tests {
#[test]
fn build_outcome_carries_both_traces() {
use crate::dynamic::probe::{ProbeArg, SinkProbe};
use crate::dynamic::probe::{ProbeArg, ProbeKind, ProbeWitness, SinkProbe};
let vuln = vec![SinkProbe {
sink_callee: "os.system".into(),
args: vec![ProbeArg::String("; echo X".into())],
captured_at_ns: 1,
payload_id: "cmdi-echo-marker".into(),
kind: ProbeKind::Normal,
witness: ProbeWitness::empty(),
}];
let benign = vec![SinkProbe {
sink_callee: "os.system".into(),
args: vec![ProbeArg::String("safe".into())],
captured_at_ns: 2,
payload_id: "cmdi-benign".into(),
kind: ProbeKind::Normal,
witness: ProbeWitness::empty(),
}];
let outcome = build_outcome(
"cmdi-echo-marker",

View file

@ -23,12 +23,101 @@ const SUPPORTED: &[EntryKind] = &[EntryKind::Function];
/// the only dep on libc / stdio.
pub fn probe_shim() -> &'static str {
r#"
/* ── __nyx_probe shim (Phase 06 — Track C.1) ─────────────────────────────── */
/* ── __nyx_probe shim (Phase 06 — Track C.1, Phase 08 — Track C.4 + C.5) ── */
#include <signal.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#ifndef __NYX_PAYLOAD_LIMIT
#define __NYX_PAYLOAD_LIMIT (16 * 1024)
#endif
#define __NYX_REDACTED "<redacted-by-nyx-policy>"
extern char **environ;
static const char *__nyx_deny[] = {
"TOKEN","SECRET","PASSWORD","PASSWD","API_KEY","APIKEY","PRIVATE_KEY",
"CREDENTIAL","SESSION","COOKIE","AUTH","BEARER","AWS_ACCESS","AWS_SESSION",
"GH_TOKEN","GITHUB_TOKEN","NPM_TOKEN","PYPI_TOKEN","DOCKER_PASS",
NULL,
};
static int __nyx_is_denied_upper(const char *k_upper) {
for (int i = 0; __nyx_deny[i]; ++i) {
if (strstr(k_upper, __nyx_deny[i])) return 1;
}
return 0;
}
static void __nyx_write_witness(FILE *f, const char *sink_callee, int nargs, const char **args) {
fputs("{\"env_snapshot\":{", f);
int first = 1;
for (char **e = environ; *e; ++e) {
const char *eq = strchr(*e, '=');
if (!eq) continue;
size_t klen = (size_t)(eq - *e);
char *kup = (char *)malloc(klen + 1);
if (!kup) continue;
for (size_t i = 0; i < klen; ++i) {
char c = (*e)[i];
if (c >= 'a' && c <= 'z') c -= 32;
kup[i] = c;
}
kup[klen] = '\0';
int denied = __nyx_is_denied_upper(kup);
if (!first) fputc(',', f);
first = 0;
fputc('"', f);
fwrite(*e, 1, klen, f);
fputs("\":\"", f);
if (denied) {
fputs(__NYX_REDACTED, f);
} else {
const char *v = eq + 1;
for (; *v; ++v) {
switch (*v) {
case '"': fputs("\\\"", f); break;
case '\\': fputs("\\\\", f); break;
case '\n': fputs("\\n", f); break;
case '\r': fputs("\\r", f); break;
case '\t': fputs("\\t", f); break;
default: fputc(*v, f);
}
}
}
fputc('"', f);
free(kup);
}
fputs("},\"cwd\":\"", f);
char cwdbuf[4096];
if (getcwd(cwdbuf, sizeof(cwdbuf))) {
fputs(cwdbuf, f);
}
fputs("\",\"payload_bytes\":[", f);
const char *payload = getenv("NYX_PAYLOAD");
if (payload) {
size_t plen = strlen(payload);
if (plen > __NYX_PAYLOAD_LIMIT) plen = __NYX_PAYLOAD_LIMIT;
for (size_t i = 0; i < plen; ++i) {
if (i > 0) fputc(',', f);
fprintf(f, "%d", (unsigned char)payload[i]);
}
}
fputs("],\"callee\":\"", f);
fputs(sink_callee, f);
fputs("\",\"args_repr\":[", f);
for (int i = 0; i < nargs; ++i) {
if (i > 0) fputc(',', f);
fputc('"', f);
if (args && args[i]) fputs(args[i], f);
fputc('"', f);
}
fputs("]}", f);
}
static void __nyx_probe(const char *sink_callee, int nargs, ...) {
const char *p = getenv("NYX_PROBE_PATH");
@ -44,16 +133,77 @@ static void __nyx_probe(const char *sink_callee, int nargs, ...) {
fprintf(f, "{\"sink_callee\":\"%s\",\"args\":[", sink_callee);
va_list ap;
va_start(ap, nargs);
const char *args_arr[32];
int captured = nargs > 32 ? 32 : nargs;
for (int i = 0; i < nargs; ++i) {
const char *arg = va_arg(ap, const char *);
if (!arg) arg = "";
if (i < captured) args_arr[i] = arg;
if (i > 0) fputc(',', f);
fprintf(f, "{\"kind\":\"String\",\"value\":\"%s\"}", arg);
}
va_end(ap);
fprintf(f, "],\"captured_at_ns\":%llu,\"payload_id\":\"%s\"}\n", ns, pid);
fprintf(f, "],\"captured_at_ns\":%llu,\"payload_id\":\"%s\",", ns, pid);
fputs("\"kind\":{\"kind\":\"Normal\"},\"witness\":", f);
__nyx_write_witness(f, sink_callee, captured, args_arr);
fputs("}\n", f);
fclose(f);
}
/* Phase 08: sink-site signal handler. __nyx_install_crash_guard sets a
* sigaction(2) handler over SIGSEGV / SIGABRT / SIGBUS / SIGFPE / SIGILL
* that writes a Crash probe with witness before restoring SIG_DFL and
* re-raising the signal the process still dies with the same exit
* code, but the probe channel now carries the forensic record. */
static const char *__nyx_crash_sink_callee = "";
static void __nyx_crash_handler(int sig) {
const char *p = getenv("NYX_PROBE_PATH");
if (p && *p) {
FILE *f = fopen(p, "a");
if (f) {
const char *name = "SIGABRT";
switch (sig) {
case SIGSEGV: name = "SIGSEGV"; break;
case SIGABRT: name = "SIGABRT"; break;
case SIGBUS: name = "SIGBUS"; break;
case SIGFPE: name = "SIGFPE"; break;
case SIGILL: name = "SIGILL"; break;
}
struct timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);
unsigned long long ns = (unsigned long long)ts.tv_sec * 1000000000ULL +
(unsigned long long)ts.tv_nsec;
const char *pid = getenv("NYX_PAYLOAD_ID");
if (!pid) pid = "";
fprintf(f,
"{\"sink_callee\":\"%s\",\"args\":[],\"captured_at_ns\":%llu,"
"\"payload_id\":\"%s\",\"kind\":{\"kind\":\"Crash\",\"signal\":\"%s\"},"
"\"witness\":",
__nyx_crash_sink_callee, ns, pid, name);
__nyx_write_witness(f, __nyx_crash_sink_callee, 0, NULL);
fputs("}\n", f);
fclose(f);
}
}
struct sigaction dfl;
memset(&dfl, 0, sizeof(dfl));
dfl.sa_handler = SIG_DFL;
sigaction(sig, &dfl, NULL);
raise(sig);
}
static void __nyx_install_crash_guard(const char *sink_callee) {
__nyx_crash_sink_callee = sink_callee;
struct sigaction sa;
memset(&sa, 0, sizeof(sa));
sa.sa_handler = __nyx_crash_handler;
sigemptyset(&sa.sa_mask);
int sigs[] = { SIGSEGV, SIGABRT, SIGBUS, SIGFPE, SIGILL };
for (size_t i = 0; i < sizeof(sigs)/sizeof(sigs[0]); ++i) {
sigaction(sigs[i], &sa, NULL);
}
}
"#
}

View file

@ -23,12 +23,31 @@ const SUPPORTED: &[EntryKind] = &[EntryKind::Function];
/// JSON-emit format matches [`crate::dynamic::probe::SinkProbe`].
pub fn probe_shim() -> &'static str {
r#"
/* ── __nyx_probe shim (Phase 06 — Track C.1) ─────────────────────────────── */
/* ── __nyx_probe shim (Phase 06 — Track C.1, Phase 08 — Track C.4 + C.5) ── */
#include <algorithm>
#include <array>
#include <chrono>
#include <csignal>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <sstream>
#include <string>
#include <vector>
#include <unistd.h>
#ifndef __NYX_PAYLOAD_LIMIT
#define __NYX_PAYLOAD_LIMIT (16 * 1024)
#endif
#define __NYX_REDACTED "<redacted-by-nyx-policy>"
extern char **environ;
static const char *__nyx_deny_substrings_cpp[] = {
"TOKEN","SECRET","PASSWORD","PASSWD","API_KEY","APIKEY","PRIVATE_KEY",
"CREDENTIAL","SESSION","COOKIE","AUTH","BEARER","AWS_ACCESS","AWS_SESSION",
"GH_TOKEN","GITHUB_TOKEN","NPM_TOKEN","PYPI_TOKEN","DOCKER_PASS",
};
inline void __nyx_probe_one(std::ostringstream &out, const std::string &v) {
out << "{\"kind\":\"String\",\"value\":\"";
@ -45,6 +64,63 @@ inline void __nyx_probe_one(std::ostringstream &out, const std::string &v) {
out << "\"}";
}
inline void __nyx_esc(std::ostringstream &out, const std::string &v) {
for (char c : v) {
switch (c) {
case '"': out << "\\\""; break;
case '\\': out << "\\\\"; break;
case '\n': out << "\\n"; break;
case '\r': out << "\\r"; break;
case '\t': out << "\\t"; break;
default: out << c;
}
}
}
inline std::string __nyx_witness_json(const char *sink_callee, const std::vector<std::string> &args_repr) {
std::ostringstream out;
out << "{\"env_snapshot\":{";
bool first = true;
for (char **e = environ; *e; ++e) {
const char *eq = std::strchr(*e, '=');
if (!eq) continue;
std::string k(*e, static_cast<size_t>(eq - *e));
std::string ku = k;
std::transform(ku.begin(), ku.end(), ku.begin(), [](unsigned char c){ return (char)std::toupper(c); });
bool denied = false;
for (const char *needle : __nyx_deny_substrings_cpp) {
if (ku.find(needle) != std::string::npos) { denied = true; break; }
}
if (!first) out << ',';
first = false;
out << '"'; __nyx_esc(out, k); out << "\":\"";
if (denied) out << __NYX_REDACTED;
else __nyx_esc(out, std::string(eq + 1));
out << '"';
}
out << "},\"cwd\":\"";
char cwdbuf[4096];
if (::getcwd(cwdbuf, sizeof(cwdbuf))) __nyx_esc(out, std::string(cwdbuf));
out << "\",\"payload_bytes\":[";
const char *payload = std::getenv("NYX_PAYLOAD");
if (payload) {
size_t plen = std::strlen(payload);
if (plen > __NYX_PAYLOAD_LIMIT) plen = __NYX_PAYLOAD_LIMIT;
for (size_t i = 0; i < plen; ++i) {
if (i > 0) out << ',';
out << static_cast<int>(static_cast<unsigned char>(payload[i]));
}
}
out << "],\"callee\":\""; __nyx_esc(out, std::string(sink_callee));
out << "\",\"args_repr\":[";
for (size_t i = 0; i < args_repr.size(); ++i) {
if (i > 0) out << ',';
out << '"'; __nyx_esc(out, args_repr[i]); out << '"';
}
out << "]}";
return out.str();
}
template <typename... Args>
inline void __nyx_probe(const char *sink_callee, Args... args) {
const char *p = std::getenv("NYX_PROBE_PATH");
@ -52,10 +128,12 @@ inline void __nyx_probe(const char *sink_callee, Args... args) {
std::ostringstream out;
out << "{\"sink_callee\":\"" << sink_callee << "\",\"args\":[";
bool first = true;
std::vector<std::string> repr;
auto emit = [&](const std::string &s) {
if (!first) out << ',';
first = false;
__nyx_probe_one(out, s);
repr.push_back(s);
};
(emit(std::string(args)), ...);
const char *pid = std::getenv("NYX_PAYLOAD_ID");
@ -63,10 +141,62 @@ inline void __nyx_probe(const char *sink_callee, Args... args) {
std::chrono::system_clock::now().time_since_epoch()
).count();
out << "],\"captured_at_ns\":" << now << ",\"payload_id\":\""
<< (pid ? pid : "") << "\"}\n";
<< (pid ? pid : "") << "\",";
out << "\"kind\":{\"kind\":\"Normal\"},\"witness\":"
<< __nyx_witness_json(sink_callee, repr) << "}\n";
std::ofstream f(p, std::ios::app);
if (f.is_open()) f << out.str();
}
/* Phase 08: sink-site sigaction handler. Mirrors the C variant; the
* captured `sink_callee` is held in a file-scope const char* so the
* async-signal-unsafe write path can pull it without TLS. */
static const char *__nyx_crash_sink_callee = "";
inline void __nyx_crash_handler(int sig) {
const char *p = std::getenv("NYX_PROBE_PATH");
if (p && *p) {
std::ofstream f(p, std::ios::app);
if (f.is_open()) {
const char *name = "SIGABRT";
switch (sig) {
case SIGSEGV: name = "SIGSEGV"; break;
case SIGABRT: name = "SIGABRT"; break;
case SIGBUS: name = "SIGBUS"; break;
case SIGFPE: name = "SIGFPE"; break;
case SIGILL: name = "SIGILL"; break;
}
auto now = std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::system_clock::now().time_since_epoch()
).count();
const char *pid = std::getenv("NYX_PAYLOAD_ID");
std::ostringstream out;
out << "{\"sink_callee\":\"" << __nyx_crash_sink_callee
<< "\",\"args\":[],\"captured_at_ns\":" << now
<< ",\"payload_id\":\"" << (pid ? pid : "")
<< "\",\"kind\":{\"kind\":\"Crash\",\"signal\":\"" << name
<< "\"},\"witness\":"
<< __nyx_witness_json(__nyx_crash_sink_callee, {}) << "}\n";
f << out.str();
}
}
struct sigaction dfl;
std::memset(&dfl, 0, sizeof(dfl));
dfl.sa_handler = SIG_DFL;
sigaction(sig, &dfl, nullptr);
raise(sig);
}
inline void __nyx_install_crash_guard(const char *sink_callee) {
__nyx_crash_sink_callee = sink_callee;
struct sigaction sa;
std::memset(&sa, 0, sizeof(sa));
sa.sa_handler = __nyx_crash_handler;
sigemptyset(&sa.sa_mask);
for (int sig : { SIGSEGV, SIGABRT, SIGBUS, SIGFPE, SIGILL }) {
sigaction(sig, &sa, nullptr);
}
}
"#
}

View file

@ -58,12 +58,71 @@ impl LangEmitter for GoEmitter {
/// captured args at the sink site.
pub fn probe_shim() -> &'static str {
r#"
// ── __nyx_probe shim (Phase 06 — Track C.1) ──────────────────────────────────
func __nyx_probe(sinkCallee string, args ...string) {
p := os.Getenv("NYX_PROBE_PATH")
if p == "" {
return
// ── __nyx_probe shim (Phase 06 — Track C.1, Phase 08 — Track C.4 + C.5) ──────
var __nyx_deny_substrings = []string{
"TOKEN","SECRET","PASSWORD","PASSWD","API_KEY","APIKEY","PRIVATE_KEY",
"CREDENTIAL","SESSION","COOKIE","AUTH","BEARER","AWS_ACCESS","AWS_SESSION",
"GH_TOKEN","GITHUB_TOKEN","NPM_TOKEN","PYPI_TOKEN","DOCKER_PASS",
}
const __nyx_payload_limit = 16 * 1024
const __nyx_redacted = "<redacted-by-nyx-policy>"
func __nyx_scrub_env() map[string]string {
out := map[string]string{}
for _, e := range os.Environ() {
idx := -1
for i, c := range e {
if c == '=' { idx = i; break }
}
if idx < 0 { continue }
k := e[:idx]
v := e[idx+1:]
ku := strings.ToUpper(k)
denied := false
for _, n := range __nyx_deny_substrings {
if strings.Contains(ku, n) { denied = true; break }
}
if denied {
out[k] = __nyx_redacted
} else {
out[k] = v
}
}
return out
}
func __nyx_witness(sinkCallee string, args []string) map[string]interface{} {
payload := os.Getenv("NYX_PAYLOAD")
pb := []byte(payload)
if len(pb) > __nyx_payload_limit { pb = pb[:__nyx_payload_limit] }
repr := make([]string, len(args))
for i, a := range args { repr[i] = a }
cwd, _ := os.Getwd()
bytes_int := make([]int, len(pb))
for i, b := range pb { bytes_int[i] = int(b) }
return map[string]interface{}{
"env_snapshot": __nyx_scrub_env(),
"cwd": cwd,
"payload_bytes": bytes_int,
"callee": sinkCallee,
"args_repr": repr,
}
}
func __nyx_emit(rec map[string]interface{}) {
p := os.Getenv("NYX_PROBE_PATH")
if p == "" { return }
b, err := json.Marshal(rec)
if err != nil { return }
f, err := os.OpenFile(p, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil { return }
defer f.Close()
f.Write(b)
f.Write([]byte("\n"))
}
func __nyx_probe(sinkCallee string, args ...string) {
serArgs := make([]map[string]interface{}, 0, len(args))
for _, a := range args {
serArgs = append(serArgs, map[string]interface{}{
@ -71,23 +130,61 @@ func __nyx_probe(sinkCallee string, args ...string) {
"value": a,
})
}
rec := map[string]interface{}{
__nyx_emit(map[string]interface{}{
"sink_callee": sinkCallee,
"args": serArgs,
"captured_at_ns": uint64(time.Now().UnixNano()),
"payload_id": os.Getenv("NYX_PAYLOAD_ID"),
"kind": map[string]interface{}{"kind": "Normal"},
"witness": __nyx_witness(sinkCallee, args),
})
}
// Phase 08: install a sink-site signal listener via `signal.Notify`. Go
// can intercept SIGABRT but not SIGSEGV (the Go runtime panics on
// memory faults before user handlers see them); for SIGSEGV we rely on
// the runtime's panic catch via `recover()` inside __nyx_run_sink.
func __nyx_install_crash_guard(sinkCallee string) {
ch := make(chan os.Signal, 1)
signal.Notify(ch, syscall.SIGABRT, syscall.SIGBUS, syscall.SIGFPE, syscall.SIGILL)
go func() {
sig := <-ch
name := "SIGABRT"
switch sig {
case syscall.SIGBUS: name = "SIGBUS"
case syscall.SIGFPE: name = "SIGFPE"
case syscall.SIGILL: name = "SIGILL"
}
__nyx_emit(map[string]interface{}{
"sink_callee": sinkCallee,
"args": []interface{}{},
"captured_at_ns": uint64(time.Now().UnixNano()),
"payload_id": os.Getenv("NYX_PAYLOAD_ID"),
"kind": map[string]interface{}{"kind": "Crash", "signal": name},
"witness": __nyx_witness(sinkCallee, nil),
})
signal.Reset(sig)
syscall.Kill(syscall.Getpid(), sig.(syscall.Signal))
}()
}
// Phase 08: panic-recover hook for Go runtime-caught faults (SIGSEGV nil-
// deref, divide-by-zero treated as panic). Call as `defer __nyx_recover_crash("callee")()`
// around the instrumented sink invocation.
func __nyx_recover_crash(sinkCallee string) func() {
return func() {
if r := recover(); r != nil {
__nyx_emit(map[string]interface{}{
"sink_callee": sinkCallee,
"args": []interface{}{},
"captured_at_ns": uint64(time.Now().UnixNano()),
"payload_id": os.Getenv("NYX_PAYLOAD_ID"),
"kind": map[string]interface{}{"kind": "Crash", "signal": "SIGSEGV"},
"witness": __nyx_witness(sinkCallee, nil),
})
panic(r)
}
}
b, err := json.Marshal(rec)
if err != nil {
return
}
f, err := os.OpenFile(p, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return
}
defer f.Close()
f.Write(b)
f.Write([]byte("\n"))
}
"#
}

View file

@ -64,16 +64,78 @@ impl LangEmitter for JavaEmitter {
/// [`crate::dynamic::probe::SinkProbe`] wire format.
pub fn probe_shim() -> &'static str {
r#"
// ── __nyx_probe shim (Phase 06 — Track C.1) ──────────────────────────────────
static void __nyx_probe(String sinkCallee, String... args) {
String p = System.getenv("NYX_PROBE_PATH");
if (p == null || p.isEmpty()) {
return;
// ── __nyx_probe shim (Phase 06 — Track C.1, Phase 08 — Track C.4 + C.5) ──
private static final String[] __NYX_DENY = {
"TOKEN","SECRET","PASSWORD","PASSWD","API_KEY","APIKEY","PRIVATE_KEY",
"CREDENTIAL","SESSION","COOKIE","AUTH","BEARER","AWS_ACCESS","AWS_SESSION",
"GH_TOKEN","GITHUB_TOKEN","NPM_TOKEN","PYPI_TOKEN","DOCKER_PASS"
};
private static final int __NYX_PAYLOAD_LIMIT = 16 * 1024;
private static final String __NYX_REDACTED = "<redacted-by-nyx-policy>";
private static boolean nyxIsDeniedKey(String k) {
String ku = k.toUpperCase();
for (String n : __NYX_DENY) {
if (ku.contains(n)) return true;
}
return false;
}
private static String nyxWitnessJson(String sinkCallee, String[] args) {
StringBuilder out = new StringBuilder(256);
out.append("{\"env_snapshot\":{");
boolean first = true;
java.util.TreeMap<String,String> envSorted = new java.util.TreeMap<>(System.getenv());
for (java.util.Map.Entry<String,String> e : envSorted.entrySet()) {
if (!first) out.append(',');
first = false;
out.append('"'); nyxJsonEscape(e.getKey(), out); out.append("\":\"");
if (nyxIsDeniedKey(e.getKey())) {
out.append(__NYX_REDACTED);
} else {
nyxJsonEscape(e.getValue() == null ? "" : e.getValue(), out);
}
out.append('"');
}
out.append("},\"cwd\":\"");
nyxJsonEscape(System.getProperty("user.dir", ""), out);
out.append("\",\"payload_bytes\":[");
String payload = System.getenv("NYX_PAYLOAD");
if (payload != null) {
byte[] pb = payload.getBytes(java.nio.charset.StandardCharsets.UTF_8);
int cap = Math.min(pb.length, __NYX_PAYLOAD_LIMIT);
for (int i = 0; i < cap; i++) {
if (i > 0) out.append(',');
out.append(((int) pb[i]) & 0xff);
}
}
out.append("],\"callee\":\""); nyxJsonEscape(sinkCallee, out);
out.append("\",\"args_repr\":[");
if (args != null) {
for (int i = 0; i < args.length; i++) {
if (i > 0) out.append(',');
out.append('"'); nyxJsonEscape(args[i] == null ? "" : args[i], out); out.append('"');
}
}
out.append("]}");
return out.toString();
}
private static void nyxEmit(String line) {
String p = System.getenv("NYX_PROBE_PATH");
if (p == null || p.isEmpty()) return;
try (java.io.FileWriter fw = new java.io.FileWriter(p, true)) {
fw.write(line);
} catch (java.io.IOException e) {
// best-effort
}
}
static void __nyx_probe(String sinkCallee, String... args) {
long now = System.nanoTime();
String payloadId = System.getenv("NYX_PAYLOAD_ID");
if (payloadId == null) payloadId = "";
StringBuilder line = new StringBuilder(128);
StringBuilder line = new StringBuilder(256);
line.append("{\"sink_callee\":\"");
nyxJsonEscape(sinkCallee, line);
line.append("\",\"args\":[");
@ -85,12 +147,33 @@ pub fn probe_shim() -> &'static str {
}
line.append("],\"captured_at_ns\":").append(now).append(",\"payload_id\":\"");
nyxJsonEscape(payloadId, line);
line.append("\"}\n");
try (java.io.FileWriter fw = new java.io.FileWriter(p, true)) {
fw.write(line.toString());
} catch (java.io.IOException e) {
// best-effort
}
line.append("\",\"kind\":{\"kind\":\"Normal\"},\"witness\":");
line.append(nyxWitnessJson(sinkCallee, args));
line.append("}\n");
nyxEmit(line.toString());
}
// Phase 08: install a sink-site Throwable handler. Java cannot catch
// SIGSEGV / SIGFPE directly (JVM aborts), but it can intercept the
// uncaught-exception path which fires for any Error / RuntimeException
// escaping the sink call. Map them onto SIGABRT for the oracle.
static void __nyx_install_crash_guard(String sinkCallee) {
Thread.setDefaultUncaughtExceptionHandler((t, e) -> {
long now = System.nanoTime();
String payloadId = System.getenv("NYX_PAYLOAD_ID");
if (payloadId == null) payloadId = "";
StringBuilder line = new StringBuilder(256);
line.append("{\"sink_callee\":\"");
nyxJsonEscape(sinkCallee, line);
line.append("\",\"args\":[],\"captured_at_ns\":").append(now)
.append(",\"payload_id\":\"");
nyxJsonEscape(payloadId, line);
line.append("\",\"kind\":{\"kind\":\"Crash\",\"signal\":\"SIGABRT\"},\"witness\":");
line.append(nyxWitnessJson(sinkCallee, new String[0]));
line.append("}\n");
nyxEmit(line.toString());
System.exit(134);
});
}
private static void nyxJsonEscape(String s, StringBuilder out) {

View file

@ -58,11 +58,62 @@ impl LangEmitter for JavaScriptEmitter {
/// unset.
pub fn probe_shim() -> &'static str {
r#"
// ── __nyx_probe shim (Phase 06 — Track C.1) ──────────────────────────────────
function __nyx_probe(sinkCallee, ...args) {
// ── __nyx_probe shim (Phase 06 — Track C.1, Phase 08 — Track C.4 + C.5) ──────
const _NYX_DENY_SUBSTRINGS = [
'TOKEN','SECRET','PASSWORD','PASSWD','API_KEY','APIKEY','PRIVATE_KEY',
'CREDENTIAL','SESSION','COOKIE','AUTH','BEARER','AWS_ACCESS','AWS_SESSION',
'GH_TOKEN','GITHUB_TOKEN','NPM_TOKEN','PYPI_TOKEN','DOCKER_PASS'
];
const _NYX_PAYLOAD_LIMIT = 16 * 1024;
const _NYX_REDACTED = '<redacted-by-nyx-policy>';
function __nyx_scrub_env() {
const out = {};
const env = process.env || {};
for (const k of Object.keys(env)) {
const ku = String(k).toUpperCase();
if (_NYX_DENY_SUBSTRINGS.some((n) => ku.indexOf(n) !== -1)) {
out[k] = _NYX_REDACTED;
} else {
out[k] = env[k];
}
}
return out;
}
function __nyx_witness(sinkCallee, args) {
let payload = process.env.NYX_PAYLOAD || '';
let buf = Buffer.from(String(payload), 'utf8');
if (buf.length > _NYX_PAYLOAD_LIMIT) buf = buf.slice(0, _NYX_PAYLOAD_LIMIT);
const argsRepr = args.map(function (a) {
if (a && typeof a === 'object' && (a instanceof Buffer || a instanceof Uint8Array)) {
return '<bytes:' + a.length + '>';
}
return String(a);
});
let cwd = '';
try { cwd = process.cwd(); } catch (e) {}
return {
env_snapshot: __nyx_scrub_env(),
cwd: cwd,
payload_bytes: Array.from(buf),
callee: String(sinkCallee),
args_repr: argsRepr,
};
}
function __nyx_emit(rec) {
const _fs = require('fs');
const _p = process.env.NYX_PROBE_PATH;
if (!_p) return;
try {
_fs.appendFileSync(_p, JSON.stringify(rec) + '\n');
} catch (e) {
// best-effort: probe channel write failure is non-fatal.
}
}
function __nyx_probe(sinkCallee, ...args) {
const _ser = args.map(function (a) {
if (a && typeof a === 'object' && (a instanceof Buffer || a instanceof Uint8Array)) {
return { kind: 'Bytes', value: Array.from(a) };
@ -75,16 +126,49 @@ function __nyx_probe(sinkCallee, ...args) {
}
return { kind: 'String', value: String(a) };
});
const _rec = {
__nyx_emit({
sink_callee: String(sinkCallee),
args: _ser,
captured_at_ns: Number(process.hrtime.bigint()),
payload_id: String(process.env.NYX_PAYLOAD_ID || ''),
kind: { kind: 'Normal' },
witness: __nyx_witness(sinkCallee, args),
});
}
// Phase 08: V8 cannot catch native SIGSEGV in pure JS, but it can intercept
// `uncaughtException` / `unhandledRejection` plus the synchronously
// deliverable signals (SIGABRT via process.kill). __nyx_install_crash_guard
// registers both: the uncaught path maps Error-shaped failures to a SIGABRT
// crash probe; explicit process.on('SIG*') registers the others where the
// runtime exposes them. Re-raise via process.exit(134) so the outcome's
// exit_code still reflects an abort-style death.
function __nyx_install_crash_guard(sinkCallee) {
const _emit_crash = function (signalName) {
__nyx_emit({
sink_callee: String(sinkCallee),
args: [],
captured_at_ns: Number(process.hrtime.bigint()),
payload_id: String(process.env.NYX_PAYLOAD_ID || ''),
kind: { kind: 'Crash', signal: signalName },
witness: __nyx_witness(sinkCallee, []),
});
};
try {
_fs.appendFileSync(_p, JSON.stringify(_rec) + '\n');
} catch (e) {
// best-effort: probe channel write failure is non-fatal.
process.on('uncaughtException', function (_err) {
_emit_crash('SIGABRT');
process.exit(134);
});
process.on('unhandledRejection', function (_reason) {
_emit_crash('SIGABRT');
process.exit(134);
});
for (const nm of ['SIGSEGV','SIGABRT','SIGBUS','SIGFPE','SIGILL']) {
try {
process.on(nm, function () {
_emit_crash(nm);
process.exit(128 + (nm === 'SIGABRT' ? 6 : 11));
});
} catch (e) { /* runtime refused signal handler */ }
}
}
"#

View file

@ -51,12 +51,53 @@ impl LangEmitter for PhpEmitter {
/// Track C.1).
pub fn probe_shim() -> &'static str {
r#"
// ── __nyx_probe shim (Phase 06 — Track C.1) ──────────────────────────────────
function __nyx_probe(string $sinkCallee, ...$args): void {
$p = getenv('NYX_PROBE_PATH');
if ($p === false || $p === '') {
return;
// ── __nyx_probe shim (Phase 06 — Track C.1, Phase 08 — Track C.4 + C.5) ──────
const __NYX_DENY_SUBSTRINGS = [
'TOKEN','SECRET','PASSWORD','PASSWD','API_KEY','APIKEY','PRIVATE_KEY',
'CREDENTIAL','SESSION','COOKIE','AUTH','BEARER','AWS_ACCESS','AWS_SESSION',
'GH_TOKEN','GITHUB_TOKEN','NPM_TOKEN','PYPI_TOKEN','DOCKER_PASS',
];
const __NYX_PAYLOAD_LIMIT = 16 * 1024;
const __NYX_REDACTED = '<redacted-by-nyx-policy>';
function __nyx_is_denied_key(string $k): bool {
$ku = strtoupper($k);
foreach (__NYX_DENY_SUBSTRINGS as $n) {
if (strpos($ku, $n) !== false) return true;
}
return false;
}
function __nyx_witness(string $sinkCallee, array $args): array {
$env = [];
foreach ($_ENV as $k => $v) {
$env[(string)$k] = __nyx_is_denied_key((string)$k) ? __NYX_REDACTED : (string)$v;
}
// Sort for deterministic output.
ksort($env);
$payload = (string) (getenv('NYX_PAYLOAD') ?: '');
$pb = substr($payload, 0, __NYX_PAYLOAD_LIMIT);
$bytes = [];
for ($i = 0; $i < strlen($pb); $i++) $bytes[] = ord($pb[$i]);
$repr = [];
foreach ($args as $a) $repr[] = is_string($a) ? $a : (string) $a;
return [
'env_snapshot' => $env,
'cwd' => @getcwd() ?: '',
'payload_bytes' => $bytes,
'callee' => $sinkCallee,
'args_repr' => $repr,
];
}
function __nyx_emit(array $rec): void {
$p = getenv('NYX_PROBE_PATH');
if ($p === false || $p === '') return;
$line = json_encode($rec) . "\n";
@file_put_contents($p, $line, FILE_APPEND);
}
function __nyx_probe(string $sinkCallee, ...$args): void {
$ser = [];
foreach ($args as $a) {
if (is_int($a)) {
@ -65,14 +106,57 @@ function __nyx_probe(string $sinkCallee, ...$args): void {
$ser[] = ['kind' => 'String', 'value' => (string) $a];
}
}
$rec = [
'sink_callee' => $sinkCallee,
'args' => $ser,
__nyx_emit([
'sink_callee' => $sinkCallee,
'args' => $ser,
'captured_at_ns' => (int) (microtime(true) * 1e9),
'payload_id' => (string) (getenv('NYX_PAYLOAD_ID') ?: ''),
];
$line = json_encode($rec) . "\n";
@file_put_contents($p, $line, FILE_APPEND);
'payload_id' => (string) (getenv('NYX_PAYLOAD_ID') ?: ''),
'kind' => ['kind' => 'Normal'],
'witness' => __nyx_witness($sinkCallee, $args),
]);
}
// Phase 08: PHP cannot catch SIGSEGV from userland, but pcntl_signal and
// register_shutdown_function intercept SIGABRT-class fatal errors.
function __nyx_install_crash_guard(string $sinkCallee): void {
$emit_crash = function (string $signalName) use ($sinkCallee) {
__nyx_emit([
'sink_callee' => $sinkCallee,
'args' => [],
'captured_at_ns' => (int) (microtime(true) * 1e9),
'payload_id' => (string) (getenv('NYX_PAYLOAD_ID') ?: ''),
'kind' => ['kind' => 'Crash', 'signal' => $signalName],
'witness' => __nyx_witness($sinkCallee, []),
]);
};
set_error_handler(function ($errno, $errstr) use ($emit_crash) {
if ($errno & (E_ERROR | E_PARSE | E_CORE_ERROR | E_COMPILE_ERROR | E_USER_ERROR)) {
$emit_crash('SIGABRT');
}
return false;
});
register_shutdown_function(function () use ($emit_crash) {
$err = error_get_last();
if ($err && ($err['type'] & (E_ERROR | E_PARSE | E_CORE_ERROR | E_COMPILE_ERROR))) {
$emit_crash('SIGABRT');
}
});
if (function_exists('pcntl_signal') && function_exists('pcntl_async_signals')) {
pcntl_async_signals(true);
foreach ([SIGABRT, SIGBUS ?? null, SIGFPE ?? null, SIGILL ?? null] as $sig) {
if ($sig === null) continue;
pcntl_signal($sig, function ($s) use ($emit_crash) {
$name = 'SIGABRT';
if (defined('SIGABRT') && $s === SIGABRT) $name = 'SIGABRT';
if (defined('SIGBUS') && $s === SIGBUS) $name = 'SIGBUS';
if (defined('SIGFPE') && $s === SIGFPE) $name = 'SIGFPE';
if (defined('SIGILL') && $s === SIGILL) $name = 'SIGILL';
$emit_crash($name);
pcntl_signal($s, SIG_DFL);
posix_kill(posix_getpid(), $s);
});
}
}
}
"#
}

View file

@ -51,12 +51,66 @@ impl LangEmitter for PythonEmitter {
/// configured a probe channel.
pub fn probe_shim() -> &'static str {
r#"
# __nyx_probe shim (Phase 06 Track C.1)
def __nyx_probe(sink_callee, *args):
import os, time, json
# __nyx_probe shim (Phase 06 Track C.1, Phase 08 Track C.4 + C.5)
# Deny-substring list mirrors crate::dynamic::policy::DENY_KEY_SUBSTRINGS; keep
# in sync when the host-side policy gains new entries.
_NYX_DENY_SUBSTRINGS = (
"TOKEN", "SECRET", "PASSWORD", "PASSWD", "API_KEY", "APIKEY",
"PRIVATE_KEY", "CREDENTIAL", "SESSION", "COOKIE", "AUTH", "BEARER",
"AWS_ACCESS", "AWS_SESSION", "GH_TOKEN", "GITHUB_TOKEN", "NPM_TOKEN",
"PYPI_TOKEN", "DOCKER_PASS",
)
_NYX_PAYLOAD_LIMIT = 16 * 1024
_NYX_REDACTED = "<redacted-by-nyx-policy>"
def __nyx_scrub_env():
import os
out = {}
for k, v in os.environ.items():
ku = str(k).upper()
if any(n in ku for n in _NYX_DENY_SUBSTRINGS):
out[k] = _NYX_REDACTED
else:
out[k] = v
return out
def __nyx_witness(sink_callee, args):
import os
payload = os.environ.get("NYX_PAYLOAD", "")
payload_bytes = payload.encode("utf-8", "replace") if isinstance(payload, str) else bytes(payload)
if len(payload_bytes) > _NYX_PAYLOAD_LIMIT:
payload_bytes = payload_bytes[:_NYX_PAYLOAD_LIMIT]
args_repr = []
for a in args:
if isinstance(a, (bytes, bytearray)):
args_repr.append("<bytes:%d>" % len(a))
else:
args_repr.append(str(a))
try:
cwd = os.getcwd()
except OSError:
cwd = ""
return {
"env_snapshot": __nyx_scrub_env(),
"cwd": cwd,
"payload_bytes": list(payload_bytes),
"callee": str(sink_callee),
"args_repr": args_repr,
}
def __nyx_emit(rec):
import os, json
p = os.environ.get("NYX_PROBE_PATH")
if not p:
return
try:
with open(p, "a") as _f:
_f.write(json.dumps(rec) + "\n")
except OSError:
pass
def __nyx_probe(sink_callee, *args):
import os, time
serialised = []
for a in args:
if isinstance(a, (bytes, bytearray)):
@ -72,12 +126,45 @@ def __nyx_probe(sink_callee, *args):
"args": serialised,
"captured_at_ns": time.time_ns(),
"payload_id": os.environ.get("NYX_PAYLOAD_ID", ""),
"kind": {"kind": "Normal"},
"witness": __nyx_witness(sink_callee, args),
}
try:
with open(p, "a") as _f:
_f.write(json.dumps(rec) + "\n")
except OSError:
pass
__nyx_emit(rec)
# Phase 08: sink-site signal handler. Call __nyx_install_crash_guard before
# invoking the instrumented sink so a SIGSEGV / SIGABRT / etc. is captured as
# a Crash probe (with witness) before the process aborts. The shim re-raises
# the signal on the default handler after writing so process-level outcome
# observers (exit_code) still see the death.
_NYX_SIGNAL_NAMES = {}
def __nyx_install_crash_guard(sink_callee):
import signal, os, time
catchable = []
for nm in ("SIGSEGV", "SIGABRT", "SIGBUS", "SIGFPE", "SIGILL"):
s = getattr(signal, nm, None)
if s is not None:
catchable.append((nm, s))
_NYX_SIGNAL_NAMES[s] = nm
def _handler(signum, frame):
nm = _NYX_SIGNAL_NAMES.get(signum, "SIG?")
rec = {
"sink_callee": str(sink_callee),
"args": [],
"captured_at_ns": time.time_ns(),
"payload_id": os.environ.get("NYX_PAYLOAD_ID", ""),
"kind": {"kind": "Crash", "signal": nm},
"witness": __nyx_witness(sink_callee, []),
}
__nyx_emit(rec)
# Reset to default and re-raise so the process actually dies.
signal.signal(signum, signal.SIG_DFL)
os.kill(os.getpid(), signum)
for _nm, s in catchable:
try:
signal.signal(s, _handler)
except (OSError, ValueError):
pass
"#
}

View file

@ -25,11 +25,50 @@ const SUPPORTED: &[EntryKind] = &[EntryKind::Function];
/// even though `emit` returns `LangUnsupported` until Phase 15 lands.
pub fn probe_shim() -> &'static str {
r#"
# __nyx_probe shim (Phase 06 Track C.1)
def __nyx_probe(sink_callee, *args)
# __nyx_probe shim (Phase 06 Track C.1, Phase 08 Track C.4 + C.5)
__NYX_DENY_SUBSTRINGS = %w[
TOKEN SECRET PASSWORD PASSWD API_KEY APIKEY PRIVATE_KEY CREDENTIAL SESSION
COOKIE AUTH BEARER AWS_ACCESS AWS_SESSION GH_TOKEN GITHUB_TOKEN NPM_TOKEN
PYPI_TOKEN DOCKER_PASS
].freeze
__NYX_PAYLOAD_LIMIT = 16 * 1024
__NYX_REDACTED = '<redacted-by-nyx-policy>'
def __nyx_is_denied_key(k)
ku = k.to_s.upcase
__NYX_DENY_SUBSTRINGS.any? { |n| ku.include?(n) }
end
def __nyx_witness(sink_callee, args)
env_snapshot = {}
ENV.each do |k, v|
env_snapshot[k] = __nyx_is_denied_key(k) ? __NYX_REDACTED : v
end
payload = ENV['NYX_PAYLOAD'] || ''
pb = payload.bytes
pb = pb[0, __NYX_PAYLOAD_LIMIT] if pb.length > __NYX_PAYLOAD_LIMIT
repr = args.map { |a| a.is_a?(String) ? a : a.to_s }
cwd = (Dir.pwd rescue '')
{
env_snapshot: env_snapshot,
cwd: cwd,
payload_bytes: pb,
callee: sink_callee.to_s,
args_repr: repr,
}
end
def __nyx_emit(rec)
require 'json'
p = ENV['NYX_PROBE_PATH']
return if p.nil? || p.empty?
begin
File.open(p, 'a') { |f| f.puts(rec.to_json) }
rescue StandardError
end
end
def __nyx_probe(sink_callee, *args)
ser = args.map do |a|
case a
when Integer then { kind: 'Int', value: a }
@ -37,15 +76,36 @@ def __nyx_probe(sink_callee, *args)
else { kind: 'String', value: a.to_s }
end
end
rec = {
__nyx_emit({
sink_callee: sink_callee.to_s,
args: ser,
captured_at_ns: (Process.clock_gettime(Process::CLOCK_REALTIME, :nanosecond)),
payload_id: (ENV['NYX_PAYLOAD_ID'] || ''),
}
begin
File.open(p, 'a') { |f| f.puts(rec.to_json) }
rescue StandardError
kind: { kind: 'Normal' },
witness: __nyx_witness(sink_callee, args),
})
end
# Phase 08: install a sink-site signal trap. Ruby traps run in interrupt
# context but can write to a file before re-raising via Process.kill.
def __nyx_install_crash_guard(sink_callee)
%w[SEGV ABRT BUS FPE ILL].each do |nm|
begin
Signal.trap(nm) do
__nyx_emit({
sink_callee: sink_callee.to_s,
args: [],
captured_at_ns: (Process.clock_gettime(Process::CLOCK_REALTIME, :nanosecond)),
payload_id: (ENV['NYX_PAYLOAD_ID'] || ''),
kind: { kind: 'Crash', signal: "SIG#{nm}" },
witness: __nyx_witness(sink_callee, []),
})
Signal.trap(nm, 'DEFAULT')
Process.kill(nm, Process.pid)
end
rescue ArgumentError, Errno::EINVAL
# signal not supported on this platform
end
end
end
"#

View file

@ -61,58 +61,197 @@ impl LangEmitter for RustEmitter {
/// [`crate::dynamic::probe::SinkProbe`] wire format.
pub fn probe_shim() -> &'static str {
r#"
// ── __nyx_probe shim (Phase 06 — Track C.1) ──────────────────────────────────
// ── __nyx_probe shim (Phase 06 — Track C.1, Phase 08 — Track C.4 + C.5) ──────
#[allow(dead_code)]
fn __nyx_probe(sink_callee: &str, args: &[&str]) {
const __NYX_DENY_SUBSTRINGS: &[&str] = &[
"TOKEN","SECRET","PASSWORD","PASSWD","API_KEY","APIKEY","PRIVATE_KEY",
"CREDENTIAL","SESSION","COOKIE","AUTH","BEARER","AWS_ACCESS","AWS_SESSION",
"GH_TOKEN","GITHUB_TOKEN","NPM_TOKEN","PYPI_TOKEN","DOCKER_PASS",
];
#[allow(dead_code)]
const __NYX_PAYLOAD_LIMIT: usize = 16 * 1024;
#[allow(dead_code)]
const __NYX_REDACTED: &str = "<redacted-by-nyx-policy>";
#[allow(dead_code)]
fn __nyx_esc(s: &str, out: &mut String) {
for ch in s.chars() {
match ch {
'"' => out.push_str("\\\""),
'\\' => out.push_str("\\\\"),
'\n' => out.push_str("\\n"),
'\r' => out.push_str("\\r"),
'\t' => out.push_str("\\t"),
c if (c as u32) < 0x20 => out.push_str(&format!("\\u{:04x}", c as u32)),
c => out.push(c),
}
}
}
#[allow(dead_code)]
fn __nyx_witness_json(sink_callee: &str, args: &[&str]) -> String {
let mut out = String::with_capacity(256);
out.push_str("{\"env_snapshot\":{");
let mut first = true;
let mut keys: Vec<(String, String)> = std::env::vars().collect();
keys.sort();
for (k, v) in keys {
let ku = k.to_ascii_uppercase();
let denied = __NYX_DENY_SUBSTRINGS.iter().any(|n| ku.contains(n));
let val = if denied { __NYX_REDACTED } else { v.as_str() };
if !first { out.push(','); }
first = false;
out.push('"');
__nyx_esc(&k, &mut out);
out.push_str("\":\"");
__nyx_esc(val, &mut out);
out.push('"');
}
out.push_str("},\"cwd\":\"");
let cwd = std::env::current_dir()
.map(|p| p.to_string_lossy().into_owned())
.unwrap_or_default();
__nyx_esc(&cwd, &mut out);
out.push_str("\",\"payload_bytes\":[");
let payload = std::env::var("NYX_PAYLOAD").unwrap_or_default();
let bytes = payload.as_bytes();
let cap = bytes.len().min(__NYX_PAYLOAD_LIMIT);
for i in 0..cap {
if i > 0 { out.push(','); }
out.push_str(&format!("{}", bytes[i]));
}
out.push_str("],\"callee\":\"");
__nyx_esc(sink_callee, &mut out);
out.push_str("\",\"args_repr\":[");
for (i, a) in args.iter().enumerate() {
if i > 0 { out.push(','); }
out.push('"');
__nyx_esc(a, &mut out);
out.push('"');
}
out.push_str("]}");
out
}
#[allow(dead_code)]
fn __nyx_emit(line: &str) {
use std::io::Write;
let p = match std::env::var("NYX_PROBE_PATH") {
Ok(v) => v,
Err(_) => return,
};
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_nanos() as u64)
.unwrap_or(0);
let payload_id = std::env::var("NYX_PAYLOAD_ID").unwrap_or_default();
fn esc(s: &str, out: &mut String) {
for ch in s.chars() {
match ch {
'"' => out.push_str("\\\""),
'\\' => out.push_str("\\\\"),
'\n' => out.push_str("\\n"),
'\r' => out.push_str("\\r"),
'\t' => out.push_str("\\t"),
c if (c as u32) < 0x20 => out.push_str(&format!("\\u{:04x}", c as u32)),
c => out.push(c),
}
}
}
let mut line = String::with_capacity(128);
line.push_str("{\"sink_callee\":\"");
esc(sink_callee, &mut line);
line.push_str("\",\"args\":[");
for (i, a) in args.iter().enumerate() {
if i > 0 {
line.push(',');
}
line.push_str("{\"kind\":\"String\",\"value\":\"");
esc(a, &mut line);
line.push_str("\"}");
}
line.push_str(&format!(
"],\"captured_at_ns\":{},\"payload_id\":\"",
now
));
esc(&payload_id, &mut line);
line.push_str("\"}\n");
if let Ok(mut f) = std::fs::OpenOptions::new()
.create(true)
.append(true)
.open(&p)
{
let _ = f.write_all(line.as_bytes());
let _ = f.write_all(b"\n");
}
}
#[allow(dead_code)]
fn __nyx_probe(sink_callee: &str, args: &[&str]) {
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_nanos() as u64)
.unwrap_or(0);
let payload_id = std::env::var("NYX_PAYLOAD_ID").unwrap_or_default();
let mut line = String::with_capacity(256);
line.push_str("{\"sink_callee\":\"");
__nyx_esc(sink_callee, &mut line);
line.push_str("\",\"args\":[");
for (i, a) in args.iter().enumerate() {
if i > 0 { line.push(','); }
line.push_str("{\"kind\":\"String\",\"value\":\"");
__nyx_esc(a, &mut line);
line.push_str("\"}");
}
line.push_str(&format!(
"],\"captured_at_ns\":{},\"payload_id\":\"",
now
));
__nyx_esc(&payload_id, &mut line);
line.push_str("\",\"kind\":{\"kind\":\"Normal\"},\"witness\":");
line.push_str(&__nyx_witness_json(sink_callee, args));
line.push('}');
__nyx_emit(&line);
}
// Phase 08: install a sink-site signal handler via `libc::sigaction` so a
// SIGSEGV / SIGABRT / etc. inside the sink call is captured as a Crash
// probe before the kernel re-delivers it via SIG_DFL. The shim is
// no-op on non-Unix targets (the dynamic-verification supported set is
// Unix-only) so consumers can splice it unconditionally.
#[cfg(unix)]
#[allow(dead_code)]
fn __nyx_install_crash_guard(sink_callee: &'static str) {
use std::sync::atomic::{AtomicPtr, Ordering};
static SINK_CALLEE: AtomicPtr<u8> = AtomicPtr::new(std::ptr::null_mut());
SINK_CALLEE.store(sink_callee.as_ptr() as *mut u8, Ordering::SeqCst);
let len = sink_callee.len();
static CALLEE_LEN: std::sync::atomic::AtomicUsize = std::sync::atomic::AtomicUsize::new(0);
CALLEE_LEN.store(len, Ordering::SeqCst);
extern "C" fn handler(sig: i32) {
// async-signal-unsafe code is unavoidable here (file I/O); we
// accept the risk because the process is already dying and we
// need the forensic record.
let name = match sig {
libc::SIGSEGV => "SIGSEGV",
libc::SIGABRT => "SIGABRT",
libc::SIGBUS => "SIGBUS",
libc::SIGFPE => "SIGFPE",
libc::SIGILL => "SIGILL",
_ => "SIGABRT",
};
let p = SINK_CALLEE.load(Ordering::SeqCst);
let len = CALLEE_LEN.load(Ordering::SeqCst);
let sink_callee: &str = unsafe {
if p.is_null() {
""
} else {
let slice = std::slice::from_raw_parts(p as *const u8, len);
std::str::from_utf8_unchecked(slice)
}
};
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_nanos() as u64)
.unwrap_or(0);
let payload_id = std::env::var("NYX_PAYLOAD_ID").unwrap_or_default();
let mut line = String::with_capacity(256);
line.push_str("{\"sink_callee\":\"");
__nyx_esc(sink_callee, &mut line);
line.push_str("\",\"args\":[],\"captured_at_ns\":");
line.push_str(&format!("{now},\"payload_id\":\""));
__nyx_esc(&payload_id, &mut line);
line.push_str("\",\"kind\":{\"kind\":\"Crash\",\"signal\":\"");
line.push_str(name);
line.push_str("\"},\"witness\":");
line.push_str(&__nyx_witness_json(sink_callee, &[]));
line.push('}');
__nyx_emit(&line);
// Restore default handler and re-raise so process actually dies.
unsafe {
let mut sa: libc::sigaction = std::mem::zeroed();
sa.sa_sigaction = libc::SIG_DFL;
libc::sigaction(sig, &sa, std::ptr::null_mut());
libc::raise(sig);
}
}
unsafe {
let mut sa: libc::sigaction = std::mem::zeroed();
sa.sa_sigaction = handler as usize;
libc::sigemptyset(&mut sa.sa_mask);
for sig in [libc::SIGSEGV, libc::SIGABRT, libc::SIGBUS, libc::SIGFPE, libc::SIGILL] {
libc::sigaction(sig, &sa, std::ptr::null_mut());
}
}
}
#[cfg(not(unix))]
#[allow(dead_code)]
fn __nyx_install_crash_guard(_sink_callee: &'static str) {}
"#
}

View file

@ -73,6 +73,7 @@ pub mod lang;
pub mod mount_filter;
pub mod oob;
pub mod oracle;
pub mod policy;
pub mod probe;
pub mod repro;
pub mod report;

View file

@ -7,12 +7,145 @@
//! evaluates the predicates against the captured arguments. A run is
//! Confirmed iff at least one drained record satisfies *every* predicate.
//!
//! The legacy [`Oracle::OutputContains`] path is retained for fixtures that
//! pre-date Phase 06 and migrated downstream; it is marked
//! `#[deprecated]` so the compiler nags every new use-site.
//! Phase 08 (Track C.4) replaces the coarse [`Oracle::Crash`] with
//! [`Oracle::SinkCrash`]. The new variant only confirms when a probe
//! observation in the channel carries
//! [`crate::dynamic::probe::ProbeKind::Crash { signal }`] *and* the captured
//! signal is present in the payload's [`SignalSet`] — i.e. the SIGSEGV /
//! SIGABRT / etc. must have been caught by a sink-site signal handler, not
//! by random crashing setup code. A process-level abort that escapes the
//! sink handler leaves no Crash probe, the oracle does not fire, and the
//! runner downgrades the verdict to
//! [`crate::evidence::InconclusiveReason::UnrelatedCrash`] instead of
//! stamping `Confirmed`.
//!
//! The legacy [`Oracle::OutputContains`] and [`Oracle::Crash`] paths are
//! retained for fixtures that pre-date Phase 06 / Phase 08 and migrated
//! downstream; both are marked `#[deprecated]` so the compiler nags every
//! new use-site.
use crate::dynamic::probe::SinkProbe;
use crate::dynamic::probe::{ProbeKind, SinkProbe};
use crate::dynamic::sandbox::SandboxOutcome;
use serde::{Deserialize, Serialize};
/// POSIX-style signal name carried inside [`ProbeKind::Crash`] and the
/// [`Oracle::SinkCrash`] match set.
///
/// Restricted to the signals a sink-site handler can plausibly catch and
/// route back through the probe channel. Anything outside this enum (e.g.
/// `SIGKILL`, `SIGSTOP`) cannot be caught by a userspace handler and is
/// therefore not modellable as a confirmable crash signal.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum Signal {
/// Segmentation fault.
#[serde(rename = "SIGSEGV", alias = "Sigsegv", alias = "SEGV")]
Sigsegv,
/// Abort (typically from `abort(3)` or `assert(3)`).
#[serde(rename = "SIGABRT", alias = "Sigabrt", alias = "ABRT")]
Sigabrt,
/// Bus error (misaligned access, mmap fault).
#[serde(rename = "SIGBUS", alias = "Sigbus", alias = "BUS")]
Sigbus,
/// Floating-point exception (incl. integer divide-by-zero on x86).
#[serde(rename = "SIGFPE", alias = "Sigfpe", alias = "FPE")]
Sigfpe,
/// Illegal instruction.
#[serde(rename = "SIGILL", alias = "Sigill", alias = "ILL")]
Sigill,
}
impl Signal {
/// Bit position of `self` inside a [`SignalSet`]. Stable across builds
/// so the wire format of a serialised [`SignalSet`] stays compatible.
pub const fn bit(self) -> u8 {
match self {
Signal::Sigsegv => 0,
Signal::Sigabrt => 1,
Signal::Sigbus => 2,
Signal::Sigfpe => 3,
Signal::Sigill => 4,
}
}
/// Render a [`Signal`] as the conventional uppercase POSIX name (e.g.
/// `"SIGSEGV"`). Used by the per-language probe shims so their
/// captured `signal` strings are identical to what the host-side
/// [`Signal::from_name`] decoder expects.
pub const fn as_name(self) -> &'static str {
match self {
Signal::Sigsegv => "SIGSEGV",
Signal::Sigabrt => "SIGABRT",
Signal::Sigbus => "SIGBUS",
Signal::Sigfpe => "SIGFPE",
Signal::Sigill => "SIGILL",
}
}
/// Inverse of [`as_name`](Signal::as_name). Matches both the canonical
/// uppercase form and a couple of common variants emitted by language
/// runtimes (`"sigsegv"`, `"Segmentation fault"`). Returns `None` for
/// signals the oracle does not model.
pub fn from_name(s: &str) -> Option<Signal> {
let upper = s.trim().to_ascii_uppercase();
match upper.as_str() {
"SIGSEGV" | "SEGV" | "SEGMENTATION FAULT" => Some(Signal::Sigsegv),
"SIGABRT" | "ABRT" | "ABORTED" => Some(Signal::Sigabrt),
"SIGBUS" | "BUS" | "BUS ERROR" => Some(Signal::Sigbus),
"SIGFPE" | "FPE" | "FLOATING POINT EXCEPTION" => Some(Signal::Sigfpe),
"SIGILL" | "ILL" | "ILLEGAL INSTRUCTION" => Some(Signal::Sigill),
_ => None,
}
}
}
/// Bitset of [`Signal`]s the [`Oracle::SinkCrash`] variant treats as
/// confirmable. Stored as a `u8` so a `const`-declared corpus entry can
/// build the set without runtime allocation.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub struct SignalSet(u8);
impl SignalSet {
/// Empty set — no signal is confirmable. Mostly useful in tests as a
/// "this oracle should never fire" baseline.
pub const fn empty() -> Self {
Self(0)
}
/// Set built from a slice of [`Signal`]s, callable from `const`
/// context. Order-independent; duplicates are collapsed.
pub const fn from_slice(sigs: &[Signal]) -> Self {
let mut bits = 0u8;
let mut i = 0;
while i < sigs.len() {
bits |= 1 << sigs[i].bit();
i += 1;
}
Self(bits)
}
/// `SignalSet` containing every modelled signal. Default for payloads
/// whose crash-on-arbitrary-input is the actual vulnerability (e.g. C
/// memory corruption fuzzed via libFuzzer).
pub const fn all() -> Self {
Self::from_slice(&[
Signal::Sigsegv,
Signal::Sigabrt,
Signal::Sigbus,
Signal::Sigfpe,
Signal::Sigill,
])
}
/// True iff `sig` is in the set.
pub const fn contains(self, sig: Signal) -> bool {
(self.0 & (1 << sig.bit())) != 0
}
/// True iff the set is empty.
pub const fn is_empty(self) -> bool {
self.0 == 0
}
}
/// Predicate evaluated against a single [`SinkProbe`] when the oracle is
/// [`Oracle::SinkProbe`].
@ -45,6 +178,12 @@ pub enum Oracle {
/// `Vec<ProbePredicate>` shape the plan listed because the corpus is
/// declared in static memory; a `Vec` would require runtime init).
SinkProbe { predicates: &'static [ProbePredicate] },
/// Phase 08 sink-site crash oracle. Fires iff at least one drained
/// probe has [`ProbeKind::Crash { signal }`] with `signal ∈ signals`.
/// A process-level abort that did not reach the sink handler leaves no
/// matching probe and the run does *not* confirm — the runner maps
/// that case to [`crate::evidence::InconclusiveReason::UnrelatedCrash`].
SinkCrash { signals: SignalSet },
/// Legacy stdout/stderr substring oracle. Kept for fixtures that
/// pre-date Phase 06; new payloads should prefer
/// [`Oracle::SinkProbe`] which is robust to oracle collisions.
@ -52,7 +191,15 @@ pub enum Oracle {
note = "use Oracle::SinkProbe with ProbePredicate args; OutputContains is brittle to oracle collisions (§16.3)"
)]
OutputContains(&'static str),
/// Process exited with a crash signal (SIGSEGV, SIGABRT).
/// Process exited with any crash signal (SIGSEGV, SIGABRT).
///
/// Coarse: fires on *any* uncaught crash, including ones unrelated to
/// the sink (e.g. `abort()` in setup code). Phase 08 introduces
/// [`Oracle::SinkCrash`] which scopes the signal to the sink handler;
/// new payloads should migrate.
#[deprecated(
note = "use Oracle::SinkCrash with a SignalSet; Crash confirms on any process abort, including setup-code failures (Phase 08 §C.4)"
)]
Crash,
/// Outbound network connection observed at the controlled sink host.
OobCallback { host: &'static str },
@ -71,6 +218,10 @@ pub fn oracle_fired(oracle: &Oracle, outcome: &SandboxOutcome, probes: &[SinkPro
Oracle::SinkProbe { predicates } => probes
.iter()
.any(|p| probe_satisfies_all(p, predicates)),
Oracle::SinkCrash { signals } => probes.iter().any(|p| match p.kind {
ProbeKind::Crash { signal } => signals.contains(signal),
ProbeKind::Normal => false,
}),
Oracle::OutputContains(needle) => {
let nb = needle.as_bytes();
contains_subslice(&outcome.stdout, nb) || contains_subslice(&outcome.stderr, nb)
@ -122,10 +273,22 @@ fn contains_subslice(hay: &[u8], needle: &[u8]) -> bool {
hay.windows(needle.len()).any(|w| w == needle)
}
/// Convenience: returns the [`Signal`] captured by a [`SinkProbe`] when
/// its kind is `Crash`, else `None`. Used by the runner to distinguish
/// "process crashed but no matching sink-site probe" (→
/// `Inconclusive(UnrelatedCrash)`) from "process crashed and a sink-site
/// probe matched" (→ `Confirmed` via `Oracle::SinkCrash`).
pub fn probe_crash_signal(probe: &SinkProbe) -> Option<Signal> {
match probe.kind {
ProbeKind::Crash { signal } => Some(signal),
ProbeKind::Normal => None,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::dynamic::probe::{ProbeArg, SinkProbe};
use crate::dynamic::probe::{ProbeArg, ProbeKind, ProbeWitness, SinkProbe};
use std::time::Duration;
fn outcome() -> SandboxOutcome {
@ -146,6 +309,19 @@ mod tests {
args,
captured_at_ns: 1,
payload_id: "test".into(),
kind: ProbeKind::Normal,
witness: ProbeWitness::empty(),
}
}
fn crash_probe(callee: &str, signal: Signal) -> SinkProbe {
SinkProbe {
sink_callee: callee.into(),
args: vec![],
captured_at_ns: 1,
payload_id: "test".into(),
kind: ProbeKind::Crash { signal },
witness: ProbeWitness::empty(),
}
}
@ -242,4 +418,74 @@ mod tests {
assert!(oracle_fired(&oracle, &outcome(), &hit));
assert!(!oracle_fired(&oracle, &outcome(), &miss));
}
#[test]
fn signal_set_round_trips_via_const_slice() {
const SIGS: SignalSet = SignalSet::from_slice(&[Signal::Sigsegv, Signal::Sigabrt]);
assert!(SIGS.contains(Signal::Sigsegv));
assert!(SIGS.contains(Signal::Sigabrt));
assert!(!SIGS.contains(Signal::Sigfpe));
assert!(!SIGS.is_empty());
assert!(SignalSet::empty().is_empty());
}
#[test]
fn signal_set_all_contains_every_modelled_signal() {
let all = SignalSet::all();
for s in [
Signal::Sigsegv,
Signal::Sigabrt,
Signal::Sigbus,
Signal::Sigfpe,
Signal::Sigill,
] {
assert!(all.contains(s), "SignalSet::all missing {s:?}");
}
}
#[test]
fn signal_from_name_matches_canonical_and_lowercase() {
assert_eq!(Signal::from_name("SIGSEGV"), Some(Signal::Sigsegv));
assert_eq!(Signal::from_name(" sigsegv "), Some(Signal::Sigsegv));
assert_eq!(Signal::from_name("Aborted"), Some(Signal::Sigabrt));
assert_eq!(Signal::from_name("nope"), None);
}
#[test]
fn sink_crash_confirms_only_on_matching_signal_probe() {
let oracle = Oracle::SinkCrash {
signals: SignalSet::from_slice(&[Signal::Sigsegv]),
};
let probes = vec![crash_probe("victim", Signal::Sigsegv)];
assert!(oracle_fired(&oracle, &outcome(), &probes));
}
#[test]
fn sink_crash_ignores_normal_probes() {
let oracle = Oracle::SinkCrash {
signals: SignalSet::all(),
};
let probes = vec![probe("victim", vec![ProbeArg::String("x".into())])];
assert!(!oracle_fired(&oracle, &outcome(), &probes));
}
#[test]
fn sink_crash_ignores_unrelated_signal() {
let oracle = Oracle::SinkCrash {
signals: SignalSet::from_slice(&[Signal::Sigsegv]),
};
let probes = vec![crash_probe("victim", Signal::Sigabrt)];
assert!(!oracle_fired(&oracle, &outcome(), &probes));
}
#[test]
fn sink_crash_without_probes_does_not_fire_even_on_process_crash() {
let mut o = outcome();
o.exit_code = None;
o.timed_out = false;
let oracle = Oracle::SinkCrash {
signals: SignalSet::all(),
};
assert!(!oracle_fired(&oracle, &o, &[]));
}
}

192
src/dynamic/policy.rs Normal file
View file

@ -0,0 +1,192 @@
//! Track-security cross-cutting policy module (Phase 08 — Track C.4 + C.5).
//!
//! Centralises the deny rules and byte-bound limits that the per-run
//! [`crate::dynamic::probe::ProbeWitness`] construction uses to keep
//! captured forensic data both privacy-safe and bounded in size.
//!
//! Two responsibilities, intentionally kept in one module so the security
//! envelope is auditable in a single file:
//!
//! 1. **Env scrubbing** — [`scrub_env`] redacts the host environment when
//! snapshotted onto a [`crate::dynamic::probe::ProbeWitness`]. Any key
//! matching a [`DENY_KEY_SUBSTRINGS`] entry (case-insensitive substring
//! match against the upper-cased key) has its value replaced with
//! [`REDACTED_VALUE`]. Whitelist semantics (allow-list) were rejected
//! because the harness env is heterogeneous across CI / local /
//! container runs; a deny-substring list matches the common-suffix
//! naming used in practice (`*_TOKEN`, `*_KEY`, `*_SECRET`, …) with no
//! false negatives on the cases we have evidence for.
//! 2. **Byte bounds** — [`PAYLOAD_CAPTURE_LIMIT_BYTES`] caps the
//! `payload_bytes` field at 16 KiB so a fuzzer-emitted megabyte payload
//! does not turn the probe file into a memory hog or balloon downstream
//! repro artifacts. [`truncate_payload_bytes`] is the only sanctioned
//! truncation entry point — every probe construction path goes through
//! it so the bound is enforced uniformly.
//!
//! The module deliberately depends on `std` only (no third-party crates)
//! so `cargo deny check` and `cargo doc` both see it as a leaf with no
//! transitive license risk.
use std::collections::BTreeMap;
/// Maximum number of bytes retained in
/// [`crate::dynamic::probe::ProbeWitness::payload_bytes`].
///
/// 16 KiB is the cap the Phase 08 plan calls for; matches the upper bound
/// any reasonable injection payload will need (the existing curated corpus
/// peaks under 200 B). Anything larger is truncated head-first via
/// [`truncate_payload_bytes`] because that is the prefix the sink actually
/// sees first.
pub const PAYLOAD_CAPTURE_LIMIT_BYTES: usize = 16 * 1024;
/// Placeholder written in place of a denied environment variable's value
/// when [`scrub_env`] redacts it. Lower-case so it is visually distinct
/// from a real CI env value (which is overwhelmingly upper-snake).
pub const REDACTED_VALUE: &str = "<redacted-by-nyx-policy>";
/// Substrings that mark a key as carrying credential-shaped data.
///
/// Matched case-insensitively against the upper-cased env var key. Order
/// is not significant — the first match wins because all matches lead to
/// the same redaction.
///
/// The list is intentionally short and high-precision: false-positive
/// redactions just remove a value from a forensic snapshot, but false
/// negatives leak credentials into a probe file that may be persisted as
/// a repro artifact.
pub const DENY_KEY_SUBSTRINGS: &[&str] = &[
"TOKEN",
"SECRET",
"PASSWORD",
"PASSWD",
"API_KEY",
"APIKEY",
"PRIVATE_KEY",
"CREDENTIAL",
"SESSION",
"COOKIE",
"AUTH",
"BEARER",
// Cloud provider shapes that don't end in TOKEN / SECRET / KEY.
"AWS_ACCESS",
"AWS_SESSION",
"GH_TOKEN",
"GITHUB_TOKEN",
"NPM_TOKEN",
"PYPI_TOKEN",
"DOCKER_PASS",
];
/// True iff `key` matches any [`DENY_KEY_SUBSTRINGS`] entry under
/// case-insensitive substring comparison. The exposed predicate so
/// [`crate::dynamic::probe`] tests can reason about individual keys
/// without round-tripping through [`scrub_env`].
pub fn is_denied_env_key(key: &str) -> bool {
let upper = key.to_ascii_uppercase();
DENY_KEY_SUBSTRINGS
.iter()
.any(|needle| upper.contains(*needle))
}
/// Redact denied keys' values in an env iterator and collect into a
/// [`BTreeMap`]. `BTreeMap` rather than `HashMap` so the serialised
/// witness is byte-deterministic across runs — repro reproducibility
/// depends on it.
pub fn scrub_env<I, S>(iter: I) -> BTreeMap<String, String>
where
I: IntoIterator<Item = (S, S)>,
S: Into<String>,
{
let mut out = BTreeMap::new();
for (k, v) in iter {
let k: String = k.into();
let v: String = v.into();
if is_denied_env_key(&k) {
out.insert(k, REDACTED_VALUE.to_owned());
} else {
out.insert(k, v);
}
}
out
}
/// Truncate `bytes` to at most [`PAYLOAD_CAPTURE_LIMIT_BYTES`].
///
/// Head-keeping: the prefix the sink reads first is retained; the tail is
/// dropped. Returns `bytes` unchanged when it already fits the cap so
/// callers can use the return value without allocating in the common case.
pub fn truncate_payload_bytes(bytes: &[u8]) -> &[u8] {
if bytes.len() <= PAYLOAD_CAPTURE_LIMIT_BYTES {
bytes
} else {
&bytes[..PAYLOAD_CAPTURE_LIMIT_BYTES]
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn deny_substring_match_is_case_insensitive() {
assert!(is_denied_env_key("AWS_SECRET_ACCESS_KEY"));
assert!(is_denied_env_key("aws_secret_access_key"));
assert!(is_denied_env_key("MyToken"));
assert!(is_denied_env_key("DATABASE_PASSWORD"));
}
#[test]
fn non_credential_keys_pass_through() {
assert!(!is_denied_env_key("PATH"));
assert!(!is_denied_env_key("HOME"));
assert!(!is_denied_env_key("NYX_PAYLOAD"));
}
#[test]
fn scrub_redacts_denied_keys_and_keeps_others() {
let env = vec![
("PATH".to_owned(), "/usr/bin".to_owned()),
("AWS_SECRET_ACCESS_KEY".to_owned(), "AKIA...".to_owned()),
("HOME".to_owned(), "/home/x".to_owned()),
];
let scrubbed = scrub_env(env);
assert_eq!(scrubbed.get("PATH").map(String::as_str), Some("/usr/bin"));
assert_eq!(scrubbed.get("HOME").map(String::as_str), Some("/home/x"));
assert_eq!(
scrubbed.get("AWS_SECRET_ACCESS_KEY").map(String::as_str),
Some(REDACTED_VALUE)
);
}
#[test]
fn truncate_keeps_short_payloads_unchanged() {
let bytes = b"short payload";
assert_eq!(truncate_payload_bytes(bytes), bytes);
}
#[test]
fn truncate_caps_long_payloads_at_limit() {
let bytes = vec![b'A'; PAYLOAD_CAPTURE_LIMIT_BYTES + 100];
let truncated = truncate_payload_bytes(&bytes);
assert_eq!(truncated.len(), PAYLOAD_CAPTURE_LIMIT_BYTES);
assert!(truncated.iter().all(|b| *b == b'A'));
}
#[test]
fn truncate_at_exact_boundary_unchanged() {
let bytes = vec![0u8; PAYLOAD_CAPTURE_LIMIT_BYTES];
assert_eq!(truncate_payload_bytes(&bytes).len(), PAYLOAD_CAPTURE_LIMIT_BYTES);
}
#[test]
fn scrub_is_deterministic_btree() {
// Same iterator yields the same map; BTreeMap guarantees iteration order.
let env = vec![
("B".to_owned(), "1".to_owned()),
("A".to_owned(), "2".to_owned()),
];
let m = scrub_env(env);
let keys: Vec<&str> = m.keys().map(String::as_str).collect();
assert_eq!(keys, vec!["A", "B"]);
}
}

View file

@ -8,6 +8,19 @@
//! [`crate::dynamic::oracle::oracle_fired`]) evaluates a payload's
//! [`crate::dynamic::oracle::ProbePredicate`] set against the captured args.
//!
//! # Phase 08 extensions (Track C.4 + C.5)
//!
//! - [`ProbeKind`] discriminates a normal sink observation from a crash
//! intercepted by a sink-site signal handler. The handler stamps
//! `ProbeKind::Crash { signal }` onto the probe before re-raising so the
//! oracle can distinguish "the sink crashed under my payload"
//! (Confirmed) from "some unrelated setup code crashed"
//! (Inconclusive(UnrelatedCrash)).
//! - [`ProbeWitness`] carries bounded forensic data — scrubbed env, cwd,
//! payload-bytes prefix, callee, args repr — so downstream repro and
//! chain composition need only the probe file, not a live sandbox. All
//! bounding goes through [`crate::dynamic::policy`].
//!
//! # Channel medium
//!
//! Currently file-based: one JSON record per line at
@ -22,7 +35,10 @@
//! The runner truncates the file via [`ProbeChannel::clear`] before each
//! payload to keep verdicts independent.
use crate::dynamic::oracle::Signal;
use crate::dynamic::policy;
use serde::{Deserialize, Serialize};
use std::collections::BTreeMap;
use std::fs::{File, OpenOptions};
use std::io::{BufRead, BufReader, Write};
use std::path::{Path, PathBuf};
@ -87,6 +103,107 @@ impl ProbeArg {
}
}
/// Discriminator on a [`SinkProbe`] (Phase 08 — Track C.4).
///
/// Distinguishes a probe written from the normal sink-instrumentation
/// path from one written by a sink-site signal handler when the sink
/// invocation crashed under the active payload. The oracle's
/// [`crate::dynamic::oracle::Oracle::SinkCrash`] variant ignores anything
/// other than `Crash { signal }`, so a process-level abort outside the
/// sink no longer satisfies the oracle.
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(tag = "kind")]
pub enum ProbeKind {
/// Standard sink observation: arguments were captured before the sink
/// returned normally (or raised a non-crash exception).
Normal,
/// Sink invocation was interrupted by a fatal signal that the
/// sink-site handler intercepted. The captured `signal` is the one
/// the handler observed; the handler re-raises after writing the
/// probe so the runner's outcome still records the process death.
Crash {
/// Signal that interrupted the sink call.
signal: Signal,
},
}
impl Default for ProbeKind {
fn default() -> Self {
ProbeKind::Normal
}
}
/// Bounded forensic snapshot captured alongside a [`SinkProbe`]
/// (Phase 08 — Track C.5).
///
/// Every byte that lands in a witness is policed by
/// [`crate::dynamic::policy`]: env keys are scrubbed against
/// [`crate::dynamic::policy::DENY_KEY_SUBSTRINGS`] and payload bytes are
/// truncated at [`crate::dynamic::policy::PAYLOAD_CAPTURE_LIMIT_BYTES`].
/// All fields are `#[serde(default, skip_serializing_if = "...")]` so
/// host-side host-emitted probes (which don't carry a witness) and
/// per-language shim-emitted probes (which do) round-trip through the
/// same JSON schema.
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
pub struct ProbeWitness {
/// Scrubbed snapshot of the harness process environment at probe
/// time. Keys matching a deny substring carry
/// [`crate::dynamic::policy::REDACTED_VALUE`].
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
pub env_snapshot: BTreeMap<String, String>,
/// Current working directory of the harness when the probe fired.
/// Empty when the language shim could not determine it.
#[serde(default, skip_serializing_if = "String::is_empty")]
pub cwd: String,
/// Head-truncated payload bytes routed into the sink, capped at
/// [`crate::dynamic::policy::PAYLOAD_CAPTURE_LIMIT_BYTES`].
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub payload_bytes: Vec<u8>,
/// Same callee name as [`SinkProbe::sink_callee`]; retained on the
/// witness so repro tooling can consume the witness in isolation.
#[serde(default, skip_serializing_if = "String::is_empty")]
pub callee: String,
/// Per-arg human-readable repr, parallel to [`SinkProbe::args`].
/// `String` for textual / numeric args; `"<bytes:N>"` for binary
/// payloads the shim chose not to inline.
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub args_repr: Vec<String>,
}
impl ProbeWitness {
/// An empty witness — every field at its `Default` value. Used by
/// tests and the host-side [`ProbeChannel::write`] path that does
/// not snapshot any forensic state.
pub fn empty() -> Self {
Self::default()
}
/// Construct a bounded witness from raw inputs. Goes through
/// [`crate::dynamic::policy::scrub_env`] and
/// [`crate::dynamic::policy::truncate_payload_bytes`] so the
/// host-side constructor cannot accidentally produce an
/// unscrubbed / unbounded witness.
pub fn from_inputs<I, S>(
env: I,
cwd: impl Into<String>,
payload: &[u8],
callee: impl Into<String>,
args_repr: Vec<String>,
) -> Self
where
I: IntoIterator<Item = (S, S)>,
S: Into<String>,
{
Self {
env_snapshot: policy::scrub_env(env),
cwd: cwd.into(),
payload_bytes: policy::truncate_payload_bytes(payload).to_vec(),
callee: callee.into(),
args_repr,
}
}
}
/// One structured observation written by the harness when the instrumented
/// sink fires. Serialised as a single JSON object on its own line.
#[derive(Debug, Clone, Serialize, Deserialize)]
@ -103,6 +220,16 @@ pub struct SinkProbe {
pub captured_at_ns: u64,
/// Identifier of the payload in flight when the probe fired.
pub payload_id: PayloadId,
/// Phase 08: normal sink observation vs sink-site crash. Defaults to
/// `Normal` so probes written by the Phase 06 shims (no `kind` field
/// on the wire) deserialise as normal observations.
#[serde(default)]
pub kind: ProbeKind,
/// Phase 08: bounded forensic snapshot. Empty when the shim did not
/// capture one — the field stays `default` so older probe files
/// round-trip unchanged.
#[serde(default)]
pub witness: ProbeWitness,
}
/// Per-run handle on a file-backed [`SinkProbe`] channel.
@ -212,6 +339,8 @@ mod tests {
args: vec![ProbeArg::String("ls; whoami".into())],
captured_at_ns: 42,
payload_id: label.into(),
kind: ProbeKind::Normal,
witness: ProbeWitness::empty(),
}
}
@ -271,4 +400,53 @@ mod tests {
let ch = ProbeChannel::for_workdir(dir.path()).unwrap();
assert!(ch.drain().is_empty());
}
#[test]
fn probe_kind_defaults_to_normal_when_field_omitted() {
// Legacy probe-line shape (Phase 06) — no `kind` field on the wire.
let line = r#"{"sink_callee":"os.system","args":[],"captured_at_ns":1,"payload_id":"p"}"#;
let p: SinkProbe = serde_json::from_str(line).unwrap();
assert_eq!(p.kind, ProbeKind::Normal);
assert_eq!(p.witness, ProbeWitness::empty());
}
#[test]
fn crash_probe_round_trips_through_channel() {
let dir = TempDir::new().unwrap();
let ch = ProbeChannel::for_workdir(dir.path()).unwrap();
let mut p = sample_probe("crash-test");
p.kind = ProbeKind::Crash { signal: Signal::Sigsegv };
ch.write(&p).unwrap();
let drained = ch.drain();
assert_eq!(drained.len(), 1);
assert!(matches!(
drained[0].kind,
ProbeKind::Crash { signal: Signal::Sigsegv }
));
}
#[test]
fn witness_from_inputs_redacts_and_truncates() {
let huge_payload = vec![0xAB; policy::PAYLOAD_CAPTURE_LIMIT_BYTES * 2];
let env = vec![
("PATH".to_owned(), "/bin".to_owned()),
("AWS_SECRET_ACCESS_KEY".to_owned(), "secret!!!".to_owned()),
];
let w = ProbeWitness::from_inputs(
env,
"/tmp/run",
&huge_payload,
"os.system",
vec!["ls; whoami".to_owned()],
);
assert_eq!(w.cwd, "/tmp/run");
assert_eq!(w.payload_bytes.len(), policy::PAYLOAD_CAPTURE_LIMIT_BYTES);
assert_eq!(w.env_snapshot.get("PATH").map(String::as_str), Some("/bin"));
assert_eq!(
w.env_snapshot.get("AWS_SECRET_ACCESS_KEY").map(String::as_str),
Some(policy::REDACTED_VALUE)
);
assert_eq!(w.args_repr, vec!["ls; whoami".to_owned()]);
assert_eq!(w.callee, "os.system");
}
}

View file

@ -11,7 +11,7 @@ use crate::dynamic::corpus::{
};
use crate::dynamic::differential;
use crate::dynamic::harness::{self, HarnessError};
use crate::dynamic::oracle::oracle_fired;
use crate::dynamic::oracle::{oracle_fired, probe_crash_signal, Oracle};
use crate::dynamic::probe::{ProbeChannel, SinkProbe};
use crate::dynamic::sandbox::{self, SandboxBackend, SandboxError, SandboxOptions, SandboxOutcome};
use crate::dynamic::spec::HarnessSpec;
@ -47,6 +47,13 @@ pub struct RunOutcome {
/// reference was `None` (or unresolved). The verifier maps this to
/// [`crate::evidence::InconclusiveReason::NoBenignControl`].
pub no_benign_control: bool,
/// Phase 08 §C.4: at least one payload's sandbox outcome reported a
/// process-level crash (no exit code, no timeout) but no
/// [`crate::dynamic::probe::ProbeKind::Crash`] record was drained
/// from the channel. The verifier maps this to
/// [`crate::evidence::InconclusiveReason::UnrelatedCrash`] so a
/// setup-code abort cannot impersonate a confirmed sink fire.
pub unrelated_crash: bool,
}
#[derive(Debug)]
@ -240,6 +247,7 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
let mut triggered_by = None;
let mut oracle_collision = false;
let mut no_benign_control = false;
let mut unrelated_crash = false;
let mut differential_outcome: Option<DifferentialOutcome> = None;
for (i, payload) in vuln_payloads.iter().enumerate() {
@ -288,6 +296,22 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
let vuln_fired = oracle_fired(&payload.oracle, &outcome, &vuln_probes);
let sink_hit = outcome.sink_hit;
// Phase 08 §C.4: a process-level crash with no matching sink-site
// Crash probe is an "unrelated abort" (setup code, harness build,
// library init). Detect once per payload and surface via
// `unrelated_crash` so the verifier downgrades from `Confirmed`
// to `Inconclusive(UnrelatedCrash)`. Only applies to
// `Oracle::SinkCrash` payloads — other oracles handle crashes
// through their own predicates.
let process_crashed = outcome.exit_code.is_none() && !outcome.timed_out;
let has_sink_crash_probe = vuln_probes.iter().any(|p| probe_crash_signal(p).is_some());
if matches!(payload.oracle, Oracle::SinkCrash { .. })
&& process_crashed
&& !has_sink_crash_probe
{
unrelated_crash = true;
}
// Differential rule (Phase 07, §4.1). Only when the vuln oracle
// fired *and* the in-harness sink-hit sentinel was observed do we
// consult the paired benign control. Oracle-fires-without-sink
@ -361,6 +385,7 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
entry_source,
differential: differential_outcome,
no_benign_control,
unrelated_crash,
})
}

View file

@ -563,6 +563,25 @@ fn build_verdict(
toolchain_match: Some(toolchain_match.to_owned()),
differential: run.differential,
}
} else if run.unrelated_crash {
// Phase 08 §C.4: the harness crashed but the death
// happened outside the instrumented sink (no Crash
// probe was written). Downgrade rather than letting
// a setup-code abort masquerade as a confirmed fire.
VerifyResult {
finding_id: finding_id.to_owned(),
status: VerifyStatus::Inconclusive,
triggered_payload: None,
reason: None,
inconclusive_reason: Some(InconclusiveReason::UnrelatedCrash),
detail: Some(
"process crashed with no sink-site crash probe — likely setup-code abort, not the sink"
.to_owned(),
),
attempts,
toolchain_match: Some(toolchain_match.to_owned()),
differential: None,
}
} else if run.no_benign_control {
// Phase 07 §4.1: vuln oracle + sink-hit fired but the
// paired benign control was missing. Downgrade to