[pitboss] phase 06: Track C.1 — SinkProbe channel + structured oracle observation

This commit is contained in:
pitboss 2026-05-14 05:35:28 -05:00
parent cdbc7f2d21
commit cce07d6c96
18 changed files with 1234 additions and 57 deletions

View file

@ -18,6 +18,45 @@ pub struct CEmitter;
/// Entry kinds the C emitter intends to support once Phase 16 lands.
const SUPPORTED: &[EntryKind] = &[EntryKind::Function];
/// Source of the `__nyx_probe` shim for the (future) C harness (Phase 06 —
/// Track C.1). Variadic over `const char *` args; hand-rolled JSON keeps
/// the only dep on libc / stdio.
pub fn probe_shim() -> &'static str {
r#"
/* ── __nyx_probe shim (Phase 06 — Track C.1) ─────────────────────────────── */
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
static void __nyx_probe(const char *sink_callee, int nargs, ...) {
const char *p = getenv("NYX_PROBE_PATH");
if (!p || *p == '\0') return;
FILE *f = fopen(p, "a");
if (!f) return;
struct timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);
unsigned long long ns = (unsigned long long)ts.tv_sec * 1000000000ULL +
(unsigned long long)ts.tv_nsec;
const char *pid = getenv("NYX_PAYLOAD_ID");
if (!pid) pid = "";
fprintf(f, "{\"sink_callee\":\"%s\",\"args\":[", sink_callee);
va_list ap;
va_start(ap, nargs);
for (int i = 0; i < nargs; ++i) {
const char *arg = va_arg(ap, const char *);
if (!arg) arg = "";
if (i > 0) fputc(',', f);
fprintf(f, "{\"kind\":\"String\",\"value\":\"%s\"}", arg);
}
va_end(ap);
fprintf(f, "],\"captured_at_ns\":%llu,\"payload_id\":\"%s\"}\n", ns, pid);
fclose(f);
}
"#
}
impl LangEmitter for CEmitter {
fn emit(&self, _spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
Err(UnsupportedReason::LangUnsupported)

View file

@ -18,6 +18,58 @@ pub struct CppEmitter;
/// Entry kinds the C++ emitter intends to support once Phase 16 lands.
const SUPPORTED: &[EntryKind] = &[EntryKind::Function];
/// Source of the `__nyx_probe` shim for the (future) C++ harness
/// (Phase 06 — Track C.1). Uses `<fstream>` + variadic templates; the
/// JSON-emit format matches [`crate::dynamic::probe::SinkProbe`].
pub fn probe_shim() -> &'static str {
r#"
/* ── __nyx_probe shim (Phase 06 — Track C.1) ─────────────────────────────── */
#include <chrono>
#include <cstdlib>
#include <fstream>
#include <sstream>
#include <string>
inline void __nyx_probe_one(std::ostringstream &out, const std::string &v) {
out << "{\"kind\":\"String\",\"value\":\"";
for (char c : v) {
switch (c) {
case '"': out << "\\\""; break;
case '\\': out << "\\\\"; break;
case '\n': out << "\\n"; break;
case '\r': out << "\\r"; break;
case '\t': out << "\\t"; break;
default: out << c;
}
}
out << "\"}";
}
template <typename... Args>
inline void __nyx_probe(const char *sink_callee, Args... args) {
const char *p = std::getenv("NYX_PROBE_PATH");
if (!p || *p == '\0') return;
std::ostringstream out;
out << "{\"sink_callee\":\"" << sink_callee << "\",\"args\":[";
bool first = true;
auto emit = [&](const std::string &s) {
if (!first) out << ',';
first = false;
__nyx_probe_one(out, s);
};
(emit(std::string(args)), ...);
const char *pid = std::getenv("NYX_PAYLOAD_ID");
auto now = std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::system_clock::now().time_since_epoch()
).count();
out << "],\"captured_at_ns\":" << now << ",\"payload_id\":\""
<< (pid ? pid : "") << "\"}\n";
std::ofstream f(p, std::ios::app);
if (f.is_open()) f << out.str();
}
"#
}
impl LangEmitter for CppEmitter {
fn emit(&self, _spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
Err(UnsupportedReason::LangUnsupported)

View file

@ -53,6 +53,45 @@ impl LangEmitter for GoEmitter {
}
}
/// Source of the `__nyx_probe` shim for the Go harness (Phase 06 —
/// Track C.1). Variadic over `string` so callers can pass any number of
/// captured args at the sink site.
pub fn probe_shim() -> &'static str {
r#"
// ── __nyx_probe shim (Phase 06 — Track C.1) ──────────────────────────────────
func __nyx_probe(sinkCallee string, args ...string) {
p := os.Getenv("NYX_PROBE_PATH")
if p == "" {
return
}
serArgs := make([]map[string]interface{}, 0, len(args))
for _, a := range args {
serArgs = append(serArgs, map[string]interface{}{
"kind": "String",
"value": a,
})
}
rec := map[string]interface{}{
"sink_callee": sinkCallee,
"args": serArgs,
"captured_at_ns": uint64(time.Now().UnixNano()),
"payload_id": os.Getenv("NYX_PAYLOAD_ID"),
}
b, err := json.Marshal(rec)
if err != nil {
return
}
f, err := os.OpenFile(p, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return
}
defer f.Close()
f.Write(b)
f.Write([]byte("\n"))
}
"#
}
/// Emit a Go harness for `spec`.
pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
match &spec.payload_slot {

View file

@ -55,6 +55,65 @@ impl LangEmitter for JavaEmitter {
}
}
/// Source of the `__nyx_probe` shim for the Java harness (Phase 06 —
/// Track C.1).
///
/// Splices into the generated harness class as a `static void __nyx_probe(...)`
/// method. Hand-rolled JSON keeps the shim free of org.json / jackson
/// dependencies; matches the
/// [`crate::dynamic::probe::SinkProbe`] wire format.
pub fn probe_shim() -> &'static str {
r#"
// ── __nyx_probe shim (Phase 06 — Track C.1) ──────────────────────────────────
static void __nyx_probe(String sinkCallee, String... args) {
String p = System.getenv("NYX_PROBE_PATH");
if (p == null || p.isEmpty()) {
return;
}
long now = System.nanoTime();
String payloadId = System.getenv("NYX_PAYLOAD_ID");
if (payloadId == null) payloadId = "";
StringBuilder line = new StringBuilder(128);
line.append("{\"sink_callee\":\"");
nyxJsonEscape(sinkCallee, line);
line.append("\",\"args\":[");
for (int i = 0; i < args.length; i++) {
if (i > 0) line.append(',');
line.append("{\"kind\":\"String\",\"value\":\"");
nyxJsonEscape(args[i] == null ? "" : args[i], line);
line.append("\"}");
}
line.append("],\"captured_at_ns\":").append(now).append(",\"payload_id\":\"");
nyxJsonEscape(payloadId, line);
line.append("\"}\n");
try (java.io.FileWriter fw = new java.io.FileWriter(p, true)) {
fw.write(line.toString());
} catch (java.io.IOException e) {
// best-effort
}
}
private static void nyxJsonEscape(String s, StringBuilder out) {
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
switch (c) {
case '"': out.append("\\\""); break;
case '\\': out.append("\\\\"); break;
case '\n': out.append("\\n"); break;
case '\r': out.append("\\r"); break;
case '\t': out.append("\\t"); break;
default:
if (c < 0x20) {
out.append(String.format("\\u%04x", (int) c));
} else {
out.append(c);
}
}
}
}
"#
}
/// Emit a Java harness for `spec`.
pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
match &spec.payload_slot {

View file

@ -49,6 +49,47 @@ impl LangEmitter for JavaScriptEmitter {
}
}
/// Source of the `__nyx_probe` shim for the Node.js harness.
///
/// Defined once here so both [`JavaScriptEmitter`] and
/// [`crate::dynamic::lang::typescript::TypeScriptEmitter`] reuse the same
/// JSON-emit format. Writes a single [`crate::dynamic::probe::SinkProbe`]
/// JSON line to `NYX_PROBE_PATH` per call; no-op when the env var is
/// unset.
pub fn probe_shim() -> &'static str {
r#"
// ── __nyx_probe shim (Phase 06 — Track C.1) ──────────────────────────────────
function __nyx_probe(sinkCallee, ...args) {
const _fs = require('fs');
const _p = process.env.NYX_PROBE_PATH;
if (!_p) return;
const _ser = args.map(function (a) {
if (a && typeof a === 'object' && (a instanceof Buffer || a instanceof Uint8Array)) {
return { kind: 'Bytes', value: Array.from(a) };
}
if (typeof a === 'number' && Number.isInteger(a)) {
return { kind: 'Int', value: a };
}
if (typeof a === 'boolean') {
return { kind: 'Int', value: a ? 1 : 0 };
}
return { kind: 'String', value: String(a) };
});
const _rec = {
sink_callee: String(sinkCallee),
args: _ser,
captured_at_ns: Number(process.hrtime.bigint()),
payload_id: String(process.env.NYX_PAYLOAD_ID || ''),
};
try {
_fs.appendFileSync(_p, JSON.stringify(_rec) + '\n');
} catch (e) {
// best-effort: probe channel write failure is non-fatal.
}
}
"#
}
/// Emit a Node.js harness for `spec`.
pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
match &spec.payload_slot {
@ -72,10 +113,12 @@ fn generate_source(spec: &HarnessSpec) -> String {
let entry_module = entry_module_name(&spec.entry_file);
let entry_fn = &spec.entry_name;
let (pre_call, call_expr) = build_call(spec, &entry_module, entry_fn);
let probe = probe_shim();
format!(
r#"'use strict';
// Nyx dynamic harness — auto-generated, do not edit.
{probe}
// ── Payload loading ────────────────────────────────────────────────────────────
const _nyx_payload = (() => {{
@ -120,6 +163,7 @@ try {{
entry_module = entry_module,
pre_call = pre_call,
call_expr = call_expr,
probe = probe,
)
}

View file

@ -47,6 +47,36 @@ impl LangEmitter for PhpEmitter {
}
}
/// Source of the `__nyx_probe` shim for the PHP harness (Phase 06 —
/// Track C.1).
pub fn probe_shim() -> &'static str {
r#"
// ── __nyx_probe shim (Phase 06 — Track C.1) ──────────────────────────────────
function __nyx_probe(string $sinkCallee, ...$args): void {
$p = getenv('NYX_PROBE_PATH');
if ($p === false || $p === '') {
return;
}
$ser = [];
foreach ($args as $a) {
if (is_int($a)) {
$ser[] = ['kind' => 'Int', 'value' => $a];
} else {
$ser[] = ['kind' => 'String', 'value' => (string) $a];
}
}
$rec = [
'sink_callee' => $sinkCallee,
'args' => $ser,
'captured_at_ns' => (int) (microtime(true) * 1e9),
'payload_id' => (string) (getenv('NYX_PAYLOAD_ID') ?: ''),
];
$line = json_encode($rec) . "\n";
@file_put_contents($p, $line, FILE_APPEND);
}
"#
}
/// Emit a PHP harness for `spec`.
pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
match &spec.payload_slot {

View file

@ -42,6 +42,45 @@ impl LangEmitter for PythonEmitter {
}
}
/// Source of the `__nyx_probe` shim for the Python harness.
///
/// The shim is callable as `__nyx_probe("sink.callee", arg0, arg1, ...)`.
/// It emits one JSON line per call to `NYX_PROBE_PATH` (when set) in the
/// [`crate::dynamic::probe::SinkProbe`] schema. No-op when the env var
/// is unset, so the shim is safe to inject even when the runner has not
/// configured a probe channel.
pub fn probe_shim() -> &'static str {
r#"
# __nyx_probe shim (Phase 06 Track C.1)
def __nyx_probe(sink_callee, *args):
import os, time, json
p = os.environ.get("NYX_PROBE_PATH")
if not p:
return
serialised = []
for a in args:
if isinstance(a, (bytes, bytearray)):
serialised.append({"kind": "Bytes", "value": list(a)})
elif isinstance(a, bool):
serialised.append({"kind": "Int", "value": 1 if a else 0})
elif isinstance(a, int):
serialised.append({"kind": "Int", "value": a})
else:
serialised.append({"kind": "String", "value": str(a)})
rec = {
"sink_callee": str(sink_callee),
"args": serialised,
"captured_at_ns": time.time_ns(),
"payload_id": os.environ.get("NYX_PAYLOAD_ID", ""),
}
try:
with open(p, "a") as _f:
_f.write(json.dumps(rec) + "\n")
except OSError:
pass
"#
}
/// Emit a Python harness for `spec`.
pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
// Validate payload slot.
@ -69,6 +108,7 @@ fn generate_source(spec: &HarnessSpec) -> String {
// Build the call expression based on payload slot.
let (pre_call, call_expr) = build_call(spec, entry_module, entry_fn);
let probe = probe_shim();
format!(
r#"#!/usr/bin/env python3
@ -81,6 +121,8 @@ import traceback
# Fires __NYX_SINK_HIT__ exactly once when the traced function is called at
# the expected file:line. Filtered to avoid false positives from library code.
{probe}
_NYX_SINK_FILE = {sink_file:?}
_NYX_SINK_LINE = {sink_line}
_NYX_SINK_HIT = False
@ -152,6 +194,7 @@ sys.settrace(None)
entry_module = entry_module,
pre_call = pre_call,
call_expr = call_expr,
probe = probe,
)
}
@ -277,6 +320,17 @@ mod tests {
assert!(hint.contains("phase 12"));
}
#[test]
fn probe_shim_is_injected() {
let spec = make_spec(PayloadSlot::Param(0));
let harness = emit(&spec).unwrap();
assert!(
harness.source.contains("def __nyx_probe"),
"Phase 06 shim must be present in generated harness",
);
assert!(harness.source.contains("NYX_PROBE_PATH"));
}
#[test]
fn unsupported_lang_returns_err() {
let mut spec = make_spec(PayloadSlot::Param(0));

View file

@ -20,6 +20,37 @@ pub struct RubyEmitter;
/// `Inconclusive(EntryKindUnsupported)` rather than `Unsupported`.
const SUPPORTED: &[EntryKind] = &[EntryKind::Function];
/// Source of the `__nyx_probe` shim for the (future) Ruby harness
/// (Phase 06 — Track C.1). Defined here for the deliverable contract
/// even though `emit` returns `LangUnsupported` until Phase 15 lands.
pub fn probe_shim() -> &'static str {
r#"
# __nyx_probe shim (Phase 06 Track C.1)
def __nyx_probe(sink_callee, *args)
require 'json'
p = ENV['NYX_PROBE_PATH']
return if p.nil? || p.empty?
ser = args.map do |a|
case a
when Integer then { kind: 'Int', value: a }
when String then { kind: 'String', value: a }
else { kind: 'String', value: a.to_s }
end
end
rec = {
sink_callee: sink_callee.to_s,
args: ser,
captured_at_ns: (Process.clock_gettime(Process::CLOCK_REALTIME, :nanosecond)),
payload_id: (ENV['NYX_PAYLOAD_ID'] || ''),
}
begin
File.open(p, 'a') { |f| f.puts(rec.to_json) }
rescue StandardError
end
end
"#
}
impl LangEmitter for RubyEmitter {
fn emit(&self, _spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
Err(UnsupportedReason::LangUnsupported)

View file

@ -51,6 +51,71 @@ impl LangEmitter for RustEmitter {
}
}
/// Source of the `__nyx_probe` shim for the Rust harness (Phase 06 —
/// Track C.1).
///
/// Defined here so future sink-rewrite passes can splice
/// `__nyx_probe("os.system", payload)` into the entry source without
/// depending on serde at the harness boundary. Hand-rolled JSON keeps
/// the shim's only dep on `std`; matches the
/// [`crate::dynamic::probe::SinkProbe`] wire format.
pub fn probe_shim() -> &'static str {
r#"
// ── __nyx_probe shim (Phase 06 — Track C.1) ──────────────────────────────────
#[allow(dead_code)]
fn __nyx_probe(sink_callee: &str, args: &[&str]) {
use std::io::Write;
let p = match std::env::var("NYX_PROBE_PATH") {
Ok(v) => v,
Err(_) => return,
};
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_nanos() as u64)
.unwrap_or(0);
let payload_id = std::env::var("NYX_PAYLOAD_ID").unwrap_or_default();
fn esc(s: &str, out: &mut String) {
for ch in s.chars() {
match ch {
'"' => out.push_str("\\\""),
'\\' => out.push_str("\\\\"),
'\n' => out.push_str("\\n"),
'\r' => out.push_str("\\r"),
'\t' => out.push_str("\\t"),
c if (c as u32) < 0x20 => out.push_str(&format!("\\u{:04x}", c as u32)),
c => out.push(c),
}
}
}
let mut line = String::with_capacity(128);
line.push_str("{\"sink_callee\":\"");
esc(sink_callee, &mut line);
line.push_str("\",\"args\":[");
for (i, a) in args.iter().enumerate() {
if i > 0 {
line.push(',');
}
line.push_str("{\"kind\":\"String\",\"value\":\"");
esc(a, &mut line);
line.push_str("\"}");
}
line.push_str(&format!(
"],\"captured_at_ns\":{},\"payload_id\":\"",
now
));
esc(&payload_id, &mut line);
line.push_str("\"}\n");
if let Ok(mut f) = std::fs::OpenOptions::new()
.create(true)
.append(true)
.open(&p)
{
let _ = f.write_all(line.as_bytes());
}
}
"#
}
/// Emit a Rust harness for `spec`.
pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
match &spec.payload_slot {

View file

@ -27,6 +27,15 @@ pub struct TypeScriptEmitter;
/// browser modules).
const SUPPORTED: &[EntryKind] = &[EntryKind::Function];
/// Source of the `__nyx_probe` shim for TypeScript harnesses.
///
/// Delegates to [`crate::dynamic::lang::javascript::probe_shim`] — the
/// runtime is Node.js in both cases, so the JSON-emit shim is identical
/// after type erasure.
pub fn probe_shim() -> &'static str {
javascript::probe_shim()
}
impl LangEmitter for TypeScriptEmitter {
fn emit(&self, spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
javascript::emit(spec)