mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-12 19:55:14 +02:00
336 lines
14 KiB
Rust
336 lines
14 KiB
Rust
//! Per-language harness emitters.
|
|
//!
|
|
//! Each submodule implements [`LangEmitter`] for one language. The top-level
|
|
//! [`emit`] function dispatches on `spec.lang` and validates `spec.entry_kind`
|
|
//! against the chosen emitter's [`LangEmitter::entry_kinds_supported`] list
|
|
//! before delegating, so unsupported entry kinds short-circuit with a typed
|
|
//! `UnsupportedReason::EntryKindUnsupported` rather than producing a
|
|
//! never-runnable harness.
|
|
//!
|
|
//! Two free helpers — [`entry_kinds_supported`] and [`entry_kind_hint`] — wrap
|
|
//! the trait dispatch so callers outside the harness build path (notably the
|
|
//! verifier, which surfaces an `Inconclusive` verdict with the supported list
|
|
//! and hint baked in) can advertise capability without instantiating a spec.
|
|
|
|
pub mod c;
|
|
pub mod cpp;
|
|
pub mod go;
|
|
pub mod java;
|
|
pub mod java_servlet_stubs;
|
|
pub mod javascript;
|
|
pub mod js_shared;
|
|
pub mod php;
|
|
pub mod python;
|
|
pub mod ruby;
|
|
pub mod rust;
|
|
pub mod typescript;
|
|
|
|
use crate::dynamic::environment::{Environment, RuntimeArtifacts};
|
|
use crate::dynamic::spec::{EntryKind, HarnessSpec};
|
|
use crate::evidence::UnsupportedReason;
|
|
use crate::symbol::Lang;
|
|
|
|
/// Generated harness source ready to write to disk.
|
|
#[derive(Debug, Clone)]
|
|
pub struct HarnessSource {
|
|
/// Harness source code as a UTF-8 string.
|
|
pub source: String,
|
|
/// Filename for the harness (e.g. `"harness.py"`, `"src/main.rs"`).
|
|
pub filename: String,
|
|
/// Shell command to invoke the harness (relative to the workdir).
|
|
pub command: Vec<String>,
|
|
/// Additional files to write to the workdir alongside the main source.
|
|
/// Each entry is `(relative_path, content)`. Subdirectories are created
|
|
/// automatically (e.g. `"Cargo.toml"` or `"src/entry.rs"`).
|
|
pub extra_files: Vec<(String, String)>,
|
|
/// Where to copy the entry source file (relative to workdir).
|
|
/// `None` = workdir root (Python default).
|
|
/// `Some("src/entry.rs")` = Rust module path.
|
|
pub entry_subpath: Option<String>,
|
|
}
|
|
|
|
/// Phase 26 — one step in a chain-composite harness.
|
|
///
|
|
/// The composite re-verifier walks every member of a chain and assembles
|
|
/// a sequence of per-step harnesses. Each step is invoked with the
|
|
/// previous step's stdout threaded into the
|
|
/// [`ChainStepHarness::PREV_OUTPUT_ENV`] env var so the harness can fold
|
|
/// the chained input into its payload (e.g. browser-fetch → websocket
|
|
/// message → shell tool).
|
|
///
|
|
/// `extra_env` is additive on top of the sandbox's own
|
|
/// [`crate::dynamic::sandbox::SandboxOptions::extra_env`]; the runner is
|
|
/// responsible for splicing both in.
|
|
#[derive(Debug, Clone)]
|
|
pub struct ChainStepHarness {
|
|
pub source: String,
|
|
pub filename: String,
|
|
pub command: Vec<String>,
|
|
pub extra_env: Vec<(String, String)>,
|
|
/// Companion files staged alongside [`Self::source`] in the chain
|
|
/// step's workdir. Each entry is `(relative_path, content)`;
|
|
/// subdirectories in `relative_path` are created automatically.
|
|
/// Mirrors [`HarnessSource::extra_files`] so an emitter whose chain
|
|
/// step needs a build manifest (Rust's `Cargo.toml`, future
|
|
/// `pom.xml`, etc.) can ship it without smuggling everything into
|
|
/// `source`.
|
|
pub extra_files: Vec<(String, String)>,
|
|
}
|
|
|
|
impl ChainStepHarness {
|
|
/// Env-var name the previous step's stdout is bound to in the next
|
|
/// step's environment. Stable surface — kept distinct from
|
|
/// `NYX_PAYLOAD` so a chain step can read both at once.
|
|
pub const PREV_OUTPUT_ENV: &'static str = "NYX_PREV_OUTPUT";
|
|
|
|
/// Sentinel printed to stdout by the terminal chain step so the
|
|
/// runner's [`crate::dynamic::sandbox::SandboxOutcome::sink_hit`]
|
|
/// fold can flip to `true` on a successful end-to-end compose.
|
|
/// Mirrors the per-language tracer sentinel used by the regular
|
|
/// harness emitters; the runner detects the byte sequence in
|
|
/// stdout/stderr.
|
|
pub const SINK_HIT_SENTINEL: &'static str = "__NYX_SINK_HIT__";
|
|
}
|
|
|
|
/// Phase 26 — terminal-step descriptor for [`LangEmitter::compose_chain_step`].
|
|
///
|
|
/// Carries the chain's terminal sink callee so the emitter can rewrite
|
|
/// the final step's source to invoke the probe shim with the threaded
|
|
/// payload and emit the [`ChainStepHarness::SINK_HIT_SENTINEL`]; the
|
|
/// composite reverifier then promotes its verdict from `Inconclusive`
|
|
/// to `Confirmed` when the runner observes the sentinel on the chain's
|
|
/// last step.
|
|
///
|
|
/// Non-terminal steps pass `None` so they retain the prev-output echo
|
|
/// behaviour.
|
|
#[derive(Debug, Clone)]
|
|
pub struct ChainStepTerminal {
|
|
/// Callee name for the chain's terminal sink (e.g. `"eval"`,
|
|
/// `"os.system"`, `"setattr"`). Used as the first argument to
|
|
/// `__nyx_probe(callee, prev)` so the per-language probe shim
|
|
/// records the witness. Kept as `String` rather than `&str` so the
|
|
/// reverifier can hand-roll a `ChainStepTerminal` from a
|
|
/// [`crate::chain::finding::ChainSink`] without lifetime gymnastics.
|
|
pub sink_callee: String,
|
|
/// Capability bits associated with the sink. Today the emitters do
|
|
/// not read this — recorded so a future per-cap sink-fire shape
|
|
/// dispatcher can pick the right invocation idiom without re-walking
|
|
/// the chain.
|
|
pub sink_cap_bits: u32,
|
|
}
|
|
|
|
/// Per-language harness emitter contract.
|
|
///
|
|
/// Implementations are zero-sized unit structs (one per `src/dynamic/lang/*.rs`
|
|
/// module). The [`emit`](LangEmitter::emit) method is the legacy
|
|
/// per-language entry point retained for the build pipeline; the two
|
|
/// capability methods are consulted both at dispatch time (`lang::emit`
|
|
/// pre-flight check) and by the verifier when constructing
|
|
/// `Inconclusive(EntryKindUnsupported { … })`.
|
|
pub trait LangEmitter {
|
|
/// Build a harness source bundle for `spec`.
|
|
fn emit(&self, spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason>;
|
|
|
|
/// The set of [`EntryKind`] variants this emitter understands.
|
|
///
|
|
/// Must be non-empty: every emitter advertises at least one shape it can
|
|
/// (or will) drive — even stub modules whose `emit` returns
|
|
/// `LangUnsupported`. Empty would be indistinguishable from "language
|
|
/// not in the dispatch table" and would defeat the structured
|
|
/// advertisement that callers consume.
|
|
fn entry_kinds_supported(&self) -> &'static [EntryKind];
|
|
|
|
/// Human-actionable hint produced when `attempted` is not in
|
|
/// [`entry_kinds_supported`](LangEmitter::entry_kinds_supported).
|
|
///
|
|
/// The string is consumed by
|
|
/// [`crate::evidence::InconclusiveReason::EntryKindUnsupported::hint`] and
|
|
/// surfaces directly to operators triaging dynamic verification gaps;
|
|
/// keep it specific (name the supported kinds, name the phase that will
|
|
/// extend support).
|
|
fn entry_kind_hint(&self, attempted: EntryKind) -> String;
|
|
|
|
/// Synthesise the language-specific manifest / lockfile contents that
|
|
/// pin the [`Environment`]'s direct deps + toolchain into a file the
|
|
/// build sandbox can consume.
|
|
///
|
|
/// Default impl returns an empty bundle — every emitter that ships a
|
|
/// real build step overrides this (Python emits `requirements.txt`,
|
|
/// Rust emits a pinned `Cargo.toml`, etc.). The harness builder
|
|
/// writes every returned `(rel_path, content)` pair into the workdir
|
|
/// alongside the generated source.
|
|
///
|
|
/// Phase 09 - Track D.2 deliverable. The default keeps the surface
|
|
/// area additive: emitters that have not yet been wired through the
|
|
/// capture path simply produce no manifest and the build cache key
|
|
/// degrades to the existing lockfile-hash path.
|
|
fn materialize_runtime(&self, _env: &Environment) -> RuntimeArtifacts {
|
|
RuntimeArtifacts::default()
|
|
}
|
|
|
|
/// Phase 26 — Track G.3: build one step of a chain-composite harness.
|
|
///
|
|
/// `prev_output` carries the previous step's stdout (or `None` for
|
|
/// the chain's entry step). `terminal` is `Some` only on the
|
|
/// chain's last step and carries the sink callee so the emitter
|
|
/// can splice in a `__nyx_probe(callee, prev)` call plus the
|
|
/// [`ChainStepHarness::SINK_HIT_SENTINEL`] stdout banner that the
|
|
/// runner detects via [`crate::dynamic::sandbox::SandboxOutcome::sink_hit`].
|
|
///
|
|
/// Default impl produces a portable POSIX-shell stub that echoes
|
|
/// the previous step's output verbatim, and (when `terminal` is
|
|
/// set) appends a `printf '__NYX_SINK_HIT__\n'` line. Concrete
|
|
/// emitters override to splice in the language-native probe shim.
|
|
fn compose_chain_step(
|
|
&self,
|
|
prev_output: Option<&[u8]>,
|
|
terminal: Option<&ChainStepTerminal>,
|
|
) -> ChainStepHarness {
|
|
default_chain_step(prev_output, terminal)
|
|
}
|
|
}
|
|
|
|
/// Default chain-step harness. Emitted by [`LangEmitter::compose_chain_step`]
|
|
/// when an emitter does not override the trait method.
|
|
pub fn default_chain_step(
|
|
prev_output: Option<&[u8]>,
|
|
terminal: Option<&ChainStepTerminal>,
|
|
) -> ChainStepHarness {
|
|
let mut script = String::from("#!/bin/sh\nprintf '%s' \"${NYX_PREV_OUTPUT:-}\"\n");
|
|
if terminal.is_some() {
|
|
script.push_str("printf '\\n");
|
|
script.push_str(ChainStepHarness::SINK_HIT_SENTINEL);
|
|
script.push_str("\\n'\n");
|
|
}
|
|
ChainStepHarness {
|
|
source: script,
|
|
filename: "step.sh".to_owned(),
|
|
command: vec!["sh".to_owned(), "step.sh".to_owned()],
|
|
extra_env: prev_output
|
|
.map(|bytes| {
|
|
vec![(
|
|
ChainStepHarness::PREV_OUTPUT_ENV.to_owned(),
|
|
String::from_utf8_lossy(bytes).into_owned(),
|
|
)]
|
|
})
|
|
.unwrap_or_default(),
|
|
extra_files: Vec::new(),
|
|
}
|
|
}
|
|
|
|
/// Public free-fn dispatcher for [`LangEmitter::compose_chain_step`].
|
|
///
|
|
/// Returns the lang-agnostic shell stub when `lang` has no registered
|
|
/// emitter so callers do not need to special-case that path.
|
|
pub fn compose_chain_step(
|
|
lang: Lang,
|
|
prev_output: Option<&[u8]>,
|
|
terminal: Option<&ChainStepTerminal>,
|
|
) -> ChainStepHarness {
|
|
dispatch(lang, |e| e.compose_chain_step(prev_output, terminal))
|
|
.unwrap_or_else(|| default_chain_step(prev_output, terminal))
|
|
}
|
|
|
|
/// Public free-fn dispatcher for [`LangEmitter::materialize_runtime`].
|
|
///
|
|
/// Returns an empty [`RuntimeArtifacts`] when `env.lang` has no
|
|
/// registered emitter so callers do not need to special-case that path.
|
|
/// Used by the harness builder to fold runtime manifest artifacts into
|
|
/// the staged workdir (Phase 09 — Track D.2).
|
|
pub fn materialize_runtime(env: &Environment) -> RuntimeArtifacts {
|
|
dispatch(env.lang, |e| e.materialize_runtime(env)).unwrap_or_default()
|
|
}
|
|
|
|
/// Dispatch to the appropriate language emitter.
|
|
///
|
|
/// Validates `spec.entry_kind` against the chosen emitter's supported list
|
|
/// before delegating; an unsupported entry kind short-circuits with
|
|
/// [`UnsupportedReason::EntryKindUnsupported`] so the verifier can surface a
|
|
/// structured `Inconclusive` verdict with the supported list and hint baked
|
|
/// in (instead of producing a never-runnable harness).
|
|
pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
|
|
let supported = entry_kinds_supported(spec.lang);
|
|
if !supported.is_empty() && !supported.contains(&spec.entry_kind) {
|
|
return Err(UnsupportedReason::EntryKindUnsupported);
|
|
}
|
|
dispatch(spec.lang, |e| e.emit(spec))
|
|
.unwrap_or(Err(UnsupportedReason::LangUnsupported))
|
|
}
|
|
|
|
/// Public free-fn dispatcher for the supported entry kinds of `lang`.
|
|
///
|
|
/// Returns an empty slice when `lang` has no registered emitter — callers
|
|
/// distinguish that from "emitter exists but advertises none" by treating
|
|
/// empty as "language unsupported".
|
|
pub fn entry_kinds_supported(lang: Lang) -> &'static [EntryKind] {
|
|
dispatch(lang, |e| e.entry_kinds_supported()).unwrap_or(&[])
|
|
}
|
|
|
|
/// Public free-fn dispatcher for an emitter's hint about `attempted`.
|
|
///
|
|
/// Falls back to a generic message when `lang` has no registered emitter so
|
|
/// callers do not need to special-case that path.
|
|
pub fn entry_kind_hint(lang: Lang, attempted: EntryKind) -> String {
|
|
dispatch(lang, |e| e.entry_kind_hint(attempted)).unwrap_or_else(|| {
|
|
format!(
|
|
"no harness emitter is registered for {lang:?}; attempted {attempted}"
|
|
)
|
|
})
|
|
}
|
|
|
|
/// Internal helper: invoke `f` against the emitter registered for `lang`,
|
|
/// returning `None` when no emitter is registered for that language.
|
|
fn dispatch<R>(lang: Lang, f: impl FnOnce(&dyn LangEmitter) -> R) -> Option<R> {
|
|
let emitter: Option<&dyn LangEmitter> = match lang {
|
|
Lang::Python => Some(&python::PythonEmitter),
|
|
Lang::Rust => Some(&rust::RustEmitter),
|
|
Lang::JavaScript => Some(&javascript::JavaScriptEmitter),
|
|
Lang::TypeScript => Some(&typescript::TypeScriptEmitter),
|
|
Lang::Go => Some(&go::GoEmitter),
|
|
Lang::Java => Some(&java::JavaEmitter),
|
|
Lang::Php => Some(&php::PhpEmitter),
|
|
Lang::Ruby => Some(&ruby::RubyEmitter),
|
|
Lang::C => Some(&c::CEmitter),
|
|
Lang::Cpp => Some(&cpp::CppEmitter),
|
|
};
|
|
emitter.map(f)
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
/// Every registered emitter must advertise at least one entry kind so the
|
|
/// verifier never produces an empty `supported` list in
|
|
/// `Inconclusive(EntryKindUnsupported { supported, .. })`.
|
|
#[test]
|
|
fn every_lang_advertises_at_least_one_entry_kind() {
|
|
for lang in [
|
|
Lang::Python,
|
|
Lang::Rust,
|
|
Lang::JavaScript,
|
|
Lang::TypeScript,
|
|
Lang::Go,
|
|
Lang::Java,
|
|
Lang::Php,
|
|
Lang::Ruby,
|
|
Lang::C,
|
|
Lang::Cpp,
|
|
] {
|
|
let kinds = entry_kinds_supported(lang);
|
|
assert!(
|
|
!kinds.is_empty(),
|
|
"{lang:?} emitter must advertise at least one EntryKind"
|
|
);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn entry_kind_hint_mentions_attempted() {
|
|
let hint = entry_kind_hint(Lang::Python, EntryKind::HttpRoute);
|
|
assert!(
|
|
hint.contains("HttpRoute"),
|
|
"hint must mention the attempted entry kind, got: {hint:?}"
|
|
);
|
|
}
|
|
}
|