mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-15 20:05:13 +02:00
Dynamic (#77)
This commit is contained in:
parent
55247b7fcd
commit
991c84a1eb
1464 changed files with 225448 additions and 1985 deletions
125
tools/image-builder/images.toml
Normal file
125
tools/image-builder/images.toml
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
# Pinned-digest catalogue consumed by `nyx-image-builder` and the
|
||||
# `build.rs` codegen that populates `src/dynamic/toolchain.rs::IMAGE_DIGESTS`.
|
||||
#
|
||||
# Each `[[image]]` entry corresponds to one `(lang, toolchain)` cell of the
|
||||
# Docker backend. The `toolchain_id` matches the IDs surfaced by
|
||||
# `src/dynamic/toolchain.rs` (`python-3.11`, `node-20`, `java-21`, …) and is
|
||||
# the lookup key used by `IMAGE_DIGESTS`.
|
||||
#
|
||||
# Fields:
|
||||
# - toolchain_id string Lookup key (see toolchain.rs).
|
||||
# - base string Docker image reference (e.g. "python:3.11-slim").
|
||||
# The `nyx-image-builder verify` command refuses to
|
||||
# run if this is not pinnable to a digest.
|
||||
# - toolchain string Human-readable interpreter / compiler version.
|
||||
# - packages table Inline pinned package names → versions (apt /
|
||||
# apk pins applied during image build). Empty `{}`
|
||||
# when the upstream image already covers everything.
|
||||
# - digest string `sha256:…` content digest written back by
|
||||
# `nyx-image-builder build`. Empty until the
|
||||
# first successful build.
|
||||
#
|
||||
# The CI workflow runs `nyx-image-builder build --all` daily. When any digest
|
||||
# drifts, the workflow opens a PR updating this file; reviewers approve before
|
||||
# the new digest pin is merged.
|
||||
|
||||
[[image]]
|
||||
toolchain_id = "python-3.11"
|
||||
base = "python:3.11-slim"
|
||||
toolchain = "Python 3.11"
|
||||
packages = {}
|
||||
digest = "sha256:9a7765b36773a37061455b332f18e265e7f58f6fea9c419a550d2a8b0e9db834"
|
||||
|
||||
[[image]]
|
||||
toolchain_id = "python-3.12"
|
||||
base = "python:3.12-slim"
|
||||
toolchain = "Python 3.12"
|
||||
packages = {}
|
||||
digest = "sha256:401f6e1a67dad31a1bd78e9ad22d0ee0a3b52154e6bd30e90be696bb6a3d7461"
|
||||
|
||||
[[image]]
|
||||
toolchain_id = "python-3.13"
|
||||
base = "python:3.13-slim"
|
||||
toolchain = "Python 3.13"
|
||||
packages = {}
|
||||
digest = "sha256:dc1546eefcbe8caaa1f004f16ab76b204b5e1dbd58ff81b899f21cd40541232f"
|
||||
|
||||
[[image]]
|
||||
toolchain_id = "node-18"
|
||||
base = "node:18-slim"
|
||||
toolchain = "Node.js 18"
|
||||
packages = {}
|
||||
digest = "sha256:f9ab18e354e6855ae56ef2b290dd225c1e51a564f87584b9bd21dd651838830e"
|
||||
|
||||
[[image]]
|
||||
toolchain_id = "node-20"
|
||||
base = "node:20-slim"
|
||||
toolchain = "Node.js 20"
|
||||
packages = {}
|
||||
digest = "sha256:2cf067cfed83d5ea958367df9f966191a942351a2df77d6f0193e162b5febfc0"
|
||||
|
||||
[[image]]
|
||||
toolchain_id = "node-22"
|
||||
base = "node:22-slim"
|
||||
toolchain = "Node.js 22"
|
||||
packages = {}
|
||||
digest = "sha256:689c11043dad91472750cd824c97dd5e2318e9dd6f954e492fe7af0135d33ceb"
|
||||
|
||||
[[image]]
|
||||
toolchain_id = "java-17"
|
||||
base = "eclipse-temurin:17-jre-jammy"
|
||||
toolchain = "Eclipse Temurin 17 JRE"
|
||||
packages = {}
|
||||
digest = "sha256:47c73dc23524b031bed0a5030410c722af6a8b49d4b25898ea8f4615895065f0"
|
||||
|
||||
[[image]]
|
||||
toolchain_id = "java-21"
|
||||
base = "eclipse-temurin:21-jre-jammy"
|
||||
toolchain = "Eclipse Temurin 21 JRE"
|
||||
packages = {}
|
||||
digest = "sha256:199aebeb3adcde4910695cdebfe782ada38dadb6cc8013159b58d3724451befd"
|
||||
|
||||
[[image]]
|
||||
toolchain_id = "php-8.1"
|
||||
base = "php:8.1-cli"
|
||||
toolchain = "PHP 8.1 CLI"
|
||||
packages = {}
|
||||
digest = "sha256:76e563191d1ade120313a8736df24154d21da5155c0756f147c0b01bd19d9087"
|
||||
|
||||
[[image]]
|
||||
toolchain_id = "php-8.2"
|
||||
base = "php:8.2-cli"
|
||||
toolchain = "PHP 8.2 CLI"
|
||||
packages = {}
|
||||
digest = "sha256:506f27f6416650a7ef41561ebdb4f93ebdcacb48dabda2af029241c956bbd8ff"
|
||||
|
||||
[[image]]
|
||||
toolchain_id = "php-8.3"
|
||||
base = "php:8.3-cli"
|
||||
toolchain = "PHP 8.3 CLI"
|
||||
packages = {}
|
||||
digest = "sha256:7e091064b23740d5c154ebcfcf69631dd16770a791409f83e4416d0ae9f660b5"
|
||||
|
||||
[[image]]
|
||||
toolchain_id = "ruby-3.2"
|
||||
base = "ruby:3.2-slim"
|
||||
toolchain = "Ruby 3.2"
|
||||
packages = {}
|
||||
digest = "sha256:84184c9e2c368885a1d0c93ad1953c33d81081058d274b87b4aa6f3e209e5d16"
|
||||
|
||||
[[image]]
|
||||
toolchain_id = "ruby-3.3"
|
||||
base = "ruby:3.3-slim"
|
||||
toolchain = "Ruby 3.3"
|
||||
packages = {}
|
||||
digest = "sha256:a26bfb9409c02987e6b7f8649f0d4c71cc8a4a97475f3f1edfc2fc6a490021ae"
|
||||
|
||||
# Native runtime image: compiled Rust + Go binaries are copied into a
|
||||
# `debian:bookworm-slim` container. Kept here so the image-builder workflow
|
||||
# pins it alongside the per-lang interpreter images.
|
||||
[[image]]
|
||||
toolchain_id = "native-binary"
|
||||
base = "debian:bookworm-slim"
|
||||
toolchain = "Debian 12 slim (native binary runner)"
|
||||
packages = {}
|
||||
digest = "sha256:67b30a61dc87758f0caf819646104f29ecbda97d920aaf5edc834128ac8493d3"
|
||||
560
tools/image-builder/main.rs
Normal file
560
tools/image-builder/main.rs
Normal file
|
|
@ -0,0 +1,560 @@
|
|||
//! Phase 19 (Track E.3) — `nyx-image-builder`.
|
||||
//!
|
||||
//! Reads `tools/image-builder/images.toml`, drives `docker pull` / `docker
|
||||
//! inspect` for each entry, and writes the resolved `sha256:…` digest back
|
||||
//! into the same TOML file so the digest pin is reproducible from source.
|
||||
//!
|
||||
//! Subcommands:
|
||||
//!
|
||||
//! - `build [--all | <toolchain_id>…]` — pull each requested image, capture
|
||||
//! its `RepoDigests` digest, and rewrite `images.toml` in place when the
|
||||
//! digest differs from the recorded pin. The daily CI workflow runs
|
||||
//! `build --all` and opens a PR with the changes when any entry drifts.
|
||||
//! - `verify` — assert that every entry in `images.toml` has a non-empty
|
||||
//! `digest` field and that the digest matches the locally-pulled image.
|
||||
//! Exit code 0 on success, 1 on any mismatch.
|
||||
//! - `list` — print every entry with its current `(base, digest)` pair to
|
||||
//! stdout, one entry per line, for human inspection.
|
||||
//!
|
||||
//! Usage:
|
||||
//!
|
||||
//! ```text
|
||||
//! cargo run -F image-builder --bin nyx-image-builder -- list
|
||||
//! cargo run -F image-builder --bin nyx-image-builder -- build --all
|
||||
//! cargo run -F image-builder --bin nyx-image-builder -- build python-3.11 node-20
|
||||
//! cargo run -F image-builder --bin nyx-image-builder -- verify
|
||||
//! ```
|
||||
//!
|
||||
//! The tool is host-side only; nothing in the Nyx scanner build depends on
|
||||
//! it at runtime. The codegen in `build.rs` reads `images.toml` directly,
|
||||
//! so updating digests is a two-step "run nyx-image-builder build → cargo
|
||||
//! build" cycle.
|
||||
|
||||
use std::env;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{Command, ExitCode, Stdio};
|
||||
|
||||
const IMAGES_TOML: &str = "tools/image-builder/images.toml";
|
||||
|
||||
fn main() -> ExitCode {
|
||||
let args: Vec<String> = env::args().skip(1).collect();
|
||||
if args.is_empty() {
|
||||
eprintln!("nyx-image-builder: missing subcommand");
|
||||
print_usage();
|
||||
return ExitCode::from(2);
|
||||
}
|
||||
|
||||
let toml_path = catalogue_path();
|
||||
|
||||
match args[0].as_str() {
|
||||
"list" => cmd_list(&toml_path),
|
||||
"build" => cmd_build(&toml_path, &args[1..]),
|
||||
"verify" => cmd_verify(&toml_path),
|
||||
"-h" | "--help" | "help" => {
|
||||
print_usage();
|
||||
ExitCode::SUCCESS
|
||||
}
|
||||
other => {
|
||||
eprintln!("nyx-image-builder: unknown subcommand `{other}`");
|
||||
print_usage();
|
||||
ExitCode::from(2)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn print_usage() {
|
||||
eprintln!(
|
||||
"usage: nyx-image-builder <list | build [--all|<id>…] | verify>\n\n\
|
||||
Reads `{IMAGES_TOML}` and pins per-toolchain Docker images by sha256\n\
|
||||
digest. Run `build --all` on a host that can reach docker daemon to\n\
|
||||
refresh the digests; commit the resulting diff."
|
||||
);
|
||||
}
|
||||
|
||||
/// Resolve the catalogue path relative to the workspace root.
|
||||
///
|
||||
/// Cargo runs binaries with CWD set to the workspace root by default, so the
|
||||
/// straight relative path works for the common case. We also walk upward
|
||||
/// from `current_dir` so the tool functions correctly when invoked from a
|
||||
/// nested directory (e.g. CI step that `cd tools/`).
|
||||
fn catalogue_path() -> PathBuf {
|
||||
if Path::new(IMAGES_TOML).exists() {
|
||||
return PathBuf::from(IMAGES_TOML);
|
||||
}
|
||||
if let Ok(cwd) = env::current_dir() {
|
||||
let mut probe = cwd.as_path();
|
||||
loop {
|
||||
let candidate = probe.join(IMAGES_TOML);
|
||||
if candidate.exists() {
|
||||
return candidate;
|
||||
}
|
||||
match probe.parent() {
|
||||
Some(p) => probe = p,
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
PathBuf::from(IMAGES_TOML)
|
||||
}
|
||||
|
||||
// ── Subcommands ──────────────────────────────────────────────────────────────
|
||||
|
||||
fn cmd_list(toml_path: &Path) -> ExitCode {
|
||||
let entries = match read_catalogue(toml_path) {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
eprintln!(
|
||||
"nyx-image-builder: cannot read {}: {e}",
|
||||
toml_path.display()
|
||||
);
|
||||
return ExitCode::FAILURE;
|
||||
}
|
||||
};
|
||||
|
||||
for e in &entries {
|
||||
let digest = if e.digest.is_empty() {
|
||||
"<unpinned>"
|
||||
} else {
|
||||
&e.digest
|
||||
};
|
||||
println!("{:<20} {:<40} {}", e.toolchain_id, e.base, digest);
|
||||
}
|
||||
ExitCode::SUCCESS
|
||||
}
|
||||
|
||||
fn cmd_build(toml_path: &Path, args: &[String]) -> ExitCode {
|
||||
let entries = match read_catalogue(toml_path) {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
eprintln!(
|
||||
"nyx-image-builder: cannot read {}: {e}",
|
||||
toml_path.display()
|
||||
);
|
||||
return ExitCode::FAILURE;
|
||||
}
|
||||
};
|
||||
|
||||
let targets: Vec<&ImageEntry> = if args.iter().any(|a| a == "--all") {
|
||||
entries.iter().collect()
|
||||
} else if args.is_empty() {
|
||||
eprintln!("nyx-image-builder build: expected --all or one or more toolchain IDs");
|
||||
return ExitCode::from(2);
|
||||
} else {
|
||||
let mut out = Vec::with_capacity(args.len());
|
||||
for id in args {
|
||||
if id == "--all" {
|
||||
continue;
|
||||
}
|
||||
match entries.iter().find(|e| &e.toolchain_id == id) {
|
||||
Some(e) => out.push(e),
|
||||
None => {
|
||||
eprintln!("nyx-image-builder build: unknown toolchain_id `{id}`");
|
||||
return ExitCode::FAILURE;
|
||||
}
|
||||
}
|
||||
}
|
||||
out
|
||||
};
|
||||
|
||||
let mut updates: Vec<(String, String)> = Vec::new();
|
||||
let mut failures = 0usize;
|
||||
|
||||
for entry in &targets {
|
||||
eprintln!("==> pulling {} ({})", entry.toolchain_id, entry.base);
|
||||
if !docker_pull(&entry.base) {
|
||||
eprintln!(" pull failed for {}", entry.base);
|
||||
failures += 1;
|
||||
continue;
|
||||
}
|
||||
match resolve_image_digest(&entry.base) {
|
||||
Some(digest) => {
|
||||
eprintln!(" {} → {}", entry.base, digest);
|
||||
updates.push((entry.toolchain_id.clone(), digest));
|
||||
}
|
||||
None => {
|
||||
eprintln!(" docker inspect produced no digest for {}", entry.base);
|
||||
failures += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !updates.is_empty() {
|
||||
let original = match std::fs::read_to_string(toml_path) {
|
||||
Ok(s) => s,
|
||||
Err(e) => {
|
||||
eprintln!(
|
||||
"nyx-image-builder build: cannot read {}: {e}",
|
||||
toml_path.display()
|
||||
);
|
||||
return ExitCode::FAILURE;
|
||||
}
|
||||
};
|
||||
let updated = rewrite_digests(&original, &updates);
|
||||
if updated != original {
|
||||
if let Err(e) = std::fs::write(toml_path, updated) {
|
||||
eprintln!(
|
||||
"nyx-image-builder build: cannot write {}: {e}",
|
||||
toml_path.display()
|
||||
);
|
||||
return ExitCode::FAILURE;
|
||||
}
|
||||
eprintln!(
|
||||
"==> updated {} ({} entries)",
|
||||
toml_path.display(),
|
||||
updates.len()
|
||||
);
|
||||
} else {
|
||||
eprintln!(
|
||||
"==> {} unchanged (digests already pinned)",
|
||||
toml_path.display()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if failures > 0 {
|
||||
ExitCode::FAILURE
|
||||
} else {
|
||||
ExitCode::SUCCESS
|
||||
}
|
||||
}
|
||||
|
||||
fn cmd_verify(toml_path: &Path) -> ExitCode {
|
||||
let entries = match read_catalogue(toml_path) {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
eprintln!(
|
||||
"nyx-image-builder: cannot read {}: {e}",
|
||||
toml_path.display()
|
||||
);
|
||||
return ExitCode::FAILURE;
|
||||
}
|
||||
};
|
||||
|
||||
let mut failures = 0usize;
|
||||
let mut unpinned = 0usize;
|
||||
|
||||
for entry in &entries {
|
||||
if entry.digest.is_empty() {
|
||||
eprintln!(
|
||||
"MISS {}: digest unpinned in {}",
|
||||
entry.toolchain_id, IMAGES_TOML
|
||||
);
|
||||
unpinned += 1;
|
||||
continue;
|
||||
}
|
||||
match resolve_image_digest(&entry.base) {
|
||||
Some(local) if local == entry.digest => {
|
||||
eprintln!("OK {}: {}", entry.toolchain_id, entry.digest);
|
||||
}
|
||||
Some(local) => {
|
||||
eprintln!(
|
||||
"DIFF {}: pinned={} local={}",
|
||||
entry.toolchain_id, entry.digest, local,
|
||||
);
|
||||
failures += 1;
|
||||
}
|
||||
None => {
|
||||
eprintln!(
|
||||
"MISS {}: docker inspect returned no digest (image not pulled?)",
|
||||
entry.toolchain_id
|
||||
);
|
||||
failures += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if failures == 0 && unpinned == 0 {
|
||||
ExitCode::SUCCESS
|
||||
} else {
|
||||
eprintln!(
|
||||
"nyx-image-builder verify: {failures} mismatch(es), {unpinned} unpinned entry(ies)",
|
||||
);
|
||||
ExitCode::FAILURE
|
||||
}
|
||||
}
|
||||
|
||||
// ── Docker shellouts ─────────────────────────────────────────────────────────
|
||||
|
||||
fn docker_bin() -> String {
|
||||
env::var("NYX_DOCKER_BIN").unwrap_or_else(|_| "docker".to_owned())
|
||||
}
|
||||
|
||||
fn docker_pull(image: &str) -> bool {
|
||||
Command::new(docker_bin())
|
||||
.args(["pull", image])
|
||||
.stdout(Stdio::inherit())
|
||||
.stderr(Stdio::inherit())
|
||||
.status()
|
||||
.map(|s| s.success())
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
/// Resolve the immutable content digest of a locally-pulled image.
|
||||
///
|
||||
/// We prefer `RepoDigests` (`name@sha256:…`) because that is the form
|
||||
/// `docker pull <image>@sha256:…` accepts directly. When the local image
|
||||
/// has no remote digest yet (e.g. fresh build), we fall back to the `.Id`
|
||||
/// which carries the local sha256 of the manifest.
|
||||
fn resolve_image_digest(image: &str) -> Option<String> {
|
||||
// Try RepoDigests first.
|
||||
let repo = Command::new(docker_bin())
|
||||
.args(["inspect", "--format={{index .RepoDigests 0}}", image])
|
||||
.output()
|
||||
.ok()?;
|
||||
if repo.status.success() {
|
||||
let line = std::str::from_utf8(&repo.stdout).unwrap_or("").trim();
|
||||
if !line.is_empty() && line != "<no value>" {
|
||||
// RepoDigests is "name@sha256:…"; the caller stores the
|
||||
// sha256:… portion alongside `base` so we just keep the
|
||||
// digest tail.
|
||||
if let Some(idx) = line.rfind("@") {
|
||||
let digest = &line[idx + 1..];
|
||||
if !digest.is_empty() {
|
||||
return Some(digest.to_owned());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back to .Id (image manifest digest).
|
||||
let id = Command::new(docker_bin())
|
||||
.args(["inspect", "--format={{.Id}}", image])
|
||||
.output()
|
||||
.ok()?;
|
||||
if !id.status.success() {
|
||||
return None;
|
||||
}
|
||||
let line = std::str::from_utf8(&id.stdout).unwrap_or("").trim();
|
||||
if line.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(line.to_owned())
|
||||
}
|
||||
}
|
||||
|
||||
// ── images.toml parser + rewriter ────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Default, Clone)]
|
||||
struct ImageEntry {
|
||||
toolchain_id: String,
|
||||
base: String,
|
||||
digest: String,
|
||||
}
|
||||
|
||||
fn read_catalogue(path: &Path) -> std::io::Result<Vec<ImageEntry>> {
|
||||
let text = std::fs::read_to_string(path)?;
|
||||
Ok(parse_catalogue(&text))
|
||||
}
|
||||
|
||||
fn parse_catalogue(src: &str) -> Vec<ImageEntry> {
|
||||
let mut entries: Vec<ImageEntry> = Vec::new();
|
||||
let mut current: Option<ImageEntry> = None;
|
||||
|
||||
for raw in src.lines() {
|
||||
let line = strip_comment(raw).trim();
|
||||
if line.is_empty() {
|
||||
continue;
|
||||
}
|
||||
if line == "[[image]]" {
|
||||
if let Some(prev) = current.take()
|
||||
&& !prev.toolchain_id.is_empty()
|
||||
{
|
||||
entries.push(prev);
|
||||
}
|
||||
current = Some(ImageEntry::default());
|
||||
continue;
|
||||
}
|
||||
if line.starts_with("[[") || line.starts_with('[') {
|
||||
if let Some(prev) = current.take()
|
||||
&& !prev.toolchain_id.is_empty()
|
||||
{
|
||||
entries.push(prev);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
let Some(slot) = current.as_mut() else {
|
||||
continue;
|
||||
};
|
||||
let Some((key, value)) = line.split_once('=') else {
|
||||
continue;
|
||||
};
|
||||
let key = key.trim();
|
||||
let value = value.trim().trim_matches('"').trim_matches('\'');
|
||||
match key {
|
||||
"toolchain_id" => slot.toolchain_id = value.to_owned(),
|
||||
"base" => slot.base = value.to_owned(),
|
||||
"digest" => slot.digest = value.to_owned(),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
if let Some(prev) = current.take()
|
||||
&& !prev.toolchain_id.is_empty()
|
||||
{
|
||||
entries.push(prev);
|
||||
}
|
||||
entries
|
||||
}
|
||||
|
||||
fn strip_comment(line: &str) -> &str {
|
||||
let mut in_string = false;
|
||||
for (i, b) in line.bytes().enumerate() {
|
||||
match b {
|
||||
b'"' => in_string = !in_string,
|
||||
b'#' if !in_string => return &line[..i],
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
line
|
||||
}
|
||||
|
||||
/// Rewrite the `digest = "…"` line for each `(toolchain_id, new_digest)` in
|
||||
/// `updates`, leaving every other byte of the original TOML untouched.
|
||||
///
|
||||
/// Algorithm: stream the original line-by-line, track which `[[image]]`
|
||||
/// block we are in by reading `toolchain_id`, and when we hit `digest = "…"`
|
||||
/// inside a block whose `toolchain_id` is in `updates`, replace the value
|
||||
/// while preserving the original indentation.
|
||||
fn rewrite_digests(src: &str, updates: &[(String, String)]) -> String {
|
||||
let mut out = String::with_capacity(src.len());
|
||||
let mut current_tid: Option<String> = None;
|
||||
let mut in_image_block = false;
|
||||
|
||||
for raw in src.lines() {
|
||||
let trimmed = raw.trim();
|
||||
if trimmed == "[[image]]" {
|
||||
in_image_block = true;
|
||||
current_tid = None;
|
||||
out.push_str(raw);
|
||||
out.push('\n');
|
||||
continue;
|
||||
}
|
||||
if trimmed.starts_with("[[") || trimmed.starts_with('[') {
|
||||
in_image_block = false;
|
||||
current_tid = None;
|
||||
out.push_str(raw);
|
||||
out.push('\n');
|
||||
continue;
|
||||
}
|
||||
|
||||
if in_image_block {
|
||||
if let Some(value) = parse_toml_string_value(trimmed, "toolchain_id") {
|
||||
current_tid = Some(value);
|
||||
}
|
||||
|
||||
if parse_toml_string_value(trimmed, "digest").is_some()
|
||||
&& let Some(tid) = ¤t_tid
|
||||
&& let Some((_, new_digest)) = updates.iter().find(|(id, _)| id == tid)
|
||||
{
|
||||
let indent_len = raw.len() - raw.trim_start().len();
|
||||
out.push_str(&raw[..indent_len]);
|
||||
out.push_str(&format!("digest = \"{new_digest}\""));
|
||||
out.push('\n');
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
out.push_str(raw);
|
||||
out.push('\n');
|
||||
}
|
||||
|
||||
// Preserve trailing-newline behaviour of the original file: if the
|
||||
// source did not end in '\n' we should not introduce one.
|
||||
if !src.ends_with('\n') && out.ends_with('\n') {
|
||||
out.pop();
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn parse_toml_string_value(line: &str, key: &str) -> Option<String> {
|
||||
let line = line.trim();
|
||||
let rest = line.strip_prefix(key)?;
|
||||
let rest = rest.trim_start();
|
||||
let rest = rest.strip_prefix('=')?.trim();
|
||||
let rest = rest.strip_prefix('"')?;
|
||||
let end = rest.find('"')?;
|
||||
Some(rest[..end].to_owned())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn parse_catalogue_extracts_three_fields() {
|
||||
let src = r#"
|
||||
[[image]]
|
||||
toolchain_id = "python-3.11"
|
||||
base = "python:3.11-slim"
|
||||
toolchain = "Python 3.11"
|
||||
packages = {}
|
||||
digest = ""
|
||||
|
||||
[[image]]
|
||||
toolchain_id = "node-20"
|
||||
base = "node:20-slim"
|
||||
toolchain = "Node.js 20"
|
||||
packages = {}
|
||||
digest = "sha256:cafebabe"
|
||||
"#;
|
||||
let entries = parse_catalogue(src);
|
||||
assert_eq!(entries.len(), 2);
|
||||
assert_eq!(entries[0].toolchain_id, "python-3.11");
|
||||
assert_eq!(entries[0].base, "python:3.11-slim");
|
||||
assert_eq!(entries[0].digest, "");
|
||||
assert_eq!(entries[1].toolchain_id, "node-20");
|
||||
assert_eq!(entries[1].digest, "sha256:cafebabe");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rewrite_digests_replaces_only_named_entries() {
|
||||
let src = r#"[[image]]
|
||||
toolchain_id = "python-3.11"
|
||||
base = "python:3.11-slim"
|
||||
digest = ""
|
||||
|
||||
[[image]]
|
||||
toolchain_id = "node-20"
|
||||
base = "node:20-slim"
|
||||
digest = ""
|
||||
"#;
|
||||
let updates = vec![("node-20".to_owned(), "sha256:deadbeef".to_owned())];
|
||||
let out = rewrite_digests(src, &updates);
|
||||
assert!(out.contains("digest = \"sha256:deadbeef\""));
|
||||
// python-3.11 must remain unpinned.
|
||||
let python_block = out
|
||||
.split("[[image]]")
|
||||
.find(|b| b.contains("python-3.11"))
|
||||
.unwrap();
|
||||
assert!(python_block.contains("digest = \"\""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rewrite_digests_preserves_indentation_and_comments() {
|
||||
let src = "# header\n[[image]]\n toolchain_id = \"go\"\n digest = \"\"\n";
|
||||
let updates = vec![("go".to_owned(), "sha256:1234".to_owned())];
|
||||
let out = rewrite_digests(src, &updates);
|
||||
assert!(out.contains(" digest = \"sha256:1234\""));
|
||||
assert!(out.starts_with("# header\n"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rewrite_digests_no_op_when_no_targets() {
|
||||
let src = "[[image]]\ntoolchain_id = \"x\"\ndigest = \"sha256:keep\"\n";
|
||||
let out = rewrite_digests(src, &[]);
|
||||
assert_eq!(out, src);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_toml_string_value_handles_trailing_garbage() {
|
||||
assert_eq!(
|
||||
parse_toml_string_value("digest = \"sha256:abc\"", "digest"),
|
||||
Some("sha256:abc".to_owned())
|
||||
);
|
||||
assert_eq!(parse_toml_string_value("other = \"x\"", "digest"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strip_comment_keeps_hash_inside_strings() {
|
||||
assert_eq!(strip_comment("foo = \"a#b\" # tail"), "foo = \"a#b\" ");
|
||||
}
|
||||
}
|
||||
481
tools/sb-trace.sh
Executable file
481
tools/sb-trace.sh
Executable file
|
|
@ -0,0 +1,481 @@
|
|||
#!/usr/bin/env bash
|
||||
# tools/sb-trace.sh — iterative-permit seed generator for the macOS
|
||||
# sandbox-exec deny-default rollout (Phase 18 follow-up path (a)).
|
||||
#
|
||||
# How it works
|
||||
# ------------
|
||||
# Apple removed the `(trace "<file>")` directive's file-emission in a
|
||||
# recent macOS release while keeping the directive syntactically valid,
|
||||
# so the older "set a trace path, run probe, parse trace file" workflow
|
||||
# captures nothing on macOS 26+. This script substitutes an iterative
|
||||
# loop driven by `log show`:
|
||||
#
|
||||
# 1. Materialise the named `.sb` profile with `(allow default)`
|
||||
# rewritten to `(deny default)` plus all `(allow ...)` rules the
|
||||
# loop has accumulated so far.
|
||||
# 2. Run the per-language probe under `sandbox-exec -f` against that
|
||||
# profile. Capture the resulting PID.
|
||||
# 3. Query `log show --predicate 'eventMessage CONTAINS "(<pid>) deny"'`
|
||||
# for the deny records the kernel logged against our process.
|
||||
# 4. Convert each deny record into a corresponding `(allow ...)` rule
|
||||
# and append it to the accumulated rule set.
|
||||
# 5. Repeat until no new deny records appear (either the probe ran
|
||||
# cleanly under the accumulated allows or the kernel deduplicated
|
||||
# everything new). Emit the rule set as the seed.
|
||||
#
|
||||
# The PID-targeted log query sidesteps the kernel's per-tuple dedup
|
||||
# window: every iteration's probe runs as a new process with a fresh
|
||||
# PID, so the kernel emits fresh records each time even if the
|
||||
# operation tuples repeat.
|
||||
#
|
||||
# Usage
|
||||
# -----
|
||||
# tools/sb-trace.sh # walk every profile + every lang fixture
|
||||
# tools/sb-trace.sh cmdi # just the cmdi profile, every lang
|
||||
# tools/sb-trace.sh cmdi python # cmdi + python only
|
||||
# tools/sb-trace.sh --selftest # rule-parser unit tests
|
||||
#
|
||||
# Requirements
|
||||
# ------------
|
||||
# * macOS host with `/usr/bin/sandbox-exec` + `/usr/bin/log` available.
|
||||
# * `python3`, `node`, `ruby`, `php`, `java` resolvable via $PATH for
|
||||
# every language whose fixtures you want to walk. Missing
|
||||
# interpreters are skipped with a warning.
|
||||
#
|
||||
# Output
|
||||
# ------
|
||||
# tools/sb-trace/<cap>.allow — generated seed, hand-review.
|
||||
#
|
||||
# The seeds are intended to be committed. Hand-review each one to:
|
||||
# * regex-anonymise host-specific user paths (`/Users/<you>/...` →
|
||||
# `^/Users/[^/]+/...`)
|
||||
# * collapse related rules onto one `(allow op a b c ...)` directive
|
||||
# when several rules share an operation.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
||||
SEED_DIR="$ROOT/tools/sb-trace"
|
||||
PROFILE_DIR="$ROOT/src/dynamic/sandbox_profiles"
|
||||
|
||||
MAX_ITERATIONS="${SB_TRACE_MAX_ITERATIONS:-200}"
|
||||
LOG_WAIT="${SB_TRACE_LOG_WAIT_SECONDS:-1.5}"
|
||||
|
||||
# Self-test mode short-circuits the macOS-host plumbing so the parser
|
||||
# can be exercised in CI on any platform.
|
||||
if [[ "${1:-}" == "--selftest" ]]; then
|
||||
selftest_mode=1
|
||||
else
|
||||
selftest_mode=0
|
||||
fi
|
||||
|
||||
# ── deny → allow rule parser ─────────────────────────────────────────────────
|
||||
#
|
||||
# Format of a kernel sandbox deny record (as it appears in `log show`'s
|
||||
# `eventMessage` field):
|
||||
#
|
||||
# Sandbox: <name>(<pid>) deny(<level>) <op> <target...>
|
||||
#
|
||||
# `<target>` is positional — everything after the operation token, up to
|
||||
# the end of the message. It may contain spaces (file paths with
|
||||
# embedded whitespace). Operation classes map to different
|
||||
# sandbox-exec rule filters:
|
||||
#
|
||||
# file-read*, file-write*, file-ioctl, file-* (most) → (literal "<path>")
|
||||
# mach-lookup → (global-name "<name>")
|
||||
# sysctl-read, sysctl-write → (sysctl-name "<name>")
|
||||
# ipc-posix-shm-read*, ipc-posix-shm-write* → (ipc-posix-name "<name>")
|
||||
# iokit-open → (iokit-user-client-class "<class>")
|
||||
# network-outbound, network-inbound, network-bind → (literal "<path>") if path-like
|
||||
# process-fork, process-exec*, signal, pseudo-tty,
|
||||
# sysctl-*, system-* → bare (allow <op>)
|
||||
#
|
||||
# Unknown operations fall through to bare allow with a `;; TODO review`
|
||||
# comment so the operator notices on hand-review.
|
||||
|
||||
deny_to_allow_rule() {
|
||||
local line="$1"
|
||||
# Strip everything up to and including "deny(N) ".
|
||||
local rest="${line#*Sandbox: }"
|
||||
rest="${rest#*deny(}"
|
||||
rest="${rest#*) }"
|
||||
|
||||
# First whitespace-delimited token is the operation, the rest is the target.
|
||||
local op="${rest%% *}"
|
||||
local target=""
|
||||
if [[ "$rest" == *" "* ]]; then
|
||||
target="${rest#* }"
|
||||
fi
|
||||
|
||||
# Strip a trailing CR that some log timestamps emit.
|
||||
target="${target%$'\r'}"
|
||||
|
||||
case "$op" in
|
||||
file-read*|file-write*|file-ioctl|file-issue-extension|file-map-executable|file-mount*|file-revoke|file-test-existence|file-chroot|file-clone)
|
||||
printf '(allow %s (literal "%s"))\n' "$op" "$(escape_quotes "$target")"
|
||||
;;
|
||||
mach-lookup|mach-register|mach-priv-task-port|mach-task-name)
|
||||
printf '(allow %s (global-name "%s"))\n' "$op" "$(escape_quotes "$target")"
|
||||
;;
|
||||
sysctl-read|sysctl-write)
|
||||
printf '(allow %s (sysctl-name "%s"))\n' "$op" "$(escape_quotes "$target")"
|
||||
;;
|
||||
ipc-posix-shm-read*|ipc-posix-shm-write*|ipc-posix-shm)
|
||||
printf '(allow %s (ipc-posix-name "%s"))\n' "$op" "$(escape_quotes "$target")"
|
||||
;;
|
||||
iokit-open|iokit-set-properties|iokit-get-properties)
|
||||
printf '(allow %s (iokit-user-client-class "%s"))\n' "$op" "$(escape_quotes "$target")"
|
||||
;;
|
||||
network-outbound|network-inbound|network-bind)
|
||||
if [[ "$target" == /* ]]; then
|
||||
printf '(allow %s (literal "%s"))\n' "$op" "$(escape_quotes "$target")"
|
||||
else
|
||||
printf '(allow %s)\n' "$op"
|
||||
fi
|
||||
;;
|
||||
process-fork|process-exec*|process-info*|signal|pseudo-tty|system-*|sysctl-*)
|
||||
printf '(allow %s)\n' "$op"
|
||||
;;
|
||||
"")
|
||||
# Unrecognised structure — emit nothing.
|
||||
;;
|
||||
*)
|
||||
printf ';; TODO review unfamiliar op: %s %s\n(allow %s)\n' \
|
||||
"$op" "$target" "$op"
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# Escape `"` and `\` for safe embedding inside a sandbox-exec string literal.
|
||||
escape_quotes() {
|
||||
local s="$1"
|
||||
s="${s//\\/\\\\}"
|
||||
s="${s//\"/\\\"}"
|
||||
printf '%s' "$s"
|
||||
}
|
||||
|
||||
# ── Self-test ────────────────────────────────────────────────────────────────
|
||||
|
||||
assert_rule() {
|
||||
local label="$1"
|
||||
local input="$2"
|
||||
local expected="$3"
|
||||
local got
|
||||
got="$(deny_to_allow_rule "$input")"
|
||||
# Trim trailing newline from `got` for comparison.
|
||||
got="${got%$'\n'}"
|
||||
if [[ "$got" != "$expected" ]]; then
|
||||
printf '[FAIL] %s\n input: %s\n expected: %s\n got: %s\n' \
|
||||
"$label" "$input" "$expected" "$got" >&2
|
||||
return 1
|
||||
fi
|
||||
printf '[PASS] %s\n' "$label"
|
||||
}
|
||||
|
||||
run_selftest() {
|
||||
local fails=0
|
||||
assert_rule "file-read-data" \
|
||||
"kernel: (Sandbox) Sandbox: python3(54920) deny(1) file-read-data /etc/hosts" \
|
||||
'(allow file-read-data (literal "/etc/hosts"))' || ((fails++))
|
||||
|
||||
assert_rule "file-read-data-root" \
|
||||
"Sandbox: python3(54920) deny(1) file-read-data /" \
|
||||
'(allow file-read-data (literal "/"))' || ((fails++))
|
||||
|
||||
assert_rule "sysctl-read" \
|
||||
"Sandbox: python3(54920) deny(1) sysctl-read security.mac.lockdown_mode_state" \
|
||||
'(allow sysctl-read (sysctl-name "security.mac.lockdown_mode_state"))' || ((fails++))
|
||||
|
||||
assert_rule "mach-lookup" \
|
||||
"Sandbox: contactsd(54920) deny(1) mach-lookup com.apple.tccd.system" \
|
||||
'(allow mach-lookup (global-name "com.apple.tccd.system"))' || ((fails++))
|
||||
|
||||
assert_rule "ipc-posix-shm-read" \
|
||||
"Sandbox: python3(54920) deny(1) ipc-posix-shm-read-data apple.shm.notification_center" \
|
||||
'(allow ipc-posix-shm-read-data (ipc-posix-name "apple.shm.notification_center"))' || ((fails++))
|
||||
|
||||
assert_rule "network-outbound-path" \
|
||||
"Sandbox: python3(54920) deny(1) network-outbound /private/var/run/syslog" \
|
||||
'(allow network-outbound (literal "/private/var/run/syslog"))' || ((fails++))
|
||||
|
||||
assert_rule "network-outbound-host" \
|
||||
"Sandbox: python3(54920) deny(1) network-outbound 1.2.3.4:80" \
|
||||
'(allow network-outbound)' || ((fails++))
|
||||
|
||||
assert_rule "process-fork" \
|
||||
"Sandbox: python3(54920) deny(1) process-fork" \
|
||||
'(allow process-fork)' || ((fails++))
|
||||
|
||||
assert_rule "process-exec-star" \
|
||||
"Sandbox: python3(54920) deny(1) process-exec* /bin/ls" \
|
||||
'(allow process-exec*)' || ((fails++))
|
||||
|
||||
assert_rule "iokit-open" \
|
||||
"Sandbox: python3(54920) deny(1) iokit-open IOUserClientCrossEndpoint" \
|
||||
'(allow iokit-open (iokit-user-client-class "IOUserClientCrossEndpoint"))' || ((fails++))
|
||||
|
||||
assert_rule "path-with-space" \
|
||||
'Sandbox: python3(54920) deny(1) file-read-data /Users/me/has spaces/file' \
|
||||
'(allow file-read-data (literal "/Users/me/has spaces/file"))' || ((fails++))
|
||||
|
||||
assert_rule "path-with-quote" \
|
||||
'Sandbox: python3(54920) deny(1) file-read-data /a"b' \
|
||||
'(allow file-read-data (literal "/a\"b"))' || ((fails++))
|
||||
|
||||
if (( fails > 0 )); then
|
||||
printf '\nsb-trace selftest: %d failure(s)\n' "$fails" >&2
|
||||
return 1
|
||||
fi
|
||||
printf '\nsb-trace selftest: all OK\n'
|
||||
}
|
||||
|
||||
if (( selftest_mode )); then
|
||||
run_selftest
|
||||
exit $?
|
||||
fi
|
||||
|
||||
# ── macOS-host guards ────────────────────────────────────────────────────────
|
||||
|
||||
if [[ "$(uname -s)" != "Darwin" ]]; then
|
||||
echo "sb-trace: must run on macOS (uname=$(uname -s))" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
if [[ ! -x /usr/bin/sandbox-exec ]]; then
|
||||
echo "sb-trace: /usr/bin/sandbox-exec missing" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
if [[ ! -x /usr/bin/log ]]; then
|
||||
echo "sb-trace: /usr/bin/log missing" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
mkdir -p "$SEED_DIR"
|
||||
|
||||
# ── Probe selection ──────────────────────────────────────────────────────────
|
||||
|
||||
ALL_PROFILES=(base cmdi path_traversal ssrf deserialize xxe)
|
||||
ALL_LANGS=(python javascript ruby php java)
|
||||
|
||||
declare -a selected_profiles selected_langs
|
||||
if [[ $# -ge 1 ]]; then
|
||||
selected_profiles=("$1")
|
||||
else
|
||||
selected_profiles=("${ALL_PROFILES[@]}")
|
||||
fi
|
||||
if [[ $# -ge 2 ]]; then
|
||||
selected_langs=("$2")
|
||||
else
|
||||
selected_langs=("${ALL_LANGS[@]}")
|
||||
fi
|
||||
|
||||
# Per-language probe command. Each probe exercises the interpreter's
|
||||
# cold-start path with the minimum import set the dynamic harness
|
||||
# needs. Probe argv is written into the global `PROBE_ARGV` array (one
|
||||
# token per element) on success; on missing interpreter the function
|
||||
# returns 1 and leaves `PROBE_ARGV` cleared.
|
||||
PROBE_ARGV=()
|
||||
probe_command_for() {
|
||||
PROBE_ARGV=()
|
||||
case "$1" in
|
||||
python)
|
||||
command -v python3 >/dev/null 2>&1 || return 1
|
||||
PROBE_ARGV=(python3 -c 'import os, sys, json, socket, subprocess')
|
||||
;;
|
||||
javascript)
|
||||
command -v node >/dev/null 2>&1 || return 1
|
||||
PROBE_ARGV=(node -e "require('fs');require('os');require('http');require('child_process')")
|
||||
;;
|
||||
ruby)
|
||||
command -v ruby >/dev/null 2>&1 || return 1
|
||||
PROBE_ARGV=(ruby -e "require 'json'; require 'socket'; require 'net/http'; require 'open3'")
|
||||
;;
|
||||
php)
|
||||
command -v php >/dev/null 2>&1 || return 1
|
||||
PROBE_ARGV=(php -r 'echo phpversion();')
|
||||
;;
|
||||
java)
|
||||
command -v java >/dev/null 2>&1 || return 1
|
||||
PROBE_ARGV=(java --version)
|
||||
;;
|
||||
*)
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# ── Iterative loop ───────────────────────────────────────────────────────────
|
||||
|
||||
# Run one probe under the given (already materialised) profile and return
|
||||
# the kernel deny lines logged against the probe's PID, one per line.
|
||||
run_probe_capture_denies() {
|
||||
local profile_path="$1"
|
||||
shift
|
||||
local -a probe_argv=("$@")
|
||||
|
||||
# Spawn the probe in the background so we can capture its PID.
|
||||
/usr/bin/sandbox-exec -f "$profile_path" -D WORKDIR=/tmp "${probe_argv[@]}" \
|
||||
>/dev/null 2>/dev/null &
|
||||
local probe_pid=$!
|
||||
|
||||
# Wait for the probe to finish. Don't propagate its exit code — many
|
||||
# operations under deny-default are silently degraded by the
|
||||
# interpreter (a denied sysctl-read just returns ENOENT, the
|
||||
# interpreter handles it gracefully).
|
||||
wait "$probe_pid" 2>/dev/null || true
|
||||
|
||||
# Wait for the kernel's log queue to drain. Empirically a few hundred
|
||||
# milliseconds suffice on macOS 26.
|
||||
sleep "$LOG_WAIT"
|
||||
|
||||
# Query log for deny lines targeting our PID. Use both the procname
|
||||
# token "(<pid>) deny" (more selective than just the pid) and the
|
||||
# `--style ndjson` flag for parseable output. We re-extract
|
||||
# `eventMessage` via a simple field grep because jq isn't required on
|
||||
# every macOS host.
|
||||
/usr/bin/log show \
|
||||
--predicate "eventMessage CONTAINS \"(${probe_pid}) deny\"" \
|
||||
--info --debug --last 30s 2>/dev/null \
|
||||
| awk '
|
||||
/Sandbox: .*\([0-9]+\) deny\(/ {
|
||||
sub(/^.*Sandbox:/, "Sandbox:")
|
||||
print
|
||||
}
|
||||
'
|
||||
}
|
||||
|
||||
iterate_one_profile() {
|
||||
local profile_name="$1"
|
||||
shift
|
||||
local -a langs=("$@")
|
||||
|
||||
local source_path="$PROFILE_DIR/$profile_name.sb"
|
||||
if [[ ! -f "$source_path" ]]; then
|
||||
echo "sb-trace: profile $profile_name missing at $source_path" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
local base
|
||||
base="$(sed 's/(allow default)/(deny default)/' "$source_path")"
|
||||
|
||||
# Per-cap accumulators.
|
||||
local -a accumulated_rules=()
|
||||
local -a accumulated_keys=()
|
||||
local total_iters=0
|
||||
|
||||
for lang in "${langs[@]}"; do
|
||||
if ! probe_command_for "$lang"; then
|
||||
echo "sb-trace: skipping $lang (interpreter missing or unsupported)" >&2
|
||||
continue
|
||||
fi
|
||||
local -a argv=("${PROBE_ARGV[@]}")
|
||||
if (( ${#argv[@]} == 0 )); then
|
||||
echo "sb-trace: skipping $lang (empty argv)" >&2
|
||||
continue
|
||||
fi
|
||||
|
||||
local iteration=0
|
||||
while (( iteration < MAX_ITERATIONS )); do
|
||||
iteration=$((iteration + 1))
|
||||
total_iters=$((total_iters + 1))
|
||||
|
||||
# Materialise tmp profile = base + accumulated rules.
|
||||
local tmp_profile
|
||||
tmp_profile="$(mktemp -t "sb-trace-$profile_name.XXXXXX.sb")"
|
||||
{
|
||||
printf '%s\n' "$base"
|
||||
printf ';; sb-trace iterative seeds (lang=%s iter=%d)\n' \
|
||||
"$lang" "$iteration"
|
||||
local r
|
||||
for r in "${accumulated_rules[@]+"${accumulated_rules[@]}"}"; do
|
||||
printf '%s\n' "$r"
|
||||
done
|
||||
} >"$tmp_profile"
|
||||
|
||||
# Run probe, collect deny lines.
|
||||
local denies
|
||||
denies="$(run_probe_capture_denies "$tmp_profile" "${argv[@]}" || true)"
|
||||
rm -f "$tmp_profile"
|
||||
|
||||
if [[ -z "$denies" ]]; then
|
||||
# No new denies for this lang — done.
|
||||
break
|
||||
fi
|
||||
|
||||
# Convert denies to allow rules, dedup against accumulated.
|
||||
local new_in_iter=0
|
||||
local line
|
||||
while IFS= read -r line; do
|
||||
[[ -z "$line" ]] && continue
|
||||
local rule
|
||||
rule="$(deny_to_allow_rule "$line")"
|
||||
rule="${rule%$'\n'}"
|
||||
[[ -z "$rule" ]] && continue
|
||||
# Dedup by exact-rule-text match.
|
||||
local seen=0
|
||||
local k
|
||||
for k in "${accumulated_keys[@]+"${accumulated_keys[@]}"}"; do
|
||||
if [[ "$k" == "$rule" ]]; then
|
||||
seen=1; break
|
||||
fi
|
||||
done
|
||||
if (( ! seen )); then
|
||||
accumulated_rules+=("$rule")
|
||||
accumulated_keys+=("$rule")
|
||||
new_in_iter=$((new_in_iter + 1))
|
||||
fi
|
||||
done <<<"$denies"
|
||||
|
||||
if (( new_in_iter == 0 )); then
|
||||
# Denies present but all already-known — kernel dedup, or
|
||||
# repeats of rules we've already issued. Bail to avoid
|
||||
# infinite loops.
|
||||
break
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
local seed_path="$SEED_DIR/$profile_name.allow"
|
||||
{
|
||||
printf ';; tools/sb-trace/%s.allow\n' "$profile_name"
|
||||
printf ';; Generated %s by tools/sb-trace.sh (iterative-permit loop)\n' \
|
||||
"$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
||||
printf ';; Languages walked: %s\n' "${langs[*]}"
|
||||
printf ';; Total probe iterations: %d\n' "$total_iters"
|
||||
printf ';;\n'
|
||||
printf ';; Hand-review before commit:\n'
|
||||
printf ';; * regex-anonymise host-specific paths under /Users/<you>/...\n'
|
||||
printf ';; into ^/Users/[^/]+/... so the seed survives a different\n'
|
||||
printf ';; operator host\n'
|
||||
printf ';; * collapse same-op rules onto one (allow op a b c ...)\n'
|
||||
printf ';; directive when the targets share semantics\n'
|
||||
printf '\n'
|
||||
if (( ${#accumulated_rules[@]} == 0 )); then
|
||||
printf ';; (no deny records captured; profile already runs cleanly\n'
|
||||
printf ';; for the probed languages under (deny default))\n'
|
||||
else
|
||||
local r
|
||||
for r in "${accumulated_rules[@]}"; do
|
||||
printf '%s\n' "$r"
|
||||
done
|
||||
fi
|
||||
} >"$seed_path"
|
||||
|
||||
printf 'sb-trace: wrote %s (%d rule(s) across %d iteration(s))\n' \
|
||||
"$seed_path" "${#accumulated_rules[@]}" "$total_iters"
|
||||
}
|
||||
|
||||
# ── Main loop ────────────────────────────────────────────────────────────────
|
||||
|
||||
for profile in "${selected_profiles[@]}"; do
|
||||
iterate_one_profile "$profile" "${selected_langs[@]}"
|
||||
done
|
||||
|
||||
printf '\nsb-trace: done.\n'
|
||||
printf 'Next steps:\n'
|
||||
printf ' 1. Hand-review each tools/sb-trace/*.allow seed.\n'
|
||||
printf ' 2. Replace host-specific literal paths with regex matches.\n'
|
||||
printf ' 3. Commit the .allow files.\n'
|
||||
printf ' 4. Run nyx with NYX_SB_DENY_DEFAULT=1 + NYX_SB_SEED_DIR pointing at\n'
|
||||
printf ' tools/sb-trace/ to exercise the splice.\n'
|
||||
91
tools/sb-trace/README.md
Normal file
91
tools/sb-trace/README.md
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
# sb-trace seeds
|
||||
|
||||
This directory holds per-capability allowlist seeds for the macOS
|
||||
sandbox-exec deny-default rollout.
|
||||
|
||||
## What the seeds are
|
||||
|
||||
Each `.allow` file is a fragment of sandbox-exec profile syntax (one
|
||||
or more `(allow ...)` directives, plus comments). At runtime,
|
||||
`src/dynamic/sandbox/process_macos.rs::profile_path` consults the
|
||||
`NYX_SB_DENY_DEFAULT` environment variable; when set, it locates the
|
||||
seed for the active capability, rewrites the baked profile's
|
||||
`(allow default)` directive to `(deny default)`, and appends the seed
|
||||
body verbatim. Sandbox-exec resolves later directives over earlier
|
||||
ones, so the appended allow rules stack on top of the deny baseline.
|
||||
|
||||
The splice path lives in `process_macos.rs::splice_deny_default`; it
|
||||
is pure, unit-tested, and a no-op when the seed for a capability is
|
||||
missing. Misconfiguration cannot brick the sandbox-exec backend.
|
||||
|
||||
## How the seeds get generated
|
||||
|
||||
Run `tools/sb-trace.sh` from a macOS host that has the interpreters
|
||||
on `$PATH`. The script materialises each `.sb` profile with
|
||||
`(allow default)` rewritten to `(deny default)`, runs each
|
||||
per-language probe under `sandbox-exec`, queries
|
||||
`log show --predicate 'eventMessage CONTAINS "(<pid>) deny"'` for the
|
||||
kernel deny records the probe triggered, converts each deny line
|
||||
into the matching `(allow ...)` rule, appends it to the profile, and
|
||||
re-runs the probe. The loop stops when an iteration produces no new
|
||||
denies (the probe ran cleanly under the accumulated allows) or when
|
||||
the kernel's per-tuple dedup window swallows every remaining record.
|
||||
|
||||
The PID-targeted log query sidesteps the dedup window: each iteration's
|
||||
probe runs as a new process with a fresh PID, so the kernel emits a
|
||||
fresh deny record even when the operation tuple repeats. The older
|
||||
`(trace "<file>")` mechanism is silently ignored on macOS 26+ and is
|
||||
no longer used.
|
||||
|
||||
Output:
|
||||
|
||||
tools/sb-trace/<cap>.allow (committed after hand-review)
|
||||
|
||||
After a run, hand-review each `.allow` seed before committing. The
|
||||
emitted seeds usually need two passes:
|
||||
|
||||
1. Replace host-specific literal paths with regex matches. For
|
||||
instance `/Users/eli/.pyenv/versions/3.11/lib/python3.11/...`
|
||||
should become a regex anchored on `^/Users/[^/]+/\\.pyenv/`.
|
||||
2. Group related rules onto one `(allow <op> a b c ...)` directive
|
||||
when the targets share semantics.
|
||||
|
||||
The parser logic that turns one deny line into one allow rule is
|
||||
exercised in CI via `tests/sb_trace_script.rs`, which invokes
|
||||
`tools/sb-trace.sh --selftest` — a mode that runs the parser against
|
||||
canned input and exits non-zero on any mismatch.
|
||||
|
||||
## Activating a seed at runtime
|
||||
|
||||
Set both env vars before invoking `nyx`:
|
||||
|
||||
export NYX_SB_DENY_DEFAULT=1
|
||||
export NYX_SB_SEED_DIR="$(pwd)/tools/sb-trace"
|
||||
|
||||
The seed dir defaults to `tools/sb-trace/` relative to the workspace
|
||||
root, so the second env var is only needed when running outside the
|
||||
workspace.
|
||||
|
||||
The runtime splice is opt-in. Production builds leave the baked
|
||||
`(allow default)` body intact unless the operator flips the env var.
|
||||
|
||||
## Verifying a seed end-to-end
|
||||
|
||||
The smoke test `deny_default_seed_loads_under_strict` in
|
||||
`tests/sandbox_hardening_macos.rs` exercises the splice through the
|
||||
production call site. It writes a synthetic seed to a tempdir,
|
||||
points `NYX_SB_SEED_DIR` at it, calls `profile_path`, and asserts the
|
||||
materialised file contains both `(deny default)` and the synthetic
|
||||
seed body.
|
||||
|
||||
For a real-host smoke test against a generated seed, run:
|
||||
|
||||
NYX_SB_DENY_DEFAULT=1 \
|
||||
NYX_SB_SEED_DIR="$(pwd)/tools/sb-trace" \
|
||||
cargo nextest run --features dynamic --test sandbox_hardening_macos
|
||||
|
||||
When every cap profile has a seed that lets the python3 / node
|
||||
cold-start clear, the macOS strict-mode acceptance row in
|
||||
`.github/workflows/dynamic.yml` flips from "ships (allow default)" to
|
||||
"ships deny-default by default" — that's the closing condition for
|
||||
the Phase 18 follow-up.
|
||||
Loading…
Add table
Add a link
Reference in a new issue