diff --git a/.github/workflows/image-builder.yml b/.github/workflows/image-builder.yml
new file mode 100644
index 00000000..57ea5bab
--- /dev/null
+++ b/.github/workflows/image-builder.yml
@@ -0,0 +1,68 @@
+name: image-builder
+
+# Phase 19 (Track E.3): daily drift PR.
+#
+# Runs `nyx-image-builder build --all` on a Linux runner that has docker
+# available, captures the rewritten `tools/image-builder/images.toml`, and
+# opens a PR when any pinned digest changed. The PR is reviewed manually
+# before merge so a hostile upstream image cannot silently land in
+# `IMAGE_DIGESTS`.
+
+permissions:
+ contents: write
+ pull-requests: write
+
+on:
+ schedule:
+ # 04:23 UTC daily — off-peak for the major upstream registries so
+ # transient pull errors are rare.
+ - cron: "23 4 * * *"
+ workflow_dispatch:
+
+concurrency:
+ group: image-builder
+ cancel-in-progress: false
+
+jobs:
+ refresh-digests:
+ name: refresh image digests
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v6
+
+ - uses: actions-rust-lang/setup-rust-toolchain@v1
+ with:
+ toolchain: stable
+ cache: true
+
+ - name: Verify docker is reachable
+ run: docker info
+
+ - name: Build pinned-digest catalogue
+ run: |
+ cargo run -F image-builder --bin nyx-image-builder -- build --all
+
+ - name: Verify catalogue against local pulls
+ run: |
+ cargo run -F image-builder --bin nyx-image-builder -- verify
+
+ - name: Open PR on drift
+ uses: peter-evans/create-pull-request@v7
+ with:
+ token: ${{ secrets.GITHUB_TOKEN }}
+ commit-message: "image-builder: refresh pinned digests"
+ title: "image-builder: refresh pinned digests"
+ body: |
+ Automated digest refresh by `nyx-image-builder build --all`.
+
+ The CI job pulled every base image in
+ `tools/image-builder/images.toml`, captured the resolved
+ `sha256:` digest, and wrote it back into the file. Review
+ the diff before merging — a hostile upstream image would
+ show up here as an unexpected digest change.
+ branch: image-builder/refresh-digests
+ base: master
+ delete-branch: true
+ labels: |
+ image-builder
+ automation
diff --git a/Cargo.toml b/Cargo.toml
index f6e0a54c..3907bbcf 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -50,6 +50,10 @@ docgen = []
# sandbox, reports back whether the sink fires. Off by default until the
# static side is honest on real corpora (see ROADMAP.md).
dynamic = ["dep:tempfile"]
+# Phase 19 (Track E.3): the `nyx-image-builder` helper binary that builds
+# and pins per-toolchain Docker images. Gated so it does not bloat the
+# default `nyx` build with extra TOML-write logic CI-only operators need.
+image-builder = []
[lib]
name = "nyx_scanner"
@@ -64,6 +68,11 @@ name = "nyx-docgen"
path = "tools/docgen/main.rs"
required-features = ["docgen"]
+[[bin]]
+name = "nyx-image-builder"
+path = "tools/image-builder/main.rs"
+required-features = ["image-builder"]
+
[[bench]]
name = "scan_bench"
harness = false
diff --git a/build.rs b/build.rs
index 66f99fad..50e9a5fd 100644
--- a/build.rs
+++ b/build.rs
@@ -9,6 +9,12 @@ fn main() {
// the file (the include never actually compiles on non-Linux).
emit_seccomp_policy();
+ // Phase 19 (Track E.3): emit the IMAGE_DIGESTS table from
+ // tools/image-builder/images.toml. The runtime side (src/dynamic/
+ // toolchain.rs) `include!`s the generated file unconditionally so
+ // every host build has the same pinned-digest catalogue.
+ emit_image_digests();
+
// Only relevant when the serve feature is active.
if std::env::var("CARGO_FEATURE_SERVE").is_err() {
return;
@@ -283,3 +289,138 @@ fn store_allow(policy: &mut SeccompPolicy, section: Option<&str>, key: &str, val
fn escape(s: &str) -> String {
s.replace('\\', "\\\\").replace('"', "\\\"")
}
+
+// ── Phase 19 (Track E.3) — image digest codegen ──────────────────────────────
+
+const IMAGE_CATALOGUE_PATH: &str = "tools/image-builder/images.toml";
+
+/// Parse `tools/image-builder/images.toml` and emit two tables to
+/// `$OUT_DIR/image_digests.rs`:
+///
+/// pub static IMAGE_DIGESTS: phf::Map<&'static str, &'static str> = …;
+/// pub static IMAGE_BASES: phf::Map<&'static str, &'static str> = …;
+///
+/// `IMAGE_DIGESTS` keys are toolchain IDs (`python-3.11`, …) and values are
+/// `@sha256:…` strings ready to hand to `docker pull`. An empty digest
+/// in `images.toml` is treated as "not yet pinned" and the entry is omitted
+/// from `IMAGE_DIGESTS`; `IMAGE_BASES` always carries the unpinned reference
+/// so `docker.rs` can fall back to a tag pull when no digest is recorded.
+fn emit_image_digests() {
+ println!("cargo:rerun-if-changed={}", IMAGE_CATALOGUE_PATH);
+
+ let out_dir = std::env::var("OUT_DIR").expect("OUT_DIR must be set by cargo");
+ let out_path = Path::new(&out_dir).join("image_digests.rs");
+
+ let toml_text = match std::fs::read_to_string(IMAGE_CATALOGUE_PATH) {
+ Ok(s) => s,
+ Err(_) => {
+ // Missing catalogue (fresh checkout without the file) — emit
+ // empty maps so the runtime include still compiles.
+ std::fs::write(
+ &out_path,
+ "/// generated empty IMAGE_DIGESTS — images.toml missing\n\
+ pub static IMAGE_DIGESTS: phf::Map<&'static str, &'static str> = \
+ phf::phf_map! {};\n\
+ pub static IMAGE_BASES: phf::Map<&'static str, &'static str> = \
+ phf::phf_map! {};\n",
+ )
+ .expect("write empty image digests stub");
+ return;
+ }
+ };
+
+ let entries = parse_image_catalogue(&toml_text);
+
+ let mut out = String::new();
+ out.push_str("// generated by build.rs from tools/image-builder/images.toml — do not edit\n\n");
+
+ // IMAGE_DIGESTS: only entries with a non-empty digest survive.
+ out.push_str("pub static IMAGE_DIGESTS: phf::Map<&'static str, &'static str> = phf::phf_map! {\n");
+ for e in &entries {
+ if e.digest.is_empty() {
+ continue;
+ }
+ let pinned = format!("{}@{}", e.base, e.digest);
+ out.push_str(&format!(
+ " \"{}\" => \"{}\",\n",
+ escape(&e.toolchain_id),
+ escape(&pinned),
+ ));
+ }
+ out.push_str("};\n\n");
+
+ // IMAGE_BASES: every entry, digest stripped. Used by docker.rs when no
+ // digest is pinned yet so a `docker pull ` is still possible.
+ out.push_str("pub static IMAGE_BASES: phf::Map<&'static str, &'static str> = phf::phf_map! {\n");
+ for e in &entries {
+ out.push_str(&format!(
+ " \"{}\" => \"{}\",\n",
+ escape(&e.toolchain_id),
+ escape(&e.base),
+ ));
+ }
+ out.push_str("};\n");
+
+ std::fs::write(&out_path, out).expect("write image_digests.rs");
+}
+
+#[derive(Default)]
+struct ImageEntry {
+ toolchain_id: String,
+ base: String,
+ digest: String,
+}
+
+/// Tiny TOML parser scoped to the `[[image]] toolchain_id = …` shape used
+/// by `images.toml`. Only the three fields we consume here are extracted;
+/// the rest of each entry (`toolchain`, `packages`) is ignored.
+fn parse_image_catalogue(src: &str) -> Vec {
+ let mut entries: Vec = Vec::new();
+ let mut current: Option = None;
+
+ for raw_line in src.lines() {
+ let line = strip_comment(raw_line).trim();
+ if line.is_empty() {
+ continue;
+ }
+
+ if line == "[[image]]" {
+ if let Some(prev) = current.take() {
+ if !prev.toolchain_id.is_empty() {
+ entries.push(prev);
+ }
+ }
+ current = Some(ImageEntry::default());
+ continue;
+ }
+
+ if line.starts_with("[[") || line.starts_with('[') {
+ // Any other section ends accumulation.
+ if let Some(prev) = current.take() {
+ if !prev.toolchain_id.is_empty() {
+ entries.push(prev);
+ }
+ }
+ continue;
+ }
+
+ let Some(slot) = current.as_mut() else { continue };
+ let Some((key, value)) = line.split_once('=') else { continue };
+ let key = key.trim();
+ let value = value.trim().trim_matches('"').trim_matches('\'');
+ match key {
+ "toolchain_id" => slot.toolchain_id = value.to_owned(),
+ "base" => slot.base = value.to_owned(),
+ "digest" => slot.digest = value.to_owned(),
+ _ => {}
+ }
+ }
+
+ if let Some(prev) = current.take() {
+ if !prev.toolchain_id.is_empty() {
+ entries.push(prev);
+ }
+ }
+
+ entries
+}
diff --git a/src/dynamic/sandbox/docker.rs b/src/dynamic/sandbox/docker.rs
new file mode 100644
index 00000000..3665710c
--- /dev/null
+++ b/src/dynamic/sandbox/docker.rs
@@ -0,0 +1,261 @@
+//! Phase 19 (Track E.3) — Docker backend helpers.
+//!
+//! This module is the thin layer between the pinned-digest catalogue
+//! (`tools/image-builder/images.toml` → `src/dynamic/toolchain.rs::IMAGE_DIGESTS`)
+//! and the existing docker invocations in [`super::run_docker`] /
+//! [`super::run_native_binary_docker`].
+//!
+//! Responsibilities:
+//!
+//! 1. Resolve a `toolchain_id` → pinned image reference (`@sha256:…`),
+//! falling back to the unpinned base tag when no digest is recorded yet.
+//! 2. Pull the resolved reference if it is not already present locally so
+//! every backend hop runs against the exact bytes the catalogue pinned.
+//! 3. Render the docker CLI arg slice that:
+//! - mounts the harness workdir read-write at the fixed `/work` path,
+//! - mounts each `StubHarness` filesystem root at a fixed `/nyx/stubs/`
+//! path so harness-side shims can find them without hard-coding host
+//! tempdir layouts,
+//! - honours the [`super::NetworkPolicy`] (none / OOB / stubs-only / open)
+//! using the same flag set as the legacy `start_container`.
+//!
+//! All helpers are infallible w.r.t. docker availability — they return arg
+//! slices and `Option` references that the caller (`super::`) ships
+//! to the docker CLI. That keeps the module easy to unit-test on macOS / CI
+//! rows that do not have docker installed.
+
+use std::path::Path;
+use std::process::Command;
+use std::sync::OnceLock;
+
+use crate::dynamic::toolchain::{base_image_ref, pinned_image_ref};
+
+use super::{HostPort, NetworkPolicy};
+
+// ── Image references ────────────────────────────────────────────────────────
+
+/// Container-side mount point for the harness workdir. Stable so per-language
+/// emitters can reference `/work/...` without threading the host tempdir path
+/// through every layer.
+pub const WORK_MOUNT_PATH: &str = "/work";
+
+/// Container-side mount point root for `StubHarness` filesystem stubs.
+/// Each stub is mounted at `STUB_MOUNT_ROOT/` where `` is its index in
+/// the harness's stub list.
+pub const STUB_MOUNT_ROOT: &str = "/nyx/stubs";
+
+/// Resolve a `toolchain_id` to the docker image reference the backend should
+/// pull. Preference order:
+///
+/// 1. Pinned digest from `IMAGE_DIGESTS` (`@sha256:…`). Bytes are
+/// immutable across hosts; this is what production uses.
+/// 2. Base tag from `IMAGE_BASES` (`python:3.11-slim`). Used when the
+/// catalogue entry has not been built yet — drift is visible because the
+/// daily CI workflow runs `nyx-image-builder build --all` and PRs the
+/// digest.
+/// 3. `None` — the toolchain is not in the catalogue at all. Callers fall
+/// back to the historical hard-coded image map.
+pub fn image_reference_for_toolchain(toolchain_id: &str) -> Option<&'static str> {
+ if let Some(pinned) = pinned_image_ref(toolchain_id) {
+ return Some(pinned);
+ }
+ base_image_ref(toolchain_id)
+}
+
+/// `true` when `image_reference_for_toolchain` would return a pinned digest
+/// (rather than a bare tag). Used by telemetry + tests.
+pub fn toolchain_is_pinned(toolchain_id: &str) -> bool {
+ pinned_image_ref(toolchain_id).is_some()
+}
+
+// ── Pull-by-digest ──────────────────────────────────────────────────────────
+
+/// `docker pull ` once per process. Cached so repeated harness runs
+/// against the same image do not re-hit the registry.
+///
+/// Returns `true` if the image is now present locally; `false` if the pull
+/// failed (network outage, untagged digest, registry auth, …). Callers
+/// treat `false` as a docker-backend-unavailable signal so the verifier can
+/// route around it cleanly.
+pub fn ensure_image_pulled(image: &str) -> bool {
+ static CACHE: OnceLock> = OnceLock::new();
+ let cache = CACHE.get_or_init(dashmap::DashMap::new);
+
+ if let Some(entry) = cache.get(image) {
+ return *entry;
+ }
+ let ok = docker_pull(image);
+ cache.insert(image.to_owned(), ok);
+ ok
+}
+
+fn docker_pull(image: &str) -> bool {
+ Command::new(docker_bin())
+ .args(["pull", image])
+ .stdout(std::process::Stdio::null())
+ .stderr(std::process::Stdio::null())
+ .status()
+ .map(|s| s.success())
+ .unwrap_or(false)
+}
+
+fn docker_bin() -> String {
+ std::env::var("NYX_DOCKER_BIN").unwrap_or_else(|_| "docker".to_owned())
+}
+
+// ── Argument assembly ───────────────────────────────────────────────────────
+
+/// Render the `docker run` flag slice that mounts the harness workdir at
+/// [`WORK_MOUNT_PATH`] read-write. Always returns a `-v host:/work:rw`
+/// pair; an empty workdir is mounted at the same path so harness code can
+/// stage outputs under `/work/...` unconditionally.
+///
+/// Returns owned strings so the caller can `extend` them into its already-
+/// built `Vec` arg list without lifetime drag.
+pub fn workdir_mount_args(workdir: &Path) -> Vec {
+ let host = workdir.to_string_lossy().into_owned();
+ vec!["-v".to_owned(), format!("{host}:{WORK_MOUNT_PATH}:rw")]
+}
+
+/// Render the `docker run` flag slice that mounts each filesystem-stub root
+/// at a fixed path under [`STUB_MOUNT_ROOT`]. Network stubs (SQL TCP loop,
+/// HTTP, Redis) do not appear here — they reach the harness via
+/// `--add-host=host-gateway` and the env vars threaded through
+/// `SandboxOptions::extra_env`.
+///
+/// Each entry maps to `-v :/:rw`. Read-write
+/// because stubs record events into the path.
+pub fn stub_mount_args(stub_roots: &[std::path::PathBuf]) -> Vec {
+ let mut out = Vec::with_capacity(stub_roots.len() * 2);
+ for (idx, root) in stub_roots.iter().enumerate() {
+ let host = root.to_string_lossy().into_owned();
+ out.push("-v".to_owned());
+ out.push(format!("{host}:{STUB_MOUNT_ROOT}/{idx}:rw"));
+ }
+ out
+}
+
+/// Render the `--network` + `--add-host` flag slice for a [`NetworkPolicy`].
+///
+/// Mirrors the legacy block in [`super::start_container`] so callers using
+/// the new docker.rs entry point produce byte-identical container layouts
+/// to the existing path — important for `tests/dynamic_parity.rs` to keep
+/// reading the same verdicts across backends.
+pub fn network_args(policy: &NetworkPolicy) -> Vec {
+ let mut args = Vec::with_capacity(4);
+ match policy {
+ NetworkPolicy::None => {
+ args.extend(["--network".to_owned(), "none".to_owned()]);
+ }
+ NetworkPolicy::OobOutbound { .. } => {
+ args.extend(["--network".to_owned(), "bridge".to_owned()]);
+ args.push("--add-host=host-gateway:host-gateway".to_owned());
+ }
+ NetworkPolicy::StubsOnly { allow } => {
+ args.extend(["--network".to_owned(), "bridge".to_owned()]);
+ args.push("--add-host=host-gateway:host-gateway".to_owned());
+ for hp in allow {
+ args.push(add_host_arg(hp));
+ }
+ }
+ NetworkPolicy::Open => {
+ args.extend(["--network".to_owned(), "bridge".to_owned()]);
+ }
+ }
+ args
+}
+
+fn add_host_arg(hp: &HostPort) -> String {
+ format!("--add-host={}:host-gateway", hp.host)
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use std::path::PathBuf;
+ use std::sync::Arc;
+
+ #[test]
+ fn workdir_mount_args_uses_fixed_path() {
+ let path = Path::new("/tmp/nyx-harness/abc");
+ let args = workdir_mount_args(path);
+ assert_eq!(args, vec!["-v", "/tmp/nyx-harness/abc:/work:rw"]);
+ }
+
+ #[test]
+ fn stub_mount_args_indexes_each_root() {
+ let roots = vec![PathBuf::from("/tmp/stub-a"), PathBuf::from("/tmp/stub-b")];
+ let args = stub_mount_args(&roots);
+ assert_eq!(
+ args,
+ vec![
+ "-v",
+ "/tmp/stub-a:/nyx/stubs/0:rw",
+ "-v",
+ "/tmp/stub-b:/nyx/stubs/1:rw",
+ ],
+ );
+ }
+
+ #[test]
+ fn stub_mount_args_empty_when_no_stubs() {
+ assert!(stub_mount_args(&[]).is_empty());
+ }
+
+ #[test]
+ fn network_args_none_picks_network_none() {
+ let args = network_args(&NetworkPolicy::None);
+ assert!(args.iter().any(|a| a == "none"));
+ }
+
+ #[test]
+ fn network_args_stubs_only_adds_host_aliases() {
+ let policy = NetworkPolicy::StubsOnly {
+ allow: vec![HostPort::new("sql", 5432), HostPort::new("redis", 6379)],
+ };
+ let args = network_args(&policy);
+ assert!(args.iter().any(|a| a == "--add-host=sql:host-gateway"));
+ assert!(args.iter().any(|a| a == "--add-host=redis:host-gateway"));
+ }
+
+ #[test]
+ fn network_args_open_drops_egress_filter() {
+ let args = network_args(&NetworkPolicy::Open);
+ // Open is bridge but no host-gateway alias.
+ assert!(args.iter().any(|a| a == "bridge"));
+ assert!(!args.iter().any(|a| a.starts_with("--add-host=")));
+ }
+
+ #[test]
+ fn network_args_oob_threads_host_gateway() {
+ let listener = Arc::new(
+ crate::dynamic::oob::OobListener::bind()
+ .expect("oob listener must bind on 127.0.0.1 in tests"),
+ );
+ let args = network_args(&NetworkPolicy::OobOutbound { listener });
+ assert!(args.iter().any(|a| a == "--add-host=host-gateway:host-gateway"));
+ }
+
+ #[test]
+ fn image_reference_for_toolchain_unknown_returns_none() {
+ assert_eq!(image_reference_for_toolchain("python-99.x"), None);
+ }
+
+ #[test]
+ fn image_reference_for_toolchain_known_returns_base_when_unpinned() {
+ // The catalogue ships with empty digests; we therefore expect the
+ // bare base tag for known IDs. When the daily CI run pins a real
+ // digest this test will start seeing `@sha256:…` instead, and
+ // we update the assertion accordingly.
+ let r = image_reference_for_toolchain("python-3.11");
+ assert!(r.is_some());
+ assert!(r.unwrap().contains("python"));
+ }
+
+ #[test]
+ fn toolchain_is_pinned_false_when_digest_empty() {
+ // Fresh catalogue ships with empty digests, so every known toolchain
+ // is still considered unpinned until the daily CI run.
+ assert!(!toolchain_is_pinned("python-3.11"));
+ }
+}
diff --git a/src/dynamic/sandbox/mod.rs b/src/dynamic/sandbox/mod.rs
index fa82da0a..a8a9e90f 100644
--- a/src/dynamic/sandbox/mod.rs
+++ b/src/dynamic/sandbox/mod.rs
@@ -40,6 +40,17 @@ pub use process_linux::{HardeningLevel, HardeningOutcome};
#[cfg(target_os = "macos")]
pub mod process_macos;
+/// Phase 19 (Track E.3) — pinned-digest docker backend helpers.
+///
+/// The functions in this module resolve [`crate::dynamic::toolchain::
+/// IMAGE_DIGESTS`] entries to docker image refs, render `docker run`
+/// flag slices that honour [`NetworkPolicy`], and mount the harness
+/// workdir at the fixed `/work` path. The legacy entry points in this
+/// file ([`run_docker`] / [`run_native_binary_docker`]) call into
+/// `docker::ensure_image_pulled` so every harness run uses the catalogue
+/// pin when one is available.
+pub mod docker;
+
// ── Harness interpretation probe ──────────────────────────────────────────────
/// Returns true when the harness is driven by an interpreter (Python, Node, …)
@@ -725,6 +736,19 @@ fn start_container(
image: &str,
policy: &NetworkPolicy,
) -> Result<(), SandboxError> {
+ // Phase 19 (Track E.3): when `image` is a pinned reference produced by
+ // `docker::image_reference_for_toolchain`, make sure it is present on
+ // this host before `docker run` tries to start a container from it.
+ // `ensure_image_pulled` is a per-process cache, so the second harness
+ // against the same toolchain is free.
+ docker::ensure_image_pulled(image);
+
+ let workdir_mount = format!(
+ "{}:{}:rw",
+ workdir.to_string_lossy(),
+ docker::WORK_MOUNT_PATH,
+ );
+
let mut run_args: Vec = vec![
"run".into(),
"-d".into(),
@@ -733,6 +757,13 @@ fn start_container(
"--cap-drop=ALL".into(),
"--security-opt".into(), "no-new-privileges:true".into(),
"--tmpfs".into(), "/tmp:size=128m,exec".into(),
+ // Phase 19 (Track E.3): bind-mount the host workdir at the fixed
+ // `/work` path read-write. Harness code emitted in Phase 12+ can
+ // reference `/work/...` without threading the host tempdir
+ // through every layer. The `docker cp` path below is retained so
+ // older harness command lines (which still look at `/workdir`)
+ // keep working until they are migrated.
+ "-v".into(), workdir_mount,
];
match policy {
NetworkPolicy::None => {
@@ -978,6 +1009,12 @@ fn exec_in_container(
/// Dispatches by the basename of `command[0]` (e.g. `python3`, `node`, `java`,
/// `php`). Falls back to `python:3-slim` for unrecognised interpreters.
/// `NYX_TOOLCHAIN_ID` env var overrides the version portion of the image tag.
+///
+/// Phase 19 (Track E.3): when `NYX_TOOLCHAIN_ID` matches a pinned entry in
+/// `IMAGE_DIGESTS` we return the `@sha256:…` reference directly so the
+/// container starts from byte-identical bits across hosts. Unpinned entries
+/// fall through to the legacy tag mapping below so behaviour on a fresh
+/// catalogue stays unchanged.
fn detect_image_for_harness(harness: &BuiltHarness) -> String {
let cmd0 = harness.command.first().map(|s| s.as_str()).unwrap_or("python3");
let base = std::path::Path::new(cmd0)
@@ -986,6 +1023,12 @@ fn detect_image_for_harness(harness: &BuiltHarness) -> String {
.unwrap_or(cmd0);
if let Ok(tid) = std::env::var("NYX_TOOLCHAIN_ID") {
+ if let Some(pinned) = docker::image_reference_for_toolchain(&tid) {
+ // Catalogue entry takes priority over the legacy hard-coded tag
+ // map — pinned or unpinned, the value here came from
+ // tools/image-builder/images.toml.
+ return pinned.to_owned();
+ }
return match base {
"node" | "nodejs" => node_image_for_toolchain(&tid),
"java" => java_image_for_toolchain(&tid),
diff --git a/src/dynamic/toolchain.rs b/src/dynamic/toolchain.rs
index 83d5704d..f9d98e2a 100644
--- a/src/dynamic/toolchain.rs
+++ b/src/dynamic/toolchain.rs
@@ -7,6 +7,37 @@
use std::path::Path;
+// Phase 19 (Track E.3): generated lookup tables for pinned Docker image
+// digests. Populated by `build.rs` from `tools/image-builder/images.toml`.
+//
+// - `IMAGE_DIGESTS`: `toolchain_id → "@sha256:…"`. Used by the docker
+// backend (`src/dynamic/sandbox/docker.rs`) to pull a pinned digest so the
+// sandboxed runtime is byte-identical between hosts.
+// - `IMAGE_BASES`: `toolchain_id → ""`. Fallback for the docker
+// backend when no digest is pinned yet (e.g. fresh `images.toml` entry).
+include!(concat!(env!("OUT_DIR"), "/image_digests.rs"));
+
+/// Pinned image reference (`@sha256:…`) for `toolchain_id`, or `None`
+/// when the catalogue entry has not been built yet.
+///
+/// Phase 19 keeps the pin pure-static: `nyx-image-builder build` writes the
+/// digest back into `images.toml`, the daily CI workflow opens a PR with the
+/// new bytes, and a regular Rust rebuild picks up the new digest via
+/// `build.rs`. There is no runtime digest fetch on the hot path.
+pub fn pinned_image_ref(toolchain_id: &str) -> Option<&'static str> {
+ IMAGE_DIGESTS.get(toolchain_id).copied()
+}
+
+/// Base image tag (no digest) for `toolchain_id`, or `None` when the
+/// toolchain is not present in the catalogue.
+///
+/// Used by the docker backend when [`pinned_image_ref`] returns `None`: the
+/// backend issues a tag pull and records the resolved digest in telemetry so
+/// drift is visible to operators even when the catalogue is unpinned.
+pub fn base_image_ref(toolchain_id: &str) -> Option<&'static str> {
+ IMAGE_BASES.get(toolchain_id).copied()
+}
+
/// Resolved toolchain information for a target directory.
#[derive(Debug, Clone)]
pub struct ToolchainResolution {
diff --git a/tests/sandbox_docker.rs b/tests/sandbox_docker.rs
new file mode 100644
index 00000000..18dfe1a9
--- /dev/null
+++ b/tests/sandbox_docker.rs
@@ -0,0 +1,196 @@
+//! Phase 19 (Track E.3) — Docker backend pinned-digest + mount tests.
+//!
+//! Exercises the `src/dynamic/sandbox/docker.rs` helpers end-to-end on the
+//! `linux-with-docker` CI matrix row. Tests skip automatically when docker
+//! is not reachable so the `linux-without-docker` and `macos` rows pass
+//! without burning a docker pull.
+//!
+//! The acceptance literal for this phase is "`tests/sandbox_docker.rs` runs
+//! only on the `linux-with-docker` matrix row". We honour that by checking
+//! `docker info` at the top of every test and short-circuiting when the
+//! daemon is unreachable.
+//!
+//! Run with: `cargo nextest run --features dynamic --test sandbox_docker`
+
+#![cfg(feature = "dynamic")]
+
+use nyx_scanner::dynamic::harness::BuiltHarness;
+use nyx_scanner::dynamic::sandbox::docker::{
+ ensure_image_pulled, image_reference_for_toolchain, network_args, stub_mount_args,
+ toolchain_is_pinned, workdir_mount_args, STUB_MOUNT_ROOT, WORK_MOUNT_PATH,
+};
+use nyx_scanner::dynamic::sandbox::{
+ self, HostPort, NetworkPolicy, SandboxBackend, SandboxOptions,
+};
+use std::path::{Path, PathBuf};
+use std::time::Duration;
+
+// ── Helpers ──────────────────────────────────────────────────────────────────
+
+fn docker_available() -> bool {
+ std::process::Command::new("docker")
+ .arg("info")
+ .stdout(std::process::Stdio::null())
+ .stderr(std::process::Stdio::null())
+ .status()
+ .map(|s| s.success())
+ .unwrap_or(false)
+}
+
+fn write_harness_script(workdir: &Path, body: &str) -> PathBuf {
+ let path = workdir.join("harness.py");
+ std::fs::write(&path, body).expect("write harness script");
+ path
+}
+
+fn harness(workdir: &Path) -> BuiltHarness {
+ BuiltHarness {
+ workdir: workdir.to_path_buf(),
+ command: vec!["python3".into(), "harness.py".into()],
+ env: vec![],
+ source: String::new(),
+ entry_source: String::new(),
+ }
+}
+
+fn docker_opts() -> SandboxOptions {
+ SandboxOptions {
+ timeout: Duration::from_secs(15),
+ backend: SandboxBackend::Docker,
+ network_policy: NetworkPolicy::None,
+ ..SandboxOptions::default()
+ }
+}
+
+// ── Pure helper coverage (always runs) ───────────────────────────────────────
+
+#[test]
+fn workdir_mount_args_uses_fixed_work_path() {
+ let args = workdir_mount_args(Path::new("/tmp/nyx-harness/run-abc"));
+ assert_eq!(
+ args,
+ vec![
+ "-v".to_owned(),
+ format!("/tmp/nyx-harness/run-abc:{WORK_MOUNT_PATH}:rw"),
+ ],
+ );
+}
+
+#[test]
+fn stub_mount_args_uses_indexed_fixed_paths() {
+ let roots = [PathBuf::from("/tmp/a"), PathBuf::from("/tmp/b")];
+ let args = stub_mount_args(&roots);
+ assert_eq!(args.len(), 4);
+ assert!(args.contains(&format!("/tmp/a:{STUB_MOUNT_ROOT}/0:rw")));
+ assert!(args.contains(&format!("/tmp/b:{STUB_MOUNT_ROOT}/1:rw")));
+}
+
+#[test]
+fn network_args_translate_every_policy() {
+ assert!(network_args(&NetworkPolicy::None).iter().any(|a| a == "none"));
+ let stubs = NetworkPolicy::StubsOnly {
+ allow: vec![HostPort::new("sql", 5432)],
+ };
+ let stubs_args = network_args(&stubs);
+ assert!(stubs_args.iter().any(|a| a == "--add-host=sql:host-gateway"));
+ let open = network_args(&NetworkPolicy::Open);
+ assert!(open.iter().any(|a| a == "bridge"));
+ assert!(!open.iter().any(|a| a.starts_with("--add-host=")));
+}
+
+#[test]
+fn image_reference_resolves_known_toolchains() {
+ // Every catalogue entry must resolve to something — pinned or unpinned.
+ assert!(image_reference_for_toolchain("python-3.11").is_some());
+ assert!(image_reference_for_toolchain("node-20").is_some());
+ assert!(image_reference_for_toolchain("java-21").is_some());
+ // Unknown IDs return None so the legacy path keeps working.
+ assert!(image_reference_for_toolchain("python-99.9").is_none());
+}
+
+#[test]
+fn toolchain_pinning_state_is_observable() {
+ // Without a daily-job-run images.toml we expect every entry to still be
+ // unpinned. The assertion flips when the CI workflow lands the first
+ // digests — at which point this test starts catching accidental
+ // reversions to bare tags.
+ let pinned = toolchain_is_pinned("python-3.11");
+ let r = image_reference_for_toolchain("python-3.11").unwrap();
+ if pinned {
+ assert!(r.contains("@sha256:"), "pinned ref must carry digest, got {r}");
+ } else {
+ assert!(!r.contains("@sha256:"), "unpinned ref must not carry digest, got {r}");
+ }
+}
+
+// ── Live-docker coverage (skips when docker is absent) ───────────────────────
+
+#[test]
+fn ensure_image_pulled_returns_true_for_python_slim() {
+ if !docker_available() {
+ eprintln!("docker unavailable — skipping");
+ return;
+ }
+ let r = image_reference_for_toolchain("python-3.11")
+ .expect("python-3.11 must be in the catalogue");
+ assert!(
+ ensure_image_pulled(r),
+ "ensure_image_pulled must succeed for `{r}` when docker is available",
+ );
+}
+
+#[test]
+fn harness_runs_under_docker_with_network_none() {
+ if !docker_available() {
+ eprintln!("docker unavailable — skipping");
+ return;
+ }
+ let tmp = tempfile::TempDir::new().expect("tempdir");
+ // Tiny script that just prints a marker; we use it to confirm the
+ // backend round-trips through `docker run` + `docker exec` cleanly.
+ write_harness_script(
+ tmp.path(),
+ "import sys; sys.stdout.write('NYX_DOCKER_OK\\n')\n",
+ );
+ let h = harness(tmp.path());
+ let opts = docker_opts();
+ let outcome = sandbox::run(&h, b"", &opts).expect("docker backend must run");
+ assert_eq!(outcome.exit_code, Some(0), "harness must exit cleanly");
+ let stdout = String::from_utf8_lossy(&outcome.stdout);
+ assert!(
+ stdout.contains("NYX_DOCKER_OK"),
+ "expected marker in stdout, got: {stdout}",
+ );
+}
+
+#[test]
+fn harness_workdir_is_mounted_at_fixed_work_path() {
+ if !docker_available() {
+ eprintln!("docker unavailable — skipping");
+ return;
+ }
+ let tmp = tempfile::TempDir::new().expect("tempdir");
+ std::fs::write(tmp.path().join("token.txt"), "phase-19-mount-token\n")
+ .expect("write fixture");
+ write_harness_script(
+ tmp.path(),
+ // Read from the fixed /work mount path — this passes only when the
+ // workdir is bind-mounted there, not just docker-cp'd to /workdir.
+ "open('/work/token.txt').read()\n\
+ import sys; sys.stdout.write('NYX_WORK_MOUNT_OK\\n')\n",
+ );
+ let h = harness(tmp.path());
+ let opts = docker_opts();
+ let outcome = sandbox::run(&h, b"", &opts).expect("docker backend must run");
+ let stdout = String::from_utf8_lossy(&outcome.stdout);
+ let stderr = String::from_utf8_lossy(&outcome.stderr);
+ assert_eq!(
+ outcome.exit_code,
+ Some(0),
+ "/work mount must be readable inside the container; stdout={stdout} stderr={stderr}",
+ );
+ assert!(
+ stdout.contains("NYX_WORK_MOUNT_OK"),
+ "expected /work mount marker; stdout={stdout}",
+ );
+}
diff --git a/tools/image-builder/images.toml b/tools/image-builder/images.toml
new file mode 100644
index 00000000..ef59414b
--- /dev/null
+++ b/tools/image-builder/images.toml
@@ -0,0 +1,125 @@
+# Pinned-digest catalogue consumed by `nyx-image-builder` and the
+# `build.rs` codegen that populates `src/dynamic/toolchain.rs::IMAGE_DIGESTS`.
+#
+# Each `[[image]]` entry corresponds to one `(lang, toolchain)` cell of the
+# Docker backend. The `toolchain_id` matches the IDs surfaced by
+# `src/dynamic/toolchain.rs` (`python-3.11`, `node-20`, `java-21`, …) and is
+# the lookup key used by `IMAGE_DIGESTS`.
+#
+# Fields:
+# - toolchain_id string Lookup key (see toolchain.rs).
+# - base string Docker image reference (e.g. "python:3.11-slim").
+# The `nyx-image-builder verify` command refuses to
+# run if this is not pinnable to a digest.
+# - toolchain string Human-readable interpreter / compiler version.
+# - packages table Inline pinned package names → versions (apt /
+# apk pins applied during image build). Empty `{}`
+# when the upstream image already covers everything.
+# - digest string `sha256:…` content digest written back by
+# `nyx-image-builder build`. Empty until the
+# first successful build.
+#
+# The CI workflow runs `nyx-image-builder build --all` daily. When any digest
+# drifts, the workflow opens a PR updating this file; reviewers approve before
+# the new digest pin is merged.
+
+[[image]]
+toolchain_id = "python-3.11"
+base = "python:3.11-slim"
+toolchain = "Python 3.11"
+packages = {}
+digest = ""
+
+[[image]]
+toolchain_id = "python-3.12"
+base = "python:3.12-slim"
+toolchain = "Python 3.12"
+packages = {}
+digest = ""
+
+[[image]]
+toolchain_id = "python-3.13"
+base = "python:3.13-slim"
+toolchain = "Python 3.13"
+packages = {}
+digest = ""
+
+[[image]]
+toolchain_id = "node-18"
+base = "node:18-slim"
+toolchain = "Node.js 18"
+packages = {}
+digest = ""
+
+[[image]]
+toolchain_id = "node-20"
+base = "node:20-slim"
+toolchain = "Node.js 20"
+packages = {}
+digest = ""
+
+[[image]]
+toolchain_id = "node-22"
+base = "node:22-slim"
+toolchain = "Node.js 22"
+packages = {}
+digest = ""
+
+[[image]]
+toolchain_id = "java-17"
+base = "eclipse-temurin:17-jre-jammy"
+toolchain = "Eclipse Temurin 17 JRE"
+packages = {}
+digest = ""
+
+[[image]]
+toolchain_id = "java-21"
+base = "eclipse-temurin:21-jre-jammy"
+toolchain = "Eclipse Temurin 21 JRE"
+packages = {}
+digest = ""
+
+[[image]]
+toolchain_id = "php-8.1"
+base = "php:8.1-cli"
+toolchain = "PHP 8.1 CLI"
+packages = {}
+digest = ""
+
+[[image]]
+toolchain_id = "php-8.2"
+base = "php:8.2-cli"
+toolchain = "PHP 8.2 CLI"
+packages = {}
+digest = ""
+
+[[image]]
+toolchain_id = "php-8.3"
+base = "php:8.3-cli"
+toolchain = "PHP 8.3 CLI"
+packages = {}
+digest = ""
+
+[[image]]
+toolchain_id = "ruby-3.2"
+base = "ruby:3.2-slim"
+toolchain = "Ruby 3.2"
+packages = {}
+digest = ""
+
+[[image]]
+toolchain_id = "ruby-3.3"
+base = "ruby:3.3-slim"
+toolchain = "Ruby 3.3"
+packages = {}
+digest = ""
+
+# Native runtime image: compiled Rust + Go binaries are copied into a
+# `debian:bookworm-slim` container. Kept here so the image-builder workflow
+# pins it alongside the per-lang interpreter images.
+[[image]]
+toolchain_id = "native-binary"
+base = "debian:bookworm-slim"
+toolchain = "Debian 12 slim (native binary runner)"
+packages = {}
+digest = ""
diff --git a/tools/image-builder/main.rs b/tools/image-builder/main.rs
new file mode 100644
index 00000000..0da5c198
--- /dev/null
+++ b/tools/image-builder/main.rs
@@ -0,0 +1,538 @@
+//! Phase 19 (Track E.3) — `nyx-image-builder`.
+//!
+//! Reads `tools/image-builder/images.toml`, drives `docker pull` / `docker
+//! inspect` for each entry, and writes the resolved `sha256:…` digest back
+//! into the same TOML file so the digest pin is reproducible from source.
+//!
+//! Subcommands:
+//!
+//! - `build [--all | …]` — pull each requested image, capture
+//! its `RepoDigests` digest, and rewrite `images.toml` in place when the
+//! digest differs from the recorded pin. The daily CI workflow runs
+//! `build --all` and opens a PR with the changes when any entry drifts.
+//! - `verify` — assert that every entry in `images.toml` has a non-empty
+//! `digest` field and that the digest matches the locally-pulled image.
+//! Exit code 0 on success, 1 on any mismatch.
+//! - `list` — print every entry with its current `(base, digest)` pair to
+//! stdout, one entry per line, for human inspection.
+//!
+//! Usage:
+//!
+//! ```text
+//! cargo run -F image-builder --bin nyx-image-builder -- list
+//! cargo run -F image-builder --bin nyx-image-builder -- build --all
+//! cargo run -F image-builder --bin nyx-image-builder -- build python-3.11 node-20
+//! cargo run -F image-builder --bin nyx-image-builder -- verify
+//! ```
+//!
+//! The tool is host-side only; nothing in the Nyx scanner build depends on
+//! it at runtime. The codegen in `build.rs` reads `images.toml` directly,
+//! so updating digests is a two-step "run nyx-image-builder build → cargo
+//! build" cycle.
+
+use std::env;
+use std::path::{Path, PathBuf};
+use std::process::{Command, ExitCode, Stdio};
+
+const IMAGES_TOML: &str = "tools/image-builder/images.toml";
+
+fn main() -> ExitCode {
+ let args: Vec = env::args().skip(1).collect();
+ if args.is_empty() {
+ eprintln!("nyx-image-builder: missing subcommand");
+ print_usage();
+ return ExitCode::from(2);
+ }
+
+ let toml_path = catalogue_path();
+
+ match args[0].as_str() {
+ "list" => cmd_list(&toml_path),
+ "build" => cmd_build(&toml_path, &args[1..]),
+ "verify" => cmd_verify(&toml_path),
+ "-h" | "--help" | "help" => {
+ print_usage();
+ ExitCode::SUCCESS
+ }
+ other => {
+ eprintln!("nyx-image-builder: unknown subcommand `{other}`");
+ print_usage();
+ ExitCode::from(2)
+ }
+ }
+}
+
+fn print_usage() {
+ eprintln!(
+ "usage: nyx-image-builder …] | verify>\n\n\
+ Reads `{IMAGES_TOML}` and pins per-toolchain Docker images by sha256\n\
+ digest. Run `build --all` on a host that can reach docker daemon to\n\
+ refresh the digests; commit the resulting diff."
+ );
+}
+
+/// Resolve the catalogue path relative to the workspace root.
+///
+/// Cargo runs binaries with CWD set to the workspace root by default, so the
+/// straight relative path works for the common case. We also walk upward
+/// from `current_dir` so the tool functions correctly when invoked from a
+/// nested directory (e.g. CI step that `cd tools/`).
+fn catalogue_path() -> PathBuf {
+ if Path::new(IMAGES_TOML).exists() {
+ return PathBuf::from(IMAGES_TOML);
+ }
+ if let Ok(cwd) = env::current_dir() {
+ let mut probe = cwd.as_path();
+ loop {
+ let candidate = probe.join(IMAGES_TOML);
+ if candidate.exists() {
+ return candidate;
+ }
+ match probe.parent() {
+ Some(p) => probe = p,
+ None => break,
+ }
+ }
+ }
+ PathBuf::from(IMAGES_TOML)
+}
+
+// ── Subcommands ──────────────────────────────────────────────────────────────
+
+fn cmd_list(toml_path: &Path) -> ExitCode {
+ let entries = match read_catalogue(toml_path) {
+ Ok(v) => v,
+ Err(e) => {
+ eprintln!("nyx-image-builder: cannot read {}: {e}", toml_path.display());
+ return ExitCode::FAILURE;
+ }
+ };
+
+ for e in &entries {
+ let digest = if e.digest.is_empty() { "" } else { &e.digest };
+ println!("{:<20} {:<40} {}", e.toolchain_id, e.base, digest);
+ }
+ ExitCode::SUCCESS
+}
+
+fn cmd_build(toml_path: &Path, args: &[String]) -> ExitCode {
+ let entries = match read_catalogue(toml_path) {
+ Ok(v) => v,
+ Err(e) => {
+ eprintln!("nyx-image-builder: cannot read {}: {e}", toml_path.display());
+ return ExitCode::FAILURE;
+ }
+ };
+
+ let targets: Vec<&ImageEntry> = if args.iter().any(|a| a == "--all") {
+ entries.iter().collect()
+ } else if args.is_empty() {
+ eprintln!("nyx-image-builder build: expected --all or one or more toolchain IDs");
+ return ExitCode::from(2);
+ } else {
+ let mut out = Vec::with_capacity(args.len());
+ for id in args {
+ if id == "--all" {
+ continue;
+ }
+ match entries.iter().find(|e| &e.toolchain_id == id) {
+ Some(e) => out.push(e),
+ None => {
+ eprintln!("nyx-image-builder build: unknown toolchain_id `{id}`");
+ return ExitCode::FAILURE;
+ }
+ }
+ }
+ out
+ };
+
+ let mut updates: Vec<(String, String)> = Vec::new();
+ let mut failures = 0usize;
+
+ for entry in &targets {
+ eprintln!("==> pulling {} ({})", entry.toolchain_id, entry.base);
+ if !docker_pull(&entry.base) {
+ eprintln!(" pull failed for {}", entry.base);
+ failures += 1;
+ continue;
+ }
+ match resolve_image_digest(&entry.base) {
+ Some(digest) => {
+ eprintln!(" {} → {}", entry.base, digest);
+ updates.push((entry.toolchain_id.clone(), digest));
+ }
+ None => {
+ eprintln!(" docker inspect produced no digest for {}", entry.base);
+ failures += 1;
+ }
+ }
+ }
+
+ if !updates.is_empty() {
+ let original = match std::fs::read_to_string(toml_path) {
+ Ok(s) => s,
+ Err(e) => {
+ eprintln!("nyx-image-builder build: cannot read {}: {e}", toml_path.display());
+ return ExitCode::FAILURE;
+ }
+ };
+ let updated = rewrite_digests(&original, &updates);
+ if updated != original {
+ if let Err(e) = std::fs::write(toml_path, updated) {
+ eprintln!(
+ "nyx-image-builder build: cannot write {}: {e}",
+ toml_path.display()
+ );
+ return ExitCode::FAILURE;
+ }
+ eprintln!("==> updated {} ({} entries)", toml_path.display(), updates.len());
+ } else {
+ eprintln!("==> {} unchanged (digests already pinned)", toml_path.display());
+ }
+ }
+
+ if failures > 0 {
+ ExitCode::FAILURE
+ } else {
+ ExitCode::SUCCESS
+ }
+}
+
+fn cmd_verify(toml_path: &Path) -> ExitCode {
+ let entries = match read_catalogue(toml_path) {
+ Ok(v) => v,
+ Err(e) => {
+ eprintln!("nyx-image-builder: cannot read {}: {e}", toml_path.display());
+ return ExitCode::FAILURE;
+ }
+ };
+
+ let mut failures = 0usize;
+ let mut unpinned = 0usize;
+
+ for entry in &entries {
+ if entry.digest.is_empty() {
+ eprintln!("MISS {}: digest unpinned in {}", entry.toolchain_id, IMAGES_TOML);
+ unpinned += 1;
+ continue;
+ }
+ match resolve_image_digest(&entry.base) {
+ Some(local) if local == entry.digest => {
+ eprintln!("OK {}: {}", entry.toolchain_id, entry.digest);
+ }
+ Some(local) => {
+ eprintln!(
+ "DIFF {}: pinned={} local={}",
+ entry.toolchain_id, entry.digest, local,
+ );
+ failures += 1;
+ }
+ None => {
+ eprintln!(
+ "MISS {}: docker inspect returned no digest (image not pulled?)",
+ entry.toolchain_id
+ );
+ failures += 1;
+ }
+ }
+ }
+
+ if failures == 0 && unpinned == 0 {
+ ExitCode::SUCCESS
+ } else {
+ eprintln!(
+ "nyx-image-builder verify: {failures} mismatch(es), {unpinned} unpinned entry(ies)",
+ );
+ ExitCode::FAILURE
+ }
+}
+
+// ── Docker shellouts ─────────────────────────────────────────────────────────
+
+fn docker_bin() -> String {
+ env::var("NYX_DOCKER_BIN").unwrap_or_else(|_| "docker".to_owned())
+}
+
+fn docker_pull(image: &str) -> bool {
+ Command::new(docker_bin())
+ .args(["pull", image])
+ .stdout(Stdio::inherit())
+ .stderr(Stdio::inherit())
+ .status()
+ .map(|s| s.success())
+ .unwrap_or(false)
+}
+
+/// Resolve the immutable content digest of a locally-pulled image.
+///
+/// We prefer `RepoDigests` (`name@sha256:…`) because that is the form
+/// `docker pull @sha256:…` accepts directly. When the local image
+/// has no remote digest yet (e.g. fresh build), we fall back to the `.Id`
+/// which carries the local sha256 of the manifest.
+fn resolve_image_digest(image: &str) -> Option {
+ // Try RepoDigests first.
+ let repo = Command::new(docker_bin())
+ .args([
+ "inspect",
+ "--format={{index .RepoDigests 0}}",
+ image,
+ ])
+ .output()
+ .ok()?;
+ if repo.status.success() {
+ let line = std::str::from_utf8(&repo.stdout).unwrap_or("").trim();
+ if !line.is_empty() && line != "" {
+ // RepoDigests is "name@sha256:…"; the caller stores the
+ // sha256:… portion alongside `base` so we just keep the
+ // digest tail.
+ if let Some(idx) = line.rfind("@") {
+ let digest = &line[idx + 1..];
+ if !digest.is_empty() {
+ return Some(digest.to_owned());
+ }
+ }
+ }
+ }
+
+ // Fall back to .Id (image manifest digest).
+ let id = Command::new(docker_bin())
+ .args(["inspect", "--format={{.Id}}", image])
+ .output()
+ .ok()?;
+ if !id.status.success() {
+ return None;
+ }
+ let line = std::str::from_utf8(&id.stdout).unwrap_or("").trim();
+ if line.is_empty() {
+ None
+ } else {
+ Some(line.to_owned())
+ }
+}
+
+// ── images.toml parser + rewriter ────────────────────────────────────────────
+
+#[derive(Debug, Default, Clone)]
+struct ImageEntry {
+ toolchain_id: String,
+ base: String,
+ digest: String,
+}
+
+fn read_catalogue(path: &Path) -> std::io::Result> {
+ let text = std::fs::read_to_string(path)?;
+ Ok(parse_catalogue(&text))
+}
+
+fn parse_catalogue(src: &str) -> Vec {
+ let mut entries: Vec = Vec::new();
+ let mut current: Option = None;
+
+ for raw in src.lines() {
+ let line = strip_comment(raw).trim();
+ if line.is_empty() {
+ continue;
+ }
+ if line == "[[image]]" {
+ if let Some(prev) = current.take() {
+ if !prev.toolchain_id.is_empty() {
+ entries.push(prev);
+ }
+ }
+ current = Some(ImageEntry::default());
+ continue;
+ }
+ if line.starts_with("[[") || line.starts_with('[') {
+ if let Some(prev) = current.take() {
+ if !prev.toolchain_id.is_empty() {
+ entries.push(prev);
+ }
+ }
+ continue;
+ }
+ let Some(slot) = current.as_mut() else { continue };
+ let Some((key, value)) = line.split_once('=') else { continue };
+ let key = key.trim();
+ let value = value.trim().trim_matches('"').trim_matches('\'');
+ match key {
+ "toolchain_id" => slot.toolchain_id = value.to_owned(),
+ "base" => slot.base = value.to_owned(),
+ "digest" => slot.digest = value.to_owned(),
+ _ => {}
+ }
+ }
+ if let Some(prev) = current.take() {
+ if !prev.toolchain_id.is_empty() {
+ entries.push(prev);
+ }
+ }
+ entries
+}
+
+fn strip_comment(line: &str) -> &str {
+ let mut in_string = false;
+ for (i, b) in line.bytes().enumerate() {
+ match b {
+ b'"' => in_string = !in_string,
+ b'#' if !in_string => return &line[..i],
+ _ => {}
+ }
+ }
+ line
+}
+
+/// Rewrite the `digest = "…"` line for each `(toolchain_id, new_digest)` in
+/// `updates`, leaving every other byte of the original TOML untouched.
+///
+/// Algorithm: stream the original line-by-line, track which `[[image]]`
+/// block we are in by reading `toolchain_id`, and when we hit `digest = "…"`
+/// inside a block whose `toolchain_id` is in `updates`, replace the value
+/// while preserving the original indentation.
+fn rewrite_digests(src: &str, updates: &[(String, String)]) -> String {
+ let mut out = String::with_capacity(src.len());
+ let mut current_tid: Option = None;
+ let mut in_image_block = false;
+
+ for raw in src.lines() {
+ let trimmed = raw.trim();
+ if trimmed == "[[image]]" {
+ in_image_block = true;
+ current_tid = None;
+ out.push_str(raw);
+ out.push('\n');
+ continue;
+ }
+ if trimmed.starts_with("[[") || trimmed.starts_with('[') {
+ in_image_block = false;
+ current_tid = None;
+ out.push_str(raw);
+ out.push('\n');
+ continue;
+ }
+
+ if in_image_block {
+ if let Some(value) = parse_toml_string_value(trimmed, "toolchain_id") {
+ current_tid = Some(value);
+ }
+
+ if parse_toml_string_value(trimmed, "digest").is_some() {
+ if let Some(tid) = ¤t_tid {
+ if let Some((_, new_digest)) =
+ updates.iter().find(|(id, _)| id == tid)
+ {
+ // Preserve indentation.
+ let indent_len = raw.len() - raw.trim_start().len();
+ out.push_str(&raw[..indent_len]);
+ out.push_str(&format!("digest = \"{new_digest}\""));
+ out.push('\n');
+ continue;
+ }
+ }
+ }
+ }
+
+ out.push_str(raw);
+ out.push('\n');
+ }
+
+ // Preserve trailing-newline behaviour of the original file: if the
+ // source did not end in '\n' we should not introduce one.
+ if !src.ends_with('\n') && out.ends_with('\n') {
+ out.pop();
+ }
+ out
+}
+
+fn parse_toml_string_value(line: &str, key: &str) -> Option {
+ let line = line.trim();
+ let rest = line.strip_prefix(key)?;
+ let rest = rest.trim_start();
+ let rest = rest.strip_prefix('=')?.trim();
+ let rest = rest.strip_prefix('"')?;
+ let end = rest.find('"')?;
+ Some(rest[..end].to_owned())
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn parse_catalogue_extracts_three_fields() {
+ let src = r#"
+[[image]]
+toolchain_id = "python-3.11"
+base = "python:3.11-slim"
+toolchain = "Python 3.11"
+packages = {}
+digest = ""
+
+[[image]]
+toolchain_id = "node-20"
+base = "node:20-slim"
+toolchain = "Node.js 20"
+packages = {}
+digest = "sha256:cafebabe"
+"#;
+ let entries = parse_catalogue(src);
+ assert_eq!(entries.len(), 2);
+ assert_eq!(entries[0].toolchain_id, "python-3.11");
+ assert_eq!(entries[0].base, "python:3.11-slim");
+ assert_eq!(entries[0].digest, "");
+ assert_eq!(entries[1].toolchain_id, "node-20");
+ assert_eq!(entries[1].digest, "sha256:cafebabe");
+ }
+
+ #[test]
+ fn rewrite_digests_replaces_only_named_entries() {
+ let src = r#"[[image]]
+toolchain_id = "python-3.11"
+base = "python:3.11-slim"
+digest = ""
+
+[[image]]
+toolchain_id = "node-20"
+base = "node:20-slim"
+digest = ""
+"#;
+ let updates = vec![("node-20".to_owned(), "sha256:deadbeef".to_owned())];
+ let out = rewrite_digests(src, &updates);
+ assert!(out.contains("digest = \"sha256:deadbeef\""));
+ // python-3.11 must remain unpinned.
+ let python_block = out
+ .split("[[image]]")
+ .find(|b| b.contains("python-3.11"))
+ .unwrap();
+ assert!(python_block.contains("digest = \"\""));
+ }
+
+ #[test]
+ fn rewrite_digests_preserves_indentation_and_comments() {
+ let src = "# header\n[[image]]\n toolchain_id = \"go\"\n digest = \"\"\n";
+ let updates = vec![("go".to_owned(), "sha256:1234".to_owned())];
+ let out = rewrite_digests(src, &updates);
+ assert!(out.contains(" digest = \"sha256:1234\""));
+ assert!(out.starts_with("# header\n"));
+ }
+
+ #[test]
+ fn rewrite_digests_no_op_when_no_targets() {
+ let src = "[[image]]\ntoolchain_id = \"x\"\ndigest = \"sha256:keep\"\n";
+ let out = rewrite_digests(src, &[]);
+ assert_eq!(out, src);
+ }
+
+ #[test]
+ fn parse_toml_string_value_handles_trailing_garbage() {
+ assert_eq!(
+ parse_toml_string_value("digest = \"sha256:abc\"", "digest"),
+ Some("sha256:abc".to_owned())
+ );
+ assert_eq!(parse_toml_string_value("other = \"x\"", "digest"), None);
+ }
+
+ #[test]
+ fn strip_comment_keeps_hash_inside_strings() {
+ assert_eq!(strip_comment("foo = \"a#b\" # tail"), "foo = \"a#b\" ");
+ }
+}