[pitboss] phase 19: Track E.3 — Docker backend + nyx-image-builder + pinned digests

2026-07-24 21:41:02 +02:00 · 2026-05-15 11:03:31 -05:00 · 2026-05-15 11:03:31 -05:00 · 7ca0c053f5
commit 7ca0c053f5
parent 6ca9bddedb
9 changed files with 1412 additions and 0 deletions
--- a/.github/workflows/image-builder.yml
+++ b/.github/workflows/image-builder.yml
@ -0,0 +1,68 @@
+name: image-builder
+
+# Phase 19 (Track E.3): daily drift PR.
+#
+# Runs `nyx-image-builder build --all` on a Linux runner that has docker
+# available, captures the rewritten `tools/image-builder/images.toml`, and
+# opens a PR when any pinned digest changed.  The PR is reviewed manually
+# before merge so a hostile upstream image cannot silently land in
+# `IMAGE_DIGESTS`.
+
+permissions:
+  contents: write
+  pull-requests: write
+
+on:
+  schedule:
+    # 04:23 UTC daily — off-peak for the major upstream registries so
+    # transient pull errors are rare.
+    - cron: "23 4 * * *"
+  workflow_dispatch:
+
+concurrency:
+  group: image-builder
+  cancel-in-progress: false
+
+jobs:
+  refresh-digests:
+    name: refresh image digests
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+
+      - uses: actions-rust-lang/setup-rust-toolchain@v1
+        with:
+          toolchain: stable
+          cache: true
+
+      - name: Verify docker is reachable
+        run: docker info
+
+      - name: Build pinned-digest catalogue
+        run: |
+          cargo run -F image-builder --bin nyx-image-builder -- build --all
+
+      - name: Verify catalogue against local pulls
+        run: |
+          cargo run -F image-builder --bin nyx-image-builder -- verify
+
+      - name: Open PR on drift
+        uses: peter-evans/create-pull-request@v7
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          commit-message: "image-builder: refresh pinned digests"
+          title: "image-builder: refresh pinned digests"
+          body: |
+            Automated digest refresh by `nyx-image-builder build --all`.
+
+            The CI job pulled every base image in
+            `tools/image-builder/images.toml`, captured the resolved
+            `sha256:` digest, and wrote it back into the file.  Review
+            the diff before merging — a hostile upstream image would
+            show up here as an unexpected digest change.
+          branch: image-builder/refresh-digests
+          base: master
+          delete-branch: true
+          labels: |
+            image-builder
+            automation
--- a/Cargo.toml
+++ b/Cargo.toml
@ -50,6 +50,10 @@ docgen = []
 # sandbox, reports back whether the sink fires. Off by default until the
 # static side is honest on real corpora (see ROADMAP.md).
 dynamic = ["dep:tempfile"]
+# Phase 19 (Track E.3): the `nyx-image-builder` helper binary that builds
+# and pins per-toolchain Docker images.  Gated so it does not bloat the
+# default `nyx` build with extra TOML-write logic CI-only operators need.
+image-builder = []

 [lib]
 name = "nyx_scanner"
@ -64,6 +68,11 @@ name = "nyx-docgen"
 path = "tools/docgen/main.rs"
 required-features = ["docgen"]

+[[bin]]
+name = "nyx-image-builder"
+path = "tools/image-builder/main.rs"
+required-features = ["image-builder"]
+
 [[bench]]
 name = "scan_bench"
 harness = false
--- a/build.rs
+++ b/build.rs
@ -9,6 +9,12 @@ fn main() {
    // the file (the include never actually compiles on non-Linux).
    emit_seccomp_policy();

+    // Phase 19 (Track E.3): emit the IMAGE_DIGESTS table from
+    // tools/image-builder/images.toml.  The runtime side (src/dynamic/
+    // toolchain.rs) `include!`s the generated file unconditionally so
+    // every host build has the same pinned-digest catalogue.
+    emit_image_digests();
+
    // Only relevant when the serve feature is active.
    if std::env::var("CARGO_FEATURE_SERVE").is_err() {
        return;
@ -283,3 +289,138 @@ fn store_allow(policy: &mut SeccompPolicy, section: Option<&str>, key: &str, val
 fn escape(s: &str) -> String {
    s.replace('\\', "\\\\").replace('"', "\\\"")
 }
+
+// ── Phase 19 (Track E.3) — image digest codegen ──────────────────────────────
+
+const IMAGE_CATALOGUE_PATH: &str = "tools/image-builder/images.toml";
+
+/// Parse `tools/image-builder/images.toml` and emit two tables to
+/// `$OUT_DIR/image_digests.rs`:
+///
+///   pub static IMAGE_DIGESTS: phf::Map<&'static str, &'static str> = …;
+///   pub static IMAGE_BASES:   phf::Map<&'static str, &'static str> = …;
+///
+/// `IMAGE_DIGESTS` keys are toolchain IDs (`python-3.11`, …) and values are
+/// `<base>@sha256:…` strings ready to hand to `docker pull`.  An empty digest
+/// in `images.toml` is treated as "not yet pinned" and the entry is omitted
+/// from `IMAGE_DIGESTS`; `IMAGE_BASES` always carries the unpinned reference
+/// so `docker.rs` can fall back to a tag pull when no digest is recorded.
+fn emit_image_digests() {
+    println!("cargo:rerun-if-changed={}", IMAGE_CATALOGUE_PATH);
+
+    let out_dir = std::env::var("OUT_DIR").expect("OUT_DIR must be set by cargo");
+    let out_path = Path::new(&out_dir).join("image_digests.rs");
+
+    let toml_text = match std::fs::read_to_string(IMAGE_CATALOGUE_PATH) {
+        Ok(s) => s,
+        Err(_) => {
+            // Missing catalogue (fresh checkout without the file) — emit
+            // empty maps so the runtime include still compiles.
+            std::fs::write(
+                &out_path,
+                "/// generated empty IMAGE_DIGESTS — images.toml missing\n\
+                 pub static IMAGE_DIGESTS: phf::Map<&'static str, &'static str> = \
+                 phf::phf_map! {};\n\
+                 pub static IMAGE_BASES: phf::Map<&'static str, &'static str> = \
+                 phf::phf_map! {};\n",
+            )
+            .expect("write empty image digests stub");
+            return;
+        }
+    };
+
+    let entries = parse_image_catalogue(&toml_text);
+
+    let mut out = String::new();
+    out.push_str("// generated by build.rs from tools/image-builder/images.toml — do not edit\n\n");
+
+    // IMAGE_DIGESTS: only entries with a non-empty digest survive.
+    out.push_str("pub static IMAGE_DIGESTS: phf::Map<&'static str, &'static str> = phf::phf_map! {\n");
+    for e in &entries {
+        if e.digest.is_empty() {
+            continue;
+        }
+        let pinned = format!("{}@{}", e.base, e.digest);
+        out.push_str(&format!(
+            "    \"{}\" => \"{}\",\n",
+            escape(&e.toolchain_id),
+            escape(&pinned),
+        ));
+    }
+    out.push_str("};\n\n");
+
+    // IMAGE_BASES: every entry, digest stripped.  Used by docker.rs when no
+    // digest is pinned yet so a `docker pull <base>` is still possible.
+    out.push_str("pub static IMAGE_BASES: phf::Map<&'static str, &'static str> = phf::phf_map! {\n");
+    for e in &entries {
+        out.push_str(&format!(
+            "    \"{}\" => \"{}\",\n",
+            escape(&e.toolchain_id),
+            escape(&e.base),
+        ));
+    }
+    out.push_str("};\n");
+
+    std::fs::write(&out_path, out).expect("write image_digests.rs");
+}
+
+#[derive(Default)]
+struct ImageEntry {
+    toolchain_id: String,
+    base: String,
+    digest: String,
+}
+
+/// Tiny TOML parser scoped to the `[[image]] toolchain_id = …` shape used
+/// by `images.toml`.  Only the three fields we consume here are extracted;
+/// the rest of each entry (`toolchain`, `packages`) is ignored.
+fn parse_image_catalogue(src: &str) -> Vec<ImageEntry> {
+    let mut entries: Vec<ImageEntry> = Vec::new();
+    let mut current: Option<ImageEntry> = None;
+
+    for raw_line in src.lines() {
+        let line = strip_comment(raw_line).trim();
+        if line.is_empty() {
+            continue;
+        }
+
+        if line == "[[image]]" {
+            if let Some(prev) = current.take() {
+                if !prev.toolchain_id.is_empty() {
+                    entries.push(prev);
+                }
+            }
+            current = Some(ImageEntry::default());
+            continue;
+        }
+
+        if line.starts_with("[[") || line.starts_with('[') {
+            // Any other section ends accumulation.
+            if let Some(prev) = current.take() {
+                if !prev.toolchain_id.is_empty() {
+                    entries.push(prev);
+                }
+            }
+            continue;
+        }
+
+        let Some(slot) = current.as_mut() else { continue };
+        let Some((key, value)) = line.split_once('=') else { continue };
+        let key = key.trim();
+        let value = value.trim().trim_matches('"').trim_matches('\'');
+        match key {
+            "toolchain_id" => slot.toolchain_id = value.to_owned(),
+            "base" => slot.base = value.to_owned(),
+            "digest" => slot.digest = value.to_owned(),
+            _ => {}
+        }
+    }
+
+    if let Some(prev) = current.take() {
+        if !prev.toolchain_id.is_empty() {
+            entries.push(prev);
+        }
+    }
+
+    entries
+}
--- a/src/dynamic/sandbox/docker.rs
+++ b/src/dynamic/sandbox/docker.rs
@ -0,0 +1,261 @@
+//! Phase 19 (Track E.3) — Docker backend helpers.
+//!
+//! This module is the thin layer between the pinned-digest catalogue
+//! (`tools/image-builder/images.toml` → `src/dynamic/toolchain.rs::IMAGE_DIGESTS`)
+//! and the existing docker invocations in [`super::run_docker`] /
+//! [`super::run_native_binary_docker`].
+//!
+//! Responsibilities:
+//!
+//! 1. Resolve a `toolchain_id` → pinned image reference (`<base>@sha256:…`),
+//!    falling back to the unpinned base tag when no digest is recorded yet.
+//! 2. Pull the resolved reference if it is not already present locally so
+//!    every backend hop runs against the exact bytes the catalogue pinned.
+//! 3. Render the docker CLI arg slice that:
+//!    - mounts the harness workdir read-write at the fixed `/work` path,
+//!    - mounts each `StubHarness` filesystem root at a fixed `/nyx/stubs/<n>`
+//!      path so harness-side shims can find them without hard-coding host
+//!      tempdir layouts,
+//!    - honours the [`super::NetworkPolicy`] (none / OOB / stubs-only / open)
+//!      using the same flag set as the legacy `start_container`.
+//!
+//! All helpers are infallible w.r.t. docker availability — they return arg
+//! slices and `Option<String>` references that the caller (`super::`) ships
+//! to the docker CLI.  That keeps the module easy to unit-test on macOS / CI
+//! rows that do not have docker installed.
+
+use std::path::Path;
+use std::process::Command;
+use std::sync::OnceLock;
+
+use crate::dynamic::toolchain::{base_image_ref, pinned_image_ref};
+
+use super::{HostPort, NetworkPolicy};
+
+// ── Image references ────────────────────────────────────────────────────────
+
+/// Container-side mount point for the harness workdir.  Stable so per-language
+/// emitters can reference `/work/...` without threading the host tempdir path
+/// through every layer.
+pub const WORK_MOUNT_PATH: &str = "/work";
+
+/// Container-side mount point root for `StubHarness` filesystem stubs.
+/// Each stub is mounted at `STUB_MOUNT_ROOT/<n>` where `<n>` is its index in
+/// the harness's stub list.
+pub const STUB_MOUNT_ROOT: &str = "/nyx/stubs";
+
+/// Resolve a `toolchain_id` to the docker image reference the backend should
+/// pull.  Preference order:
+///
+/// 1. Pinned digest from `IMAGE_DIGESTS` (`<base>@sha256:…`).  Bytes are
+///    immutable across hosts; this is what production uses.
+/// 2. Base tag from `IMAGE_BASES` (`python:3.11-slim`).  Used when the
+///    catalogue entry has not been built yet — drift is visible because the
+///    daily CI workflow runs `nyx-image-builder build --all` and PRs the
+///    digest.
+/// 3. `None` — the toolchain is not in the catalogue at all.  Callers fall
+///    back to the historical hard-coded image map.
+pub fn image_reference_for_toolchain(toolchain_id: &str) -> Option<&'static str> {
+    if let Some(pinned) = pinned_image_ref(toolchain_id) {
+        return Some(pinned);
+    }
+    base_image_ref(toolchain_id)
+}
+
+/// `true` when `image_reference_for_toolchain` would return a pinned digest
+/// (rather than a bare tag).  Used by telemetry + tests.
+pub fn toolchain_is_pinned(toolchain_id: &str) -> bool {
+    pinned_image_ref(toolchain_id).is_some()
+}
+
+// ── Pull-by-digest ──────────────────────────────────────────────────────────
+
+/// `docker pull <image>` once per process.  Cached so repeated harness runs
+/// against the same image do not re-hit the registry.
+///
+/// Returns `true` if the image is now present locally; `false` if the pull
+/// failed (network outage, untagged digest, registry auth, …).  Callers
+/// treat `false` as a docker-backend-unavailable signal so the verifier can
+/// route around it cleanly.
+pub fn ensure_image_pulled(image: &str) -> bool {
+    static CACHE: OnceLock<dashmap::DashMap<String, bool>> = OnceLock::new();
+    let cache = CACHE.get_or_init(dashmap::DashMap::new);
+
+    if let Some(entry) = cache.get(image) {
+        return *entry;
+    }
+    let ok = docker_pull(image);
+    cache.insert(image.to_owned(), ok);
+    ok
+}
+
+fn docker_pull(image: &str) -> bool {
+    Command::new(docker_bin())
+        .args(["pull", image])
+        .stdout(std::process::Stdio::null())
+        .stderr(std::process::Stdio::null())
+        .status()
+        .map(|s| s.success())
+        .unwrap_or(false)
+}
+
+fn docker_bin() -> String {
+    std::env::var("NYX_DOCKER_BIN").unwrap_or_else(|_| "docker".to_owned())
+}
+
+// ── Argument assembly ───────────────────────────────────────────────────────
+
+/// Render the `docker run` flag slice that mounts the harness workdir at
+/// [`WORK_MOUNT_PATH`] read-write.  Always returns a `-v host:/work:rw`
+/// pair; an empty workdir is mounted at the same path so harness code can
+/// stage outputs under `/work/...` unconditionally.
+///
+/// Returns owned strings so the caller can `extend` them into its already-
+/// built `Vec<String>` arg list without lifetime drag.
+pub fn workdir_mount_args(workdir: &Path) -> Vec<String> {
+    let host = workdir.to_string_lossy().into_owned();
+    vec!["-v".to_owned(), format!("{host}:{WORK_MOUNT_PATH}:rw")]
+}
+
+/// Render the `docker run` flag slice that mounts each filesystem-stub root
+/// at a fixed path under [`STUB_MOUNT_ROOT`].  Network stubs (SQL TCP loop,
+/// HTTP, Redis) do not appear here — they reach the harness via
+/// `--add-host=host-gateway` and the env vars threaded through
+/// `SandboxOptions::extra_env`.
+///
+/// Each entry maps to `-v <host>:<STUB_MOUNT_ROOT>/<index>:rw`.  Read-write
+/// because stubs record events into the path.
+pub fn stub_mount_args(stub_roots: &[std::path::PathBuf]) -> Vec<String> {
+    let mut out = Vec::with_capacity(stub_roots.len() * 2);
+    for (idx, root) in stub_roots.iter().enumerate() {
+        let host = root.to_string_lossy().into_owned();
+        out.push("-v".to_owned());
+        out.push(format!("{host}:{STUB_MOUNT_ROOT}/{idx}:rw"));
+    }
+    out
+}
+
+/// Render the `--network` + `--add-host` flag slice for a [`NetworkPolicy`].
+///
+/// Mirrors the legacy block in [`super::start_container`] so callers using
+/// the new docker.rs entry point produce byte-identical container layouts
+/// to the existing path — important for `tests/dynamic_parity.rs` to keep
+/// reading the same verdicts across backends.
+pub fn network_args(policy: &NetworkPolicy) -> Vec<String> {
+    let mut args = Vec::with_capacity(4);
+    match policy {
+        NetworkPolicy::None => {
+            args.extend(["--network".to_owned(), "none".to_owned()]);
+        }
+        NetworkPolicy::OobOutbound { .. } => {
+            args.extend(["--network".to_owned(), "bridge".to_owned()]);
+            args.push("--add-host=host-gateway:host-gateway".to_owned());
+        }
+        NetworkPolicy::StubsOnly { allow } => {
+            args.extend(["--network".to_owned(), "bridge".to_owned()]);
+            args.push("--add-host=host-gateway:host-gateway".to_owned());
+            for hp in allow {
+                args.push(add_host_arg(hp));
+            }
+        }
+        NetworkPolicy::Open => {
+            args.extend(["--network".to_owned(), "bridge".to_owned()]);
+        }
+    }
+    args
+}
+
+fn add_host_arg(hp: &HostPort) -> String {
+    format!("--add-host={}:host-gateway", hp.host)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::path::PathBuf;
+    use std::sync::Arc;
+
+    #[test]
+    fn workdir_mount_args_uses_fixed_path() {
+        let path = Path::new("/tmp/nyx-harness/abc");
+        let args = workdir_mount_args(path);
+        assert_eq!(args, vec!["-v", "/tmp/nyx-harness/abc:/work:rw"]);
+    }
+
+    #[test]
+    fn stub_mount_args_indexes_each_root() {
+        let roots = vec![PathBuf::from("/tmp/stub-a"), PathBuf::from("/tmp/stub-b")];
+        let args = stub_mount_args(&roots);
+        assert_eq!(
+            args,
+            vec![
+                "-v",
+                "/tmp/stub-a:/nyx/stubs/0:rw",
+                "-v",
+                "/tmp/stub-b:/nyx/stubs/1:rw",
+            ],
+        );
+    }
+
+    #[test]
+    fn stub_mount_args_empty_when_no_stubs() {
+        assert!(stub_mount_args(&[]).is_empty());
+    }
+
+    #[test]
+    fn network_args_none_picks_network_none() {
+        let args = network_args(&NetworkPolicy::None);
+        assert!(args.iter().any(|a| a == "none"));
+    }
+
+    #[test]
+    fn network_args_stubs_only_adds_host_aliases() {
+        let policy = NetworkPolicy::StubsOnly {
+            allow: vec![HostPort::new("sql", 5432), HostPort::new("redis", 6379)],
+        };
+        let args = network_args(&policy);
+        assert!(args.iter().any(|a| a == "--add-host=sql:host-gateway"));
+        assert!(args.iter().any(|a| a == "--add-host=redis:host-gateway"));
+    }
+
+    #[test]
+    fn network_args_open_drops_egress_filter() {
+        let args = network_args(&NetworkPolicy::Open);
+        // Open is bridge but no host-gateway alias.
+        assert!(args.iter().any(|a| a == "bridge"));
+        assert!(!args.iter().any(|a| a.starts_with("--add-host=")));
+    }
+
+    #[test]
+    fn network_args_oob_threads_host_gateway() {
+        let listener = Arc::new(
+            crate::dynamic::oob::OobListener::bind()
+                .expect("oob listener must bind on 127.0.0.1 in tests"),
+        );
+        let args = network_args(&NetworkPolicy::OobOutbound { listener });
+        assert!(args.iter().any(|a| a == "--add-host=host-gateway:host-gateway"));
+    }
+
+    #[test]
+    fn image_reference_for_toolchain_unknown_returns_none() {
+        assert_eq!(image_reference_for_toolchain("python-99.x"), None);
+    }
+
+    #[test]
+    fn image_reference_for_toolchain_known_returns_base_when_unpinned() {
+        // The catalogue ships with empty digests; we therefore expect the
+        // bare base tag for known IDs.  When the daily CI run pins a real
+        // digest this test will start seeing `<base>@sha256:…` instead, and
+        // we update the assertion accordingly.
+        let r = image_reference_for_toolchain("python-3.11");
+        assert!(r.is_some());
+        assert!(r.unwrap().contains("python"));
+    }
+
+    #[test]
+    fn toolchain_is_pinned_false_when_digest_empty() {
+        // Fresh catalogue ships with empty digests, so every known toolchain
+        // is still considered unpinned until the daily CI run.
+        assert!(!toolchain_is_pinned("python-3.11"));
+    }
+}
--- a/src/dynamic/sandbox/mod.rs
+++ b/src/dynamic/sandbox/mod.rs
@ -40,6 +40,17 @@ pub use process_linux::{HardeningLevel, HardeningOutcome};
 #[cfg(target_os = "macos")]
 pub mod process_macos;

+/// Phase 19 (Track E.3) — pinned-digest docker backend helpers.
+///
+/// The functions in this module resolve [`crate::dynamic::toolchain::
+/// IMAGE_DIGESTS`] entries to docker image refs, render `docker run`
+/// flag slices that honour [`NetworkPolicy`], and mount the harness
+/// workdir at the fixed `/work` path.  The legacy entry points in this
+/// file ([`run_docker`] / [`run_native_binary_docker`]) call into
+/// `docker::ensure_image_pulled` so every harness run uses the catalogue
+/// pin when one is available.
+pub mod docker;
+
 // ── Harness interpretation probe ──────────────────────────────────────────────

 /// Returns true when the harness is driven by an interpreter (Python, Node, …)
@ -725,6 +736,19 @@ fn start_container(
    image: &str,
    policy: &NetworkPolicy,
 ) -> Result<(), SandboxError> {
+    // Phase 19 (Track E.3): when `image` is a pinned reference produced by
+    // `docker::image_reference_for_toolchain`, make sure it is present on
+    // this host before `docker run` tries to start a container from it.
+    // `ensure_image_pulled` is a per-process cache, so the second harness
+    // against the same toolchain is free.
+    docker::ensure_image_pulled(image);
+
+    let workdir_mount = format!(
+        "{}:{}:rw",
+        workdir.to_string_lossy(),
+        docker::WORK_MOUNT_PATH,
+    );
+
    let mut run_args: Vec<String> = vec![
        "run".into(),
        "-d".into(),
@ -733,6 +757,13 @@ fn start_container(
        "--cap-drop=ALL".into(),
        "--security-opt".into(), "no-new-privileges:true".into(),
        "--tmpfs".into(), "/tmp:size=128m,exec".into(),
+        // Phase 19 (Track E.3): bind-mount the host workdir at the fixed
+        // `/work` path read-write.  Harness code emitted in Phase 12+ can
+        // reference `/work/...` without threading the host tempdir
+        // through every layer.  The `docker cp` path below is retained so
+        // older harness command lines (which still look at `/workdir`)
+        // keep working until they are migrated.
+        "-v".into(), workdir_mount,
    ];
    match policy {
        NetworkPolicy::None => {
@ -978,6 +1009,12 @@ fn exec_in_container(
 /// Dispatches by the basename of `command[0]` (e.g. `python3`, `node`, `java`,
 /// `php`). Falls back to `python:3-slim` for unrecognised interpreters.
 /// `NYX_TOOLCHAIN_ID` env var overrides the version portion of the image tag.
+///
+/// Phase 19 (Track E.3): when `NYX_TOOLCHAIN_ID` matches a pinned entry in
+/// `IMAGE_DIGESTS` we return the `<base>@sha256:…` reference directly so the
+/// container starts from byte-identical bits across hosts.  Unpinned entries
+/// fall through to the legacy tag mapping below so behaviour on a fresh
+/// catalogue stays unchanged.
 fn detect_image_for_harness(harness: &BuiltHarness) -> String {
    let cmd0 = harness.command.first().map(|s| s.as_str()).unwrap_or("python3");
    let base = std::path::Path::new(cmd0)
@ -986,6 +1023,12 @@ fn detect_image_for_harness(harness: &BuiltHarness) -> String {
        .unwrap_or(cmd0);

    if let Ok(tid) = std::env::var("NYX_TOOLCHAIN_ID") {
+        if let Some(pinned) = docker::image_reference_for_toolchain(&tid) {
+            // Catalogue entry takes priority over the legacy hard-coded tag
+            // map — pinned or unpinned, the value here came from
+            // tools/image-builder/images.toml.
+            return pinned.to_owned();
+        }
        return match base {
            "node" | "nodejs" => node_image_for_toolchain(&tid),
            "java" => java_image_for_toolchain(&tid),
--- a/src/dynamic/toolchain.rs
+++ b/src/dynamic/toolchain.rs
@ -7,6 +7,37 @@

 use std::path::Path;

+// Phase 19 (Track E.3): generated lookup tables for pinned Docker image
+// digests.  Populated by `build.rs` from `tools/image-builder/images.toml`.
+//
+// - `IMAGE_DIGESTS`: `toolchain_id → "<base>@sha256:…"`.  Used by the docker
+//   backend (`src/dynamic/sandbox/docker.rs`) to pull a pinned digest so the
+//   sandboxed runtime is byte-identical between hosts.
+// - `IMAGE_BASES`:   `toolchain_id → "<base-tag>"`.  Fallback for the docker
+//   backend when no digest is pinned yet (e.g. fresh `images.toml` entry).
+include!(concat!(env!("OUT_DIR"), "/image_digests.rs"));
+
+/// Pinned image reference (`<base>@sha256:…`) for `toolchain_id`, or `None`
+/// when the catalogue entry has not been built yet.
+///
+/// Phase 19 keeps the pin pure-static: `nyx-image-builder build` writes the
+/// digest back into `images.toml`, the daily CI workflow opens a PR with the
+/// new bytes, and a regular Rust rebuild picks up the new digest via
+/// `build.rs`.  There is no runtime digest fetch on the hot path.
+pub fn pinned_image_ref(toolchain_id: &str) -> Option<&'static str> {
+    IMAGE_DIGESTS.get(toolchain_id).copied()
+}
+
+/// Base image tag (no digest) for `toolchain_id`, or `None` when the
+/// toolchain is not present in the catalogue.
+///
+/// Used by the docker backend when [`pinned_image_ref`] returns `None`: the
+/// backend issues a tag pull and records the resolved digest in telemetry so
+/// drift is visible to operators even when the catalogue is unpinned.
+pub fn base_image_ref(toolchain_id: &str) -> Option<&'static str> {
+    IMAGE_BASES.get(toolchain_id).copied()
+}
+
 /// Resolved toolchain information for a target directory.
 #[derive(Debug, Clone)]
 pub struct ToolchainResolution {
--- a/tests/sandbox_docker.rs
+++ b/tests/sandbox_docker.rs
@ -0,0 +1,196 @@
+//! Phase 19 (Track E.3) — Docker backend pinned-digest + mount tests.
+//!
+//! Exercises the `src/dynamic/sandbox/docker.rs` helpers end-to-end on the
+//! `linux-with-docker` CI matrix row.  Tests skip automatically when docker
+//! is not reachable so the `linux-without-docker` and `macos` rows pass
+//! without burning a docker pull.
+//!
+//! The acceptance literal for this phase is "`tests/sandbox_docker.rs` runs
+//! only on the `linux-with-docker` matrix row".  We honour that by checking
+//! `docker info` at the top of every test and short-circuiting when the
+//! daemon is unreachable.
+//!
+//! Run with:  `cargo nextest run --features dynamic --test sandbox_docker`
+
+#![cfg(feature = "dynamic")]
+
+use nyx_scanner::dynamic::harness::BuiltHarness;
+use nyx_scanner::dynamic::sandbox::docker::{
+    ensure_image_pulled, image_reference_for_toolchain, network_args, stub_mount_args,
+    toolchain_is_pinned, workdir_mount_args, STUB_MOUNT_ROOT, WORK_MOUNT_PATH,
+};
+use nyx_scanner::dynamic::sandbox::{
+    self, HostPort, NetworkPolicy, SandboxBackend, SandboxOptions,
+};
+use std::path::{Path, PathBuf};
+use std::time::Duration;
+
+// ── Helpers ──────────────────────────────────────────────────────────────────
+
+fn docker_available() -> bool {
+    std::process::Command::new("docker")
+        .arg("info")
+        .stdout(std::process::Stdio::null())
+        .stderr(std::process::Stdio::null())
+        .status()
+        .map(|s| s.success())
+        .unwrap_or(false)
+}
+
+fn write_harness_script(workdir: &Path, body: &str) -> PathBuf {
+    let path = workdir.join("harness.py");
+    std::fs::write(&path, body).expect("write harness script");
+    path
+}
+
+fn harness(workdir: &Path) -> BuiltHarness {
+    BuiltHarness {
+        workdir: workdir.to_path_buf(),
+        command: vec!["python3".into(), "harness.py".into()],
+        env: vec![],
+        source: String::new(),
+        entry_source: String::new(),
+    }
+}
+
+fn docker_opts() -> SandboxOptions {
+    SandboxOptions {
+        timeout: Duration::from_secs(15),
+        backend: SandboxBackend::Docker,
+        network_policy: NetworkPolicy::None,
+        ..SandboxOptions::default()
+    }
+}
+
+// ── Pure helper coverage (always runs) ───────────────────────────────────────
+
+#[test]
+fn workdir_mount_args_uses_fixed_work_path() {
+    let args = workdir_mount_args(Path::new("/tmp/nyx-harness/run-abc"));
+    assert_eq!(
+        args,
+        vec![
+            "-v".to_owned(),
+            format!("/tmp/nyx-harness/run-abc:{WORK_MOUNT_PATH}:rw"),
+        ],
+    );
+}
+
+#[test]
+fn stub_mount_args_uses_indexed_fixed_paths() {
+    let roots = [PathBuf::from("/tmp/a"), PathBuf::from("/tmp/b")];
+    let args = stub_mount_args(&roots);
+    assert_eq!(args.len(), 4);
+    assert!(args.contains(&format!("/tmp/a:{STUB_MOUNT_ROOT}/0:rw")));
+    assert!(args.contains(&format!("/tmp/b:{STUB_MOUNT_ROOT}/1:rw")));
+}
+
+#[test]
+fn network_args_translate_every_policy() {
+    assert!(network_args(&NetworkPolicy::None).iter().any(|a| a == "none"));
+    let stubs = NetworkPolicy::StubsOnly {
+        allow: vec![HostPort::new("sql", 5432)],
+    };
+    let stubs_args = network_args(&stubs);
+    assert!(stubs_args.iter().any(|a| a == "--add-host=sql:host-gateway"));
+    let open = network_args(&NetworkPolicy::Open);
+    assert!(open.iter().any(|a| a == "bridge"));
+    assert!(!open.iter().any(|a| a.starts_with("--add-host=")));
+}
+
+#[test]
+fn image_reference_resolves_known_toolchains() {
+    // Every catalogue entry must resolve to something — pinned or unpinned.
+    assert!(image_reference_for_toolchain("python-3.11").is_some());
+    assert!(image_reference_for_toolchain("node-20").is_some());
+    assert!(image_reference_for_toolchain("java-21").is_some());
+    // Unknown IDs return None so the legacy path keeps working.
+    assert!(image_reference_for_toolchain("python-99.9").is_none());
+}
+
+#[test]
+fn toolchain_pinning_state_is_observable() {
+    // Without a daily-job-run images.toml we expect every entry to still be
+    // unpinned.  The assertion flips when the CI workflow lands the first
+    // digests — at which point this test starts catching accidental
+    // reversions to bare tags.
+    let pinned = toolchain_is_pinned("python-3.11");
+    let r = image_reference_for_toolchain("python-3.11").unwrap();
+    if pinned {
+        assert!(r.contains("@sha256:"), "pinned ref must carry digest, got {r}");
+    } else {
+        assert!(!r.contains("@sha256:"), "unpinned ref must not carry digest, got {r}");
+    }
+}
+
+// ── Live-docker coverage (skips when docker is absent) ───────────────────────
+
+#[test]
+fn ensure_image_pulled_returns_true_for_python_slim() {
+    if !docker_available() {
+        eprintln!("docker unavailable — skipping");
+        return;
+    }
+    let r = image_reference_for_toolchain("python-3.11")
+        .expect("python-3.11 must be in the catalogue");
+    assert!(
+        ensure_image_pulled(r),
+        "ensure_image_pulled must succeed for `{r}` when docker is available",
+    );
+}
+
+#[test]
+fn harness_runs_under_docker_with_network_none() {
+    if !docker_available() {
+        eprintln!("docker unavailable — skipping");
+        return;
+    }
+    let tmp = tempfile::TempDir::new().expect("tempdir");
+    // Tiny script that just prints a marker; we use it to confirm the
+    // backend round-trips through `docker run` + `docker exec` cleanly.
+    write_harness_script(
+        tmp.path(),
+        "import sys; sys.stdout.write('NYX_DOCKER_OK\\n')\n",
+    );
+    let h = harness(tmp.path());
+    let opts = docker_opts();
+    let outcome = sandbox::run(&h, b"", &opts).expect("docker backend must run");
+    assert_eq!(outcome.exit_code, Some(0), "harness must exit cleanly");
+    let stdout = String::from_utf8_lossy(&outcome.stdout);
+    assert!(
+        stdout.contains("NYX_DOCKER_OK"),
+        "expected marker in stdout, got: {stdout}",
+    );
+}
+
+#[test]
+fn harness_workdir_is_mounted_at_fixed_work_path() {
+    if !docker_available() {
+        eprintln!("docker unavailable — skipping");
+        return;
+    }
+    let tmp = tempfile::TempDir::new().expect("tempdir");
+    std::fs::write(tmp.path().join("token.txt"), "phase-19-mount-token\n")
+        .expect("write fixture");
+    write_harness_script(
+        tmp.path(),
+        // Read from the fixed /work mount path — this passes only when the
+        // workdir is bind-mounted there, not just docker-cp'd to /workdir.
+        "open('/work/token.txt').read()\n\
+         import sys; sys.stdout.write('NYX_WORK_MOUNT_OK\\n')\n",
+    );
+    let h = harness(tmp.path());
+    let opts = docker_opts();
+    let outcome = sandbox::run(&h, b"", &opts).expect("docker backend must run");
+    let stdout = String::from_utf8_lossy(&outcome.stdout);
+    let stderr = String::from_utf8_lossy(&outcome.stderr);
+    assert_eq!(
+        outcome.exit_code,
+        Some(0),
+        "/work mount must be readable inside the container; stdout={stdout} stderr={stderr}",
+    );
+    assert!(
+        stdout.contains("NYX_WORK_MOUNT_OK"),
+        "expected /work mount marker; stdout={stdout}",
+    );
+}
--- a/tools/image-builder/images.toml
+++ b/tools/image-builder/images.toml
@ -0,0 +1,125 @@
+# Pinned-digest catalogue consumed by `nyx-image-builder` and the
+# `build.rs` codegen that populates `src/dynamic/toolchain.rs::IMAGE_DIGESTS`.
+#
+# Each `[[image]]` entry corresponds to one `(lang, toolchain)` cell of the
+# Docker backend.  The `toolchain_id` matches the IDs surfaced by
+# `src/dynamic/toolchain.rs` (`python-3.11`, `node-20`, `java-21`, …) and is
+# the lookup key used by `IMAGE_DIGESTS`.
+#
+# Fields:
+#   - toolchain_id   string  Lookup key (see toolchain.rs).
+#   - base           string  Docker image reference (e.g. "python:3.11-slim").
+#                            The `nyx-image-builder verify` command refuses to
+#                            run if this is not pinnable to a digest.
+#   - toolchain      string  Human-readable interpreter / compiler version.
+#   - packages       table   Inline pinned package names → versions (apt /
+#                            apk pins applied during image build).  Empty `{}`
+#                            when the upstream image already covers everything.
+#   - digest         string  `sha256:…` content digest written back by
+#                            `nyx-image-builder build`.  Empty until the
+#                            first successful build.
+#
+# The CI workflow runs `nyx-image-builder build --all` daily.  When any digest
+# drifts, the workflow opens a PR updating this file; reviewers approve before
+# the new digest pin is merged.
+
+[[image]]
+toolchain_id = "python-3.11"
+base = "python:3.11-slim"
+toolchain = "Python 3.11"
+packages = {}
+digest = ""
+
+[[image]]
+toolchain_id = "python-3.12"
+base = "python:3.12-slim"
+toolchain = "Python 3.12"
+packages = {}
+digest = ""
+
+[[image]]
+toolchain_id = "python-3.13"
+base = "python:3.13-slim"
+toolchain = "Python 3.13"
+packages = {}
+digest = ""
+
+[[image]]
+toolchain_id = "node-18"
+base = "node:18-slim"
+toolchain = "Node.js 18"
+packages = {}
+digest = ""
+
+[[image]]
+toolchain_id = "node-20"
+base = "node:20-slim"
+toolchain = "Node.js 20"
+packages = {}
+digest = ""
+
+[[image]]
+toolchain_id = "node-22"
+base = "node:22-slim"
+toolchain = "Node.js 22"
+packages = {}
+digest = ""
+
+[[image]]
+toolchain_id = "java-17"
+base = "eclipse-temurin:17-jre-jammy"
+toolchain = "Eclipse Temurin 17 JRE"
+packages = {}
+digest = ""
+
+[[image]]
+toolchain_id = "java-21"
+base = "eclipse-temurin:21-jre-jammy"
+toolchain = "Eclipse Temurin 21 JRE"
+packages = {}
+digest = ""
+
+[[image]]
+toolchain_id = "php-8.1"
+base = "php:8.1-cli"
+toolchain = "PHP 8.1 CLI"
+packages = {}
+digest = ""
+
+[[image]]
+toolchain_id = "php-8.2"
+base = "php:8.2-cli"
+toolchain = "PHP 8.2 CLI"
+packages = {}
+digest = ""
+
+[[image]]
+toolchain_id = "php-8.3"
+base = "php:8.3-cli"
+toolchain = "PHP 8.3 CLI"
+packages = {}
+digest = ""
+
+[[image]]
+toolchain_id = "ruby-3.2"
+base = "ruby:3.2-slim"
+toolchain = "Ruby 3.2"
+packages = {}
+digest = ""
+
+[[image]]
+toolchain_id = "ruby-3.3"
+base = "ruby:3.3-slim"
+toolchain = "Ruby 3.3"
+packages = {}
+digest = ""
+
+# Native runtime image: compiled Rust + Go binaries are copied into a
+# `debian:bookworm-slim` container.  Kept here so the image-builder workflow
+# pins it alongside the per-lang interpreter images.
+[[image]]
+toolchain_id = "native-binary"
+base = "debian:bookworm-slim"
+toolchain = "Debian 12 slim (native binary runner)"
+packages = {}
+digest = ""
--- a/tools/image-builder/main.rs
+++ b/tools/image-builder/main.rs
@ -0,0 +1,538 @@
+//! Phase 19 (Track E.3) — `nyx-image-builder`.
+//!
+//! Reads `tools/image-builder/images.toml`, drives `docker pull` / `docker
+//! inspect` for each entry, and writes the resolved `sha256:…` digest back
+//! into the same TOML file so the digest pin is reproducible from source.
+//!
+//! Subcommands:
+//!
+//! - `build [--all | <toolchain_id>…]` — pull each requested image, capture
+//!   its `RepoDigests` digest, and rewrite `images.toml` in place when the
+//!   digest differs from the recorded pin.  The daily CI workflow runs
+//!   `build --all` and opens a PR with the changes when any entry drifts.
+//! - `verify` — assert that every entry in `images.toml` has a non-empty
+//!   `digest` field and that the digest matches the locally-pulled image.
+//!   Exit code 0 on success, 1 on any mismatch.
+//! - `list` — print every entry with its current `(base, digest)` pair to
+//!   stdout, one entry per line, for human inspection.
+//!
+//! Usage:
+//!
+//! ```text
+//! cargo run -F image-builder --bin nyx-image-builder -- list
+//! cargo run -F image-builder --bin nyx-image-builder -- build --all
+//! cargo run -F image-builder --bin nyx-image-builder -- build python-3.11 node-20
+//! cargo run -F image-builder --bin nyx-image-builder -- verify
+//! ```
+//!
+//! The tool is host-side only; nothing in the Nyx scanner build depends on
+//! it at runtime.  The codegen in `build.rs` reads `images.toml` directly,
+//! so updating digests is a two-step "run nyx-image-builder build → cargo
+//! build" cycle.
+
+use std::env;
+use std::path::{Path, PathBuf};
+use std::process::{Command, ExitCode, Stdio};
+
+const IMAGES_TOML: &str = "tools/image-builder/images.toml";
+
+fn main() -> ExitCode {
+    let args: Vec<String> = env::args().skip(1).collect();
+    if args.is_empty() {
+        eprintln!("nyx-image-builder: missing subcommand");
+        print_usage();
+        return ExitCode::from(2);
+    }
+
+    let toml_path = catalogue_path();
+
+    match args[0].as_str() {
+        "list" => cmd_list(&toml_path),
+        "build" => cmd_build(&toml_path, &args[1..]),
+        "verify" => cmd_verify(&toml_path),
+        "-h" | "--help" | "help" => {
+            print_usage();
+            ExitCode::SUCCESS
+        }
+        other => {
+            eprintln!("nyx-image-builder: unknown subcommand `{other}`");
+            print_usage();
+            ExitCode::from(2)
+        }
+    }
+}
+
+fn print_usage() {
+    eprintln!(
+        "usage: nyx-image-builder <list | build [--all|<id>…] | verify>\n\n\
+         Reads `{IMAGES_TOML}` and pins per-toolchain Docker images by sha256\n\
+         digest.  Run `build --all` on a host that can reach docker daemon to\n\
+         refresh the digests; commit the resulting diff."
+    );
+}
+
+/// Resolve the catalogue path relative to the workspace root.
+///
+/// Cargo runs binaries with CWD set to the workspace root by default, so the
+/// straight relative path works for the common case.  We also walk upward
+/// from `current_dir` so the tool functions correctly when invoked from a
+/// nested directory (e.g. CI step that `cd tools/`).
+fn catalogue_path() -> PathBuf {
+    if Path::new(IMAGES_TOML).exists() {
+        return PathBuf::from(IMAGES_TOML);
+    }
+    if let Ok(cwd) = env::current_dir() {
+        let mut probe = cwd.as_path();
+        loop {
+            let candidate = probe.join(IMAGES_TOML);
+            if candidate.exists() {
+                return candidate;
+            }
+            match probe.parent() {
+                Some(p) => probe = p,
+                None => break,
+            }
+        }
+    }
+    PathBuf::from(IMAGES_TOML)
+}
+
+// ── Subcommands ──────────────────────────────────────────────────────────────
+
+fn cmd_list(toml_path: &Path) -> ExitCode {
+    let entries = match read_catalogue(toml_path) {
+        Ok(v) => v,
+        Err(e) => {
+            eprintln!("nyx-image-builder: cannot read {}: {e}", toml_path.display());
+            return ExitCode::FAILURE;
+        }
+    };
+
+    for e in &entries {
+        let digest = if e.digest.is_empty() { "<unpinned>" } else { &e.digest };
+        println!("{:<20} {:<40} {}", e.toolchain_id, e.base, digest);
+    }
+    ExitCode::SUCCESS
+}
+
+fn cmd_build(toml_path: &Path, args: &[String]) -> ExitCode {
+    let entries = match read_catalogue(toml_path) {
+        Ok(v) => v,
+        Err(e) => {
+            eprintln!("nyx-image-builder: cannot read {}: {e}", toml_path.display());
+            return ExitCode::FAILURE;
+        }
+    };
+
+    let targets: Vec<&ImageEntry> = if args.iter().any(|a| a == "--all") {
+        entries.iter().collect()
+    } else if args.is_empty() {
+        eprintln!("nyx-image-builder build: expected --all or one or more toolchain IDs");
+        return ExitCode::from(2);
+    } else {
+        let mut out = Vec::with_capacity(args.len());
+        for id in args {
+            if id == "--all" {
+                continue;
+            }
+            match entries.iter().find(|e| &e.toolchain_id == id) {
+                Some(e) => out.push(e),
+                None => {
+                    eprintln!("nyx-image-builder build: unknown toolchain_id `{id}`");
+                    return ExitCode::FAILURE;
+                }
+            }
+        }
+        out
+    };
+
+    let mut updates: Vec<(String, String)> = Vec::new();
+    let mut failures = 0usize;
+
+    for entry in &targets {
+        eprintln!("==> pulling {} ({})", entry.toolchain_id, entry.base);
+        if !docker_pull(&entry.base) {
+            eprintln!("    pull failed for {}", entry.base);
+            failures += 1;
+            continue;
+        }
+        match resolve_image_digest(&entry.base) {
+            Some(digest) => {
+                eprintln!("    {} → {}", entry.base, digest);
+                updates.push((entry.toolchain_id.clone(), digest));
+            }
+            None => {
+                eprintln!("    docker inspect produced no digest for {}", entry.base);
+                failures += 1;
+            }
+        }
+    }
+
+    if !updates.is_empty() {
+        let original = match std::fs::read_to_string(toml_path) {
+            Ok(s) => s,
+            Err(e) => {
+                eprintln!("nyx-image-builder build: cannot read {}: {e}", toml_path.display());
+                return ExitCode::FAILURE;
+            }
+        };
+        let updated = rewrite_digests(&original, &updates);
+        if updated != original {
+            if let Err(e) = std::fs::write(toml_path, updated) {
+                eprintln!(
+                    "nyx-image-builder build: cannot write {}: {e}",
+                    toml_path.display()
+                );
+                return ExitCode::FAILURE;
+            }
+            eprintln!("==> updated {} ({} entries)", toml_path.display(), updates.len());
+        } else {
+            eprintln!("==> {} unchanged (digests already pinned)", toml_path.display());
+        }
+    }
+
+    if failures > 0 {
+        ExitCode::FAILURE
+    } else {
+        ExitCode::SUCCESS
+    }
+}
+
+fn cmd_verify(toml_path: &Path) -> ExitCode {
+    let entries = match read_catalogue(toml_path) {
+        Ok(v) => v,
+        Err(e) => {
+            eprintln!("nyx-image-builder: cannot read {}: {e}", toml_path.display());
+            return ExitCode::FAILURE;
+        }
+    };
+
+    let mut failures = 0usize;
+    let mut unpinned = 0usize;
+
+    for entry in &entries {
+        if entry.digest.is_empty() {
+            eprintln!("MISS {}: digest unpinned in {}", entry.toolchain_id, IMAGES_TOML);
+            unpinned += 1;
+            continue;
+        }
+        match resolve_image_digest(&entry.base) {
+            Some(local) if local == entry.digest => {
+                eprintln!("OK   {}: {}", entry.toolchain_id, entry.digest);
+            }
+            Some(local) => {
+                eprintln!(
+                    "DIFF {}: pinned={} local={}",
+                    entry.toolchain_id, entry.digest, local,
+                );
+                failures += 1;
+            }
+            None => {
+                eprintln!(
+                    "MISS {}: docker inspect returned no digest (image not pulled?)",
+                    entry.toolchain_id
+                );
+                failures += 1;
+            }
+        }
+    }
+
+    if failures == 0 && unpinned == 0 {
+        ExitCode::SUCCESS
+    } else {
+        eprintln!(
+            "nyx-image-builder verify: {failures} mismatch(es), {unpinned} unpinned entry(ies)",
+        );
+        ExitCode::FAILURE
+    }
+}
+
+// ── Docker shellouts ─────────────────────────────────────────────────────────
+
+fn docker_bin() -> String {
+    env::var("NYX_DOCKER_BIN").unwrap_or_else(|_| "docker".to_owned())
+}
+
+fn docker_pull(image: &str) -> bool {
+    Command::new(docker_bin())
+        .args(["pull", image])
+        .stdout(Stdio::inherit())
+        .stderr(Stdio::inherit())
+        .status()
+        .map(|s| s.success())
+        .unwrap_or(false)
+}
+
+/// Resolve the immutable content digest of a locally-pulled image.
+///
+/// We prefer `RepoDigests` (`name@sha256:…`) because that is the form
+/// `docker pull <image>@sha256:…` accepts directly.  When the local image
+/// has no remote digest yet (e.g. fresh build), we fall back to the `.Id`
+/// which carries the local sha256 of the manifest.
+fn resolve_image_digest(image: &str) -> Option<String> {
+    // Try RepoDigests first.
+    let repo = Command::new(docker_bin())
+        .args([
+            "inspect",
+            "--format={{index .RepoDigests 0}}",
+            image,
+        ])
+        .output()
+        .ok()?;
+    if repo.status.success() {
+        let line = std::str::from_utf8(&repo.stdout).unwrap_or("").trim();
+        if !line.is_empty() && line != "<no value>" {
+            // RepoDigests is "name@sha256:…"; the caller stores the
+            // sha256:… portion alongside `base` so we just keep the
+            // digest tail.
+            if let Some(idx) = line.rfind("@") {
+                let digest = &line[idx + 1..];
+                if !digest.is_empty() {
+                    return Some(digest.to_owned());
+                }
+            }
+        }
+    }
+
+    // Fall back to .Id (image manifest digest).
+    let id = Command::new(docker_bin())
+        .args(["inspect", "--format={{.Id}}", image])
+        .output()
+        .ok()?;
+    if !id.status.success() {
+        return None;
+    }
+    let line = std::str::from_utf8(&id.stdout).unwrap_or("").trim();
+    if line.is_empty() {
+        None
+    } else {
+        Some(line.to_owned())
+    }
+}
+
+// ── images.toml parser + rewriter ────────────────────────────────────────────
+
+#[derive(Debug, Default, Clone)]
+struct ImageEntry {
+    toolchain_id: String,
+    base: String,
+    digest: String,
+}
+
+fn read_catalogue(path: &Path) -> std::io::Result<Vec<ImageEntry>> {
+    let text = std::fs::read_to_string(path)?;
+    Ok(parse_catalogue(&text))
+}
+
+fn parse_catalogue(src: &str) -> Vec<ImageEntry> {
+    let mut entries: Vec<ImageEntry> = Vec::new();
+    let mut current: Option<ImageEntry> = None;
+
+    for raw in src.lines() {
+        let line = strip_comment(raw).trim();
+        if line.is_empty() {
+            continue;
+        }
+        if line == "[[image]]" {
+            if let Some(prev) = current.take() {
+                if !prev.toolchain_id.is_empty() {
+                    entries.push(prev);
+                }
+            }
+            current = Some(ImageEntry::default());
+            continue;
+        }
+        if line.starts_with("[[") || line.starts_with('[') {
+            if let Some(prev) = current.take() {
+                if !prev.toolchain_id.is_empty() {
+                    entries.push(prev);
+                }
+            }
+            continue;
+        }
+        let Some(slot) = current.as_mut() else { continue };
+        let Some((key, value)) = line.split_once('=') else { continue };
+        let key = key.trim();
+        let value = value.trim().trim_matches('"').trim_matches('\'');
+        match key {
+            "toolchain_id" => slot.toolchain_id = value.to_owned(),
+            "base" => slot.base = value.to_owned(),
+            "digest" => slot.digest = value.to_owned(),
+            _ => {}
+        }
+    }
+    if let Some(prev) = current.take() {
+        if !prev.toolchain_id.is_empty() {
+            entries.push(prev);
+        }
+    }
+    entries
+}
+
+fn strip_comment(line: &str) -> &str {
+    let mut in_string = false;
+    for (i, b) in line.bytes().enumerate() {
+        match b {
+            b'"' => in_string = !in_string,
+            b'#' if !in_string => return &line[..i],
+            _ => {}
+        }
+    }
+    line
+}
+
+/// Rewrite the `digest = "…"` line for each `(toolchain_id, new_digest)` in
+/// `updates`, leaving every other byte of the original TOML untouched.
+///
+/// Algorithm: stream the original line-by-line, track which `[[image]]`
+/// block we are in by reading `toolchain_id`, and when we hit `digest = "…"`
+/// inside a block whose `toolchain_id` is in `updates`, replace the value
+/// while preserving the original indentation.
+fn rewrite_digests(src: &str, updates: &[(String, String)]) -> String {
+    let mut out = String::with_capacity(src.len());
+    let mut current_tid: Option<String> = None;
+    let mut in_image_block = false;
+
+    for raw in src.lines() {
+        let trimmed = raw.trim();
+        if trimmed == "[[image]]" {
+            in_image_block = true;
+            current_tid = None;
+            out.push_str(raw);
+            out.push('\n');
+            continue;
+        }
+        if trimmed.starts_with("[[") || trimmed.starts_with('[') {
+            in_image_block = false;
+            current_tid = None;
+            out.push_str(raw);
+            out.push('\n');
+            continue;
+        }
+
+        if in_image_block {
+            if let Some(value) = parse_toml_string_value(trimmed, "toolchain_id") {
+                current_tid = Some(value);
+            }
+
+            if parse_toml_string_value(trimmed, "digest").is_some() {
+                if let Some(tid) = &current_tid {
+                    if let Some((_, new_digest)) =
+                        updates.iter().find(|(id, _)| id == tid)
+                    {
+                        // Preserve indentation.
+                        let indent_len = raw.len() - raw.trim_start().len();
+                        out.push_str(&raw[..indent_len]);
+                        out.push_str(&format!("digest = \"{new_digest}\""));
+                        out.push('\n');
+                        continue;
+                    }
+                }
+            }
+        }
+
+        out.push_str(raw);
+        out.push('\n');
+    }
+
+    // Preserve trailing-newline behaviour of the original file: if the
+    // source did not end in '\n' we should not introduce one.
+    if !src.ends_with('\n') && out.ends_with('\n') {
+        out.pop();
+    }
+    out
+}
+
+fn parse_toml_string_value(line: &str, key: &str) -> Option<String> {
+    let line = line.trim();
+    let rest = line.strip_prefix(key)?;
+    let rest = rest.trim_start();
+    let rest = rest.strip_prefix('=')?.trim();
+    let rest = rest.strip_prefix('"')?;
+    let end = rest.find('"')?;
+    Some(rest[..end].to_owned())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn parse_catalogue_extracts_three_fields() {
+        let src = r#"
+[[image]]
+toolchain_id = "python-3.11"
+base = "python:3.11-slim"
+toolchain = "Python 3.11"
+packages = {}
+digest = ""
+
+[[image]]
+toolchain_id = "node-20"
+base = "node:20-slim"
+toolchain = "Node.js 20"
+packages = {}
+digest = "sha256:cafebabe"
+"#;
+        let entries = parse_catalogue(src);
+        assert_eq!(entries.len(), 2);
+        assert_eq!(entries[0].toolchain_id, "python-3.11");
+        assert_eq!(entries[0].base, "python:3.11-slim");
+        assert_eq!(entries[0].digest, "");
+        assert_eq!(entries[1].toolchain_id, "node-20");
+        assert_eq!(entries[1].digest, "sha256:cafebabe");
+    }
+
+    #[test]
+    fn rewrite_digests_replaces_only_named_entries() {
+        let src = r#"[[image]]
+toolchain_id = "python-3.11"
+base = "python:3.11-slim"
+digest = ""
+
+[[image]]
+toolchain_id = "node-20"
+base = "node:20-slim"
+digest = ""
+"#;
+        let updates = vec![("node-20".to_owned(), "sha256:deadbeef".to_owned())];
+        let out = rewrite_digests(src, &updates);
+        assert!(out.contains("digest = \"sha256:deadbeef\""));
+        // python-3.11 must remain unpinned.
+        let python_block = out
+            .split("[[image]]")
+            .find(|b| b.contains("python-3.11"))
+            .unwrap();
+        assert!(python_block.contains("digest = \"\""));
+    }
+
+    #[test]
+    fn rewrite_digests_preserves_indentation_and_comments() {
+        let src = "# header\n[[image]]\n    toolchain_id = \"go\"\n    digest = \"\"\n";
+        let updates = vec![("go".to_owned(), "sha256:1234".to_owned())];
+        let out = rewrite_digests(src, &updates);
+        assert!(out.contains("    digest = \"sha256:1234\""));
+        assert!(out.starts_with("# header\n"));
+    }
+
+    #[test]
+    fn rewrite_digests_no_op_when_no_targets() {
+        let src = "[[image]]\ntoolchain_id = \"x\"\ndigest = \"sha256:keep\"\n";
+        let out = rewrite_digests(src, &[]);
+        assert_eq!(out, src);
+    }
+
+    #[test]
+    fn parse_toml_string_value_handles_trailing_garbage() {
+        assert_eq!(
+            parse_toml_string_value("digest = \"sha256:abc\"", "digest"),
+            Some("sha256:abc".to_owned())
+        );
+        assert_eq!(parse_toml_string_value("other = \"x\"", "digest"), None);
+    }
+
+    #[test]
+    fn strip_comment_keeps_hash_inside_strings() {
+        assert_eq!(strip_comment("foo = \"a#b\" # tail"), "foo = \"a#b\" ");
+    }
+}