nyx/build.rs

use std::collections::BTreeMap;
use std::path::Path;
use std::process::Command;

fn main() {
    // Phase 17 (Track E.1): always emit the seccomp policy table to
    // OUT_DIR.  Gated runtime via `#[cfg(target_os = "linux")]`, but the
    // codegen runs on every host so `cargo check` on macOS still emits
    // the file (the include never actually compiles on non-Linux).
    emit_seccomp_policy();

    // Phase 19 (Track E.3): emit the IMAGE_DIGESTS table from
    // tools/image-builder/images.toml.  The runtime side (src/dynamic/
    // toolchain.rs) `include!`s the generated file unconditionally so
    // every host build has the same pinned-digest catalogue.
    emit_image_digests();

    // Only relevant when the serve feature is active.
    if std::env::var("CARGO_FEATURE_SERVE").is_err() {
        return;
    }

    let dist_dir = Path::new("src/server/assets/dist");
    let index_html = dist_dir.join("index.html");

    // Re-run build.rs only when dist output is missing/changed
    println!("cargo:rerun-if-changed=src/server/assets/dist/index.html");

    if index_html.exists() {
        // Dist already built, nothing to do
        return;
    }

    // Dist missing, try to build frontend
    let frontend_dir = Path::new("frontend");
    if !frontend_dir.join("package.json").exists() {
        emit_placeholder_and_warn(dist_dir);
        return;
    }

    // Run npm install + build
    println!("cargo:warning=Frontend dist not found, running npm install && npm run build...");
    let npm_install = Command::new("npm")
        .arg("install")
        .current_dir(frontend_dir)
        .status();

    match npm_install {
        Ok(s) if s.success() => {}
        _ => {
            emit_placeholder_and_warn(dist_dir);
            return;
        }
    }

    let npm_build = Command::new("npm")
        .arg("run")
        .arg("build")
        .current_dir(frontend_dir)
        .status();

    match npm_build {
        Ok(s) if s.success() => {
            println!("cargo:warning=Frontend built successfully.");
        }
        _ => {
            emit_placeholder_and_warn(dist_dir);
        }
    }
}

fn emit_placeholder_and_warn(dist_dir: &Path) {
    // Create minimal placeholder files so compilation succeeds
    std::fs::create_dir_all(dist_dir).ok();
    std::fs::write(
        dist_dir.join("index.html"),
        "<!DOCTYPE html><html><body><h1>Frontend not built</h1><p>Run: cd frontend &amp;&amp; npm install &amp;&amp; npm run build</p></body></html>",
    )
    .ok();
    std::fs::write(dist_dir.join("app.js"), "// frontend not built\n").ok();
    std::fs::write(dist_dir.join("style.css"), "/* frontend not built */\n").ok();
    println!(
        "cargo:warning=Node.js/npm not available — wrote placeholder frontend assets. Run 'cd frontend && npm install && npm run build' for the real UI."
    );
}

// ── Phase 17 (Track E.1) — seccomp policy codegen ────────────────────────────

const SECCOMP_POLICY_PATH: &str = "src/dynamic/sandbox/seccomp/seccomp_policy.toml";

/// Cap-name → Cap bit value table.  Mirrors the `bitflags!` block in
/// `src/labels/mod.rs`.  Keep in sync when adding/removing `Cap`
/// constants.
const CAP_BIT_FOR_NAME: &[(&str, u32)] = &[
    ("ENV_VAR", 1 << 0),
    ("HTML_ESCAPE", 1 << 1),
    ("SHELL_ESCAPE", 1 << 2),
    ("URL_ENCODE", 1 << 3),
    ("JSON_PARSE", 1 << 4),
    ("FILE_IO", 1 << 5),
    ("FMT_STRING", 1 << 6),
    ("SQL_QUERY", 1 << 7),
    ("DESERIALIZE", 1 << 8),
    ("SSRF", 1 << 9),
    ("CODE_EXEC", 1 << 10),
    ("CRYPTO", 1 << 11),
    ("UNAUTHORIZED_ID", 1 << 12),
    ("DATA_EXFIL", 1 << 13),
    ("LDAP_INJECTION", 1 << 14),
    ("XPATH_INJECTION", 1 << 15),
    ("HEADER_INJECTION", 1 << 16),
    ("OPEN_REDIRECT", 1 << 17),
    ("SSTI", 1 << 18),
    ("XXE", 1 << 19),
    ("PROTOTYPE_POLLUTION", 1 << 20),
];

fn emit_seccomp_policy() {
    println!("cargo:rerun-if-changed={}", SECCOMP_POLICY_PATH);

    let out_dir = std::env::var("OUT_DIR").expect("OUT_DIR must be set by cargo");
    let out_path = Path::new(&out_dir).join("seccomp_policy.rs");

    // Read the policy file; on missing file (e.g. fresh checkout on a
    // foreign target), emit empty tables so compilation still succeeds.
    let toml_text = match std::fs::read_to_string(SECCOMP_POLICY_PATH) {
        Ok(s) => s,
        Err(_) => {
            std::fs::write(
                &out_path,
                "pub static BASE: &[&str] = &[];\npub static CAP: &[(u32, &[&str])] = &[];\n",
            )
            .expect("write empty seccomp policy stub");
            return;
        }
    };

    let parsed = parse_seccomp_toml(&toml_text);

    let mut out = String::new();
    out.push_str("// generated by build.rs from seccomp_policy.toml — do not edit\n\n");

    // Base allowlist.
    out.push_str("pub static BASE: &[&str] = &[\n");
    for name in &parsed.base {
        out.push_str(&format!("    \"{}\",\n", escape(name)));
    }
    out.push_str("];\n\n");

    // Per-cap allowlists.
    out.push_str("pub static CAP: &[(u32, &[&str])] = &[\n");
    for (cap_name, allow) in &parsed.caps {
        let bit = CAP_BIT_FOR_NAME
            .iter()
            .find(|(n, _)| *n == cap_name.as_str())
            .map(|(_, b)| *b)
            .unwrap_or_else(|| {
                panic!(
                    "seccomp_policy.toml references unknown Cap '{cap_name}' — \
                 add it to CAP_BIT_FOR_NAME in build.rs first"
                )
            });
        out.push_str(&format!("    (0x{bit:08x}_u32, &[\n"));
        for name in allow {
            out.push_str(&format!("        \"{}\",\n", escape(name)));
        }
        out.push_str("    ]),\n");
    }
    out.push_str("];\n");

    std::fs::write(&out_path, out).expect("write seccomp policy table");
}

#[derive(Default)]
struct SeccompPolicy {
    base: Vec<String>,
    caps: BTreeMap<String, Vec<String>>,
}

/// Tiny line-oriented TOML parser scoped to the shape used by
/// `seccomp_policy.toml`:
///
///   [base]
///   allow = ["read", "write", ...]
///
///   [cap.SQL_QUERY]
///   allow = [
///       "fdatasync",
///       ...
///   ]
///
/// Comments (`#`) and blank lines are skipped.  Multi-line array bodies
/// are accumulated until the closing `]`.
fn parse_seccomp_toml(src: &str) -> SeccompPolicy {
    let mut policy = SeccompPolicy::default();
    let mut current_section: Option<String> = None;
    let mut accumulating_array: Option<String> = None;
    let mut array_buf = String::new();

    for raw_line in src.lines() {
        let line = strip_comment(raw_line).trim();
        if line.is_empty() {
            continue;
        }

        if let Some(_key) = accumulating_array.as_ref() {
            array_buf.push_str(line);
            array_buf.push('\n');
            if line.contains(']') {
                let key = accumulating_array.take().unwrap();
                let values = parse_string_array(&array_buf);
                store_allow(&mut policy, current_section.as_deref(), &key, values);
                array_buf.clear();
            }
            continue;
        }

        if let Some(section) = line.strip_prefix('[').and_then(|s| s.strip_suffix(']')) {
            current_section = Some(section.to_string());
            continue;
        }

        if let Some((key, rest)) = line.split_once('=') {
            let key = key.trim().to_string();
            let rest = rest.trim();
            if rest.starts_with('[') && rest.contains(']') {
                let values = parse_string_array(rest);
                store_allow(&mut policy, current_section.as_deref(), &key, values);
            } else if rest.starts_with('[') {
                accumulating_array = Some(key);
                array_buf.push_str(rest);
                array_buf.push('\n');
            }
            continue;
        }
    }

    policy
}

fn strip_comment(line: &str) -> &str {
    let mut in_string = false;
    let bytes = line.as_bytes();
    for (i, &b) in bytes.iter().enumerate() {
        match b {
            b'"' => in_string = !in_string,
            b'#' if !in_string => return &line[..i],
            _ => {}
        }
    }
    line
}

fn parse_string_array(src: &str) -> Vec<String> {
    // Find every "..." run between the first `[` and the last `]`.
    let start = src.find('[').map(|i| i + 1).unwrap_or(0);
    let end = src.rfind(']').unwrap_or(src.len());
    let body = &src[start..end];
    let mut out = Vec::new();
    let mut chars = body.chars().peekable();
    while let Some(c) = chars.next() {
        if c == '"' {
            let mut s = String::new();
            for c2 in chars.by_ref() {
                if c2 == '"' {
                    break;
                }
                s.push(c2);
            }
            out.push(s);
        }
    }
    out
}

fn store_allow(policy: &mut SeccompPolicy, section: Option<&str>, key: &str, values: Vec<String>) {
    if key != "allow" {
        return;
    }
    match section {
        Some("base") => policy.base = values,
        Some(other) => {
            if let Some(cap_name) = other.strip_prefix("cap.") {
                policy.caps.insert(cap_name.to_string(), values);
            }
        }
        None => {}
    }
}

fn escape(s: &str) -> String {
    s.replace('\\', "\\\\").replace('"', "\\\"")
}

// ── Phase 19 (Track E.3) — image digest codegen ──────────────────────────────

const IMAGE_CATALOGUE_PATH: &str = "tools/image-builder/images.toml";

/// Parse `tools/image-builder/images.toml` and emit two tables to
/// `$OUT_DIR/image_digests.rs`:
///
///   pub static IMAGE_DIGESTS: phf::Map<&'static str, &'static str> = …;
///   pub static IMAGE_BASES:   phf::Map<&'static str, &'static str> = …;
///
/// `IMAGE_DIGESTS` keys are toolchain IDs (`python-3.11`, …) and values are
/// `<base>@sha256:…` strings ready to hand to `docker pull`.  An empty digest
/// in `images.toml` is treated as "not yet pinned" and the entry is omitted
/// from `IMAGE_DIGESTS`; `IMAGE_BASES` always carries the unpinned reference
/// so `docker.rs` can fall back to a tag pull when no digest is recorded.
fn emit_image_digests() {
    println!("cargo:rerun-if-changed={}", IMAGE_CATALOGUE_PATH);

    let out_dir = std::env::var("OUT_DIR").expect("OUT_DIR must be set by cargo");
    let out_path = Path::new(&out_dir).join("image_digests.rs");

    let toml_text = match std::fs::read_to_string(IMAGE_CATALOGUE_PATH) {
        Ok(s) => s,
        Err(_) => {
            // Missing catalogue (fresh checkout without the file) — emit
            // empty maps so the runtime include still compiles.
            std::fs::write(
                &out_path,
                "/// generated empty IMAGE_DIGESTS — images.toml missing\n\
                 pub static IMAGE_DIGESTS: phf::Map<&'static str, &'static str> = \
                 phf::phf_map! {};\n\
                 pub static IMAGE_BASES: phf::Map<&'static str, &'static str> = \
                 phf::phf_map! {};\n",
            )
            .expect("write empty image digests stub");
            return;
        }
    };

    let entries = parse_image_catalogue(&toml_text);

    let mut out = String::new();
    out.push_str("// generated by build.rs from tools/image-builder/images.toml — do not edit\n\n");

    // IMAGE_DIGESTS: only entries with a non-empty digest survive.
    out.push_str(
        "pub static IMAGE_DIGESTS: phf::Map<&'static str, &'static str> = phf::phf_map! {\n",
    );
    for e in &entries {
        if e.digest.is_empty() {
            continue;
        }
        let pinned = format!("{}@{}", e.base, e.digest);
        out.push_str(&format!(
            "    \"{}\" => \"{}\",\n",
            escape(&e.toolchain_id),
            escape(&pinned),
        ));
    }
    out.push_str("};\n\n");

    // IMAGE_BASES: every entry, digest stripped.  Used by docker.rs when no
    // digest is pinned yet so a `docker pull <base>` is still possible.
    out.push_str(
        "pub static IMAGE_BASES: phf::Map<&'static str, &'static str> = phf::phf_map! {\n",
    );
    for e in &entries {
        out.push_str(&format!(
            "    \"{}\" => \"{}\",\n",
            escape(&e.toolchain_id),
            escape(&e.base),
        ));
    }
    out.push_str("};\n");

    std::fs::write(&out_path, out).expect("write image_digests.rs");
}

#[derive(Default)]
struct ImageEntry {
    toolchain_id: String,
    base: String,
    digest: String,
}

/// Tiny TOML parser scoped to the `[[image]] toolchain_id = …` shape used
/// by `images.toml`.  Only the three fields we consume here are extracted;
/// the rest of each entry (`toolchain`, `packages`) is ignored.
fn parse_image_catalogue(src: &str) -> Vec<ImageEntry> {
    let mut entries: Vec<ImageEntry> = Vec::new();
    let mut current: Option<ImageEntry> = None;

    for raw_line in src.lines() {
        let line = strip_comment(raw_line).trim();
        if line.is_empty() {
            continue;
        }

        if line == "[[image]]" {
            if let Some(prev) = current.take()
                && !prev.toolchain_id.is_empty()
            {
                entries.push(prev);
            }
            current = Some(ImageEntry::default());
            continue;
        }

        if line.starts_with("[[") || line.starts_with('[') {
            // Any other section ends accumulation.
            if let Some(prev) = current.take()
                && !prev.toolchain_id.is_empty()
            {
                entries.push(prev);
            }
            continue;
        }

        let Some(slot) = current.as_mut() else {
            continue;
        };
        let Some((key, value)) = line.split_once('=') else {
            continue;
        };
        let key = key.trim();
        let value = value.trim().trim_matches('"').trim_matches('\'');
        match key {
            "toolchain_id" => slot.toolchain_id = value.to_owned(),
            "base" => slot.base = value.to_owned(),
            "digest" => slot.digest = value.to_owned(),
            _ => {}
        }
    }

    if let Some(prev) = current.take()
        && !prev.toolchain_id.is_empty()
    {
        entries.push(prev);
    }

    entries
}