From 8974b91bfc98878410f7c4e0bc4ac44f0dfab7b8 Mon Sep 17 00:00:00 2001 From: elipeter Date: Wed, 3 Jun 2026 22:27:24 -0500 Subject: [PATCH] fix linux java --- src/dynamic/build_pool/java.rs | 6 ++++++ src/dynamic/build_sandbox.rs | 18 +++++++++++++++++- src/dynamic/lang/java.rs | 7 ++++++- 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/dynamic/build_pool/java.rs b/src/dynamic/build_pool/java.rs index c6144642..dfd75181 100644 --- a/src/dynamic/build_pool/java.rs +++ b/src/dynamic/build_pool/java.rs @@ -279,6 +279,12 @@ fn ensure_worker_compiled(dir: &Path) -> Result<(), String> { std::fs::create_dir_all(&staging).map_err(|e| format!("javac-pool: mkdir staging: {e}"))?; let javac = std::env::var("NYX_JAVAC_BIN").unwrap_or_else(|_| "javac".to_owned()); let compiled = Command::new(&javac) + // Pin the source charset so the bootstrap compile is independent of + // the host locale (a `C`/`POSIX` CI runner defaults `javac` to + // `US-ASCII` and would reject any non-ASCII byte in the worker + // source). Mirrors the harness-compile pin in `build_sandbox`. + .arg("-encoding") + .arg("UTF-8") .arg("-d") .arg(&staging) .arg(&src_path) diff --git a/src/dynamic/build_sandbox.rs b/src/dynamic/build_sandbox.rs index f0e08ac6..f79242f3 100644 --- a/src/dynamic/build_sandbox.rs +++ b/src/dynamic/build_sandbox.rs @@ -1334,7 +1334,23 @@ fn try_compile_java_with_toolchain( } // Compile sources — class files are written to workdir by default. - let mut args = vec!["-d".to_owned(), workdir.to_string_lossy().into_owned()]; + // + // `-encoding UTF-8` is mandatory, not cosmetic: the emitted harness + // (`NyxHarness.java`) and many corpus fixtures carry non-ASCII bytes + // in comments (em-dashes, box-drawing rules). `javac` reads source + // in the platform default charset, which is `US-ASCII` on a CI host + // running the `C` / `POSIX` locale (the common Linux-runner default). + // Without the pin, every such source aborts with + // `unmappable character (0xE2) for encoding US-ASCII` and the build + // fails deterministically on Linux while passing on a UTF-8 macOS dev + // box. Pinning the source charset makes the compile host-locale + // independent. + let mut args = vec![ + "-encoding".to_owned(), + "UTF-8".to_owned(), + "-d".to_owned(), + workdir.to_string_lossy().into_owned(), + ]; if let Some(rel) = target_release { args.push("--release".to_owned()); args.push(rel.to_string()); diff --git a/src/dynamic/lang/java.rs b/src/dynamic/lang/java.rs index 5016da9d..df3ecc30 100644 --- a/src/dynamic/lang/java.rs +++ b/src/dynamic/lang/java.rs @@ -130,7 +130,12 @@ fn chain_step( command: vec![ "sh".to_owned(), "-c".to_owned(), - "javac Step.java && java Step".to_owned(), + // Pin the source charset so the step build does not depend on + // the container locale (a `C`/`POSIX` base image defaults + // `javac` to `US-ASCII` and rejects any non-ASCII byte in the + // generated source). Mirrors the harness-compile pin in + // `build_sandbox`. + "javac -encoding UTF-8 Step.java && java Step".to_owned(), ], extra_env: prev_output .map(|bytes| {