From 92e90f05cc138f90e7416ea84282083df76804e6 Mon Sep 17 00:00:00 2001 From: pitboss Date: Sat, 16 May 2026 04:13:55 -0500 Subject: [PATCH] [pitboss/grind] deferred session-0006 (20260516T052512Z-20f8) --- src/callgraph.rs | 109 ++++++++++++++++++++++++++++++++++--- src/commands/scan.rs | 8 ++- src/dynamic/sandbox/mod.rs | 93 +++++++++++-------------------- 3 files changed, 139 insertions(+), 71 deletions(-) diff --git a/src/callgraph.rs b/src/callgraph.rs index a179dfd3..4393a0e6 100644 --- a/src/callgraph.rs +++ b/src/callgraph.rs @@ -918,9 +918,21 @@ pub fn callers_transitive(cg: &CallGraph, callee: &FuncKey) -> std::collections: /// Map shape: `callee_namespace → { caller_namespace, … }`. A file /// always appears in its own caller set so intra-file recursion stays /// reachable. +/// +/// `scan_root` is optional path-normalisation context. Callers that +/// build the map without a scan root must pass project-relative POSIX +/// paths to [`FileReachMap::reaches`] directly. When a root is set +/// (typical in production scans), [`FileReachMap::reaches`] applies +/// [`crate::symbol::normalize_namespace`] to its arguments before +/// lookup so absolute host paths (the convention on +/// [`crate::commands::scan::Diag::path`]) and project-relative paths +/// (the convention on call-graph [`FuncKey::namespace`] and +/// [`crate::surface::SourceLocation::file`]) both resolve to the +/// stored keys. #[derive(Debug, Default, Clone)] pub struct FileReachMap { by_callee_ns: HashMap>, + scan_root: Option, } impl FileReachMap { @@ -928,6 +940,10 @@ impl FileReachMap { /// /// O(V × (V + E)) worst case, but the per-function BFS is sparse on /// real call graphs (median in-degree < 4 on the eval corpus). + /// + /// The returned map has no scan root configured; pair with + /// [`FileReachMap::with_scan_root`] when callers may pass absolute + /// paths. pub fn build(cg: &CallGraph) -> Self { let mut by_callee_ns: HashMap> = HashMap::new(); for callee in cg.index.keys() { @@ -937,17 +953,33 @@ impl FileReachMap { entry.insert(caller.namespace); } } - FileReachMap { by_callee_ns } + FileReachMap { + by_callee_ns, + scan_root: None, + } } - /// True when `caller_ns` transitively reaches at least one function - /// defined in `callee_ns`. False when either namespace is unknown - /// to the graph (conservative: chain composer falls back to the - /// file-local heuristic). - pub fn reaches(&self, caller_ns: &str, callee_ns: &str) -> bool { + /// Attach a scan root so [`FileReachMap::reaches`] can normalise + /// absolute host paths back to the project-relative POSIX form the + /// map keys use. Pass `None` to clear an existing root. + pub fn with_scan_root>(mut self, root: Option

) -> Self { + self.scan_root = root.map(|p| p.as_ref().to_string_lossy().into_owned()); + self + } + + /// True when `caller` transitively reaches at least one function + /// defined in `callee`. Inputs may be either project-relative + /// POSIX paths (matching the call-graph namespace convention) or + /// absolute host paths when a scan root was set via + /// [`FileReachMap::with_scan_root`]. False when either path is + /// unknown to the graph (conservative: chain composer falls back + /// to the file-local heuristic). + pub fn reaches(&self, caller: &str, callee: &str) -> bool { + let lookup_callee = self.normalize(callee); + let lookup_caller = self.normalize(caller); self.by_callee_ns - .get(callee_ns) - .is_some_and(|set| set.contains(caller_ns)) + .get(lookup_callee.as_ref()) + .is_some_and(|set| set.contains(lookup_caller.as_ref())) } /// Number of distinct callee namespaces tracked. Exposed for @@ -955,6 +987,16 @@ impl FileReachMap { pub fn callee_ns_len(&self) -> usize { self.by_callee_ns.len() } + + fn normalize<'a>(&self, path: &'a str) -> std::borrow::Cow<'a, str> { + match self.scan_root.as_deref() { + Some(root) => std::borrow::Cow::Owned(crate::symbol::normalize_namespace( + path, + Some(root), + )), + None => std::borrow::Cow::Borrowed(path), + } + } } /// Compute the set of file namespaces that must be re-analysed when a @@ -2962,4 +3004,55 @@ mod tests { assert!(!reach.reaches("b.py", "a.py")); assert_eq!(reach.callee_ns_len(), 2); } + + /// `with_scan_root` normalises absolute host paths to the + /// project-relative POSIX form the map keys carry, so + /// `reaches("/abs/scan/routes.py", "/abs/scan/helper.py")` finds + /// the same entry as the project-relative + /// `reaches("routes.py", "helper.py")` call. Mirrors the + /// production wire-up in `src/commands/scan.rs`: the call-graph + /// uses project-relative namespaces while `Diag.path` (from + /// `src/ast.rs`) is the absolute walker path. + #[test] + fn file_reach_map_with_scan_root_normalises_absolute_paths() { + let handle = make_summary("handle", "routes.py", "python", 0, vec!["sink"]); + let sink = make_summary("sink", "helper.py", "python", 0, vec![]); + let gs = merge_summaries(vec![handle, sink], None); + let cg = build_call_graph(&gs, &[]); + let scan_root = std::path::Path::new("/abs/scan"); + let reach = FileReachMap::build(&cg).with_scan_root(Some(scan_root)); + + // Mixed conventions: surface (project-relative) caller, + // Diag (absolute) callee. Pre-fix this returned false. + assert!(reach.reaches("routes.py", "/abs/scan/helper.py")); + // Both absolute: also resolves. + assert!(reach.reaches("/abs/scan/routes.py", "/abs/scan/helper.py")); + // Trailing-slash root works. + let reach_trail = + FileReachMap::build(&cg).with_scan_root(Some(std::path::Path::new("/abs/scan/"))); + assert!(reach_trail.reaches("/abs/scan/routes.py", "/abs/scan/helper.py")); + // Both project-relative: still resolves (legacy behaviour). + assert!(reach.reaches("routes.py", "helper.py")); + // Path outside the root falls through normalize_namespace + // unchanged and does not collide with a project-relative key. + assert!(!reach.reaches("/other/root/routes.py", "/other/root/helper.py")); + } + + /// `with_scan_root(None)` clears a previously set root and + /// restores strict project-relative lookup semantics. + #[test] + fn file_reach_map_with_scan_root_none_clears_root() { + let handle = make_summary("handle", "routes.py", "python", 0, vec!["sink"]); + let sink = make_summary("sink", "helper.py", "python", 0, vec![]); + let gs = merge_summaries(vec![handle, sink], None); + let cg = build_call_graph(&gs, &[]); + let reach: FileReachMap = FileReachMap::build(&cg) + .with_scan_root(Some(std::path::Path::new("/abs/scan"))) + .with_scan_root::<&std::path::Path>(None); + + // Absolute lookup no longer resolves once root is cleared. + assert!(!reach.reaches("/abs/scan/routes.py", "/abs/scan/helper.py")); + // Project-relative still works. + assert!(reach.reaches("routes.py", "helper.py")); + } } diff --git a/src/commands/scan.rs b/src/commands/scan.rs index 108c9738..ce29c5d1 100644 --- a/src/commands/scan.rs +++ b/src/commands/scan.rs @@ -2201,7 +2201,9 @@ pub(crate) fn scan_filesystem_with_observer( } if let Some(out) = chain_reach_out { - let _ = out.set(crate::callgraph::FileReachMap::build(&call_graph)); + let _ = out.set( + crate::callgraph::FileReachMap::build(&call_graph).with_scan_root(Some(root)), + ); } // ── Pass 2: re-run with cross-file global summaries ────────────────── @@ -2996,7 +2998,9 @@ pub fn scan_with_index_parallel_observer( } if let Some(out) = chain_reach_out { - let _ = out.set(crate::callgraph::FileReachMap::build(&call_graph)); + let _ = out.set( + crate::callgraph::FileReachMap::build(&call_graph).with_scan_root(Some(scan_root)), + ); } let (batches, orphans) = crate::callgraph::scc_file_batches_with_metadata( diff --git a/src/dynamic/sandbox/mod.rs b/src/dynamic/sandbox/mod.rs index ca48234c..adf3ddec 100644 --- a/src/dynamic/sandbox/mod.rs +++ b/src/dynamic/sandbox/mod.rs @@ -837,12 +837,11 @@ fn start_container( "--cap-drop=ALL".into(), "--security-opt".into(), "no-new-privileges:true".into(), "--tmpfs".into(), "/tmp:size=128m,exec".into(), - // Phase 19 (Track E.3): bind-mount the host workdir at the fixed - // `/work` path read-write. Harness code emitted in Phase 12+ can - // reference `/work/...` without threading the host tempdir - // through every layer. The `docker cp` path below is retained so - // older harness command lines (which still look at `/workdir`) - // keep working until they are migrated. + // Bind-mount the host workdir at the fixed `/work` path + // read-write so harness code can reference `/work/...` without + // threading the host tempdir through every layer. The mount + // alone is sufficient to deliver harness files into the + // container — no follow-up `docker cp` is needed. "-v".into(), workdir_mount, ]; match policy { @@ -868,7 +867,6 @@ fn start_container( } run_args.extend([image.into(), "sleep".into(), "300".into()]); - // Start container (no volume mount). let status = std::process::Command::new(docker_bin()) .args(&run_args) .stdout(std::process::Stdio::null()) @@ -880,55 +878,24 @@ fn start_container( return Err(SandboxError::BackendUnavailable(SandboxBackend::Docker)); } - // Copy harness files into /workdir inside the container. - let workdir_str = workdir.to_string_lossy(); - let status = std::process::Command::new(docker_bin()) - .args([ - "exec", - name, - "mkdir", "-p", "/workdir", - ]) - .stdout(std::process::Stdio::null()) - .stderr(std::process::Stdio::null()) - .status() - .map_err(SandboxError::Io)?; - - if !status.success() { - return Err(SandboxError::BackendUnavailable(SandboxBackend::Docker)); - } - - // Copy workdir contents (harness.py + entry module) into the container. - let cp_src = format!("{workdir_str}/."); // trailing /. copies dir contents - let cp_dst = format!("{name}:/workdir"); - let status = std::process::Command::new(docker_bin()) - .args(["cp", &cp_src, &cp_dst]) - .stdout(std::process::Stdio::null()) - .stderr(std::process::Stdio::null()) - .status() - .map_err(SandboxError::Io)?; - - if status.success() { - // Apply OOB egress filter on Linux when the OOB listener is active. - // This restricts the bridge-networked container to only reach the host - // on the OOB port; all other egress is dropped (§17.2). - #[cfg(target_os = "linux")] - if let NetworkPolicy::OobOutbound { listener } = policy { - apply_oob_egress_filter(name, listener.port()); - } - #[cfg(not(target_os = "linux"))] - let _ = policy; // policy already consumed structurally above - Ok(()) - } else { - Err(SandboxError::BackendUnavailable(SandboxBackend::Docker)) + // Apply OOB egress filter on Linux when the OOB listener is active. + // This restricts the bridge-networked container to only reach the + // host on the OOB port; all other egress is dropped (§17.2). + #[cfg(target_os = "linux")] + if let NetworkPolicy::OobOutbound { listener } = policy { + apply_oob_egress_filter(name, listener.port()); } + #[cfg(not(target_os = "linux"))] + let _ = policy; // policy already consumed structurally above + Ok(()) } /// Build the inner-container command args for `docker exec`. /// /// For 2-arg interpreted commands (`python3 harness.py`, `node harness.js`, -/// `php harness.php`) the file arg is prefixed with `/workdir/`. +/// `php harness.php`) the file arg is prefixed with `/work/`. /// For Java (`java -cp /host/abs/path NyxHarness`) the classpath argument is -/// replaced with `/workdir` (the container-side mount path, not the host path +/// replaced with `/work` (the container-side mount path, not the host path /// that runner.rs wrote after `javac`). fn build_container_exec_args(command: &[String]) -> Vec { let mut args = Vec::new(); @@ -948,7 +915,7 @@ fn build_container_exec_args(command: &[String]) -> Vec { if command[i] == "-cp" || command[i] == "-classpath" { args.push(command[i].clone()); i += 1; - args.push("/workdir".to_owned()); + args.push(docker::WORK_MOUNT_PATH.to_owned()); i += 1; } else { args.push(command[i].clone()); @@ -961,7 +928,7 @@ fn build_container_exec_args(command: &[String]) -> Vec { if harness_file.starts_with('/') { args.push(harness_file.clone()); } else { - args.push(format!("/workdir/{harness_file}")); + args.push(format!("{}/{harness_file}", docker::WORK_MOUNT_PATH)); } } } @@ -1173,8 +1140,11 @@ fn run_native_binary_docker( &opts.network_policy, )?; - // Copy the compiled binary into the container as /workdir/nyx_harness. - let cp_dst = format!("{container_name}:/workdir/nyx_harness"); + // Copy the compiled binary into the container as + // `/work/nyx_harness`. The destination resolves through the + // workdir bind mount, so the file also appears on the host + // workdir and survives container restarts. + let cp_dst = format!("{container_name}:{}/nyx_harness", docker::WORK_MOUNT_PATH); let cp_status = std::process::Command::new(docker_bin()) .args(["cp", &binary_path, &cp_dst]) .stdout(std::process::Stdio::null()) @@ -1186,8 +1156,9 @@ fn run_native_binary_docker( } // Ensure execute bit is set (docker cp preserves it on Linux, but be explicit). + let chmod_path = format!("{}/nyx_harness", docker::WORK_MOUNT_PATH); let chmod_status = std::process::Command::new(docker_bin()) - .args(["exec", &container_name, "chmod", "+x", "/workdir/nyx_harness"]) + .args(["exec", &container_name, "chmod", "+x", &chmod_path]) .stdout(std::process::Stdio::null()) .stderr(std::process::Stdio::null()) .status() @@ -1202,7 +1173,7 @@ fn run_native_binary_docker( exec_native_binary_in_container(&container_name, harness, payload_bytes, opts) } -/// Execute a native binary already in the container at `/workdir/nyx_harness`. +/// Execute a native binary already in the container at `/work/nyx_harness`. fn exec_native_binary_in_container( container_name: &str, harness: &BuiltHarness, @@ -1224,7 +1195,7 @@ fn exec_native_binary_in_container( cmd_args.push(format!("{k}={v}")); } cmd_args.push(container_name.into()); - cmd_args.push("/workdir/nyx_harness".into()); + cmd_args.push(format!("{}/nyx_harness", docker::WORK_MOUNT_PATH)); let mut cmd = Command::new(docker_bin()); cmd.args(&cmd_args); @@ -1745,7 +1716,7 @@ mod tests { let cmd = vec!["python3".to_owned(), "harness.py".to_owned()]; assert_eq!( build_container_exec_args(&cmd), - vec!["python3", "/workdir/harness.py"] + vec!["python3", "/work/harness.py"] ); } @@ -1754,7 +1725,7 @@ mod tests { let cmd = vec!["node".to_owned(), "harness.js".to_owned()]; assert_eq!( build_container_exec_args(&cmd), - vec!["node", "/workdir/harness.js"] + vec!["node", "/work/harness.js"] ); } @@ -1763,7 +1734,7 @@ mod tests { let cmd = vec!["php".to_owned(), "harness.php".to_owned()]; assert_eq!( build_container_exec_args(&cmd), - vec!["php", "/workdir/harness.php"] + vec!["php", "/work/harness.php"] ); } @@ -1772,7 +1743,7 @@ mod tests { let cmd = vec!["ruby".to_owned(), "harness.rb".to_owned()]; assert_eq!( build_container_exec_args(&cmd), - vec!["ruby", "/workdir/harness.rb"] + vec!["ruby", "/work/harness.rb"] ); } @@ -1786,7 +1757,7 @@ mod tests { ]; assert_eq!( build_container_exec_args(&cmd), - vec!["java", "-cp", "/workdir", "NyxHarness"] + vec!["java", "-cp", "/work", "NyxHarness"] ); }