[pitboss/grind] deferred session-0006 (20260516T052512Z-20f8)

This commit is contained in:
pitboss 2026-05-16 04:13:55 -05:00
parent bf8e61ffdb
commit 92e90f05cc
3 changed files with 139 additions and 71 deletions

View file

@ -918,9 +918,21 @@ pub fn callers_transitive(cg: &CallGraph, callee: &FuncKey) -> std::collections:
/// Map shape: `callee_namespace → { caller_namespace, … }`. A file
/// always appears in its own caller set so intra-file recursion stays
/// reachable.
///
/// `scan_root` is optional path-normalisation context. Callers that
/// build the map without a scan root must pass project-relative POSIX
/// paths to [`FileReachMap::reaches`] directly. When a root is set
/// (typical in production scans), [`FileReachMap::reaches`] applies
/// [`crate::symbol::normalize_namespace`] to its arguments before
/// lookup so absolute host paths (the convention on
/// [`crate::commands::scan::Diag::path`]) and project-relative paths
/// (the convention on call-graph [`FuncKey::namespace`] and
/// [`crate::surface::SourceLocation::file`]) both resolve to the
/// stored keys.
#[derive(Debug, Default, Clone)]
pub struct FileReachMap {
by_callee_ns: HashMap<String, std::collections::HashSet<String>>,
scan_root: Option<String>,
}
impl FileReachMap {
@ -928,6 +940,10 @@ impl FileReachMap {
///
/// O(V × (V + E)) worst case, but the per-function BFS is sparse on
/// real call graphs (median in-degree < 4 on the eval corpus).
///
/// The returned map has no scan root configured; pair with
/// [`FileReachMap::with_scan_root`] when callers may pass absolute
/// paths.
pub fn build(cg: &CallGraph) -> Self {
let mut by_callee_ns: HashMap<String, std::collections::HashSet<String>> = HashMap::new();
for callee in cg.index.keys() {
@ -937,17 +953,33 @@ impl FileReachMap {
entry.insert(caller.namespace);
}
}
FileReachMap { by_callee_ns }
FileReachMap {
by_callee_ns,
scan_root: None,
}
}
/// True when `caller_ns` transitively reaches at least one function
/// defined in `callee_ns`. False when either namespace is unknown
/// to the graph (conservative: chain composer falls back to the
/// file-local heuristic).
pub fn reaches(&self, caller_ns: &str, callee_ns: &str) -> bool {
/// Attach a scan root so [`FileReachMap::reaches`] can normalise
/// absolute host paths back to the project-relative POSIX form the
/// map keys use. Pass `None` to clear an existing root.
pub fn with_scan_root<P: AsRef<std::path::Path>>(mut self, root: Option<P>) -> Self {
self.scan_root = root.map(|p| p.as_ref().to_string_lossy().into_owned());
self
}
/// True when `caller` transitively reaches at least one function
/// defined in `callee`. Inputs may be either project-relative
/// POSIX paths (matching the call-graph namespace convention) or
/// absolute host paths when a scan root was set via
/// [`FileReachMap::with_scan_root`]. False when either path is
/// unknown to the graph (conservative: chain composer falls back
/// to the file-local heuristic).
pub fn reaches(&self, caller: &str, callee: &str) -> bool {
let lookup_callee = self.normalize(callee);
let lookup_caller = self.normalize(caller);
self.by_callee_ns
.get(callee_ns)
.is_some_and(|set| set.contains(caller_ns))
.get(lookup_callee.as_ref())
.is_some_and(|set| set.contains(lookup_caller.as_ref()))
}
/// Number of distinct callee namespaces tracked. Exposed for
@ -955,6 +987,16 @@ impl FileReachMap {
pub fn callee_ns_len(&self) -> usize {
self.by_callee_ns.len()
}
fn normalize<'a>(&self, path: &'a str) -> std::borrow::Cow<'a, str> {
match self.scan_root.as_deref() {
Some(root) => std::borrow::Cow::Owned(crate::symbol::normalize_namespace(
path,
Some(root),
)),
None => std::borrow::Cow::Borrowed(path),
}
}
}
/// Compute the set of file namespaces that must be re-analysed when a
@ -2962,4 +3004,55 @@ mod tests {
assert!(!reach.reaches("b.py", "a.py"));
assert_eq!(reach.callee_ns_len(), 2);
}
/// `with_scan_root` normalises absolute host paths to the
/// project-relative POSIX form the map keys carry, so
/// `reaches("/abs/scan/routes.py", "/abs/scan/helper.py")` finds
/// the same entry as the project-relative
/// `reaches("routes.py", "helper.py")` call. Mirrors the
/// production wire-up in `src/commands/scan.rs`: the call-graph
/// uses project-relative namespaces while `Diag.path` (from
/// `src/ast.rs`) is the absolute walker path.
#[test]
fn file_reach_map_with_scan_root_normalises_absolute_paths() {
let handle = make_summary("handle", "routes.py", "python", 0, vec!["sink"]);
let sink = make_summary("sink", "helper.py", "python", 0, vec![]);
let gs = merge_summaries(vec![handle, sink], None);
let cg = build_call_graph(&gs, &[]);
let scan_root = std::path::Path::new("/abs/scan");
let reach = FileReachMap::build(&cg).with_scan_root(Some(scan_root));
// Mixed conventions: surface (project-relative) caller,
// Diag (absolute) callee. Pre-fix this returned false.
assert!(reach.reaches("routes.py", "/abs/scan/helper.py"));
// Both absolute: also resolves.
assert!(reach.reaches("/abs/scan/routes.py", "/abs/scan/helper.py"));
// Trailing-slash root works.
let reach_trail =
FileReachMap::build(&cg).with_scan_root(Some(std::path::Path::new("/abs/scan/")));
assert!(reach_trail.reaches("/abs/scan/routes.py", "/abs/scan/helper.py"));
// Both project-relative: still resolves (legacy behaviour).
assert!(reach.reaches("routes.py", "helper.py"));
// Path outside the root falls through normalize_namespace
// unchanged and does not collide with a project-relative key.
assert!(!reach.reaches("/other/root/routes.py", "/other/root/helper.py"));
}
/// `with_scan_root(None)` clears a previously set root and
/// restores strict project-relative lookup semantics.
#[test]
fn file_reach_map_with_scan_root_none_clears_root() {
let handle = make_summary("handle", "routes.py", "python", 0, vec!["sink"]);
let sink = make_summary("sink", "helper.py", "python", 0, vec![]);
let gs = merge_summaries(vec![handle, sink], None);
let cg = build_call_graph(&gs, &[]);
let reach: FileReachMap = FileReachMap::build(&cg)
.with_scan_root(Some(std::path::Path::new("/abs/scan")))
.with_scan_root::<&std::path::Path>(None);
// Absolute lookup no longer resolves once root is cleared.
assert!(!reach.reaches("/abs/scan/routes.py", "/abs/scan/helper.py"));
// Project-relative still works.
assert!(reach.reaches("routes.py", "helper.py"));
}
}

View file

@ -2201,7 +2201,9 @@ pub(crate) fn scan_filesystem_with_observer(
}
if let Some(out) = chain_reach_out {
let _ = out.set(crate::callgraph::FileReachMap::build(&call_graph));
let _ = out.set(
crate::callgraph::FileReachMap::build(&call_graph).with_scan_root(Some(root)),
);
}
// ── Pass 2: re-run with cross-file global summaries ──────────────────
@ -2996,7 +2998,9 @@ pub fn scan_with_index_parallel_observer(
}
if let Some(out) = chain_reach_out {
let _ = out.set(crate::callgraph::FileReachMap::build(&call_graph));
let _ = out.set(
crate::callgraph::FileReachMap::build(&call_graph).with_scan_root(Some(scan_root)),
);
}
let (batches, orphans) = crate::callgraph::scc_file_batches_with_metadata(

View file

@ -837,12 +837,11 @@ fn start_container(
"--cap-drop=ALL".into(),
"--security-opt".into(), "no-new-privileges:true".into(),
"--tmpfs".into(), "/tmp:size=128m,exec".into(),
// Phase 19 (Track E.3): bind-mount the host workdir at the fixed
// `/work` path read-write. Harness code emitted in Phase 12+ can
// reference `/work/...` without threading the host tempdir
// through every layer. The `docker cp` path below is retained so
// older harness command lines (which still look at `/workdir`)
// keep working until they are migrated.
// Bind-mount the host workdir at the fixed `/work` path
// read-write so harness code can reference `/work/...` without
// threading the host tempdir through every layer. The mount
// alone is sufficient to deliver harness files into the
// container — no follow-up `docker cp` is needed.
"-v".into(), workdir_mount,
];
match policy {
@ -868,7 +867,6 @@ fn start_container(
}
run_args.extend([image.into(), "sleep".into(), "300".into()]);
// Start container (no volume mount).
let status = std::process::Command::new(docker_bin())
.args(&run_args)
.stdout(std::process::Stdio::null())
@ -880,55 +878,24 @@ fn start_container(
return Err(SandboxError::BackendUnavailable(SandboxBackend::Docker));
}
// Copy harness files into /workdir inside the container.
let workdir_str = workdir.to_string_lossy();
let status = std::process::Command::new(docker_bin())
.args([
"exec",
name,
"mkdir", "-p", "/workdir",
])
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status()
.map_err(SandboxError::Io)?;
if !status.success() {
return Err(SandboxError::BackendUnavailable(SandboxBackend::Docker));
}
// Copy workdir contents (harness.py + entry module) into the container.
let cp_src = format!("{workdir_str}/."); // trailing /. copies dir contents
let cp_dst = format!("{name}:/workdir");
let status = std::process::Command::new(docker_bin())
.args(["cp", &cp_src, &cp_dst])
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status()
.map_err(SandboxError::Io)?;
if status.success() {
// Apply OOB egress filter on Linux when the OOB listener is active.
// This restricts the bridge-networked container to only reach the host
// on the OOB port; all other egress is dropped (§17.2).
#[cfg(target_os = "linux")]
if let NetworkPolicy::OobOutbound { listener } = policy {
apply_oob_egress_filter(name, listener.port());
}
#[cfg(not(target_os = "linux"))]
let _ = policy; // policy already consumed structurally above
Ok(())
} else {
Err(SandboxError::BackendUnavailable(SandboxBackend::Docker))
// Apply OOB egress filter on Linux when the OOB listener is active.
// This restricts the bridge-networked container to only reach the
// host on the OOB port; all other egress is dropped (§17.2).
#[cfg(target_os = "linux")]
if let NetworkPolicy::OobOutbound { listener } = policy {
apply_oob_egress_filter(name, listener.port());
}
#[cfg(not(target_os = "linux"))]
let _ = policy; // policy already consumed structurally above
Ok(())
}
/// Build the inner-container command args for `docker exec`.
///
/// For 2-arg interpreted commands (`python3 harness.py`, `node harness.js`,
/// `php harness.php`) the file arg is prefixed with `/workdir/`.
/// `php harness.php`) the file arg is prefixed with `/work/`.
/// For Java (`java -cp /host/abs/path NyxHarness`) the classpath argument is
/// replaced with `/workdir` (the container-side mount path, not the host path
/// replaced with `/work` (the container-side mount path, not the host path
/// that runner.rs wrote after `javac`).
fn build_container_exec_args(command: &[String]) -> Vec<String> {
let mut args = Vec::new();
@ -948,7 +915,7 @@ fn build_container_exec_args(command: &[String]) -> Vec<String> {
if command[i] == "-cp" || command[i] == "-classpath" {
args.push(command[i].clone());
i += 1;
args.push("/workdir".to_owned());
args.push(docker::WORK_MOUNT_PATH.to_owned());
i += 1;
} else {
args.push(command[i].clone());
@ -961,7 +928,7 @@ fn build_container_exec_args(command: &[String]) -> Vec<String> {
if harness_file.starts_with('/') {
args.push(harness_file.clone());
} else {
args.push(format!("/workdir/{harness_file}"));
args.push(format!("{}/{harness_file}", docker::WORK_MOUNT_PATH));
}
}
}
@ -1173,8 +1140,11 @@ fn run_native_binary_docker(
&opts.network_policy,
)?;
// Copy the compiled binary into the container as /workdir/nyx_harness.
let cp_dst = format!("{container_name}:/workdir/nyx_harness");
// Copy the compiled binary into the container as
// `/work/nyx_harness`. The destination resolves through the
// workdir bind mount, so the file also appears on the host
// workdir and survives container restarts.
let cp_dst = format!("{container_name}:{}/nyx_harness", docker::WORK_MOUNT_PATH);
let cp_status = std::process::Command::new(docker_bin())
.args(["cp", &binary_path, &cp_dst])
.stdout(std::process::Stdio::null())
@ -1186,8 +1156,9 @@ fn run_native_binary_docker(
}
// Ensure execute bit is set (docker cp preserves it on Linux, but be explicit).
let chmod_path = format!("{}/nyx_harness", docker::WORK_MOUNT_PATH);
let chmod_status = std::process::Command::new(docker_bin())
.args(["exec", &container_name, "chmod", "+x", "/workdir/nyx_harness"])
.args(["exec", &container_name, "chmod", "+x", &chmod_path])
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status()
@ -1202,7 +1173,7 @@ fn run_native_binary_docker(
exec_native_binary_in_container(&container_name, harness, payload_bytes, opts)
}
/// Execute a native binary already in the container at `/workdir/nyx_harness`.
/// Execute a native binary already in the container at `/work/nyx_harness`.
fn exec_native_binary_in_container(
container_name: &str,
harness: &BuiltHarness,
@ -1224,7 +1195,7 @@ fn exec_native_binary_in_container(
cmd_args.push(format!("{k}={v}"));
}
cmd_args.push(container_name.into());
cmd_args.push("/workdir/nyx_harness".into());
cmd_args.push(format!("{}/nyx_harness", docker::WORK_MOUNT_PATH));
let mut cmd = Command::new(docker_bin());
cmd.args(&cmd_args);
@ -1745,7 +1716,7 @@ mod tests {
let cmd = vec!["python3".to_owned(), "harness.py".to_owned()];
assert_eq!(
build_container_exec_args(&cmd),
vec!["python3", "/workdir/harness.py"]
vec!["python3", "/work/harness.py"]
);
}
@ -1754,7 +1725,7 @@ mod tests {
let cmd = vec!["node".to_owned(), "harness.js".to_owned()];
assert_eq!(
build_container_exec_args(&cmd),
vec!["node", "/workdir/harness.js"]
vec!["node", "/work/harness.js"]
);
}
@ -1763,7 +1734,7 @@ mod tests {
let cmd = vec!["php".to_owned(), "harness.php".to_owned()];
assert_eq!(
build_container_exec_args(&cmd),
vec!["php", "/workdir/harness.php"]
vec!["php", "/work/harness.php"]
);
}
@ -1772,7 +1743,7 @@ mod tests {
let cmd = vec!["ruby".to_owned(), "harness.rb".to_owned()];
assert_eq!(
build_container_exec_args(&cmd),
vec!["ruby", "/workdir/harness.rb"]
vec!["ruby", "/work/harness.rb"]
);
}
@ -1786,7 +1757,7 @@ mod tests {
];
assert_eq!(
build_container_exec_args(&cmd),
vec!["java", "-cp", "/workdir", "NyxHarness"]
vec!["java", "-cp", "/work", "NyxHarness"]
);
}