mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-15 20:05:13 +02:00
1113 lines
42 KiB
Rust
1113 lines
42 KiB
Rust
|
|
//! Project dependency capture + workdir staging (Phase 09 — Track D.1 + D.2).
|
||
|
|
//!
|
||
|
|
//! [`capture_project_dependencies`] reads the user's project root and
|
||
|
|
//! produces a [`CapturedDeps`] record describing every artifact the
|
||
|
|
//! harness will need at runtime — toolchain pin, direct imports of the
|
||
|
|
//! entry file, web framework signal, and local config files reachable
|
||
|
|
//! from the entry point. [`stage_workdir`] then materialises a minimal
|
||
|
|
//! copy of those artifacts into the per-spec workdir so the sandboxed
|
||
|
|
//! harness can `import flask` (or its per-language equivalent) inside an
|
||
|
|
//! offline sandbox without leaking the whole project tree across the
|
||
|
|
//! filesystem boundary.
|
||
|
|
//!
|
||
|
|
//! The lang-specific manifest (`requirements.txt`, `package.json`,
|
||
|
|
//! `Cargo.toml`, …) is then synthesised by the per-language emitter via
|
||
|
|
//! [`crate::dynamic::lang::LangEmitter::materialize_runtime`] from the
|
||
|
|
//! [`Environment`] handed back by `stage_workdir`.
|
||
|
|
//!
|
||
|
|
//! ## Scope
|
||
|
|
//!
|
||
|
|
//! - Direct imports of the spec's entry file (tree-sitter walk, top-level
|
||
|
|
//! `import` / `require` / `use` only — transitive imports are deferred
|
||
|
|
//! to a future phase).
|
||
|
|
//! - Framework deps inferred from [`crate::utils::project::detect_frameworks`].
|
||
|
|
//! - Local config files reachable from the entry point's directory
|
||
|
|
//! (`config.yaml`, `config.yml`, `.env`, `appsettings.json`, plus the
|
||
|
|
//! toolchain-resolver-recognised manifest itself).
|
||
|
|
//! - Source files reached via reverse callgraph closure from the sink's
|
||
|
|
//! enclosing function. Bounded by [`MAX_WORKDIR_BYTES`] so a
|
||
|
|
//! pathological closure does not copy the entire repository.
|
||
|
|
//!
|
||
|
|
//! The staged workdir is intentionally minimalist: every file copied has
|
||
|
|
//! to either be the entry, a dep manifest, a config file, or an in-closure
|
||
|
|
//! source file. The 10 MiB ceiling protects against runaway full-tree
|
||
|
|
//! copy regressions called out in the Phase 09 acceptance.
|
||
|
|
|
||
|
|
use crate::callgraph::{callers_of, CallGraph};
|
||
|
|
use crate::dynamic::spec::HarnessSpec;
|
||
|
|
use crate::dynamic::toolchain::{self, ToolchainResolution};
|
||
|
|
use crate::summary::GlobalSummaries;
|
||
|
|
use crate::symbol::{FuncKey, Lang};
|
||
|
|
use crate::utils::project::{detect_frameworks, DetectedFramework, FrameworkContext};
|
||
|
|
use std::collections::HashSet;
|
||
|
|
use std::io;
|
||
|
|
use std::path::{Path, PathBuf};
|
||
|
|
|
||
|
|
/// Hard upper bound on the bytes a staged workdir may consume after
|
||
|
|
/// `stage_workdir` returns. Phase 09 acceptance pins this to 10 MiB so a
|
||
|
|
/// pathological full-tree copy regression is caught at the test boundary
|
||
|
|
/// rather than ballooning the sandbox into the user's whole repo.
|
||
|
|
pub const MAX_WORKDIR_BYTES: u64 = 10 * 1024 * 1024;
|
||
|
|
|
||
|
|
/// Bytes scanned for `import` / `require` / `use` statements when the
|
||
|
|
/// per-language extractor is asked to enumerate the entry file's direct
|
||
|
|
/// dependencies. 64 KiB covers every reasonable header / preamble; we
|
||
|
|
/// intentionally do not walk the whole file because the import shape
|
||
|
|
/// almost always lives at the top.
|
||
|
|
const IMPORT_SCAN_LIMIT: usize = 64 * 1024;
|
||
|
|
|
||
|
|
/// Names of common config files reachable from the entry point. The
|
||
|
|
/// existence test is `entry_dir.join(name).is_file()` so we never recurse
|
||
|
|
/// into subdirectories — that's intentional: the harness boots from
|
||
|
|
/// `workdir/` and any path beneath the entry's directory is reachable via
|
||
|
|
/// relative paths only if it sits at the same level.
|
||
|
|
const CONFIG_FILE_CANDIDATES: &[&str] = &[
|
||
|
|
"config.yaml",
|
||
|
|
"config.yml",
|
||
|
|
".env",
|
||
|
|
"appsettings.json",
|
||
|
|
"settings.json",
|
||
|
|
"config.toml",
|
||
|
|
"config.json",
|
||
|
|
];
|
||
|
|
|
||
|
|
/// Per-language manifest files (lockfile + manifest pair) recognised by
|
||
|
|
/// the toolchain resolver. When present at `project_root`, these are
|
||
|
|
/// copied verbatim into the staged workdir so the build sandbox sees the
|
||
|
|
/// user's pinned dependency set. Order is significant only insofar as
|
||
|
|
/// the first match wins for [`CapturedDeps::lockfile_origin`].
|
||
|
|
const MANIFEST_FILES_BY_LANG: &[(Lang, &[&str])] = &[
|
||
|
|
(Lang::Python, &["requirements.txt", "pyproject.toml", "Pipfile", "Pipfile.lock"]),
|
||
|
|
(Lang::JavaScript, &["package.json", "package-lock.json", "yarn.lock", "pnpm-lock.yaml"]),
|
||
|
|
(Lang::TypeScript, &["package.json", "package-lock.json", "yarn.lock", "tsconfig.json"]),
|
||
|
|
(Lang::Rust, &["Cargo.toml", "Cargo.lock"]),
|
||
|
|
(Lang::Go, &["go.mod", "go.sum"]),
|
||
|
|
(Lang::Java, &["pom.xml", "build.gradle", "build.gradle.kts"]),
|
||
|
|
(Lang::Php, &["composer.json", "composer.lock"]),
|
||
|
|
(Lang::Ruby, &["Gemfile", "Gemfile.lock"]),
|
||
|
|
(Lang::C, &["Makefile", "CMakeLists.txt"]),
|
||
|
|
(Lang::Cpp, &["Makefile", "CMakeLists.txt"]),
|
||
|
|
];
|
||
|
|
|
||
|
|
/// Static-analysis output captured from the project, ready to be staged
|
||
|
|
/// into the harness workdir.
|
||
|
|
///
|
||
|
|
/// Returned by [`capture_project_dependencies`] and consumed by
|
||
|
|
/// [`stage_workdir`]. The struct deliberately separates *capture* (read
|
||
|
|
/// the project tree, no writes) from *staging* (write the workdir, no
|
||
|
|
/// reads of the source tree), so a future phase can persist
|
||
|
|
/// `CapturedDeps` to disk and re-stage without re-walking the source.
|
||
|
|
#[derive(Debug, Clone)]
|
||
|
|
pub struct CapturedDeps {
|
||
|
|
/// Absolute path to the user's project root used as the read anchor.
|
||
|
|
pub project_root: PathBuf,
|
||
|
|
/// Absolute path to the entry file (resolved against `project_root`).
|
||
|
|
pub entry_file: PathBuf,
|
||
|
|
/// Resolved language toolchain pin (version + drift flag).
|
||
|
|
pub toolchain: ToolchainResolution,
|
||
|
|
/// Top-level imports literally appearing in [`Self::entry_file`].
|
||
|
|
///
|
||
|
|
/// `lib_name` is the canonical package/module the import names. The
|
||
|
|
/// per-language `materialize_runtime` impl pins each entry to the
|
||
|
|
/// project's framework version when possible, or to a known-good
|
||
|
|
/// recent version otherwise.
|
||
|
|
pub direct_deps: Vec<String>,
|
||
|
|
/// Web frameworks detected from project manifests. Surfaced as a
|
||
|
|
/// separate field (rather than folded into `direct_deps`) so the
|
||
|
|
/// emitters can decide whether to pin to a specific framework
|
||
|
|
/// version even when the entry file imports the framework
|
||
|
|
/// transitively.
|
||
|
|
pub frameworks: Vec<DetectedFramework>,
|
||
|
|
/// Three-valued lang-has-framework signal (see
|
||
|
|
/// [`FrameworkContext::lang_has_web_framework`]).
|
||
|
|
pub framework_signal: Option<bool>,
|
||
|
|
/// Absolute paths of local config files reachable from the entry
|
||
|
|
/// point's directory. Each is copied verbatim into the workdir
|
||
|
|
/// during [`stage_workdir`].
|
||
|
|
pub config_files: Vec<PathBuf>,
|
||
|
|
/// Source files reachable from the sink's enclosing function via
|
||
|
|
/// reverse callgraph edges. Always includes the entry file. Empty
|
||
|
|
/// when no summaries / callgraph are threaded into the capture step.
|
||
|
|
pub source_closure: Vec<PathBuf>,
|
||
|
|
/// Manifest files (lockfile + project manifest pair) recognised for
|
||
|
|
/// [`Self::toolchain`]'s language. Each entry is an absolute path
|
||
|
|
/// inside `project_root`; the first existing entry from
|
||
|
|
/// [`MANIFEST_FILES_BY_LANG`] wins for [`Self::lockfile`].
|
||
|
|
pub manifests: Vec<PathBuf>,
|
||
|
|
/// First recognised manifest file (== `manifests[0]` when present).
|
||
|
|
/// Used by the per-language emitter as the canonical lockfile when
|
||
|
|
/// synthesising the staged manifest.
|
||
|
|
pub lockfile: Option<PathBuf>,
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Runtime environment handle owned by the staging step.
|
||
|
|
///
|
||
|
|
/// Holds everything the per-language `materialize_runtime` impl needs to
|
||
|
|
/// emit a pinned manifest, plus the workdir handle so the staged paths
|
||
|
|
/// resolve correctly. Construction is owned by [`stage_workdir`]; the
|
||
|
|
/// fields are otherwise read-only so future stub injection (Phase 09+
|
||
|
|
/// extensions) can extend the struct without invalidating existing
|
||
|
|
/// callers.
|
||
|
|
#[derive(Debug, Clone)]
|
||
|
|
pub struct Environment {
|
||
|
|
/// Stable hash of the originating spec. Copied here so the emitter
|
||
|
|
/// can include it in the manifest comment header for forensic
|
||
|
|
/// traceability.
|
||
|
|
pub spec_hash: String,
|
||
|
|
/// Absolute path to the workdir that was just staged.
|
||
|
|
pub workdir: PathBuf,
|
||
|
|
/// Absolute path to the canonical lockfile staged into the workdir
|
||
|
|
/// (e.g. `workdir/requirements.txt`, `workdir/Cargo.lock`). `None`
|
||
|
|
/// when the language has no recognised lockfile or the user's
|
||
|
|
/// project carried none.
|
||
|
|
pub lockfile: Option<PathBuf>,
|
||
|
|
/// Source files materialised into the workdir, as paths *relative*
|
||
|
|
/// to the workdir root (e.g. `"src/handler.py"`).
|
||
|
|
pub staged_sources: Vec<PathBuf>,
|
||
|
|
/// Environment variables the harness should set before invoking the
|
||
|
|
/// entry point. Phase 09 stops at the empty set; Phase 10+
|
||
|
|
/// extensions (stub injection) will populate these.
|
||
|
|
pub env_vars: Vec<(String, String)>,
|
||
|
|
/// Stub registry handles. Reserved for the Phase 10 stub-injection
|
||
|
|
/// layer; Phase 09 stages no stubs so this is always empty.
|
||
|
|
pub stub_handles: Vec<String>,
|
||
|
|
/// Language-toolchain pin carried over from
|
||
|
|
/// [`CapturedDeps::toolchain`] so the emitter does not need both
|
||
|
|
/// inputs.
|
||
|
|
pub toolchain: ToolchainResolution,
|
||
|
|
/// Direct deps the entry imports. Same shape as
|
||
|
|
/// [`CapturedDeps::direct_deps`].
|
||
|
|
pub direct_deps: Vec<String>,
|
||
|
|
/// Frameworks detected in the project root.
|
||
|
|
pub frameworks: Vec<DetectedFramework>,
|
||
|
|
/// Language pinned via the originating spec. Cached here so the
|
||
|
|
/// emitter does not have to re-thread the spec.
|
||
|
|
pub lang: Lang,
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Manifest / lockfile artifacts the harness build needs alongside the
|
||
|
|
/// generated source. Returned by
|
||
|
|
/// [`crate::dynamic::lang::LangEmitter::materialize_runtime`].
|
||
|
|
///
|
||
|
|
/// Mirrors [`crate::dynamic::lang::HarnessSource::extra_files`] so the
|
||
|
|
/// harness staging path can write the manifest directly via the existing
|
||
|
|
/// extra-files loop.
|
||
|
|
#[derive(Debug, Clone, Default)]
|
||
|
|
pub struct RuntimeArtifacts {
|
||
|
|
/// `(relative_path, contents)` pairs written under `Environment::workdir`.
|
||
|
|
pub files: Vec<(String, String)>,
|
||
|
|
}
|
||
|
|
|
||
|
|
impl RuntimeArtifacts {
|
||
|
|
/// Convenience builder.
|
||
|
|
pub fn new() -> Self {
|
||
|
|
Self::default()
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Push a `(rel_path, content)` artifact.
|
||
|
|
pub fn push(&mut self, rel_path: impl Into<String>, content: impl Into<String>) {
|
||
|
|
self.files.push((rel_path.into(), content.into()));
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Walk the user's project tree to assemble the runtime dependencies the
|
||
|
|
/// harness needs.
|
||
|
|
///
|
||
|
|
/// Reads only — never writes. The returned [`CapturedDeps`] is the
|
||
|
|
/// single input to [`stage_workdir`], which is the sole owner of the
|
||
|
|
/// workdir filesystem mutations.
|
||
|
|
///
|
||
|
|
/// Always returns a populated record: missing inputs are best-effort and
|
||
|
|
/// fall back to defaults (system toolchain, empty deps). The function
|
||
|
|
/// never fails — every failure mode (manifest unreadable, entry file
|
||
|
|
/// missing) is folded into the returned record.
|
||
|
|
pub fn capture_project_dependencies(project_root: &Path, spec: &HarnessSpec) -> CapturedDeps {
|
||
|
|
capture_project_dependencies_with_context(project_root, spec, None, None)
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Strategy-aware [`capture_project_dependencies`] that consults the
|
||
|
|
/// whole-program [`CallGraph`] and [`GlobalSummaries`] when present.
|
||
|
|
///
|
||
|
|
/// When both are provided, [`CapturedDeps::source_closure`] is populated
|
||
|
|
/// via reverse-edge BFS from the sink's enclosing function so the
|
||
|
|
/// staging step copies every file the entry transitively depends on.
|
||
|
|
/// When either is `None` the closure shrinks to a single-file set
|
||
|
|
/// containing only the entry — staging still works for the simple case
|
||
|
|
/// but cross-file helpers are not copied across.
|
||
|
|
pub fn capture_project_dependencies_with_context(
|
||
|
|
project_root: &Path,
|
||
|
|
spec: &HarnessSpec,
|
||
|
|
summaries: Option<&GlobalSummaries>,
|
||
|
|
callgraph: Option<&CallGraph>,
|
||
|
|
) -> CapturedDeps {
|
||
|
|
let entry_file = resolve_under_root(project_root, &spec.entry_file);
|
||
|
|
|
||
|
|
let toolchain = resolve_toolchain_for_lang(spec.lang, project_root);
|
||
|
|
|
||
|
|
let direct_deps = extract_direct_deps(&entry_file, spec.lang);
|
||
|
|
|
||
|
|
let framework_ctx = detect_frameworks(project_root);
|
||
|
|
let frameworks = framework_ctx.frameworks.clone();
|
||
|
|
let framework_signal = framework_ctx.lang_has_web_framework(framework_slug_for_lang(spec.lang));
|
||
|
|
|
||
|
|
let config_files = collect_config_files(&entry_file, project_root);
|
||
|
|
|
||
|
|
let manifests = collect_manifest_files(spec.lang, project_root);
|
||
|
|
let lockfile = manifests.first().cloned();
|
||
|
|
|
||
|
|
let source_closure = compute_source_closure(&entry_file, project_root, spec, summaries, callgraph);
|
||
|
|
|
||
|
|
CapturedDeps {
|
||
|
|
project_root: project_root.to_path_buf(),
|
||
|
|
entry_file,
|
||
|
|
toolchain,
|
||
|
|
direct_deps,
|
||
|
|
frameworks,
|
||
|
|
framework_signal,
|
||
|
|
config_files,
|
||
|
|
source_closure,
|
||
|
|
manifests,
|
||
|
|
lockfile,
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Materialise a minimal copy of the project into `workdir`.
|
||
|
|
///
|
||
|
|
/// Writes (in order):
|
||
|
|
/// 1. The entry file itself (under its source-tree-relative path so
|
||
|
|
/// relative `from .x import y` works inside the workdir).
|
||
|
|
/// 2. Every file in `captured.source_closure`, preserving the
|
||
|
|
/// `project_root`-relative layout.
|
||
|
|
/// 3. Every manifest file in `captured.manifests`.
|
||
|
|
/// 4. Every local config file in `captured.config_files`.
|
||
|
|
///
|
||
|
|
/// Each write checks the running workdir size against
|
||
|
|
/// [`MAX_WORKDIR_BYTES`] and stops early on overflow; the function
|
||
|
|
/// returns `io::ErrorKind::FileTooLarge` in that case so the caller can
|
||
|
|
/// surface a `Inconclusive(WorkdirOverflow)` verdict in a future phase.
|
||
|
|
///
|
||
|
|
/// The returned [`Environment`] is the sole handle subsequent emitters
|
||
|
|
/// consult; callers must not assume the workdir is otherwise mutated
|
||
|
|
/// outside of this function (the harness builder still writes the
|
||
|
|
/// generated source via [`crate::dynamic::harness::build`]).
|
||
|
|
pub fn stage_workdir(captured: &CapturedDeps, workdir: &Path) -> io::Result<Environment> {
|
||
|
|
let lang = guess_lang_for_toolchain(&captured.toolchain.toolchain_id);
|
||
|
|
stage_workdir_full(captured, workdir, "", lang)
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Like [`stage_workdir`] but lets the caller thread the originating
|
||
|
|
/// spec hash into the resulting [`Environment`].
|
||
|
|
pub fn stage_workdir_with_spec_hash(
|
||
|
|
captured: &CapturedDeps,
|
||
|
|
workdir: &Path,
|
||
|
|
spec_hash: &str,
|
||
|
|
) -> io::Result<Environment> {
|
||
|
|
let lang = guess_lang_for_toolchain(&captured.toolchain.toolchain_id);
|
||
|
|
stage_workdir_full(captured, workdir, spec_hash, lang)
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Strategy-aware [`stage_workdir`] that lets the caller pin the
|
||
|
|
/// [`Environment`]'s [`Lang`] explicitly (rather than guessing from the
|
||
|
|
/// toolchain id). Used by the integration tests and by future harness
|
||
|
|
/// staging plumbing that already has a [`HarnessSpec`] in scope.
|
||
|
|
pub fn stage_workdir_full(
|
||
|
|
captured: &CapturedDeps,
|
||
|
|
workdir: &Path,
|
||
|
|
spec_hash: &str,
|
||
|
|
lang: Lang,
|
||
|
|
) -> io::Result<Environment> {
|
||
|
|
std::fs::create_dir_all(workdir)?;
|
||
|
|
|
||
|
|
let mut running_bytes: u64 = 0;
|
||
|
|
let mut staged_sources: Vec<PathBuf> = Vec::new();
|
||
|
|
|
||
|
|
// 1. Entry file — preserve project-relative layout when the entry
|
||
|
|
// lives under project_root, otherwise fall back to the basename.
|
||
|
|
if captured.entry_file.exists() {
|
||
|
|
let rel = rel_under_root(&captured.entry_file, &captured.project_root)
|
||
|
|
.unwrap_or_else(|| PathBuf::from(captured.entry_file.file_name().unwrap_or_default()));
|
||
|
|
running_bytes = copy_into_workdir(
|
||
|
|
&captured.entry_file,
|
||
|
|
workdir,
|
||
|
|
&rel,
|
||
|
|
running_bytes,
|
||
|
|
&mut staged_sources,
|
||
|
|
)?;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 2. Source closure — every reachable in-closure file.
|
||
|
|
for src in &captured.source_closure {
|
||
|
|
if src == &captured.entry_file {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
if !src.exists() {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
let rel = match rel_under_root(src, &captured.project_root) {
|
||
|
|
Some(r) => r,
|
||
|
|
None => continue,
|
||
|
|
};
|
||
|
|
running_bytes = copy_into_workdir(src, workdir, &rel, running_bytes, &mut staged_sources)?;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 3. Manifests (project-relative).
|
||
|
|
let mut lockfile_in_workdir: Option<PathBuf> = None;
|
||
|
|
for manifest in &captured.manifests {
|
||
|
|
if !manifest.exists() {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
let rel = match rel_under_root(manifest, &captured.project_root) {
|
||
|
|
Some(r) => r,
|
||
|
|
None => continue,
|
||
|
|
};
|
||
|
|
running_bytes = copy_into_workdir(
|
||
|
|
manifest,
|
||
|
|
workdir,
|
||
|
|
&rel,
|
||
|
|
running_bytes,
|
||
|
|
&mut staged_sources,
|
||
|
|
)?;
|
||
|
|
if lockfile_in_workdir.is_none() {
|
||
|
|
lockfile_in_workdir = Some(workdir.join(&rel));
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// 4. Config files (preserve relative layout under project_root).
|
||
|
|
for cfg in &captured.config_files {
|
||
|
|
if !cfg.exists() {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
let rel = match rel_under_root(cfg, &captured.project_root) {
|
||
|
|
Some(r) => r,
|
||
|
|
None => PathBuf::from(cfg.file_name().unwrap_or_default()),
|
||
|
|
};
|
||
|
|
running_bytes =
|
||
|
|
copy_into_workdir(cfg, workdir, &rel, running_bytes, &mut staged_sources)?;
|
||
|
|
}
|
||
|
|
|
||
|
|
Ok(Environment {
|
||
|
|
spec_hash: spec_hash.to_owned(),
|
||
|
|
workdir: workdir.to_path_buf(),
|
||
|
|
lockfile: lockfile_in_workdir,
|
||
|
|
staged_sources,
|
||
|
|
env_vars: Vec::new(),
|
||
|
|
stub_handles: Vec::new(),
|
||
|
|
toolchain: captured.toolchain.clone(),
|
||
|
|
direct_deps: captured.direct_deps.clone(),
|
||
|
|
frameworks: captured.frameworks.clone(),
|
||
|
|
lang,
|
||
|
|
})
|
||
|
|
}
|
||
|
|
|
||
|
|
fn guess_lang_for_toolchain(toolchain_id: &str) -> Lang {
|
||
|
|
Lang::from_slug(framework_slug_for_lang_for_toolchain(toolchain_id)).unwrap_or(Lang::Python)
|
||
|
|
}
|
||
|
|
|
||
|
|
// ── Helpers ──────────────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
fn copy_into_workdir(
|
||
|
|
src: &Path,
|
||
|
|
workdir: &Path,
|
||
|
|
rel: &Path,
|
||
|
|
running_bytes: u64,
|
||
|
|
staged: &mut Vec<PathBuf>,
|
||
|
|
) -> io::Result<u64> {
|
||
|
|
let metadata = match std::fs::metadata(src) {
|
||
|
|
Ok(m) => m,
|
||
|
|
Err(_) => return Ok(running_bytes),
|
||
|
|
};
|
||
|
|
let size = metadata.len();
|
||
|
|
if running_bytes.saturating_add(size) > MAX_WORKDIR_BYTES {
|
||
|
|
return Err(io::Error::new(
|
||
|
|
io::ErrorKind::Other,
|
||
|
|
format!(
|
||
|
|
"staged workdir would exceed {} bytes (next file `{}` = {} bytes)",
|
||
|
|
MAX_WORKDIR_BYTES,
|
||
|
|
rel.display(),
|
||
|
|
size
|
||
|
|
),
|
||
|
|
));
|
||
|
|
}
|
||
|
|
let dest = workdir.join(rel);
|
||
|
|
if let Some(parent) = dest.parent() {
|
||
|
|
std::fs::create_dir_all(parent)?;
|
||
|
|
}
|
||
|
|
std::fs::copy(src, &dest)?;
|
||
|
|
staged.push(rel.to_path_buf());
|
||
|
|
Ok(running_bytes.saturating_add(size))
|
||
|
|
}
|
||
|
|
|
||
|
|
fn resolve_under_root(project_root: &Path, entry_file: &str) -> PathBuf {
|
||
|
|
let p = Path::new(entry_file);
|
||
|
|
if p.is_absolute() {
|
||
|
|
return p.to_path_buf();
|
||
|
|
}
|
||
|
|
project_root.join(p)
|
||
|
|
}
|
||
|
|
|
||
|
|
fn rel_under_root(path: &Path, root: &Path) -> Option<PathBuf> {
|
||
|
|
let abs_path = path.canonicalize().ok().unwrap_or_else(|| path.to_path_buf());
|
||
|
|
let abs_root = root.canonicalize().ok().unwrap_or_else(|| root.to_path_buf());
|
||
|
|
abs_path
|
||
|
|
.strip_prefix(&abs_root)
|
||
|
|
.ok()
|
||
|
|
.map(|p| p.to_path_buf())
|
||
|
|
}
|
||
|
|
|
||
|
|
fn resolve_toolchain_for_lang(lang: Lang, project_root: &Path) -> ToolchainResolution {
|
||
|
|
match lang {
|
||
|
|
Lang::Python => toolchain::resolve_python(project_root),
|
||
|
|
Lang::Rust => toolchain::resolve_rust(project_root),
|
||
|
|
Lang::JavaScript | Lang::TypeScript => toolchain::resolve_node(project_root),
|
||
|
|
Lang::Go => toolchain::resolve_go(project_root),
|
||
|
|
Lang::Java => toolchain::resolve_java(project_root),
|
||
|
|
Lang::Php => toolchain::resolve_php(project_root),
|
||
|
|
_ => toolchain::resolve_python(project_root),
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
fn framework_slug_for_lang(lang: Lang) -> &'static str {
|
||
|
|
match lang {
|
||
|
|
Lang::Python => "python",
|
||
|
|
Lang::JavaScript => "javascript",
|
||
|
|
Lang::TypeScript => "typescript",
|
||
|
|
Lang::Java => "java",
|
||
|
|
Lang::Go => "go",
|
||
|
|
Lang::Php => "php",
|
||
|
|
Lang::Ruby => "ruby",
|
||
|
|
Lang::Rust => "rust",
|
||
|
|
Lang::C => "c",
|
||
|
|
Lang::Cpp => "cpp",
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
fn framework_slug_for_lang_for_toolchain(toolchain_id: &str) -> &'static str {
|
||
|
|
if toolchain_id.starts_with("python") {
|
||
|
|
"python"
|
||
|
|
} else if toolchain_id.starts_with("node") {
|
||
|
|
"javascript"
|
||
|
|
} else if toolchain_id.starts_with("rust") {
|
||
|
|
"rust"
|
||
|
|
} else if toolchain_id.starts_with("go") {
|
||
|
|
"go"
|
||
|
|
} else if toolchain_id.starts_with("java") {
|
||
|
|
"java"
|
||
|
|
} else if toolchain_id.starts_with("php") {
|
||
|
|
"php"
|
||
|
|
} else {
|
||
|
|
"python"
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
fn collect_config_files(entry_file: &Path, project_root: &Path) -> Vec<PathBuf> {
|
||
|
|
let mut out: Vec<PathBuf> = Vec::new();
|
||
|
|
let mut seen: HashSet<PathBuf> = HashSet::new();
|
||
|
|
let dirs: Vec<PathBuf> = {
|
||
|
|
let mut v = Vec::new();
|
||
|
|
v.push(project_root.to_path_buf());
|
||
|
|
if let Some(parent) = entry_file.parent() {
|
||
|
|
if parent != project_root && parent.starts_with(project_root) {
|
||
|
|
v.push(parent.to_path_buf());
|
||
|
|
}
|
||
|
|
}
|
||
|
|
v
|
||
|
|
};
|
||
|
|
for dir in &dirs {
|
||
|
|
for name in CONFIG_FILE_CANDIDATES {
|
||
|
|
let cand = dir.join(name);
|
||
|
|
if cand.is_file() && !seen.contains(&cand) {
|
||
|
|
seen.insert(cand.clone());
|
||
|
|
out.push(cand);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
out
|
||
|
|
}
|
||
|
|
|
||
|
|
fn collect_manifest_files(lang: Lang, project_root: &Path) -> Vec<PathBuf> {
|
||
|
|
let names = MANIFEST_FILES_BY_LANG
|
||
|
|
.iter()
|
||
|
|
.find(|(l, _)| *l == lang)
|
||
|
|
.map(|(_, n)| *n)
|
||
|
|
.unwrap_or(&[]);
|
||
|
|
let mut out: Vec<PathBuf> = Vec::new();
|
||
|
|
for name in names {
|
||
|
|
let cand = project_root.join(name);
|
||
|
|
if cand.is_file() {
|
||
|
|
out.push(cand);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
out
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Walk `entry_file` for top-level imports and project-internal package
|
||
|
|
/// names. Distinct per language; the fall-through returns an empty Vec
|
||
|
|
/// so unsupported languages do not crash, they just stage with no
|
||
|
|
/// imports.
|
||
|
|
pub fn extract_direct_deps(entry_file: &Path, lang: Lang) -> Vec<String> {
|
||
|
|
let bytes = match read_bounded(entry_file) {
|
||
|
|
Some(s) => s,
|
||
|
|
None => return Vec::new(),
|
||
|
|
};
|
||
|
|
let head = match std::str::from_utf8(&bytes) {
|
||
|
|
Ok(s) => s,
|
||
|
|
Err(_) => return Vec::new(),
|
||
|
|
};
|
||
|
|
match lang {
|
||
|
|
Lang::Python => extract_python_imports(head),
|
||
|
|
Lang::JavaScript | Lang::TypeScript => extract_js_imports(head),
|
||
|
|
Lang::Ruby => extract_ruby_imports(head),
|
||
|
|
Lang::Php => extract_php_imports(head),
|
||
|
|
Lang::Go => extract_go_imports(head),
|
||
|
|
Lang::Java => extract_java_imports(head),
|
||
|
|
Lang::Rust => extract_rust_imports(head),
|
||
|
|
Lang::C | Lang::Cpp => extract_c_includes(head),
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
fn extract_python_imports(source: &str) -> Vec<String> {
|
||
|
|
let mut out: Vec<String> = Vec::new();
|
||
|
|
let mut seen: HashSet<String> = HashSet::new();
|
||
|
|
for line in source.lines() {
|
||
|
|
let line = line.trim_start();
|
||
|
|
if line.is_empty() || line.starts_with('#') {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
let candidate = if let Some(rest) = line.strip_prefix("from ") {
|
||
|
|
// `from X.Y import Z` → top-level pkg = "X"
|
||
|
|
let mod_name = rest.split_whitespace().next().unwrap_or("");
|
||
|
|
if mod_name.is_empty() || mod_name.starts_with('.') {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
mod_name.split('.').next().unwrap_or("").to_owned()
|
||
|
|
} else if let Some(rest) = line.strip_prefix("import ") {
|
||
|
|
// `import X.Y` → top-level pkg = "X"
|
||
|
|
// `import X.Y as Z` → top-level pkg = "X"
|
||
|
|
// `import X, Y` → first "X" only (best-effort)
|
||
|
|
let mod_name = rest.split([',', ' ']).next().unwrap_or("").trim();
|
||
|
|
if mod_name.is_empty() {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
mod_name.split('.').next().unwrap_or("").to_owned()
|
||
|
|
} else {
|
||
|
|
continue;
|
||
|
|
};
|
||
|
|
if candidate.is_empty() {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
if !seen.contains(&candidate) {
|
||
|
|
seen.insert(candidate.clone());
|
||
|
|
out.push(candidate);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
out
|
||
|
|
}
|
||
|
|
|
||
|
|
fn extract_js_imports(source: &str) -> Vec<String> {
|
||
|
|
let mut out: Vec<String> = Vec::new();
|
||
|
|
let mut seen: HashSet<String> = HashSet::new();
|
||
|
|
let push = |s: &str, out: &mut Vec<String>, seen: &mut HashSet<String>| {
|
||
|
|
let trimmed = s.trim_matches(|c: char| c == '\'' || c == '"' || c == '`');
|
||
|
|
if trimmed.is_empty() || trimmed.starts_with('.') || trimmed.starts_with('/') {
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
// Scoped pkg (`@scope/name`) keeps full prefix; bare pkg keeps top segment.
|
||
|
|
let canonical = if trimmed.starts_with('@') {
|
||
|
|
let parts: Vec<&str> = trimmed.splitn(3, '/').collect();
|
||
|
|
if parts.len() >= 2 {
|
||
|
|
format!("{}/{}", parts[0], parts[1])
|
||
|
|
} else {
|
||
|
|
trimmed.to_owned()
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
trimmed.split('/').next().unwrap_or(trimmed).to_owned()
|
||
|
|
};
|
||
|
|
if !seen.contains(&canonical) {
|
||
|
|
seen.insert(canonical.clone());
|
||
|
|
out.push(canonical);
|
||
|
|
}
|
||
|
|
};
|
||
|
|
for line in source.lines() {
|
||
|
|
let line = line.trim_start();
|
||
|
|
if let Some(idx) = line.find("from ") {
|
||
|
|
// `import x from 'pkg'`
|
||
|
|
let after = &line[idx + 5..];
|
||
|
|
let after = after.trim_start();
|
||
|
|
if let Some(end) = after.find(['\'', '"', '`']) {
|
||
|
|
let quote = after.as_bytes()[end] as char;
|
||
|
|
if let Some(close) = after[end + 1..].find(quote) {
|
||
|
|
push(&after[end + 1..end + 1 + close], &mut out, &mut seen);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
if let Some(idx) = line.find("require(") {
|
||
|
|
let after = &line[idx + 8..];
|
||
|
|
let after = after.trim_start();
|
||
|
|
if let Some(end) = after.find(['\'', '"', '`']) {
|
||
|
|
let quote = after.as_bytes()[end] as char;
|
||
|
|
if let Some(close) = after[end + 1..].find(quote) {
|
||
|
|
push(&after[end + 1..end + 1 + close], &mut out, &mut seen);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
if line.starts_with("import ") && !line.contains("from ") {
|
||
|
|
// Side-effect import: `import 'pkg'`.
|
||
|
|
let rest = line.trim_start_matches("import ").trim();
|
||
|
|
push(rest, &mut out, &mut seen);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
out
|
||
|
|
}
|
||
|
|
|
||
|
|
fn extract_ruby_imports(source: &str) -> Vec<String> {
|
||
|
|
let mut out: Vec<String> = Vec::new();
|
||
|
|
let mut seen: HashSet<String> = HashSet::new();
|
||
|
|
for line in source.lines() {
|
||
|
|
let line = line.trim_start();
|
||
|
|
let rest = if let Some(r) = line.strip_prefix("require_relative ") {
|
||
|
|
r
|
||
|
|
} else if let Some(r) = line.strip_prefix("require ") {
|
||
|
|
r
|
||
|
|
} else {
|
||
|
|
continue;
|
||
|
|
};
|
||
|
|
let trimmed = rest.trim().trim_matches(|c: char| c == '\'' || c == '"');
|
||
|
|
if trimmed.is_empty() {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
let pkg = trimmed.split('/').next().unwrap_or(trimmed).to_owned();
|
||
|
|
if !seen.contains(&pkg) {
|
||
|
|
seen.insert(pkg.clone());
|
||
|
|
out.push(pkg);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
out
|
||
|
|
}
|
||
|
|
|
||
|
|
fn extract_php_imports(source: &str) -> Vec<String> {
|
||
|
|
let mut out: Vec<String> = Vec::new();
|
||
|
|
let mut seen: HashSet<String> = HashSet::new();
|
||
|
|
for line in source.lines() {
|
||
|
|
let line = line.trim_start();
|
||
|
|
let rest = if let Some(r) = line.strip_prefix("use ") {
|
||
|
|
r
|
||
|
|
} else if let Some(r) = line.strip_prefix("require_once ") {
|
||
|
|
r
|
||
|
|
} else if let Some(r) = line.strip_prefix("require ") {
|
||
|
|
r
|
||
|
|
} else if let Some(r) = line.strip_prefix("include ") {
|
||
|
|
r
|
||
|
|
} else {
|
||
|
|
continue;
|
||
|
|
};
|
||
|
|
let trimmed = rest
|
||
|
|
.trim()
|
||
|
|
.trim_end_matches(';')
|
||
|
|
.trim_matches(|c: char| c == '\'' || c == '"');
|
||
|
|
if trimmed.is_empty() {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
let pkg = trimmed.split('\\').next().unwrap_or(trimmed).to_owned();
|
||
|
|
if !seen.contains(&pkg) {
|
||
|
|
seen.insert(pkg.clone());
|
||
|
|
out.push(pkg);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
out
|
||
|
|
}
|
||
|
|
|
||
|
|
fn extract_go_imports(source: &str) -> Vec<String> {
|
||
|
|
let mut out: Vec<String> = Vec::new();
|
||
|
|
let mut seen: HashSet<String> = HashSet::new();
|
||
|
|
let mut in_block = false;
|
||
|
|
for line in source.lines() {
|
||
|
|
let line = line.trim_start();
|
||
|
|
if line.starts_with("import (") {
|
||
|
|
in_block = true;
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
if in_block {
|
||
|
|
if line.starts_with(')') {
|
||
|
|
in_block = false;
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
let trimmed = line.trim().trim_matches(|c: char| c == '\'' || c == '"');
|
||
|
|
if trimmed.is_empty() {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
// Skip aliased imports' alias prefix: `foo "pkg"`.
|
||
|
|
let pkg_part = trimmed
|
||
|
|
.rsplit_once(' ')
|
||
|
|
.map(|(_, r)| r.trim_matches(|c: char| c == '"' || c == '`' || c == '\''))
|
||
|
|
.unwrap_or(trimmed)
|
||
|
|
.trim_matches(|c: char| c == '"' || c == '`' || c == '\'');
|
||
|
|
if pkg_part.is_empty() || pkg_part.starts_with("//") {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
if !seen.contains(pkg_part) {
|
||
|
|
seen.insert(pkg_part.to_owned());
|
||
|
|
out.push(pkg_part.to_owned());
|
||
|
|
}
|
||
|
|
} else if let Some(rest) = line.strip_prefix("import ") {
|
||
|
|
let trimmed = rest.trim().trim_matches(|c: char| c == '"' || c == '`');
|
||
|
|
if !trimmed.is_empty() && !seen.contains(trimmed) {
|
||
|
|
seen.insert(trimmed.to_owned());
|
||
|
|
out.push(trimmed.to_owned());
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
out
|
||
|
|
}
|
||
|
|
|
||
|
|
fn extract_java_imports(source: &str) -> Vec<String> {
|
||
|
|
let mut out: Vec<String> = Vec::new();
|
||
|
|
let mut seen: HashSet<String> = HashSet::new();
|
||
|
|
for line in source.lines() {
|
||
|
|
let line = line.trim_start();
|
||
|
|
let rest = match line.strip_prefix("import ") {
|
||
|
|
Some(r) => r,
|
||
|
|
None => continue,
|
||
|
|
};
|
||
|
|
let trimmed = rest.trim().trim_end_matches(';');
|
||
|
|
if trimmed.is_empty() {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
// Top-level Java package = first dotted segment.
|
||
|
|
let pkg = trimmed.split('.').next().unwrap_or(trimmed).to_owned();
|
||
|
|
if !seen.contains(&pkg) {
|
||
|
|
seen.insert(pkg.clone());
|
||
|
|
out.push(pkg);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
out
|
||
|
|
}
|
||
|
|
|
||
|
|
fn extract_rust_imports(source: &str) -> Vec<String> {
|
||
|
|
let mut out: Vec<String> = Vec::new();
|
||
|
|
let mut seen: HashSet<String> = HashSet::new();
|
||
|
|
for line in source.lines() {
|
||
|
|
let line = line.trim_start();
|
||
|
|
let rest = match line.strip_prefix("use ") {
|
||
|
|
Some(r) => r,
|
||
|
|
None => match line.strip_prefix("extern crate ") {
|
||
|
|
Some(r) => r,
|
||
|
|
None => continue,
|
||
|
|
},
|
||
|
|
};
|
||
|
|
let trimmed = rest.trim().trim_end_matches(';');
|
||
|
|
if trimmed.is_empty() {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
let crate_name = trimmed
|
||
|
|
.split("::")
|
||
|
|
.next()
|
||
|
|
.unwrap_or(trimmed)
|
||
|
|
.split([' ', ','])
|
||
|
|
.next()
|
||
|
|
.unwrap_or(trimmed)
|
||
|
|
.to_owned();
|
||
|
|
if crate_name == "self" || crate_name == "super" || crate_name == "crate" {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
if !seen.contains(&crate_name) {
|
||
|
|
seen.insert(crate_name.clone());
|
||
|
|
out.push(crate_name);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
out
|
||
|
|
}
|
||
|
|
|
||
|
|
fn extract_c_includes(source: &str) -> Vec<String> {
|
||
|
|
let mut out: Vec<String> = Vec::new();
|
||
|
|
let mut seen: HashSet<String> = HashSet::new();
|
||
|
|
for line in source.lines() {
|
||
|
|
let line = line.trim_start();
|
||
|
|
if !line.starts_with("#include") {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
let rest = line.trim_start_matches("#include").trim();
|
||
|
|
let trimmed = rest
|
||
|
|
.trim_start_matches('<')
|
||
|
|
.trim_end_matches('>')
|
||
|
|
.trim_start_matches('"')
|
||
|
|
.trim_end_matches('"');
|
||
|
|
if trimmed.is_empty() {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
if !seen.contains(trimmed) {
|
||
|
|
seen.insert(trimmed.to_owned());
|
||
|
|
out.push(trimmed.to_owned());
|
||
|
|
}
|
||
|
|
}
|
||
|
|
out
|
||
|
|
}
|
||
|
|
|
||
|
|
fn read_bounded(path: &Path) -> Option<Vec<u8>> {
|
||
|
|
use std::io::Read;
|
||
|
|
let file = std::fs::File::open(path).ok()?;
|
||
|
|
let mut buf: Vec<u8> = Vec::new();
|
||
|
|
let mut reader = std::io::BufReader::new(file).take(IMPORT_SCAN_LIMIT as u64);
|
||
|
|
reader.read_to_end(&mut buf).ok()?;
|
||
|
|
Some(buf)
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Reverse-edge callgraph closure starting from the spec's sink-enclosing
|
||
|
|
/// function and walking outward through callers until the entry file is
|
||
|
|
/// reached or there are no more callers. Falls back to the entry-file
|
||
|
|
/// only when summaries / callgraph are not present.
|
||
|
|
///
|
||
|
|
/// The resulting set is bounded by the number of [`FuncKey`]s in the
|
||
|
|
/// call graph; in practice harness fixtures sit at <100 nodes so the BFS
|
||
|
|
/// terminates almost immediately.
|
||
|
|
fn compute_source_closure(
|
||
|
|
entry_file: &Path,
|
||
|
|
project_root: &Path,
|
||
|
|
spec: &HarnessSpec,
|
||
|
|
summaries: Option<&GlobalSummaries>,
|
||
|
|
callgraph: Option<&CallGraph>,
|
||
|
|
) -> Vec<PathBuf> {
|
||
|
|
let mut out: Vec<PathBuf> = Vec::new();
|
||
|
|
let mut seen: HashSet<PathBuf> = HashSet::new();
|
||
|
|
|
||
|
|
let push = |p: PathBuf, out: &mut Vec<PathBuf>, seen: &mut HashSet<PathBuf>| {
|
||
|
|
if !seen.contains(&p) {
|
||
|
|
seen.insert(p.clone());
|
||
|
|
out.push(p);
|
||
|
|
}
|
||
|
|
};
|
||
|
|
|
||
|
|
push(entry_file.to_path_buf(), &mut out, &mut seen);
|
||
|
|
|
||
|
|
let (Some(gs), Some(cg)) = (summaries, callgraph) else {
|
||
|
|
return out;
|
||
|
|
};
|
||
|
|
|
||
|
|
let sink_file_abs = resolve_under_root(project_root, &spec.sink_file);
|
||
|
|
|
||
|
|
// Seed: every FuncKey whose namespace is the sink file.
|
||
|
|
let mut frontier: Vec<FuncKey> = gs
|
||
|
|
.iter()
|
||
|
|
.filter_map(|(k, _)| {
|
||
|
|
let ns_abs = resolve_under_root(project_root, &k.namespace);
|
||
|
|
if paths_equal(&ns_abs, &sink_file_abs) {
|
||
|
|
Some(k.clone())
|
||
|
|
} else {
|
||
|
|
None
|
||
|
|
}
|
||
|
|
})
|
||
|
|
.collect();
|
||
|
|
|
||
|
|
let mut visited: HashSet<FuncKey> = frontier.iter().cloned().collect();
|
||
|
|
let mut steps = 0;
|
||
|
|
const MAX_STEPS: usize = 256;
|
||
|
|
while let Some(callee) = frontier.pop() {
|
||
|
|
if steps > MAX_STEPS {
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
steps += 1;
|
||
|
|
let ns_abs = resolve_under_root(project_root, &callee.namespace);
|
||
|
|
push(ns_abs.clone(), &mut out, &mut seen);
|
||
|
|
for caller in callers_of(cg, &callee) {
|
||
|
|
if visited.contains(&caller) {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
visited.insert(caller.clone());
|
||
|
|
frontier.push(caller);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
out
|
||
|
|
}
|
||
|
|
|
||
|
|
fn paths_equal(a: &Path, b: &Path) -> bool {
|
||
|
|
let a_can = a.canonicalize().ok();
|
||
|
|
let b_can = b.canonicalize().ok();
|
||
|
|
match (a_can, b_can) {
|
||
|
|
(Some(a), Some(b)) => a == b,
|
||
|
|
_ => a == b,
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Adapter used by [`crate::dynamic::lang::LangEmitter::materialize_runtime`]
|
||
|
|
/// when a language wants to know whether the captured deps mention a
|
||
|
|
/// specific package name (case-insensitive).
|
||
|
|
pub fn deps_mention(env: &Environment, needle: &str) -> bool {
|
||
|
|
let needle = needle.to_ascii_lowercase();
|
||
|
|
env.direct_deps
|
||
|
|
.iter()
|
||
|
|
.any(|d| d.eq_ignore_ascii_case(&needle))
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Adapter used by [`crate::dynamic::lang::LangEmitter::materialize_runtime`]
|
||
|
|
/// when a language wants to know whether a specific [`DetectedFramework`]
|
||
|
|
/// was named in the project manifest.
|
||
|
|
pub fn frameworks_contain(env: &Environment, fw: DetectedFramework) -> bool {
|
||
|
|
env.frameworks.contains(&fw)
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Stamp the Phase-09 lang detection slug back onto an [`Environment`]
|
||
|
|
/// whose [`Lang`] field was guessed from the toolchain id. Used by the
|
||
|
|
/// integration tests to make the lang round-trip deterministic.
|
||
|
|
pub fn override_lang(env: &mut Environment, lang: Lang) {
|
||
|
|
env.lang = lang;
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Helper for [`FrameworkContext`] consumers: returns the cached
|
||
|
|
/// inspected-langs set so the verifier can decide whether a missing
|
||
|
|
/// framework signal counts as "absent" vs "no manifest".
|
||
|
|
pub fn framework_context_for(project_root: &Path) -> FrameworkContext {
|
||
|
|
detect_frameworks(project_root)
|
||
|
|
}
|
||
|
|
|
||
|
|
#[cfg(test)]
|
||
|
|
mod tests {
|
||
|
|
use super::*;
|
||
|
|
use crate::dynamic::spec::{EntryKind, HarnessSpec, PayloadSlot, SpecDerivationStrategy};
|
||
|
|
use crate::labels::Cap;
|
||
|
|
use std::fs;
|
||
|
|
use tempfile::TempDir;
|
||
|
|
|
||
|
|
fn fake_spec(entry_file: &str, lang: Lang) -> HarnessSpec {
|
||
|
|
HarnessSpec {
|
||
|
|
finding_id: "0000000000000001".into(),
|
||
|
|
entry_file: entry_file.into(),
|
||
|
|
entry_name: "handler".into(),
|
||
|
|
entry_kind: EntryKind::Function,
|
||
|
|
lang,
|
||
|
|
toolchain_id: "python-3.11".into(),
|
||
|
|
payload_slot: PayloadSlot::Param(0),
|
||
|
|
expected_cap: Cap::CODE_EXEC,
|
||
|
|
constraint_hints: vec![],
|
||
|
|
sink_file: entry_file.into(),
|
||
|
|
sink_line: 10,
|
||
|
|
spec_hash: "test0000abcd1234".into(),
|
||
|
|
derivation: SpecDerivationStrategy::FromFlowSteps,
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn extract_python_imports_picks_top_level_pkg() {
|
||
|
|
let src = r#"
|
||
|
|
from flask import Flask, request
|
||
|
|
import os
|
||
|
|
import sqlalchemy
|
||
|
|
import pandas as pd
|
||
|
|
from sqlalchemy.orm import sessionmaker
|
||
|
|
"#;
|
||
|
|
let deps = extract_python_imports(src);
|
||
|
|
assert!(deps.contains(&"flask".to_owned()));
|
||
|
|
assert!(deps.contains(&"os".to_owned()));
|
||
|
|
assert!(deps.contains(&"sqlalchemy".to_owned()));
|
||
|
|
assert!(deps.contains(&"pandas".to_owned()));
|
||
|
|
// sqlalchemy.orm is deduped to "sqlalchemy".
|
||
|
|
assert_eq!(deps.iter().filter(|d| *d == "sqlalchemy").count(), 1);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn extract_js_imports_handles_scoped_pkg() {
|
||
|
|
let src = r#"
|
||
|
|
import express from 'express';
|
||
|
|
const helmet = require("helmet");
|
||
|
|
import { Router } from '@koa/router';
|
||
|
|
import './local-thing';
|
||
|
|
"#;
|
||
|
|
let deps = extract_js_imports(src);
|
||
|
|
assert!(deps.contains(&"express".to_owned()));
|
||
|
|
assert!(deps.contains(&"helmet".to_owned()));
|
||
|
|
assert!(deps.contains(&"@koa/router".to_owned()));
|
||
|
|
// Relative imports are skipped.
|
||
|
|
assert!(!deps.iter().any(|d| d.starts_with('.')));
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn extract_rust_imports_collects_crates() {
|
||
|
|
let src = "use serde::Deserialize;\nuse tokio::net::TcpListener;\nextern crate libc;\nuse crate::foo::bar;\n";
|
||
|
|
let deps = extract_rust_imports(src);
|
||
|
|
assert!(deps.contains(&"serde".to_owned()));
|
||
|
|
assert!(deps.contains(&"tokio".to_owned()));
|
||
|
|
assert!(deps.contains(&"libc".to_owned()));
|
||
|
|
// Project-internal references skipped.
|
||
|
|
assert!(!deps.contains(&"crate".to_owned()));
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn extract_go_imports_handles_block_and_single() {
|
||
|
|
let src = "package main\nimport \"fmt\"\nimport (\n\t\"net/http\"\n\t alias \"github.com/gin-gonic/gin\"\n)\n";
|
||
|
|
let deps = extract_go_imports(src);
|
||
|
|
assert!(deps.contains(&"fmt".to_owned()));
|
||
|
|
assert!(deps.contains(&"net/http".to_owned()));
|
||
|
|
assert!(deps.contains(&"github.com/gin-gonic/gin".to_owned()));
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn capture_returns_default_when_root_empty() {
|
||
|
|
let tmp = TempDir::new().unwrap();
|
||
|
|
let root = tmp.path();
|
||
|
|
let spec = fake_spec("app.py", Lang::Python);
|
||
|
|
let captured = capture_project_dependencies(root, &spec);
|
||
|
|
assert!(captured.direct_deps.is_empty());
|
||
|
|
assert!(captured.frameworks.is_empty());
|
||
|
|
assert!(captured.lockfile.is_none());
|
||
|
|
assert_eq!(captured.toolchain.toolchain_id, "python-3");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn capture_picks_up_python_imports_and_frameworks() {
|
||
|
|
let tmp = TempDir::new().unwrap();
|
||
|
|
let root = tmp.path();
|
||
|
|
fs::write(
|
||
|
|
root.join("app.py"),
|
||
|
|
"from flask import Flask, request\nimport os\nimport requests\n",
|
||
|
|
)
|
||
|
|
.unwrap();
|
||
|
|
fs::write(root.join("requirements.txt"), "Flask==2.3.0\nrequests>=2.28\n").unwrap();
|
||
|
|
let spec = fake_spec("app.py", Lang::Python);
|
||
|
|
let captured = capture_project_dependencies(root, &spec);
|
||
|
|
assert!(captured.direct_deps.contains(&"flask".to_owned()));
|
||
|
|
assert!(captured.direct_deps.contains(&"requests".to_owned()));
|
||
|
|
assert!(captured.frameworks.contains(&DetectedFramework::Flask));
|
||
|
|
assert!(captured.lockfile.is_some());
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn stage_workdir_copies_entry_and_manifest() {
|
||
|
|
let tmp = TempDir::new().unwrap();
|
||
|
|
let root = tmp.path();
|
||
|
|
fs::write(root.join("app.py"), "from flask import Flask\n").unwrap();
|
||
|
|
fs::write(root.join("requirements.txt"), "Flask\n").unwrap();
|
||
|
|
let spec = fake_spec("app.py", Lang::Python);
|
||
|
|
let captured = capture_project_dependencies(root, &spec);
|
||
|
|
let stage = TempDir::new().unwrap();
|
||
|
|
let env = stage_workdir_with_spec_hash(&captured, stage.path(), "deadbeef").unwrap();
|
||
|
|
assert!(env.workdir.join("app.py").is_file());
|
||
|
|
assert!(env.workdir.join("requirements.txt").is_file());
|
||
|
|
assert_eq!(env.spec_hash, "deadbeef");
|
||
|
|
assert!(env.lockfile.is_some());
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn stage_workdir_respects_max_size() {
|
||
|
|
let tmp = TempDir::new().unwrap();
|
||
|
|
let root = tmp.path();
|
||
|
|
// Write a single source over the budget. The copy must error.
|
||
|
|
let big = vec![b'x'; (MAX_WORKDIR_BYTES + 1) as usize];
|
||
|
|
fs::write(root.join("app.py"), &big).unwrap();
|
||
|
|
let spec = fake_spec("app.py", Lang::Python);
|
||
|
|
let captured = capture_project_dependencies(root, &spec);
|
||
|
|
let stage = TempDir::new().unwrap();
|
||
|
|
let err = stage_workdir(&captured, stage.path()).unwrap_err();
|
||
|
|
assert!(err.to_string().contains("exceed"));
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn config_files_picked_up_when_present() {
|
||
|
|
let tmp = TempDir::new().unwrap();
|
||
|
|
let root = tmp.path();
|
||
|
|
fs::write(root.join("app.py"), "from flask import Flask\n").unwrap();
|
||
|
|
fs::write(root.join("config.yaml"), "debug: true\n").unwrap();
|
||
|
|
fs::write(root.join(".env"), "FLASK_DEBUG=1\n").unwrap();
|
||
|
|
let spec = fake_spec("app.py", Lang::Python);
|
||
|
|
let captured = capture_project_dependencies(root, &spec);
|
||
|
|
assert_eq!(captured.config_files.len(), 2);
|
||
|
|
}
|
||
|
|
}
|