[pitboss/grind] deferred session-0010 (20260517T044708Z-e058)

This commit is contained in:
pitboss 2026-05-17 03:44:24 -05:00
parent e0b1dfbb2a
commit 2deb74c18c
4 changed files with 409 additions and 6 deletions

View file

@ -625,9 +625,13 @@ pub enum Commands {
/// Loads the SurfaceMap persisted by the most recent indexed scan
/// when available, otherwise builds an entry-point-only map by
/// running the per-language framework probes against the on-disk
/// source. Use `--format dot` and pipe through `dot -Tsvg` to
/// produce a renderable graph; `--format svg` does the same in one
/// step when graphviz is installed locally.
/// source. Pass `--build` to force a full inline build (pass-1
/// summary extraction + call-graph construction) when no indexed
/// scan exists; that populates DataStore / ExternalService /
/// DangerousLocal nodes the entry-points-only fallback omits.
/// Use `--format dot` and pipe through `dot -Tsvg` to produce a
/// renderable graph; `--format svg` does the same in one step when
/// graphviz is installed locally.
Surface {
/// Path to inspect (defaults to current directory)
#[arg(default_value = ".")]
@ -636,6 +640,15 @@ pub enum Commands {
/// Output format: text (default), json, dot, svg
#[arg(long, value_enum, default_value_t = SurfaceFormat::Text)]
format: SurfaceFormat,
/// Build the full SurfaceMap from source even when no indexed
/// scan exists. Runs pass-1 summary extraction + call-graph
/// build inline (same cost as `nyx index build`), then renders
/// data-store / external-service / dangerous-local nodes plus
/// reach edges. Without this flag, an unscanned project
/// produces an entry-points-only map.
#[arg(long)]
build: bool,
},
/// Start the local web UI for browsing scan results

View file

@ -427,9 +427,13 @@ pub fn handle_command(
Commands::Rules { action } => {
self::rules::handle(action, config)?;
}
Commands::Surface { path, format } => {
Commands::Surface {
path,
format,
build,
} => {
install_from_config(config);
surface::handle(&path, format, database_dir, config)?;
surface::handle(&path, format, build, database_dir, config)?;
}
Commands::Serve {
path,

View file

@ -16,7 +16,15 @@
//! map first; if none exists (no `nyx scan` ever ran, or the index was
//! cleaned) it falls back to building a fresh entry-point-only map by
//! running the framework probes against the on-disk source.
//!
//! Pass `--build` to force a full inline build that runs pass-1
//! summary extraction + call-graph construction. That populates the
//! same DataStore / ExternalService / DangerousLocal nodes and Reaches
//! edges that an indexed scan would have persisted, at the cost of
//! parsing the project tree once (same wall-clock as `nyx index
//! build`).
use crate::ast::extract_all_summaries_from_bytes;
use crate::callgraph;
use crate::cli::SurfaceFormat;
use crate::database::index::Indexer;
@ -30,6 +38,7 @@ use crate::utils::Config;
use crate::utils::project::get_project_info;
use crate::walk::spawn_file_walker;
use crossbeam_channel::TryRecvError;
use rayon::prelude::*;
use std::collections::BTreeMap;
use std::io::Write;
use std::path::{Path, PathBuf};
@ -37,14 +46,25 @@ use std::process::{Command, Stdio};
/// Top-level CLI handler. Resolves the scan root, loads or builds a
/// [`SurfaceMap`], renders it in `format`, and writes to stdout.
///
/// When `build_inline` is `true`, the persisted SurfaceMap (if any) is
/// ignored and the full map is built by running pass-1 summary
/// extraction + call-graph construction against the on-disk source.
/// This populates DataStore / ExternalService / DangerousLocal nodes
/// and Reaches edges that the entry-points-only fallback omits.
pub fn handle(
path: &str,
format: SurfaceFormat,
build_inline: bool,
database_dir: &Path,
config: &Config,
) -> NyxResult<()> {
let scan_root = Path::new(path).canonicalize()?;
let map = load_or_build(&scan_root, database_dir, config)?;
let map = if build_inline {
build_full_from_filesystem(&scan_root, config)?
} else {
load_or_build(&scan_root, database_dir, config)?
};
let stdout = std::io::stdout();
let mut out = stdout.lock();
match format {
@ -108,6 +128,76 @@ fn build_from_filesystem(scan_root: &Path, config: &Config) -> NyxResult<Surface
Ok(build_surface_map(&inputs))
}
/// Build a full SurfaceMap from source by running pass-1 summary
/// extraction inline + call-graph construction, then handing the
/// resulting [`GlobalSummaries`] + [`CallGraph`] to
/// [`build_surface_map`]. Same cost as `nyx index build` pass 1 but
/// holds nothing in SQLite.
fn build_full_from_filesystem(scan_root: &Path, config: &Config) -> NyxResult<SurfaceMap> {
let files = collect_files(scan_root, config)?;
let mut summaries = build_summaries_inline(&files, scan_root, config);
summaries.install_hierarchy();
let call_graph = callgraph::build_call_graph(&summaries, &[]);
let inputs = SurfaceBuildInputs {
files: &files,
scan_root: Some(scan_root),
global_summaries: &summaries,
call_graph: &call_graph,
config,
};
Ok(build_surface_map(&inputs))
}
/// Run pass-1 summary extraction across `files` in parallel and merge
/// the per-thread results into a single [`GlobalSummaries`]. Mirrors
/// the `scan_filesystem_with_observer` pass-1 fold/reduce shape but
/// strips out the progress / metrics / logs threading the surface
/// command does not need.
///
/// Per-file errors are swallowed so a single bad file does not kill
/// the whole map.
fn build_summaries_inline(
files: &[PathBuf],
scan_root: &Path,
config: &Config,
) -> GlobalSummaries {
let root_str = scan_root.to_string_lossy().into_owned();
let mg = config.module_graph.as_deref();
files
.par_iter()
.fold(GlobalSummaries::new, |mut local_gs, path| {
let Ok(bytes) = std::fs::read(path) else {
return local_gs;
};
let Ok((func_summaries, ssa_summaries, ssa_bodies, auth_summaries, cross_pkg)) =
extract_all_summaries_from_bytes(&bytes, path, config, Some(scan_root))
else {
return local_gs;
};
for s in func_summaries {
let key = s.func_key_with_resolver(Some(&root_str), mg);
local_gs.insert(key, s);
}
for (key, ssa_sum) in ssa_summaries {
local_gs.insert_ssa(key, ssa_sum);
}
for (key, body) in ssa_bodies {
local_gs.insert_body(key, body);
}
for (key, auth_sum) in auth_summaries {
local_gs.insert_auth(key, auth_sum);
}
if let Some((ns, map)) = cross_pkg {
local_gs.insert_cross_package_imports(ns, map);
}
local_gs
})
.reduce(GlobalSummaries::new, |mut a, b| {
a.merge(b);
a
})
}
fn collect_files(root: &Path, config: &Config) -> NyxResult<Vec<PathBuf>> {
let (rx, handle) = spawn_file_walker(root, config);
let mut out = Vec::new();
@ -541,4 +631,127 @@ mod tests {
assert!(text.contains("reaches:"));
assert!(text.contains("dangerous: eval"));
}
#[test]
fn build_summaries_inline_extracts_function_summaries() {
// Establishes that the inline pass-1 path produces the same
// `GlobalSummaries` shape that an indexed scan would have
// persisted — at minimum, one FuncSummary per top-level
// function in the fixture. Without this guarantee the surface
// build downstream falls back to entry-points-only because
// `detect_data_stores` / `detect_external_services` /
// `detect_dangerous_locals` walk the summaries map.
let td = tempfile::tempdir().unwrap();
let project_dir = td.path();
std::fs::write(
project_dir.join("app.py"),
"from flask import Flask, request\n\
app = Flask(__name__)\n\
\n\
@app.route('/run')\n\
def run():\n\
cmd = request.args.get('cmd')\n\
return str(eval(cmd))\n\
\n\
def helper(x):\n\
return eval(x)\n",
)
.unwrap();
let cfg = Config::default();
let canon = project_dir.canonicalize().unwrap();
let files = collect_files(&canon, &cfg).unwrap();
let summaries = build_summaries_inline(&files, &canon, &cfg);
let names: Vec<String> = summaries
.iter()
.map(|(k, _)| k.qualified_name())
.collect();
assert!(
names.iter().any(|n| n.ends_with("run")),
"summaries should contain `run`, got {names:?}"
);
assert!(
names.iter().any(|n| n.ends_with("helper")),
"summaries should contain `helper`, got {names:?}"
);
}
#[test]
fn build_full_from_filesystem_walks_pass1_pipeline() {
// End-to-end smoke for `surface::handle(..., build=true)`: the
// inline-build path must produce a non-empty SurfaceMap on a
// project with a recognisable framework route. Equivalent to
// running `nyx surface --build .` on a single-file Flask app.
let td = tempfile::tempdir().unwrap();
let project_dir = td.path();
std::fs::write(
project_dir.join("app.py"),
"from flask import Flask, request\n\
app = Flask(__name__)\n\
\n\
@app.route('/run')\n\
def run():\n\
cmd = request.args.get('cmd')\n\
return str(eval(cmd))\n",
)
.unwrap();
let cfg = Config::default();
let canon = project_dir.canonicalize().unwrap();
let map = build_full_from_filesystem(&canon, &cfg).expect("inline build succeeds");
let has_entry = map
.nodes
.iter()
.any(|n| matches!(n, SurfaceNode::EntryPoint(_)));
assert!(has_entry, "Flask /run route should be detected");
}
#[test]
fn build_from_filesystem_entry_points_only_runs_with_empty_summaries() {
// Locks in the fallback contract: `build_from_filesystem` runs
// framework probes against an empty `GlobalSummaries` and
// produces only entry-point nodes. Any future change that
// accidentally widens the fallback to populate sinks should
// either ship through `--build` or update this test.
let td = tempfile::tempdir().unwrap();
let project_dir = td.path();
std::fs::write(
project_dir.join("app.py"),
"from flask import Flask\n\
app = Flask(__name__)\n\
\n\
@app.route('/run')\n\
def run():\n\
return 'ok'\n",
)
.unwrap();
let cfg = Config::default();
let canon = project_dir.canonicalize().unwrap();
let map = build_from_filesystem(&canon, &cfg).expect("fallback build succeeds");
// Entry point should still appear (framework probes run in the
// fallback path too).
assert!(
map.nodes
.iter()
.any(|n| matches!(n, SurfaceNode::EntryPoint(_))),
"Flask route should land via framework probe"
);
// No DataStore / ExternalService / DangerousLocal because the
// fallback path feeds an empty GlobalSummaries to the detectors.
let non_entry = map.nodes.iter().any(|n| {
matches!(
n,
SurfaceNode::DataStore(_)
| SurfaceNode::ExternalService(_)
| SurfaceNode::DangerousLocal(_)
)
});
assert!(
!non_entry,
"entry-points-only fallback should not produce non-entry nodes"
);
}
}