mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
[pitboss/grind] deferred session-0010 (20260517T044708Z-e058)
This commit is contained in:
parent
e0b1dfbb2a
commit
2deb74c18c
4 changed files with 409 additions and 6 deletions
19
src/cli.rs
19
src/cli.rs
|
|
@ -625,9 +625,13 @@ pub enum Commands {
|
|||
/// Loads the SurfaceMap persisted by the most recent indexed scan
|
||||
/// when available, otherwise builds an entry-point-only map by
|
||||
/// running the per-language framework probes against the on-disk
|
||||
/// source. Use `--format dot` and pipe through `dot -Tsvg` to
|
||||
/// produce a renderable graph; `--format svg` does the same in one
|
||||
/// step when graphviz is installed locally.
|
||||
/// source. Pass `--build` to force a full inline build (pass-1
|
||||
/// summary extraction + call-graph construction) when no indexed
|
||||
/// scan exists; that populates DataStore / ExternalService /
|
||||
/// DangerousLocal nodes the entry-points-only fallback omits.
|
||||
/// Use `--format dot` and pipe through `dot -Tsvg` to produce a
|
||||
/// renderable graph; `--format svg` does the same in one step when
|
||||
/// graphviz is installed locally.
|
||||
Surface {
|
||||
/// Path to inspect (defaults to current directory)
|
||||
#[arg(default_value = ".")]
|
||||
|
|
@ -636,6 +640,15 @@ pub enum Commands {
|
|||
/// Output format: text (default), json, dot, svg
|
||||
#[arg(long, value_enum, default_value_t = SurfaceFormat::Text)]
|
||||
format: SurfaceFormat,
|
||||
|
||||
/// Build the full SurfaceMap from source even when no indexed
|
||||
/// scan exists. Runs pass-1 summary extraction + call-graph
|
||||
/// build inline (same cost as `nyx index build`), then renders
|
||||
/// data-store / external-service / dangerous-local nodes plus
|
||||
/// reach edges. Without this flag, an unscanned project
|
||||
/// produces an entry-points-only map.
|
||||
#[arg(long)]
|
||||
build: bool,
|
||||
},
|
||||
|
||||
/// Start the local web UI for browsing scan results
|
||||
|
|
|
|||
|
|
@ -427,9 +427,13 @@ pub fn handle_command(
|
|||
Commands::Rules { action } => {
|
||||
self::rules::handle(action, config)?;
|
||||
}
|
||||
Commands::Surface { path, format } => {
|
||||
Commands::Surface {
|
||||
path,
|
||||
format,
|
||||
build,
|
||||
} => {
|
||||
install_from_config(config);
|
||||
surface::handle(&path, format, database_dir, config)?;
|
||||
surface::handle(&path, format, build, database_dir, config)?;
|
||||
}
|
||||
Commands::Serve {
|
||||
path,
|
||||
|
|
|
|||
|
|
@ -16,7 +16,15 @@
|
|||
//! map first; if none exists (no `nyx scan` ever ran, or the index was
|
||||
//! cleaned) it falls back to building a fresh entry-point-only map by
|
||||
//! running the framework probes against the on-disk source.
|
||||
//!
|
||||
//! Pass `--build` to force a full inline build that runs pass-1
|
||||
//! summary extraction + call-graph construction. That populates the
|
||||
//! same DataStore / ExternalService / DangerousLocal nodes and Reaches
|
||||
//! edges that an indexed scan would have persisted, at the cost of
|
||||
//! parsing the project tree once (same wall-clock as `nyx index
|
||||
//! build`).
|
||||
|
||||
use crate::ast::extract_all_summaries_from_bytes;
|
||||
use crate::callgraph;
|
||||
use crate::cli::SurfaceFormat;
|
||||
use crate::database::index::Indexer;
|
||||
|
|
@ -30,6 +38,7 @@ use crate::utils::Config;
|
|||
use crate::utils::project::get_project_info;
|
||||
use crate::walk::spawn_file_walker;
|
||||
use crossbeam_channel::TryRecvError;
|
||||
use rayon::prelude::*;
|
||||
use std::collections::BTreeMap;
|
||||
use std::io::Write;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
|
@ -37,14 +46,25 @@ use std::process::{Command, Stdio};
|
|||
|
||||
/// Top-level CLI handler. Resolves the scan root, loads or builds a
|
||||
/// [`SurfaceMap`], renders it in `format`, and writes to stdout.
|
||||
///
|
||||
/// When `build_inline` is `true`, the persisted SurfaceMap (if any) is
|
||||
/// ignored and the full map is built by running pass-1 summary
|
||||
/// extraction + call-graph construction against the on-disk source.
|
||||
/// This populates DataStore / ExternalService / DangerousLocal nodes
|
||||
/// and Reaches edges that the entry-points-only fallback omits.
|
||||
pub fn handle(
|
||||
path: &str,
|
||||
format: SurfaceFormat,
|
||||
build_inline: bool,
|
||||
database_dir: &Path,
|
||||
config: &Config,
|
||||
) -> NyxResult<()> {
|
||||
let scan_root = Path::new(path).canonicalize()?;
|
||||
let map = load_or_build(&scan_root, database_dir, config)?;
|
||||
let map = if build_inline {
|
||||
build_full_from_filesystem(&scan_root, config)?
|
||||
} else {
|
||||
load_or_build(&scan_root, database_dir, config)?
|
||||
};
|
||||
let stdout = std::io::stdout();
|
||||
let mut out = stdout.lock();
|
||||
match format {
|
||||
|
|
@ -108,6 +128,76 @@ fn build_from_filesystem(scan_root: &Path, config: &Config) -> NyxResult<Surface
|
|||
Ok(build_surface_map(&inputs))
|
||||
}
|
||||
|
||||
/// Build a full SurfaceMap from source by running pass-1 summary
|
||||
/// extraction inline + call-graph construction, then handing the
|
||||
/// resulting [`GlobalSummaries`] + [`CallGraph`] to
|
||||
/// [`build_surface_map`]. Same cost as `nyx index build` pass 1 but
|
||||
/// holds nothing in SQLite.
|
||||
fn build_full_from_filesystem(scan_root: &Path, config: &Config) -> NyxResult<SurfaceMap> {
|
||||
let files = collect_files(scan_root, config)?;
|
||||
let mut summaries = build_summaries_inline(&files, scan_root, config);
|
||||
summaries.install_hierarchy();
|
||||
let call_graph = callgraph::build_call_graph(&summaries, &[]);
|
||||
let inputs = SurfaceBuildInputs {
|
||||
files: &files,
|
||||
scan_root: Some(scan_root),
|
||||
global_summaries: &summaries,
|
||||
call_graph: &call_graph,
|
||||
config,
|
||||
};
|
||||
Ok(build_surface_map(&inputs))
|
||||
}
|
||||
|
||||
/// Run pass-1 summary extraction across `files` in parallel and merge
|
||||
/// the per-thread results into a single [`GlobalSummaries`]. Mirrors
|
||||
/// the `scan_filesystem_with_observer` pass-1 fold/reduce shape but
|
||||
/// strips out the progress / metrics / logs threading the surface
|
||||
/// command does not need.
|
||||
///
|
||||
/// Per-file errors are swallowed so a single bad file does not kill
|
||||
/// the whole map.
|
||||
fn build_summaries_inline(
|
||||
files: &[PathBuf],
|
||||
scan_root: &Path,
|
||||
config: &Config,
|
||||
) -> GlobalSummaries {
|
||||
let root_str = scan_root.to_string_lossy().into_owned();
|
||||
let mg = config.module_graph.as_deref();
|
||||
files
|
||||
.par_iter()
|
||||
.fold(GlobalSummaries::new, |mut local_gs, path| {
|
||||
let Ok(bytes) = std::fs::read(path) else {
|
||||
return local_gs;
|
||||
};
|
||||
let Ok((func_summaries, ssa_summaries, ssa_bodies, auth_summaries, cross_pkg)) =
|
||||
extract_all_summaries_from_bytes(&bytes, path, config, Some(scan_root))
|
||||
else {
|
||||
return local_gs;
|
||||
};
|
||||
for s in func_summaries {
|
||||
let key = s.func_key_with_resolver(Some(&root_str), mg);
|
||||
local_gs.insert(key, s);
|
||||
}
|
||||
for (key, ssa_sum) in ssa_summaries {
|
||||
local_gs.insert_ssa(key, ssa_sum);
|
||||
}
|
||||
for (key, body) in ssa_bodies {
|
||||
local_gs.insert_body(key, body);
|
||||
}
|
||||
for (key, auth_sum) in auth_summaries {
|
||||
local_gs.insert_auth(key, auth_sum);
|
||||
}
|
||||
if let Some((ns, map)) = cross_pkg {
|
||||
local_gs.insert_cross_package_imports(ns, map);
|
||||
}
|
||||
local_gs
|
||||
})
|
||||
.reduce(GlobalSummaries::new, |mut a, b| {
|
||||
a.merge(b);
|
||||
a
|
||||
})
|
||||
}
|
||||
|
||||
fn collect_files(root: &Path, config: &Config) -> NyxResult<Vec<PathBuf>> {
|
||||
let (rx, handle) = spawn_file_walker(root, config);
|
||||
let mut out = Vec::new();
|
||||
|
|
@ -541,4 +631,127 @@ mod tests {
|
|||
assert!(text.contains("reaches:"));
|
||||
assert!(text.contains("dangerous: eval"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_summaries_inline_extracts_function_summaries() {
|
||||
// Establishes that the inline pass-1 path produces the same
|
||||
// `GlobalSummaries` shape that an indexed scan would have
|
||||
// persisted — at minimum, one FuncSummary per top-level
|
||||
// function in the fixture. Without this guarantee the surface
|
||||
// build downstream falls back to entry-points-only because
|
||||
// `detect_data_stores` / `detect_external_services` /
|
||||
// `detect_dangerous_locals` walk the summaries map.
|
||||
let td = tempfile::tempdir().unwrap();
|
||||
let project_dir = td.path();
|
||||
std::fs::write(
|
||||
project_dir.join("app.py"),
|
||||
"from flask import Flask, request\n\
|
||||
app = Flask(__name__)\n\
|
||||
\n\
|
||||
@app.route('/run')\n\
|
||||
def run():\n\
|
||||
cmd = request.args.get('cmd')\n\
|
||||
return str(eval(cmd))\n\
|
||||
\n\
|
||||
def helper(x):\n\
|
||||
return eval(x)\n",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let cfg = Config::default();
|
||||
let canon = project_dir.canonicalize().unwrap();
|
||||
let files = collect_files(&canon, &cfg).unwrap();
|
||||
let summaries = build_summaries_inline(&files, &canon, &cfg);
|
||||
let names: Vec<String> = summaries
|
||||
.iter()
|
||||
.map(|(k, _)| k.qualified_name())
|
||||
.collect();
|
||||
assert!(
|
||||
names.iter().any(|n| n.ends_with("run")),
|
||||
"summaries should contain `run`, got {names:?}"
|
||||
);
|
||||
assert!(
|
||||
names.iter().any(|n| n.ends_with("helper")),
|
||||
"summaries should contain `helper`, got {names:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_full_from_filesystem_walks_pass1_pipeline() {
|
||||
// End-to-end smoke for `surface::handle(..., build=true)`: the
|
||||
// inline-build path must produce a non-empty SurfaceMap on a
|
||||
// project with a recognisable framework route. Equivalent to
|
||||
// running `nyx surface --build .` on a single-file Flask app.
|
||||
let td = tempfile::tempdir().unwrap();
|
||||
let project_dir = td.path();
|
||||
std::fs::write(
|
||||
project_dir.join("app.py"),
|
||||
"from flask import Flask, request\n\
|
||||
app = Flask(__name__)\n\
|
||||
\n\
|
||||
@app.route('/run')\n\
|
||||
def run():\n\
|
||||
cmd = request.args.get('cmd')\n\
|
||||
return str(eval(cmd))\n",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let cfg = Config::default();
|
||||
let canon = project_dir.canonicalize().unwrap();
|
||||
let map = build_full_from_filesystem(&canon, &cfg).expect("inline build succeeds");
|
||||
|
||||
let has_entry = map
|
||||
.nodes
|
||||
.iter()
|
||||
.any(|n| matches!(n, SurfaceNode::EntryPoint(_)));
|
||||
assert!(has_entry, "Flask /run route should be detected");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_from_filesystem_entry_points_only_runs_with_empty_summaries() {
|
||||
// Locks in the fallback contract: `build_from_filesystem` runs
|
||||
// framework probes against an empty `GlobalSummaries` and
|
||||
// produces only entry-point nodes. Any future change that
|
||||
// accidentally widens the fallback to populate sinks should
|
||||
// either ship through `--build` or update this test.
|
||||
let td = tempfile::tempdir().unwrap();
|
||||
let project_dir = td.path();
|
||||
std::fs::write(
|
||||
project_dir.join("app.py"),
|
||||
"from flask import Flask\n\
|
||||
app = Flask(__name__)\n\
|
||||
\n\
|
||||
@app.route('/run')\n\
|
||||
def run():\n\
|
||||
return 'ok'\n",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let cfg = Config::default();
|
||||
let canon = project_dir.canonicalize().unwrap();
|
||||
let map = build_from_filesystem(&canon, &cfg).expect("fallback build succeeds");
|
||||
|
||||
// Entry point should still appear (framework probes run in the
|
||||
// fallback path too).
|
||||
assert!(
|
||||
map.nodes
|
||||
.iter()
|
||||
.any(|n| matches!(n, SurfaceNode::EntryPoint(_))),
|
||||
"Flask route should land via framework probe"
|
||||
);
|
||||
// No DataStore / ExternalService / DangerousLocal because the
|
||||
// fallback path feeds an empty GlobalSummaries to the detectors.
|
||||
let non_entry = map.nodes.iter().any(|n| {
|
||||
matches!(
|
||||
n,
|
||||
SurfaceNode::DataStore(_)
|
||||
| SurfaceNode::ExternalService(_)
|
||||
| SurfaceNode::DangerousLocal(_)
|
||||
)
|
||||
});
|
||||
assert!(
|
||||
!non_entry,
|
||||
"entry-points-only fallback should not produce non-entry nodes"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
173
tests/eval_corpus/check_surface.sh
Executable file
173
tests/eval_corpus/check_surface.sh
Executable file
|
|
@ -0,0 +1,173 @@
|
|||
#!/usr/bin/env bash
|
||||
# Phase 31 acceptance walker: assert `nyx surface` produces a usable
|
||||
# map on every downloaded eval-corpus fixture root.
|
||||
#
|
||||
# Walks the project trees under $NYX_EVAL_CORPUS_DIR plus the in-house
|
||||
# `tests/benchmark/corpus` and `tests/dynamic_fixtures` trees, runs
|
||||
# `nyx surface --build --format json <root>` against each, and asserts
|
||||
# the resulting JSON contains at least one EntryPoint plus at least
|
||||
# one DataStore / ExternalService / DangerousLocal node.
|
||||
#
|
||||
# `--build` forces the inline pass-1 + call-graph path so the walker
|
||||
# does not depend on a prior `nyx index build` or `nyx scan`.
|
||||
#
|
||||
# Usage:
|
||||
# tests/eval_corpus/check_surface.sh [--nyx BIN] [--corpus-dir DIR]
|
||||
# [--also-inhouse]
|
||||
# [--report FILE]
|
||||
#
|
||||
# Environment:
|
||||
# NYX_EVAL_CORPUS_DIR — path to pre-downloaded corpus roots
|
||||
# (default: ~/.cache/nyx/eval_corpus). When
|
||||
# missing or empty the walker still scans the
|
||||
# in-house corpus and exits 0 so CI without a
|
||||
# corpus mirror does not block on Phase 31.
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 every walked project produced a usable SurfaceMap (or no
|
||||
# projects were available — see corpus-missing note above).
|
||||
# 1 setup / I/O / missing-binary error.
|
||||
# 2 one or more projects produced an empty or unusable SurfaceMap.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
NYX_BIN="${NYX_BIN:-${REPO_ROOT}/target/release/nyx}"
|
||||
CORPUS_CACHE="${NYX_EVAL_CORPUS_DIR:-${HOME}/.cache/nyx/eval_corpus}"
|
||||
ALSO_INHOUSE="false"
|
||||
REPORT_FILE=""
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--nyx) NYX_BIN="$2"; shift 2 ;;
|
||||
--corpus-dir) CORPUS_CACHE="$2"; shift 2 ;;
|
||||
--also-inhouse) ALSO_INHOUSE="true"; shift ;;
|
||||
--report) REPORT_FILE="$2"; shift 2 ;;
|
||||
-h|--help)
|
||||
sed -n '1,40p' "$0"
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "unknown flag: $1" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
die() { echo "error: $*" >&2; exit 1; }
|
||||
info() { echo "[surface-check] $*"; }
|
||||
warn() { echo "[surface-check] WARN: $*" >&2; }
|
||||
|
||||
[[ -x "$NYX_BIN" ]] || die "nyx binary not found or not executable: $NYX_BIN"
|
||||
command -v jq >/dev/null 2>&1 || die "required command not found: jq"
|
||||
|
||||
# Collect project roots. Each corpus directory is treated as a single
|
||||
# project; the in-house corpus trees are handled the same way (each
|
||||
# language vertical is a project root).
|
||||
PROJECTS=()
|
||||
if [[ -d "$CORPUS_CACHE" ]]; then
|
||||
for entry in "$CORPUS_CACHE"/*; do
|
||||
[[ -d "$entry" ]] && PROJECTS+=("$entry")
|
||||
done
|
||||
else
|
||||
warn "corpus directory missing: $CORPUS_CACHE (run tests/eval_corpus/run.sh to bootstrap)"
|
||||
fi
|
||||
if [[ "$ALSO_INHOUSE" == "true" ]]; then
|
||||
for dir in \
|
||||
"${REPO_ROOT}/tests/benchmark/corpus" \
|
||||
"${REPO_ROOT}/tests/dynamic_fixtures"
|
||||
do
|
||||
[[ -d "$dir" ]] && PROJECTS+=("$dir")
|
||||
done
|
||||
fi
|
||||
|
||||
if [[ ${#PROJECTS[@]} -eq 0 ]]; then
|
||||
info "no project roots to walk (eval corpus not downloaded, in-house trees absent)"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
PASS_COUNT=0
|
||||
FAIL_COUNT=0
|
||||
FAIL_PROJECTS=()
|
||||
declare -a REPORT_ROWS=()
|
||||
|
||||
for project in "${PROJECTS[@]}"; do
|
||||
info "walking: $project"
|
||||
set +e
|
||||
out="$("$NYX_BIN" surface --build --format json "$project" 2>/dev/null)"
|
||||
rc=$?
|
||||
set -e
|
||||
if [[ $rc -ne 0 ]]; then
|
||||
warn "nyx surface --build exited $rc on $project"
|
||||
FAIL_COUNT=$((FAIL_COUNT + 1))
|
||||
FAIL_PROJECTS+=("$project (nyx exit=$rc)")
|
||||
REPORT_ROWS+=("$(printf '{"project":%s,"status":"nyx-error","exit":%d}' \
|
||||
"$(jq -Rn --arg p "$project" '$p')" "$rc")")
|
||||
continue
|
||||
fi
|
||||
if [[ -z "$out" ]]; then
|
||||
warn "empty output on $project"
|
||||
FAIL_COUNT=$((FAIL_COUNT + 1))
|
||||
FAIL_PROJECTS+=("$project (empty output)")
|
||||
REPORT_ROWS+=("$(printf '{"project":%s,"status":"empty-output"}' \
|
||||
"$(jq -Rn --arg p "$project" '$p')")")
|
||||
continue
|
||||
fi
|
||||
# Count nodes by kind. SurfaceMap serialises each node as a flat
|
||||
# object with a `node` discriminator: `entry_point`, `data_store`,
|
||||
# `external_service`, `dangerous_local`.
|
||||
entry_count="$(echo "$out" | jq '[.nodes[] | select(.node == "entry_point")] | length')"
|
||||
ds_count="$(echo "$out" | jq '[.nodes[] | select(.node == "data_store")] | length')"
|
||||
es_count="$(echo "$out" | jq '[.nodes[] | select(.node == "external_service")] | length')"
|
||||
dl_count="$(echo "$out" | jq '[.nodes[] | select(.node == "dangerous_local")] | length')"
|
||||
sink_count=$((ds_count + es_count + dl_count))
|
||||
if [[ "$entry_count" -lt 1 ]]; then
|
||||
warn "no EntryPoint nodes on $project"
|
||||
FAIL_COUNT=$((FAIL_COUNT + 1))
|
||||
FAIL_PROJECTS+=("$project (no entry-points)")
|
||||
REPORT_ROWS+=("$(printf '{"project":%s,"status":"no-entry-points","entry_count":%d}' \
|
||||
"$(jq -Rn --arg p "$project" '$p')" "$entry_count")")
|
||||
continue
|
||||
fi
|
||||
if [[ "$sink_count" -lt 1 ]]; then
|
||||
warn "no DataStore / ExternalService / DangerousLocal nodes on $project"
|
||||
FAIL_COUNT=$((FAIL_COUNT + 1))
|
||||
FAIL_PROJECTS+=("$project (no sinks: ds=$ds_count es=$es_count dl=$dl_count)")
|
||||
REPORT_ROWS+=("$(printf '{"project":%s,"status":"no-sinks","entry_count":%d,"ds":%d,"es":%d,"dl":%d}' \
|
||||
"$(jq -Rn --arg p "$project" '$p')" "$entry_count" "$ds_count" "$es_count" "$dl_count")")
|
||||
continue
|
||||
fi
|
||||
info " ok: ${entry_count} entry-points, ${ds_count} data stores, ${es_count} external, ${dl_count} dangerous"
|
||||
PASS_COUNT=$((PASS_COUNT + 1))
|
||||
REPORT_ROWS+=("$(printf '{"project":%s,"status":"ok","entry_count":%d,"ds":%d,"es":%d,"dl":%d}' \
|
||||
"$(jq -Rn --arg p "$project" '$p')" "$entry_count" "$ds_count" "$es_count" "$dl_count")")
|
||||
done
|
||||
|
||||
if [[ -n "$REPORT_FILE" ]]; then
|
||||
{
|
||||
echo "{"
|
||||
echo " \"pass\": $PASS_COUNT,"
|
||||
echo " \"fail\": $FAIL_COUNT,"
|
||||
echo " \"projects\": ["
|
||||
for i in "${!REPORT_ROWS[@]}"; do
|
||||
sep=","
|
||||
[[ $i -eq $((${#REPORT_ROWS[@]} - 1)) ]] && sep=""
|
||||
echo " ${REPORT_ROWS[$i]}$sep"
|
||||
done
|
||||
echo " ]"
|
||||
echo "}"
|
||||
} > "$REPORT_FILE"
|
||||
info "report written: $REPORT_FILE"
|
||||
fi
|
||||
|
||||
info ""
|
||||
info "summary: ${PASS_COUNT} pass, ${FAIL_COUNT} fail (of $((PASS_COUNT + FAIL_COUNT)) projects)"
|
||||
if [[ $FAIL_COUNT -gt 0 ]]; then
|
||||
for p in "${FAIL_PROJECTS[@]}"; do
|
||||
info " fail: $p"
|
||||
done
|
||||
exit 2
|
||||
fi
|
||||
exit 0
|
||||
Loading…
Add table
Add a link
Reference in a new issue