mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-24 20:28:06 +02:00
Python fp and docs updtes (#58)
* refactor: Update comments for clarity and add expectations.json files for performance metrics * feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks * feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks * refactor: Simplify code formatting for better readability in multiple files * refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration * feat: Update Java and Python patterns to include new security rules * refactor: Improve comment clarity and consistency across multiple Rust files * refactor: Simplify code formatting for improved readability in integration tests and module files * refactor: Improve comment formatting and enhance clarity in assertions across multiple files
This commit is contained in:
parent
4db0805de6
commit
a438886217
291 changed files with 9485 additions and 3851 deletions
|
|
@ -57,7 +57,7 @@ fn print_toml_with_highlights(toml_str: &str) {
|
|||
continue;
|
||||
}
|
||||
// key = value lines (but not `[xxx]`). Split on the first `=`
|
||||
// that isn't inside a quoted string — TOML keys don't contain
|
||||
// that isn't inside a quoted string, TOML keys don't contain
|
||||
// `=` outside quotes, so a leading-segment split is safe enough
|
||||
// for the common case. Continuation lines from multi-line
|
||||
// arrays/strings won't have `=` and fall through to plain.
|
||||
|
|
@ -149,7 +149,7 @@ fn prune_matching(effective: &toml::Value, defaults: &toml::Value) -> Option<tom
|
|||
}
|
||||
}
|
||||
None => {
|
||||
// Key absent in defaults — keep entirely.
|
||||
// Key absent in defaults, keep entirely.
|
||||
out.insert(k.clone(), v.clone());
|
||||
}
|
||||
}
|
||||
|
|
@ -160,9 +160,9 @@ fn prune_matching(effective: &toml::Value, defaults: &toml::Value) -> Option<tom
|
|||
Some(toml::Value::Table(out))
|
||||
}
|
||||
}
|
||||
// Identical leaf — drop.
|
||||
// Identical leaf, drop.
|
||||
_ if effective == defaults => None,
|
||||
// Differing leaf or shape change — keep the effective value.
|
||||
// Differing leaf or shape change, keep the effective value.
|
||||
_ => Some(effective.clone()),
|
||||
}
|
||||
}
|
||||
|
|
@ -180,13 +180,13 @@ fn count_top_level_keys(toml_str: &str) -> usize {
|
|||
continue;
|
||||
}
|
||||
if trimmed.starts_with('[') {
|
||||
// Section header — not an override on its own. Reset
|
||||
// Section header, not an override on its own. Reset
|
||||
// any stuck multi-line state defensively.
|
||||
in_multiline = false;
|
||||
continue;
|
||||
}
|
||||
if in_multiline {
|
||||
// Inside a multi-line array/inline table — closing bracket
|
||||
// Inside a multi-line array/inline table, closing bracket
|
||||
// ends it, intermediate lines don't count.
|
||||
if trimmed.starts_with(']') || trimmed.starts_with('}') {
|
||||
in_multiline = false;
|
||||
|
|
|
|||
|
|
@ -123,7 +123,7 @@ pub fn build_index_with_observer(
|
|||
logs: Option<&Arc<ScanLogCollector>>,
|
||||
) -> NyxResult<()> {
|
||||
// Pass 1 of the indexed scan reads persisted summaries produced here, so
|
||||
// framework context must be populated at index-build time — otherwise
|
||||
// framework context must be populated at index-build time, otherwise
|
||||
// framework-conditional label rules never contribute to the summaries
|
||||
// and indexed scans diverge from non-indexed ones. Matches the
|
||||
// auto-fill in scan_filesystem_with_observer /
|
||||
|
|
@ -152,7 +152,7 @@ pub fn build_index_with_observer(
|
|||
|
||||
let walk_start = std::time::Instant::now();
|
||||
let (rx, handle) = spawn_file_walker(project_path, config);
|
||||
// Drain the channel BEFORE joining — the bounded channel will deadlock
|
||||
// Drain the channel BEFORE joining, the bounded channel will deadlock
|
||||
// if we join first and the walker blocks on send.
|
||||
let paths: Vec<PathBuf> = rx.into_iter().flatten().collect();
|
||||
if let Err(err) = handle.join() {
|
||||
|
|
@ -205,7 +205,7 @@ pub fn build_index_with_observer(
|
|||
.try_for_each(|path| -> NyxResult<()> {
|
||||
let mut idx = Indexer::from_pool(project_name, &pool)?;
|
||||
|
||||
// Read once, hash once — pass bytes to both rule execution and
|
||||
// Read once, hash once, pass bytes to both rule execution and
|
||||
// summary extraction. Use pre-computed hash for upsert to avoid
|
||||
// a redundant file read inside upsert_file.
|
||||
let bytes = std::fs::read(&path)?;
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ pub fn handle_command(
|
|||
// Resolve engine options once for the whole process. Scan overlays CLI
|
||||
// flags below; other subcommands use the config values verbatim. The
|
||||
// install is a no-op after the first call, so Scan's overlay must happen
|
||||
// before we reach this point for its own call path — we delay the install
|
||||
// before we reach this point for its own call path, we delay the install
|
||||
// to the Scan arm and gate non-scan commands behind a fallback install of
|
||||
// the bare config values.
|
||||
let install_from_config = |config: &Config| {
|
||||
|
|
@ -378,7 +378,7 @@ fn print_engine_explanation(config: &Config, engine_profile: Option<EngineProfil
|
|||
use console::style;
|
||||
|
||||
// Plain-text on/off, padded to 3 chars so the trailing column aligns
|
||||
// regardless of which value is rendered. Colour is layered on top —
|
||||
// regardless of which value is rendered. Colour is layered on top ,
|
||||
// the visible width stays 3 characters because `console::style` emits
|
||||
// zero-width ANSI codes (and nothing at all when NO_COLOR is set).
|
||||
fn onoff(b: bool) -> String {
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ fn record_persist_error(errors: &Arc<Mutex<Vec<String>>>, message: String) {
|
|||
/// When `enabled` is true, a panic inside `f` is caught, logged, and
|
||||
/// converted into a `NyxError::Msg`; callers that already match on
|
||||
/// `Err(_)` will gracefully skip the file. When `enabled` is false,
|
||||
/// the panic propagates unchanged — preserving the default behaviour
|
||||
/// the panic propagates unchanged, preserving the default behaviour
|
||||
/// for users who want to catch engine bugs loudly.
|
||||
///
|
||||
/// `AssertUnwindSafe` is load-bearing: closures over `&Config` /
|
||||
|
|
@ -222,7 +222,7 @@ fn is_false(b: &bool) -> bool {
|
|||
/// Framework detection drives framework-conditional label rules (e.g. actix /
|
||||
/// axum / rocket handler-arg sources, Rails route helpers) and auth-analysis
|
||||
/// extractors. If any scan entry point forgets to populate it, the indexed
|
||||
/// and non-indexed paths silently diverge — missing framework-specific
|
||||
/// and non-indexed paths silently diverge, missing framework-specific
|
||||
/// findings in whichever path skipped detection. This helper exists so the
|
||||
/// auto-fill stays consistent across `scan_filesystem_with_observer`,
|
||||
/// `scan_with_index_parallel_observer`, and `build_index_with_observer`.
|
||||
|
|
@ -239,7 +239,7 @@ pub(crate) fn ensure_framework_ctx(root: &Path, cfg: &Config) -> Option<Config>
|
|||
///
|
||||
/// Drives the one-time `preview-tier scan` banner in `handle()`. Tracks
|
||||
/// the extensions `lang_for_path` in `ast.rs` maps to the `"c"` and `"cpp"`
|
||||
/// slugs — keep this aligned with that mapping.
|
||||
/// slugs, keep this aligned with that mapping.
|
||||
pub(crate) fn is_preview_tier_path(path: &Path) -> bool {
|
||||
matches!(
|
||||
path.extension()
|
||||
|
|
@ -514,14 +514,14 @@ pub fn retain_converged_findings(diags: &mut Vec<Diag>) {
|
|||
/// the same function; tiebreak by source line asc, source col asc).
|
||||
///
|
||||
/// Rule IDs of the form `taint-unsanitised-flow (source L:C)` share a single
|
||||
/// base `taint-unsanitised-flow`. The grouping key is column-agnostic —
|
||||
/// base `taint-unsanitised-flow`. The grouping key is column-agnostic ,
|
||||
/// multiple flows to the same sink line differing only in column or source
|
||||
/// are collapsed to one. The rule_id preserves the source location, so the
|
||||
/// kept representative still identifies which flow was reported.
|
||||
///
|
||||
/// The grouping key **includes the resolved sink capability bits** so that
|
||||
/// two different sinks on the same line (e.g. `sink_sql(x); sink_shell(x);`)
|
||||
/// are not collapsed into one finding — they represent materially different
|
||||
/// are not collapsed into one finding, they represent materially different
|
||||
/// vulnerabilities and must surface independently. Findings with different
|
||||
/// base rule IDs (e.g. `js.code_exec.eval`) or different severities are
|
||||
/// left untouched per guardrails.
|
||||
|
|
@ -560,7 +560,7 @@ pub(crate) fn deduplicate_taint_flows(diags: &mut Vec<Diag>) {
|
|||
let src_col = src.map(|s| s.col).unwrap_or(u32::MAX);
|
||||
// Same-function check: first flow_step (Source) and the step at the
|
||||
// sink share an `enclosing_func`. If flow_steps are absent or the
|
||||
// function markers are missing, treat as "unknown" — worse than a
|
||||
// function markers are missing, treat as "unknown", worse than a
|
||||
// confirmed same-function match but better than a confirmed mismatch.
|
||||
let same_function_flag: u32 = ev
|
||||
.and_then(|e| {
|
||||
|
|
@ -677,7 +677,7 @@ pub const SCC_UNCONVERGED_CROSS_FILE_NOTE_PREFIX: &str = "scc_unconverged:cross-
|
|||
/// [`GlobalSummaries::snapshot_caps`] results.
|
||||
///
|
||||
/// Used by the Phase-B worklist to derive the next iteration's dirty
|
||||
/// file set. Semantics match [`diff_cap_snapshots`] — a key that
|
||||
/// file set. Semantics match [`diff_cap_snapshots`], a key that
|
||||
/// appears or disappears counts as changed.
|
||||
fn changed_cap_keys_of(
|
||||
before: &HashMap<crate::symbol::FuncKey, (u16, u16, u16, Vec<usize>)>,
|
||||
|
|
@ -728,7 +728,7 @@ fn changed_ssa_keys_of(
|
|||
///
|
||||
/// Called once per unconverged batch (after the pass-2 rayon parallelism
|
||||
/// has collected `iteration_diags`) so the cost is O(n) over the batch's
|
||||
/// findings — much cheaper than a per-finding `warn!`.
|
||||
/// findings, much cheaper than a per-finding `warn!`.
|
||||
///
|
||||
/// Confidence is **capped** at `Low` rather than unconditionally set:
|
||||
/// upstream analysis may have proven something particularly strong about
|
||||
|
|
@ -795,7 +795,7 @@ fn tag_unconverged_findings(
|
|||
|
||||
/// Safety cap on SCC fixed-point iterations.
|
||||
///
|
||||
/// The convergence predicate is *snapshot equality* — we break as soon as
|
||||
/// The convergence predicate is *snapshot equality*, we break as soon as
|
||||
/// an iteration leaves both `snapshot_caps()` and `snapshot_ssa()`
|
||||
/// unchanged. The cap only triggers if something prevents monotone
|
||||
/// progress (e.g. a non-monotone SSA summary refinement or an SCC larger
|
||||
|
|
@ -809,7 +809,7 @@ fn tag_unconverged_findings(
|
|||
/// SCC with `k` functions arranged in a chain, fresh taint introduced at
|
||||
/// one end of the chain needs up to `k` iterations to reach the other
|
||||
/// end. A hard cap of 3 was silently truncating propagation for any
|
||||
/// SCC of 4+ cross-file functions — findings vanished with no warning.
|
||||
/// SCC of 4+ cross-file functions, findings vanished with no warning.
|
||||
///
|
||||
/// `FuncSummary` is a finite-height lattice (≤ 48 bits of caps + a
|
||||
/// bounded vector of parameter indices) and `insert()` is strictly
|
||||
|
|
@ -865,7 +865,7 @@ fn effective_scc_cap() -> usize {
|
|||
/// persisted by non-recursive topo batches in the most recent
|
||||
/// [`run_topo_batches`] invocation. Intended for the regression tests
|
||||
/// that prove the topo-refinement pipeline is wired and producing
|
||||
/// observable cross-batch state — see
|
||||
/// observable cross-batch state, see
|
||||
/// `tests/topo_pass2_refinement_tests.rs`. Cheap relaxed load.
|
||||
static LAST_TOPO_NONRECURSIVE_REFINEMENTS: AtomicUsize = AtomicUsize::new(0);
|
||||
|
||||
|
|
@ -905,7 +905,7 @@ fn topo_refine_enabled() -> bool {
|
|||
///
|
||||
/// When `call_graph` is missing an edge (e.g. a summary was inserted
|
||||
/// after graph construction), we conservatively fall back to
|
||||
/// re-analysing the full batch — correctness is preserved at the cost
|
||||
/// re-analysing the full batch, correctness is preserved at the cost
|
||||
/// of the worklist optimisation for that iteration.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn run_topo_batches(
|
||||
|
|
@ -1104,7 +1104,7 @@ fn run_topo_batches(
|
|||
// A file becomes dirty for iteration N+1 iff it
|
||||
// contains at least one caller of a FuncKey that
|
||||
// changed in iteration N. If no key changed, the
|
||||
// dirty set is empty — which implies convergence (and
|
||||
// dirty set is empty, which implies convergence (and
|
||||
// matches `iter_converged` above).
|
||||
let changed_cap_keys = changed_cap_keys_of(&snap_before, &snap_after);
|
||||
let changed_ssa_keys =
|
||||
|
|
@ -1124,7 +1124,7 @@ fn run_topo_batches(
|
|||
// changed key. Fall back to the full batch when the
|
||||
// call graph does not resolve any caller (e.g. all
|
||||
// changes happened in leaf functions that no one in
|
||||
// this batch calls — rare but must not regress to
|
||||
// this batch calls, rare but must not regress to
|
||||
// missed analysis).
|
||||
let namespaces_needing_reanalysis =
|
||||
crate::callgraph::namespaces_for_callers(call_graph, &all_changed_keys);
|
||||
|
|
@ -1165,7 +1165,7 @@ fn run_topo_batches(
|
|||
}
|
||||
if iter_converged {
|
||||
// Snapshots equal but dirty_files non-empty is
|
||||
// anomalous — log and treat as converged
|
||||
// anomalous, log and treat as converged
|
||||
// (snapshot equality is the correctness-preserving
|
||||
// signal).
|
||||
tracing::debug!(
|
||||
|
|
@ -1182,7 +1182,7 @@ fn run_topo_batches(
|
|||
// After the loop, flatten per-file diags into the
|
||||
// iteration_diags vector in batch order for deterministic
|
||||
// output. Files that were in the batch but never made
|
||||
// dirty (shouldn't happen — iter 0 runs all of them) are
|
||||
// dirty (shouldn't happen, iter 0 runs all of them) are
|
||||
// skipped silently.
|
||||
let mut iteration_diags: Vec<Diag> = Vec::new();
|
||||
for p in &batch.files {
|
||||
|
|
@ -1268,7 +1268,7 @@ fn run_topo_batches(
|
|||
// parallel section completes, persist those refinements into
|
||||
// `global_summaries` sequentially. Subsequent batches in
|
||||
// topo order (caller-most batches) then resolve their call
|
||||
// sites against the refined cross-file context — the final
|
||||
// sites against the refined cross-file context, the final
|
||||
// step in the callee-first topo pipeline that pass-2
|
||||
// sequencing was always meant to deliver.
|
||||
//
|
||||
|
|
@ -1455,7 +1455,7 @@ fn run_topo_batches(
|
|||
}
|
||||
}
|
||||
|
||||
// Orphan files (no functions in call graph) — process last, single pass.
|
||||
// Orphan files (no functions in call graph), process last, single pass.
|
||||
if !orphans.is_empty() {
|
||||
let orphan_diags: Vec<Diag> = orphans
|
||||
.par_iter()
|
||||
|
|
@ -2099,7 +2099,7 @@ pub fn scan_with_index_parallel_observer(
|
|||
if let Some(p) = &progress_ref {
|
||||
p.set_current_file(&path.to_string_lossy());
|
||||
}
|
||||
// Read once, hash once — use the hash for the change check
|
||||
// Read once, hash once, use the hash for the change check
|
||||
// to avoid a second file read inside should_scan.
|
||||
if let Ok(bytes) = std::fs::read(path) {
|
||||
let hash = Indexer::digest_bytes(&bytes);
|
||||
|
|
@ -2681,7 +2681,7 @@ pub fn scan_with_index_parallel_observer(
|
|||
// pipeline intends to produce (taint + cfg-* + state-* from state
|
||||
// analysis + auth.* when configured). A previous revision clipped this
|
||||
// to `taint*`/`cfg-*` only, silently dropping state-model findings and
|
||||
// breaking parity with `scan_filesystem` — fixed. Mode-scoped
|
||||
// breaking parity with `scan_filesystem`, fixed. Mode-scoped
|
||||
// filtering, if ever needed, belongs in the analysis layer, not here.
|
||||
|
||||
let post_process_start = std::time::Instant::now();
|
||||
|
|
@ -3134,7 +3134,7 @@ mod dedup_taint_flow_tests {
|
|||
|
||||
#[test]
|
||||
fn dedup_collapses_same_line_different_columns() {
|
||||
// Two findings at line 10 but different columns — the widened key
|
||||
// Two findings at line 10 but different columns, the widened key
|
||||
// (path, line, severity) collapses them; the tighter source wins.
|
||||
let mut diags = vec![
|
||||
make_taint("a.rs", 10, 3, 4, 1),
|
||||
|
|
@ -3151,7 +3151,7 @@ mod dedup_taint_flow_tests {
|
|||
|
||||
#[test]
|
||||
fn dedup_does_not_drop_different_sink_caps_on_same_line() {
|
||||
// Two findings at line 10, same column, same severity — but with
|
||||
// Two findings at line 10, same column, same severity, but with
|
||||
// different resolved sink capability bits (SQL vs SHELL). They must
|
||||
// NOT collapse: different sink kinds are materially different
|
||||
// vulnerabilities. Regression guard.
|
||||
|
|
@ -3175,7 +3175,7 @@ mod dedup_taint_flow_tests {
|
|||
|
||||
#[test]
|
||||
fn dedup_collapses_same_sink_caps_on_same_line() {
|
||||
// Same line, same severity, same sink caps — this is the canonical
|
||||
// Same line, same severity, same sink caps, this is the canonical
|
||||
// dedup case (two flows to the same sink, differing only in source).
|
||||
let mut diags = vec![
|
||||
make_taint("a.rs", 10, 5, 3, 1),
|
||||
|
|
|
|||
|
|
@ -88,7 +88,7 @@ pub fn handle(
|
|||
|
||||
// Invalidate the findings cache whenever a scan finishes so the next
|
||||
// request rebuilds against fresh diags. The next-request rebuild keeps
|
||||
// this hot-path simple — we only clear the slot here, never recompute.
|
||||
// this hot-path simple, we only clear the slot here, never recompute.
|
||||
let cache_for_invalidate = Arc::clone(&state.findings_cache);
|
||||
let mut event_rx = event_tx.subscribe();
|
||||
tokio::spawn(async move {
|
||||
|
|
@ -152,7 +152,7 @@ async fn shutdown_signal() {
|
|||
.expect("failed to listen for Ctrl+C");
|
||||
eprintln!("\n Shutting down...");
|
||||
// SSE connections block graceful shutdown indefinitely.
|
||||
// Use a raw OS thread to force exit — tokio tasks may not
|
||||
// Use a raw OS thread to force exit, tokio tasks may not
|
||||
// run reliably during shutdown.
|
||||
std::thread::spawn(|| {
|
||||
std::thread::sleep(std::time::Duration::from_millis(250));
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue