Python fp and docs updtes (#58)

* refactor: Update comments for clarity and add expectations.json files for performance metrics * feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks * feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks * refactor: Simplify code formatting for better readability in multiple files * refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration * feat: Update Java and Python patterns to include new security rules * refactor: Improve comment clarity and consistency across multiple Rust files * refactor: Simplify code formatting for improved readability in integration tests and module files * refactor: Improve comment formatting and enhance clarity in assertions across multiple files
2026-06-24 20:28:06 +02:00 · 2026-04-29 19:53:34 -04:00 · 2026-04-29 19:53:34 -04:00 · a438886217
commit a438886217
parent 4db0805de6
291 changed files with 9485 additions and 3851 deletions
--- a/src/commands/config.rs
+++ b/src/commands/config.rs
@ -57,7 +57,7 @@ fn print_toml_with_highlights(toml_str: &str) {
            continue;
        }
        // key = value lines (but not `[xxx]`).  Split on the first `=`
-        // that isn't inside a quoted string — TOML keys don't contain
+        // that isn't inside a quoted string, TOML keys don't contain
        // `=` outside quotes, so a leading-segment split is safe enough
        // for the common case.  Continuation lines from multi-line
        // arrays/strings won't have `=` and fall through to plain.
@ -149,7 +149,7 @@ fn prune_matching(effective: &toml::Value, defaults: &toml::Value) -> Option<tom
                        }
                    }
                    None => {
-                        // Key absent in defaults — keep entirely.
+                        // Key absent in defaults, keep entirely.
                        out.insert(k.clone(), v.clone());
                    }
                }
@ -160,9 +160,9 @@ fn prune_matching(effective: &toml::Value, defaults: &toml::Value) -> Option<tom
                Some(toml::Value::Table(out))
            }
        }
-        // Identical leaf — drop.
+        // Identical leaf, drop.
        _ if effective == defaults => None,
-        // Differing leaf or shape change — keep the effective value.
+        // Differing leaf or shape change, keep the effective value.
        _ => Some(effective.clone()),
    }
 }
@ -180,13 +180,13 @@ fn count_top_level_keys(toml_str: &str) -> usize {
            continue;
        }
        if trimmed.starts_with('[') {
-            // Section header — not an override on its own.  Reset
+            // Section header, not an override on its own.  Reset
            // any stuck multi-line state defensively.
            in_multiline = false;
            continue;
        }
        if in_multiline {
-            // Inside a multi-line array/inline table — closing bracket
+            // Inside a multi-line array/inline table, closing bracket
            // ends it, intermediate lines don't count.
            if trimmed.starts_with(']') || trimmed.starts_with('}') {
                in_multiline = false;
--- a/src/commands/index.rs
+++ b/src/commands/index.rs
@ -123,7 +123,7 @@ pub fn build_index_with_observer(
    logs: Option<&Arc<ScanLogCollector>>,
 ) -> NyxResult<()> {
    // Pass 1 of the indexed scan reads persisted summaries produced here, so
-    // framework context must be populated at index-build time — otherwise
+    // framework context must be populated at index-build time, otherwise
    // framework-conditional label rules never contribute to the summaries
    // and indexed scans diverge from non-indexed ones.  Matches the
    // auto-fill in scan_filesystem_with_observer /
@ -152,7 +152,7 @@ pub fn build_index_with_observer(

    let walk_start = std::time::Instant::now();
    let (rx, handle) = spawn_file_walker(project_path, config);
-    // Drain the channel BEFORE joining — the bounded channel will deadlock
+    // Drain the channel BEFORE joining, the bounded channel will deadlock
    // if we join first and the walker blocks on send.
    let paths: Vec<PathBuf> = rx.into_iter().flatten().collect();
    if let Err(err) = handle.join() {
@ -205,7 +205,7 @@ pub fn build_index_with_observer(
        .try_for_each(|path| -> NyxResult<()> {
            let mut idx = Indexer::from_pool(project_name, &pool)?;

-            // Read once, hash once — pass bytes to both rule execution and
+            // Read once, hash once, pass bytes to both rule execution and
            // summary extraction.  Use pre-computed hash for upsert to avoid
            // a redundant file read inside upsert_file.
            let bytes = std::fs::read(&path)?;
--- a/src/commands/mod.rs
+++ b/src/commands/mod.rs
@ -21,7 +21,7 @@ pub fn handle_command(
    // Resolve engine options once for the whole process.  Scan overlays CLI
    // flags below; other subcommands use the config values verbatim.  The
    // install is a no-op after the first call, so Scan's overlay must happen
-    // before we reach this point for its own call path — we delay the install
+    // before we reach this point for its own call path, we delay the install
    // to the Scan arm and gate non-scan commands behind a fallback install of
    // the bare config values.
    let install_from_config = |config: &Config| {
@ -378,7 +378,7 @@ fn print_engine_explanation(config: &Config, engine_profile: Option<EngineProfil
    use console::style;

    // Plain-text on/off, padded to 3 chars so the trailing column aligns
-    // regardless of which value is rendered.  Colour is layered on top —
+    // regardless of which value is rendered.  Colour is layered on top ,
    // the visible width stays 3 characters because `console::style` emits
    // zero-width ANSI codes (and nothing at all when NO_COLOR is set).
    fn onoff(b: bool) -> String {
--- a/src/commands/scan.rs
+++ b/src/commands/scan.rs
@ -54,7 +54,7 @@ fn record_persist_error(errors: &Arc<Mutex<Vec<String>>>, message: String) {
 /// When `enabled` is true, a panic inside `f` is caught, logged, and
 /// converted into a `NyxError::Msg`; callers that already match on
 /// `Err(_)` will gracefully skip the file.  When `enabled` is false,
-/// the panic propagates unchanged — preserving the default behaviour
+/// the panic propagates unchanged, preserving the default behaviour
 /// for users who want to catch engine bugs loudly.
 ///
 /// `AssertUnwindSafe` is load-bearing: closures over `&Config` /
@ -222,7 +222,7 @@ fn is_false(b: &bool) -> bool {
 /// Framework detection drives framework-conditional label rules (e.g. actix /
 /// axum / rocket handler-arg sources, Rails route helpers) and auth-analysis
 /// extractors.  If any scan entry point forgets to populate it, the indexed
-/// and non-indexed paths silently diverge — missing framework-specific
+/// and non-indexed paths silently diverge, missing framework-specific
 /// findings in whichever path skipped detection.  This helper exists so the
 /// auto-fill stays consistent across `scan_filesystem_with_observer`,
 /// `scan_with_index_parallel_observer`, and `build_index_with_observer`.
@ -239,7 +239,7 @@ pub(crate) fn ensure_framework_ctx(root: &Path, cfg: &Config) -> Option<Config>
 ///
 /// Drives the one-time `preview-tier scan` banner in `handle()`.  Tracks
 /// the extensions `lang_for_path` in `ast.rs` maps to the `"c"` and `"cpp"`
-/// slugs — keep this aligned with that mapping.
+/// slugs, keep this aligned with that mapping.
 pub(crate) fn is_preview_tier_path(path: &Path) -> bool {
    matches!(
        path.extension()
@ -514,14 +514,14 @@ pub fn retain_converged_findings(diags: &mut Vec<Diag>) {
 /// the same function; tiebreak by source line asc, source col asc).
 ///
 /// Rule IDs of the form `taint-unsanitised-flow (source L:C)` share a single
-/// base `taint-unsanitised-flow`. The grouping key is column-agnostic —
+/// base `taint-unsanitised-flow`. The grouping key is column-agnostic ,
 /// multiple flows to the same sink line differing only in column or source
 /// are collapsed to one. The rule_id preserves the source location, so the
 /// kept representative still identifies which flow was reported.
 ///
 /// The grouping key **includes the resolved sink capability bits** so that
 /// two different sinks on the same line (e.g. `sink_sql(x); sink_shell(x);`)
-/// are not collapsed into one finding — they represent materially different
+/// are not collapsed into one finding, they represent materially different
 /// vulnerabilities and must surface independently. Findings with different
 /// base rule IDs (e.g. `js.code_exec.eval`) or different severities are
 /// left untouched per guardrails.
@ -560,7 +560,7 @@ pub(crate) fn deduplicate_taint_flows(diags: &mut Vec<Diag>) {
        let src_col = src.map(|s| s.col).unwrap_or(u32::MAX);
        // Same-function check: first flow_step (Source) and the step at the
        // sink share an `enclosing_func`. If flow_steps are absent or the
-        // function markers are missing, treat as "unknown" — worse than a
+        // function markers are missing, treat as "unknown", worse than a
        // confirmed same-function match but better than a confirmed mismatch.
        let same_function_flag: u32 = ev
            .and_then(|e| {
@ -677,7 +677,7 @@ pub const SCC_UNCONVERGED_CROSS_FILE_NOTE_PREFIX: &str = "scc_unconverged:cross-
 /// [`GlobalSummaries::snapshot_caps`] results.
 ///
 /// Used by the Phase-B worklist to derive the next iteration's dirty
-/// file set.  Semantics match [`diff_cap_snapshots`] — a key that
+/// file set.  Semantics match [`diff_cap_snapshots`], a key that
 /// appears or disappears counts as changed.
 fn changed_cap_keys_of(
    before: &HashMap<crate::symbol::FuncKey, (u16, u16, u16, Vec<usize>)>,
@ -728,7 +728,7 @@ fn changed_ssa_keys_of(
 ///
 /// Called once per unconverged batch (after the pass-2 rayon parallelism
 /// has collected `iteration_diags`) so the cost is O(n) over the batch's
-/// findings — much cheaper than a per-finding `warn!`.
+/// findings, much cheaper than a per-finding `warn!`.
 ///
 /// Confidence is **capped** at `Low` rather than unconditionally set:
 /// upstream analysis may have proven something particularly strong about
@ -795,7 +795,7 @@ fn tag_unconverged_findings(

 /// Safety cap on SCC fixed-point iterations.
 ///
-/// The convergence predicate is *snapshot equality* — we break as soon as
+/// The convergence predicate is *snapshot equality*, we break as soon as
 /// an iteration leaves both `snapshot_caps()` and `snapshot_ssa()`
 /// unchanged.  The cap only triggers if something prevents monotone
 /// progress (e.g. a non-monotone SSA summary refinement or an SCC larger
@ -809,7 +809,7 @@ fn tag_unconverged_findings(
 /// SCC with `k` functions arranged in a chain, fresh taint introduced at
 /// one end of the chain needs up to `k` iterations to reach the other
 /// end.  A hard cap of 3 was silently truncating propagation for any
-/// SCC of 4+ cross-file functions — findings vanished with no warning.
+/// SCC of 4+ cross-file functions, findings vanished with no warning.
 ///
 /// `FuncSummary` is a finite-height lattice (≤ 48 bits of caps + a
 /// bounded vector of parameter indices) and `insert()` is strictly
@ -865,7 +865,7 @@ fn effective_scc_cap() -> usize {
 /// persisted by non-recursive topo batches in the most recent
 /// [`run_topo_batches`] invocation.  Intended for the regression tests
 /// that prove the topo-refinement pipeline is wired and producing
-/// observable cross-batch state — see
+/// observable cross-batch state, see
 /// `tests/topo_pass2_refinement_tests.rs`.  Cheap relaxed load.
 static LAST_TOPO_NONRECURSIVE_REFINEMENTS: AtomicUsize = AtomicUsize::new(0);

@ -905,7 +905,7 @@ fn topo_refine_enabled() -> bool {
 ///
 /// When `call_graph` is missing an edge (e.g. a summary was inserted
 /// after graph construction), we conservatively fall back to
-/// re-analysing the full batch — correctness is preserved at the cost
+/// re-analysing the full batch, correctness is preserved at the cost
 /// of the worklist optimisation for that iteration.
 #[allow(clippy::too_many_arguments)]
 fn run_topo_batches(
@ -1104,7 +1104,7 @@ fn run_topo_batches(
                // A file becomes dirty for iteration N+1 iff it
                // contains at least one caller of a FuncKey that
                // changed in iteration N.  If no key changed, the
-                // dirty set is empty — which implies convergence (and
+                // dirty set is empty, which implies convergence (and
                // matches `iter_converged` above).
                let changed_cap_keys = changed_cap_keys_of(&snap_before, &snap_after);
                let changed_ssa_keys =
@ -1124,7 +1124,7 @@ fn run_topo_batches(
                // changed key.  Fall back to the full batch when the
                // call graph does not resolve any caller (e.g. all
                // changes happened in leaf functions that no one in
-                // this batch calls — rare but must not regress to
+                // this batch calls, rare but must not regress to
                // missed analysis).
                let namespaces_needing_reanalysis =
                    crate::callgraph::namespaces_for_callers(call_graph, &all_changed_keys);
@ -1165,7 +1165,7 @@ fn run_topo_batches(
                }
                if iter_converged {
                    // Snapshots equal but dirty_files non-empty is
-                    // anomalous — log and treat as converged
+                    // anomalous, log and treat as converged
                    // (snapshot equality is the correctness-preserving
                    // signal).
                    tracing::debug!(
@ -1182,7 +1182,7 @@ fn run_topo_batches(
            // After the loop, flatten per-file diags into the
            // iteration_diags vector in batch order for deterministic
            // output.  Files that were in the batch but never made
-            // dirty (shouldn't happen — iter 0 runs all of them) are
+            // dirty (shouldn't happen, iter 0 runs all of them) are
            // skipped silently.
            let mut iteration_diags: Vec<Diag> = Vec::new();
            for p in &batch.files {
@ -1268,7 +1268,7 @@ fn run_topo_batches(
            // parallel section completes, persist those refinements into
            // `global_summaries` sequentially.  Subsequent batches in
            // topo order (caller-most batches) then resolve their call
-            // sites against the refined cross-file context — the final
+            // sites against the refined cross-file context, the final
            // step in the callee-first topo pipeline that pass-2
            // sequencing was always meant to deliver.
            //
@ -1455,7 +1455,7 @@ fn run_topo_batches(
        }
    }

-    // Orphan files (no functions in call graph) — process last, single pass.
+    // Orphan files (no functions in call graph), process last, single pass.
    if !orphans.is_empty() {
        let orphan_diags: Vec<Diag> = orphans
            .par_iter()
@ -2099,7 +2099,7 @@ pub fn scan_with_index_parallel_observer(
                if let Some(p) = &progress_ref {
                    p.set_current_file(&path.to_string_lossy());
                }
-                // Read once, hash once — use the hash for the change check
+                // Read once, hash once, use the hash for the change check
                // to avoid a second file read inside should_scan.
                if let Ok(bytes) = std::fs::read(path) {
                    let hash = Indexer::digest_bytes(&bytes);
@ -2681,7 +2681,7 @@ pub fn scan_with_index_parallel_observer(
    // pipeline intends to produce (taint + cfg-* + state-* from state
    // analysis + auth.* when configured).  A previous revision clipped this
    // to `taint*`/`cfg-*` only, silently dropping state-model findings and
-    // breaking parity with `scan_filesystem` — fixed.  Mode-scoped
+    // breaking parity with `scan_filesystem`, fixed.  Mode-scoped
    // filtering, if ever needed, belongs in the analysis layer, not here.

    let post_process_start = std::time::Instant::now();
@ -3134,7 +3134,7 @@ mod dedup_taint_flow_tests {

    #[test]
    fn dedup_collapses_same_line_different_columns() {
-        // Two findings at line 10 but different columns — the widened key
+        // Two findings at line 10 but different columns, the widened key
        // (path, line, severity) collapses them; the tighter source wins.
        let mut diags = vec![
            make_taint("a.rs", 10, 3, 4, 1),
@ -3151,7 +3151,7 @@ mod dedup_taint_flow_tests {

    #[test]
    fn dedup_does_not_drop_different_sink_caps_on_same_line() {
-        // Two findings at line 10, same column, same severity — but with
+        // Two findings at line 10, same column, same severity, but with
        // different resolved sink capability bits (SQL vs SHELL). They must
        // NOT collapse: different sink kinds are materially different
        // vulnerabilities. Regression guard.
@ -3175,7 +3175,7 @@ mod dedup_taint_flow_tests {

    #[test]
    fn dedup_collapses_same_sink_caps_on_same_line() {
-        // Same line, same severity, same sink caps — this is the canonical
+        // Same line, same severity, same sink caps, this is the canonical
        // dedup case (two flows to the same sink, differing only in source).
        let mut diags = vec![
            make_taint("a.rs", 10, 5, 3, 1),
--- a/src/commands/serve.rs
+++ b/src/commands/serve.rs
@ -88,7 +88,7 @@ pub fn handle(

        // Invalidate the findings cache whenever a scan finishes so the next
        // request rebuilds against fresh diags. The next-request rebuild keeps
-        // this hot-path simple — we only clear the slot here, never recompute.
+        // this hot-path simple, we only clear the slot here, never recompute.
        let cache_for_invalidate = Arc::clone(&state.findings_cache);
        let mut event_rx = event_tx.subscribe();
        tokio::spawn(async move {
@ -152,7 +152,7 @@ async fn shutdown_signal() {
        .expect("failed to listen for Ctrl+C");
    eprintln!("\n  Shutting down...");
    // SSE connections block graceful shutdown indefinitely.
-    // Use a raw OS thread to force exit — tokio tasks may not
+    // Use a raw OS thread to force exit, tokio tasks may not
    // run reliably during shutdown.
    std::thread::spawn(|| {
        std::thread::sleep(std::time::Duration::from_millis(250));