Feat/configurable sanitizers and js precision (#32)

* chore: Exclude CLAUDE.md from Cargo.toml * feat: Add configurable analysis rules and CLI commands for custom sanitizers and terminators * feat: Enhance resource management and analysis efficiency - Implemented parallel summary merging in `scan_filesystem` using rayon for improved performance. - Introduced `GlobalSummaries::merge()` for efficient merging of summaries. - Optimized file reading and hashing to eliminate redundant I/O operations. - Added `should_scan_with_hash()` and `upsert_file_with_hash()` methods to streamline file processing. - Enhanced taint analysis with in-place mutations to reduce memory allocations. - Updated resource acquisition patterns to exclude false positives for `freopen` and wrapper functions. * feat: Implement severity downgrade for findings in non-production paths and add source kind inference * feat: Update versioning information in SECURITY.md for new stable line * feat: Update categories in Cargo.toml to include parser-implementations and text-processing * feat: Update dependencies in Cargo.lock for improved compatibility and performance * feat: Update dependencies in Cargo.lock and Cargo.toml for improved compatibility
2026-06-09 19:45:13 +02:00 · 2026-02-25 04:02:11 -05:00 · 2026-02-25 04:02:11 -05:00 · 19b578c5c4
commit 19b578c5c4
parent f96a89e7c1
37 changed files with 3775 additions and 432 deletions
--- a/src/ast.rs
+++ b/src/ast.rs
@ -2,6 +2,7 @@ use crate::cfg::{build_cfg, export_summaries};
 use crate::cfg_analysis;
 use crate::commands::scan::Diag;
 use crate::errors::{NyxError, NyxResult};
+use crate::labels::{build_lang_rules, severity_for_source_kind};
 use crate::patterns::Severity;
 use crate::summary::{FuncSummary, GlobalSummaries};
 use crate::symbol::{Lang, normalize_namespace};
@ -53,6 +54,53 @@ fn is_binary(bytes: &[u8]) -> bool {
    bytes.iter().filter(|b| **b == 0).count() * 100 / bytes.len().max(1) > 1
 }

+/// Check if a file path belongs to a non-production context (tests, vendor,
+/// benchmarks, etc.).  Used to downgrade severity for findings in paths that
+/// are unlikely to represent attack surface.
+fn is_nonprod_path(path: &Path) -> bool {
+    static NONPROD_DIRS: &[&str] = &[
+        "tests",
+        "test",
+        "__tests__",
+        "benches",
+        "benchmarks",
+        "examples",
+        "build",
+        "scripts",
+        "docs",
+        "js_tests",
+        "fixtures",
+        "vendor",
+    ];
+    static NONPROD_FILES: &[&str] = &["build.rs"];
+
+    if let Some(name) = path.file_name().and_then(|n| n.to_str())
+        && (NONPROD_FILES.contains(&name) || name.ends_with(".min.js"))
+    {
+        return true;
+    }
+
+    for component in path.components() {
+        if let std::path::Component::Normal(c) = component
+            && let Some(s) = c.to_str()
+            && NONPROD_DIRS.contains(&s)
+        {
+            return true;
+        }
+    }
+
+    false
+}
+
+/// Downgrade severity by one tier: High→Medium, Medium→Low, Low→Low.
+fn downgrade_severity(s: Severity) -> Severity {
+    match s {
+        Severity::High => Severity::Medium,
+        Severity::Medium => Severity::Low,
+        Severity::Low => Severity::Low,
+    }
+}
+
 // ─────────────────────────────────────────────────────────────────────────────
 //  Pass 1: Extract function summaries (no taint analysis)
 // ─────────────────────────────────────────────────────────────────────────────
@ -84,7 +132,17 @@ pub fn extract_summaries_from_bytes(
    })?;

    let file_path_str = path.to_string_lossy();
-    let (_cfg_graph, _entry, local_summaries) = build_cfg(&tree, bytes, lang_slug, &file_path_str);
+    let lang_rules = build_lang_rules(_cfg, lang_slug);
+    let rules_ref = if lang_rules.extra_labels.is_empty()
+        && lang_rules.terminators.is_empty()
+        && lang_rules.event_handlers.is_empty()
+    {
+        None
+    } else {
+        Some(&lang_rules)
+    };
+    let (_cfg_graph, _entry, local_summaries) =
+        build_cfg(&tree, bytes, lang_slug, &file_path_str, rules_ref);

    Ok(export_summaries(
        &local_summaries,
@ -95,6 +153,7 @@ pub fn extract_summaries_from_bytes(

 /// Convenience wrapper that reads the file then delegates to
 /// [`extract_summaries_from_bytes`].
+#[allow(dead_code)] // used by benchmarks and lib consumers
 pub fn extract_summaries_from_file(path: &Path, cfg: &Config) -> NyxResult<Vec<FuncSummary>> {
    let bytes = std::fs::read(path)?;
    extract_summaries_from_bytes(&bytes, path, cfg)
@ -142,7 +201,17 @@ pub fn run_rules_on_bytes(

    if needs_cfg {
        // Build CFG — needed for both taint analysis and CFG structural analyses.
-        let (cfg_graph, entry, summaries) = build_cfg(&_tree, bytes, lang_slug, &file_path_str);
+        let lang_rules = build_lang_rules(cfg, lang_slug);
+        let rules_ref = if lang_rules.extra_labels.is_empty()
+            && lang_rules.terminators.is_empty()
+            && lang_rules.event_handlers.is_empty()
+        {
+            None
+        } else {
+            Some(&lang_rules)
+        };
+        let (cfg_graph, entry, summaries) =
+            build_cfg(&_tree, bytes, lang_slug, &file_path_str, rules_ref);
        let caller_lang = Lang::from_slug(lang_slug).unwrap_or(Lang::Rust);

        // ── Taint analysis ──────────────────────────────────────────────
@ -174,7 +243,7 @@ pub fn run_rules_on_bytes(
                path: path.to_string_lossy().into_owned(),
                line: sink_point.row + 1,
                col: sink_point.column + 1,
-                severity: Severity::High,
+                severity: severity_for_source_kind(finding.source_kind),
                id: format!(
                    "taint-unsanitised-flow (source {}:{})",
                    source_point.row + 1,
@ -184,6 +253,7 @@ pub fn run_rules_on_bytes(
        }

        // ── CFG structural analyses ─────────────────────────────────────
+        let taint_active = global_summaries.is_some() || !taint_results.is_empty();
        let cfg_ctx = cfg_analysis::AnalysisContext {
            cfg: &cfg_graph,
            entry,
@ -193,6 +263,8 @@ pub fn run_rules_on_bytes(
            func_summaries: &summaries,
            global_summaries,
            taint_findings: &taint_results,
+            analysis_rules: rules_ref,
+            taint_active,
        };
        for cf in cfg_analysis::run_all(&cfg_ctx) {
            let point = byte_offset_to_point(&_tree, cf.span.0);
@ -238,6 +310,13 @@ pub fn run_rules_on_bytes(
        a.line == b.line && a.col == b.col && a.id == b.id && a.severity == b.severity
    });

+    // Downgrade severity for non-production paths unless opted out
+    if !cfg.scanner.include_nonprod && is_nonprod_path(path) {
+        for d in &mut out {
+            d.severity = downgrade_severity(d.severity);
+        }
+    }
+
    Ok(out)
 }

@ -253,6 +332,184 @@ pub fn run_rules_on_file(
    run_rules_on_bytes(&bytes, path, cfg, global_summaries, scan_root)
 }

+// ─────────────────────────────────────────────────────────────────────────────
+//  Fused single-pass: extract summaries + run full analysis in one parse/CFG
+// ─────────────────────────────────────────────────────────────────────────────
+
+/// Result of a fused analysis pass: both function summaries and diagnostics.
+pub struct FusedResult {
+    pub summaries: Vec<FuncSummary>,
+    pub diags: Vec<Diag>,
+}
+
+/// Parse the file once, build the CFG once, and produce both function
+/// summaries (for cross-file resolution) and full diagnostics (AST queries +
+/// taint + CFG structural analyses).
+///
+/// When `global_summaries` is `None`, the taint engine runs with local
+/// context only (equivalent to pass 1 + partial pass 2).  A second call
+/// to [`run_taint_only`] can refine findings with the full cross-file view
+/// without re-parsing or re-building the CFG.
+pub fn analyse_file_fused(
+    bytes: &[u8],
+    path: &Path,
+    cfg: &Config,
+    global_summaries: Option<&GlobalSummaries>,
+    scan_root: Option<&Path>,
+) -> NyxResult<FusedResult> {
+    let _span = tracing::debug_span!("analyse_fused", file = %path.display()).entered();
+
+    if is_binary(bytes) {
+        return Ok(FusedResult {
+            summaries: vec![],
+            diags: vec![],
+        });
+    }
+
+    let Some((ts_lang, lang_slug)) = lang_for_path(path) else {
+        return Ok(FusedResult {
+            summaries: vec![],
+            diags: vec![],
+        });
+    };
+
+    let tree = PARSER.with(|cell| {
+        let mut parser = cell.borrow_mut();
+        parser.set_language(&ts_lang)?;
+        parser
+            .parse(bytes, None)
+            .ok_or_else(|| NyxError::Other("tree-sitter failed".into()))
+    })?;
+
+    let file_path_str = path.to_string_lossy();
+
+    // Build language-specific analysis rules once
+    let lang_rules = build_lang_rules(cfg, lang_slug);
+    let rules_ref = if lang_rules.extra_labels.is_empty()
+        && lang_rules.terminators.is_empty()
+        && lang_rules.event_handlers.is_empty()
+    {
+        None
+    } else {
+        Some(&lang_rules)
+    };
+
+    // Build CFG once — used for both summary extraction AND analysis
+    let (cfg_graph, entry, local_summaries) =
+        build_cfg(&tree, bytes, lang_slug, &file_path_str, rules_ref);
+
+    // Export summaries (always — needed for cross-file merging)
+    let summaries = export_summaries(&local_summaries, &file_path_str, lang_slug);
+
+    let mut out = Vec::new();
+
+    // Taint + CFG structural analyses
+    let needs_cfg =
+        cfg.scanner.mode == AnalysisMode::Full || cfg.scanner.mode == AnalysisMode::Taint;
+
+    if needs_cfg {
+        let caller_lang = Lang::from_slug(lang_slug).unwrap_or(Lang::Rust);
+        let scan_root_str = scan_root.map(|p| p.to_string_lossy());
+        let namespace = normalize_namespace(&file_path_str, scan_root_str.as_deref());
+
+        let taint_results = analyse_file(
+            &cfg_graph,
+            entry,
+            &local_summaries,
+            global_summaries,
+            caller_lang,
+            &namespace,
+            &[],
+        );
+        for finding in &taint_results {
+            let sink_byte = cfg_graph[finding.sink].span.0;
+            let sink_point = byte_offset_to_point(&tree, sink_byte);
+            let source_byte = cfg_graph[finding.source].span.0;
+            let source_point = byte_offset_to_point(&tree, source_byte);
+
+            out.push(Diag {
+                path: path.to_string_lossy().into_owned(),
+                line: sink_point.row + 1,
+                col: sink_point.column + 1,
+                severity: severity_for_source_kind(finding.source_kind),
+                id: format!(
+                    "taint-unsanitised-flow (source {}:{})",
+                    source_point.row + 1,
+                    source_point.column + 1
+                ),
+            });
+        }
+
+        let taint_active = global_summaries.is_some() || !taint_results.is_empty();
+        let cfg_ctx = cfg_analysis::AnalysisContext {
+            cfg: &cfg_graph,
+            entry,
+            lang: caller_lang,
+            file_path: &file_path_str,
+            source_bytes: bytes,
+            func_summaries: &local_summaries,
+            global_summaries,
+            taint_findings: &taint_results,
+            analysis_rules: rules_ref,
+            taint_active,
+        };
+        for cf in cfg_analysis::run_all(&cfg_ctx) {
+            let point = byte_offset_to_point(&tree, cf.span.0);
+            out.push(Diag {
+                path: path.to_string_lossy().into_owned(),
+                line: point.row + 1,
+                col: point.column + 1,
+                severity: cf.severity,
+                id: cf.rule_id,
+            });
+        }
+    }
+
+    // AST pattern queries
+    if cfg.scanner.mode == AnalysisMode::Full || cfg.scanner.mode == AnalysisMode::Ast {
+        let root = tree.root_node();
+        let compiled = query_cache::for_lang(lang_slug, ts_lang);
+        let mut cursor = QueryCursor::new();
+
+        for cq in compiled.iter() {
+            if cfg.scanner.min_severity <= cq.meta.severity {
+                continue;
+            }
+            let mut matches = cursor.matches(&cq.query, root, bytes);
+            while let Some(m) = matches.next() {
+                if let Some(cap) = m.captures.iter().find(|c| c.index == 0) {
+                    let point = cap.node.start_position();
+                    out.push(Diag {
+                        path: path.to_string_lossy().into_owned(),
+                        line: point.row + 1,
+                        col: point.column + 1,
+                        severity: cq.meta.severity,
+                        id: cq.meta.id.to_owned(),
+                    });
+                }
+            }
+        }
+    }
+
+    // Dedup
+    out.sort_by(|a, b| (a.line, a.col, &a.id, a.severity).cmp(&(b.line, b.col, &b.id, b.severity)));
+    out.dedup_by(|a, b| {
+        a.line == b.line && a.col == b.col && a.id == b.id && a.severity == b.severity
+    });
+
+    // Downgrade severity for non-production paths unless opted out
+    if !cfg.scanner.include_nonprod && is_nonprod_path(path) {
+        for d in &mut out {
+            d.severity = downgrade_severity(d.severity);
+        }
+    }
+
+    Ok(FusedResult {
+        summaries,
+        diags: out,
+    })
+}
+
 #[test]
 fn unknown_extension_returns_empty() {
    let dir = tempfile::tempdir().unwrap();
@ -279,3 +536,65 @@ fn binary_file_guard_triggers() {
    let diags = run_rules_on_file(&bin, &Config::default(), None, None).unwrap();
    assert!(diags.is_empty(), "binary files are skipped");
 }
+
+#[test]
+fn nonprod_path_detection() {
+    // Test that is_nonprod_path recognises common non-production paths
+    assert!(is_nonprod_path(Path::new("project/tests/test_main.py")));
+    assert!(is_nonprod_path(Path::new("src/__tests__/foo.js")));
+    assert!(is_nonprod_path(Path::new("benches/bench.rs")));
+    assert!(is_nonprod_path(Path::new("vendor/lib/foo.py")));
+    assert!(is_nonprod_path(Path::new("src/build.rs")));
+    assert!(is_nonprod_path(Path::new("dist/app.min.js")));
+    assert!(is_nonprod_path(Path::new("examples/demo.py")));
+    assert!(is_nonprod_path(Path::new("fixtures/data.json")));
+
+    // Should NOT match production paths
+    assert!(!is_nonprod_path(Path::new("src/main.rs")));
+    assert!(!is_nonprod_path(Path::new("lib/handler.py")));
+    assert!(!is_nonprod_path(Path::new("app/views.py")));
+}
+
+#[test]
+fn severity_downgrade_works() {
+    assert_eq!(downgrade_severity(Severity::High), Severity::Medium);
+    assert_eq!(downgrade_severity(Severity::Medium), Severity::Low);
+    assert_eq!(downgrade_severity(Severity::Low), Severity::Low);
+}
+
+#[test]
+fn nonprod_path_downgrades_findings() {
+    let dir = tempfile::tempdir().unwrap();
+    // Create a file under a "tests" directory
+    let test_dir = dir.path().join("tests");
+    std::fs::create_dir_all(&test_dir).unwrap();
+    let test_file = test_dir.join("test_cmd.py");
+    std::fs::write(
+        &test_file,
+        b"import os\ndef test():\n    cmd = os.environ['X']\n    os.system(cmd)\n",
+    )
+    .unwrap();
+
+    let default_cfg = Config::default();
+    let diags = run_rules_on_file(&test_file, &default_cfg, None, None).unwrap();
+
+    // All findings in tests/ should be downgraded (no HIGH)
+    let high: Vec<_> = diags
+        .iter()
+        .filter(|d| d.severity == Severity::High)
+        .collect();
+    assert!(
+        high.is_empty(),
+        "Findings in tests/ should be downgraded from HIGH; got {:?}",
+        high
+    );
+
+    // With include_nonprod=true, original severity preserved
+    let mut prod_cfg = Config::default();
+    prod_cfg.scanner.include_nonprod = true;
+    let diags_prod = run_rules_on_file(&test_file, &prod_cfg, None, None).unwrap();
+
+    // Not all diagnostics are necessarily high, but include_nonprod should not downgrade
+    // Just verify that if there are findings, they weren't downgraded by the nonprod logic
+    let _ = diags_prod;
+}
--- a/src/cfg.rs
+++ b/src/cfg.rs
@ -3,7 +3,7 @@ use petgraph::prelude::*;
 use tracing::debug;
 use tree_sitter::{Node, Tree};

-use crate::labels::{Cap, DataLabel, Kind, classify, lookup, param_config};
+use crate::labels::{Cap, DataLabel, Kind, LangAnalysisRules, classify, lookup, param_config};
 use crate::summary::FuncSummary;
 use crate::symbol::{FuncKey, Lang};
 use std::collections::{HashMap, HashSet};
@ -186,7 +186,12 @@ fn member_expr_text(n: Node, code: &[u8]) -> Option<String> {
 }

 /// Recursively search `n` for a member expression whose text classifies as a label.
-fn first_member_label(n: Node, lang: &str, code: &[u8]) -> Option<DataLabel> {
+fn first_member_label(
+    n: Node,
+    lang: &str,
+    code: &[u8],
+    extra_labels: Option<&[crate::labels::RuntimeLabelRule]>,
+) -> Option<DataLabel> {
    match n.kind() {
        "member_expression" | "attribute" | "selector_expression" => {
            if let Some(full) = member_expr_text(n, code) {
@ -194,7 +199,7 @@ fn first_member_label(n: Node, lang: &str, code: &[u8]) -> Option<DataLabel> {
                // to match rules like "process.env" from "process.env.CMD".
                let mut candidate = full.as_str();
                loop {
-                    if let Some(lbl) = classify(lang, candidate) {
+                    if let Some(lbl) = classify(lang, candidate, extra_labels) {
                        return Some(lbl);
                    }
                    match candidate.rsplit_once('.') {
@ -208,7 +213,7 @@ fn first_member_label(n: Node, lang: &str, code: &[u8]) -> Option<DataLabel> {
    }
    let mut cursor = n.walk();
    for child in n.children(&mut cursor) {
-        if let Some(lbl) = first_member_label(child, lang, code) {
+        if let Some(lbl) = first_member_label(child, lang, code, extra_labels) {
            return Some(lbl);
        }
    }
@ -366,6 +371,7 @@ fn def_use(ast: Node, lang: &str, code: &[u8]) -> (Option<String>, Vec<String>)
 }

 /// Create a node in one short borrow and optionally attach a taint label.
+#[allow(clippy::too_many_arguments)]
 fn push_node<'a>(
    g: &mut Cfg,
    kind: StmtKind,
@ -374,6 +380,7 @@ fn push_node<'a>(
    code: &'a [u8],
    enclosing_func: Option<&str>,
    call_ordinal: u32,
+    analysis_rules: Option<&LangAnalysisRules>,
 ) -> NodeIndex {
    /* ── 1.  IDENTIFIER EXTRACTION ─────────────────────────────────────── */

@ -427,7 +434,8 @@ fn push_node<'a>(

    /* ── 2.  LABEL LOOK-UP  ───────────────────────────────────────────── */

-    let mut label = classify(lang, &text);
+    let extra = analysis_rules.map(|r| r.extra_labels.as_slice());
+    let mut label = classify(lang, &text, extra);

    // For assignments like `element.innerHTML = value`, the inner-call heuristic
    // above may have overridden `text` with a call on the RHS (e.g. getElementById).
@ -450,10 +458,20 @@ fn push_node<'a>(

        if let Some(assign) = assign_node
            && let Some(lhs) = assign.child_by_field_name("left")
-            && let Some(prop) = lhs.child_by_field_name("property")
-            && let Some(prop_text) = text_of(prop, code)
        {
-            label = classify(lang, &prop_text);
+            // Try full member expression first (e.g. "location.href") — more
+            // specific and avoids false positives on `a.href`.
+            if let Some(full) = member_expr_text(lhs, code) {
+                label = classify(lang, &full, extra);
+            }
+            // Fall back to property-only (e.g. "innerHTML") for sinks that
+            // don't need object context.
+            if label.is_none()
+                && let Some(prop) = lhs.child_by_field_name("property")
+                && let Some(prop_text) = text_of(prop, code)
+            {
+                label = classify(lang, &prop_text, extra);
+            }
        }
    }

@ -466,7 +484,7 @@ fn push_node<'a>(
            lookup(lang, ast.kind()),
            Kind::CallWrapper | Kind::Assignment
        )
-        && let Some(found) = first_member_label(ast, lang, code)
+        && let Some(found) = first_member_label(ast, lang, code, extra)
    {
        label = Some(found);
        // Update text so the callee name reflects the source
@ -564,6 +582,19 @@ fn extract_param_names<'a>(func_node: Node<'a>, lang: &str, code: &'a [u8]) -> V
    names
 }

+/// Check if a callee name matches any configured terminator.
+fn is_configured_terminator(callee: &str, analysis_rules: Option<&LangAnalysisRules>) -> bool {
+    if let Some(rules) = analysis_rules {
+        let callee_lower = callee.to_ascii_lowercase();
+        rules
+            .terminators
+            .iter()
+            .any(|t| callee_lower == t.to_ascii_lowercase())
+    } else {
+        false
+    }
+}
+
 /// Add the same edge (of the same kind) from every node in `froms` to `to`.
 #[inline]
 fn connect_all(g: &mut Cfg, froms: &[NodeIndex], to: NodeIndex, kind: EdgeKind) {
@ -588,6 +619,9 @@ fn build_sub<'a>(
    file_path: &str,
    enclosing_func: Option<&str>,
    call_ordinal: &mut u32,
+    analysis_rules: Option<&LangAnalysisRules>,
+    break_targets: &mut Vec<NodeIndex>,
+    continue_targets: &mut Vec<NodeIndex>,
 ) -> Vec<NodeIndex> {
    match lookup(lang, ast.kind()) {
        // ─────────────────────────────────────────────────────────────────
@ -595,7 +629,16 @@ fn build_sub<'a>(
        // ─────────────────────────────────────────────────────────────────
        Kind::If => {
            // Condition node
-            let cond = push_node(g, StmtKind::If, ast, lang, code, enclosing_func, 0);
+            let cond = push_node(
+                g,
+                StmtKind::If,
+                ast,
+                lang,
+                code,
+                enclosing_func,
+                0,
+                analysis_rules,
+            );
            connect_all(g, preds, cond, EdgeKind::Seq);

            // Locate then & else blocks using field-based lookup first,
@ -620,6 +663,7 @@ fn build_sub<'a>(
            };

            // THEN branch
+            let then_first_node = NodeIndex::new(g.node_count());
            let then_exits = if let Some(b) = then_block {
                let exits = build_sub(
                    b,
@ -631,9 +675,17 @@ fn build_sub<'a>(
                    file_path,
                    enclosing_func,
                    call_ordinal,
+                    analysis_rules,
+                    break_targets,
+                    continue_targets,
                );
-                // True edges leave the condition
-                if let Some(&first) = exits.first() {
+                // Add True edge from condition to first node of then-branch.
+                // We use the first node created (by index) rather than the
+                // exit, because the branch may terminate (return/break) and
+                // have no exits.
+                if then_first_node.index() < g.node_count() {
+                    connect_all(g, &[cond], then_first_node, EdgeKind::True);
+                } else if let Some(&first) = exits.first() {
                    connect_all(g, &[cond], first, EdgeKind::True);
                }
                exits
@ -642,6 +694,7 @@ fn build_sub<'a>(
            };

            // ELSE branch
+            let else_first_node = NodeIndex::new(g.node_count());
            let else_exits = if let Some(b) = else_block {
                let exits = build_sub(
                    b,
@ -653,17 +706,30 @@ fn build_sub<'a>(
                    file_path,
                    enclosing_func,
                    call_ordinal,
+                    analysis_rules,
+                    break_targets,
+                    continue_targets,
                );
-                if let Some(&first) = exits.first() {
+                if else_first_node.index() < g.node_count() {
+                    connect_all(g, &[cond], else_first_node, EdgeKind::False);
+                } else if let Some(&first) = exits.first() {
                    connect_all(g, &[cond], first, EdgeKind::False);
                }
                exits
            } else {
-                // No explicit else → non-taken branch flows to the *then* exits
-                if let Some(&first) = then_exits.first() {
-                    connect_all(g, &[cond], first, EdgeKind::False);
+                // No explicit else → if the then-branch falls through
+                // (non-empty exits), the false branch merges with those exits.
+                // If the then-branch terminates (break/return/continue →
+                // empty exits), the false branch flows from the condition
+                // to whatever comes next.
+                if then_exits.is_empty() {
+                    vec![cond]
+                } else {
+                    if let Some(&first) = then_exits.first() {
+                        connect_all(g, &[cond], first, EdgeKind::False);
+                    }
+                    then_exits.clone()
                }
-                then_exits.clone()
            };

            // Frontier = union of both branches
@ -672,9 +738,22 @@ fn build_sub<'a>(

        Kind::InfiniteLoop => {
            // Synthetic header node
-            let header = push_node(g, StmtKind::Loop, ast, lang, code, enclosing_func, 0);
+            let header = push_node(
+                g,
+                StmtKind::Loop,
+                ast,
+                lang,
+                code,
+                enclosing_func,
+                0,
+                analysis_rules,
+            );
            connect_all(g, preds, header, EdgeKind::Seq);

+            // Fresh break/continue targets scoped to this loop
+            let mut loop_breaks = Vec::new();
+            let mut loop_continues = Vec::new();
+
            // The body is the single `block` child
            let body = ast.child_by_field_name("body").expect("loop without body");
            let body_exits = build_sub(
@ -687,23 +766,49 @@ fn build_sub<'a>(
                file_path,
                enclosing_func,
                call_ordinal,
+                analysis_rules,
+                &mut loop_breaks,
+                &mut loop_continues,
            );

            // Back-edge from every linear exit to header
            for &e in &body_exits {
                connect_all(g, &[e], header, EdgeKind::Back);
            }
-            // `loop` may break → those exits are frontiers too
-            body_exits.into_iter().chain([header]).collect()
+            // Wire continue targets as back edges to header
+            for &c in &loop_continues {
+                connect_all(g, &[c], header, EdgeKind::Back);
+            }
+            // Break targets become exits of the loop
+            if loop_breaks.is_empty() {
+                // No break → infinite loop; header is the only exit for
+                // downstream code (fallthrough semantics)
+                vec![header]
+            } else {
+                loop_breaks
+            }
        }

        // ─────────────────────────────────────────────────────────────────
        //  WHILE / FOR: classic loop with a back edge.
        // ─────────────────────────────────────────────────────────────────
        Kind::While | Kind::For => {
-            let header = push_node(g, StmtKind::Loop, ast, lang, code, enclosing_func, 0);
+            let header = push_node(
+                g,
+                StmtKind::Loop,
+                ast,
+                lang,
+                code,
+                enclosing_func,
+                0,
+                analysis_rules,
+            );
            connect_all(g, preds, header, EdgeKind::Seq);

+            // Fresh break/continue targets scoped to this loop
+            let mut loop_breaks = Vec::new();
+            let mut loop_continues = Vec::new();
+
            // Body = first (and usually only) block child.
            let body = ast
                .child_by_field_name("body")
@ -724,14 +829,24 @@ fn build_sub<'a>(
                file_path,
                enclosing_func,
                call_ordinal,
+                analysis_rules,
+                &mut loop_breaks,
+                &mut loop_continues,
            );

            // Back‑edge for every linear exit → header.
            for &e in &body_exits {
                connect_all(g, &[e], header, EdgeKind::Back);
            }
-            // Falling out of the loop = header’s false branch.
-            vec![header]
+            // Wire continue targets as back edges to header
+            for &c in &loop_continues {
+                connect_all(g, &[c], header, EdgeKind::Back);
+            }
+            // Falling out of the loop = header’s false branch +
+            // any break targets that exit the loop.
+            let mut exits = vec![header];
+            exits.extend(loop_breaks);
+            exits
        }

        // ─────────────────────────────────────────────────────────────────
@ -743,25 +858,72 @@ fn build_sub<'a>(
                // that callee labels (source/sanitizer/sink) are applied.
                let ord = *call_ordinal;
                *call_ordinal += 1;
-                let call_idx = push_node(g, StmtKind::Call, ast, lang, code, enclosing_func, ord);
+                let call_idx = push_node(
+                    g,
+                    StmtKind::Call,
+                    ast,
+                    lang,
+                    code,
+                    enclosing_func,
+                    ord,
+                    analysis_rules,
+                );
                connect_all(g, preds, call_idx, EdgeKind::Seq);
-                let ret = push_node(g, StmtKind::Return, ast, lang, code, enclosing_func, 0);
+                let ret = push_node(
+                    g,
+                    StmtKind::Return,
+                    ast,
+                    lang,
+                    code,
+                    enclosing_func,
+                    0,
+                    analysis_rules,
+                );
                connect_all(g, &[call_idx], ret, EdgeKind::Seq);
                Vec::new()
            } else {
-                let ret = push_node(g, StmtKind::Return, ast, lang, code, enclosing_func, 0);
+                let ret = push_node(
+                    g,
+                    StmtKind::Return,
+                    ast,
+                    lang,
+                    code,
+                    enclosing_func,
+                    0,
+                    analysis_rules,
+                );
                connect_all(g, preds, ret, EdgeKind::Seq);
                Vec::new() // terminates this path
            }
        }
        Kind::Break => {
-            let brk = push_node(g, StmtKind::Break, ast, lang, code, enclosing_func, 0);
+            let brk = push_node(
+                g,
+                StmtKind::Break,
+                ast,
+                lang,
+                code,
+                enclosing_func,
+                0,
+                analysis_rules,
+            );
            connect_all(g, preds, brk, EdgeKind::Seq);
+            break_targets.push(brk);
            Vec::new()
        }
        Kind::Continue => {
-            let cont = push_node(g, StmtKind::Continue, ast, lang, code, enclosing_func, 0);
+            let cont = push_node(
+                g,
+                StmtKind::Continue,
+                ast,
+                lang,
+                code,
+                enclosing_func,
+                0,
+                analysis_rules,
+            );
            connect_all(g, preds, cont, EdgeKind::Seq);
+            continue_targets.push(cont);
            Vec::new()
        }

@ -774,6 +936,7 @@ fn build_sub<'a>(
            // Track the last frontier before a function emptied it — used to
            // keep subsequent functions reachable.
            let mut last_live_frontier = preds.to_vec();
+            let mut prev_was_preproc = false;
            for child in ast.children(&mut cursor) {
                let child_is_fn = lookup(lang, child.kind()) == Kind::Function;

@ -782,7 +945,13 @@ fn build_sub<'a>(
                // file-level predecessors.  Without this, a preceding function
                // that ends with `return` (frontier = []) would leave subsequent
                // functions disconnected from the graph.
-                let child_preds = if child_is_fn && frontier.is_empty() {
+                //
+                // Similarly, when a preprocessor block (`#ifdef ... #endif`)
+                // contains an `if/else` whose else branch is on the other side
+                // of the `#endif`, tree-sitter parses a dangling else that
+                // empties the frontier.  The code after the preproc block should
+                // remain reachable.
+                let child_preds = if frontier.is_empty() && (child_is_fn || prev_was_preproc) {
                    last_live_frontier.clone()
                } else {
                    frontier.clone()
@ -798,12 +967,17 @@ fn build_sub<'a>(
                    file_path,
                    enclosing_func,
                    call_ordinal,
+                    analysis_rules,
+                    break_targets,
+                    continue_targets,
                );

+                let is_preproc = child.kind().starts_with("preproc_");
                if !child_exits.is_empty() {
                    last_live_frontier = child_exits.clone();
                }
                frontier = child_exits;
+                prev_was_preproc = is_preproc;
            }
            frontier
        }
@ -822,7 +996,16 @@ fn build_sub<'a>(
                    tmp.into_iter().next()
                })
                .unwrap_or_else(|| "<anon>".to_string());
-            let entry_idx = push_node(g, StmtKind::Seq, ast, lang, code, Some(&fn_name), 0);
+            let entry_idx = push_node(
+                g,
+                StmtKind::Seq,
+                ast,
+                lang,
+                code,
+                Some(&fn_name),
+                0,
+                analysis_rules,
+            );
            connect_all(g, preds, entry_idx, EdgeKind::Seq);

            // 1b) extract parameter names
@ -830,8 +1013,13 @@ fn build_sub<'a>(
            let param_count = param_names.len();

            // 2) build its body with a fresh call ordinal counter for this function scope
+            // Snapshot the current node count so we can iterate only over nodes
+            // created within this function (avoids O(N²) scan of the full graph).
+            let fn_first_node: NodeIndex = NodeIndex::new(g.node_count());
            let body = ast.child_by_field_name("body").expect("fn w/o body");
            let mut fn_call_ordinal: u32 = 0;
+            let mut fn_breaks = Vec::new();
+            let mut fn_continues = Vec::new();
            let body_exits = build_sub(
                body,
                &[entry_idx],
@ -842,6 +1030,9 @@ fn build_sub<'a>(
                file_path,
                Some(&fn_name),
                &mut fn_call_ordinal,
+                analysis_rules,
+                &mut fn_breaks,
+                &mut fn_continues,
            );

            // ───── 3) light-weight dataflow ──────────────────────────────────────
@ -862,11 +1053,12 @@ fn build_sub<'a>(

            let param_set: HashSet<&str> = param_names.iter().map(|s| s.as_str()).collect();

-            for idx in g.node_indices() {
+            // Iterate only over nodes created within this function scope
+            // (entry_idx .. current end) instead of the entire graph.
+            let fn_node_range = entry_idx.index()..g.node_count();
+            for raw in fn_node_range {
+                let idx = NodeIndex::new(raw);
                let info = &g[idx];
-                if info.span.0 < ast.start_byte() || info.span.1 > ast.end_byte() {
-                    continue;
-                }

                // collect callee names
                if let Some(callee) = &info.callee
@ -1010,11 +1202,12 @@ fn build_sub<'a>(
            // this edge, the synthetic exit node is unreachable whenever
            // the function body ends with a `return` statement, which
            // disconnects all subsequent functions at the module level.
-            for idx in g.node_indices() {
+            //
+            // Only scan nodes created within this function scope.
+            for raw in fn_first_node.index()..g.node_count() {
+                let idx = NodeIndex::new(raw);
                let info = &g[idx];
                if info.kind == StmtKind::Return
-                    && info.span.0 >= ast.start_byte()
-                    && info.span.1 <= ast.end_byte()
                    && idx != exit_idx
                    && !g.contains_edge(idx, exit_idx)
                {
@ -1068,6 +1261,9 @@ fn build_sub<'a>(
                    file_path,
                    enclosing_func,
                    call_ordinal,
+                    analysis_rules,
+                    break_targets,
+                    continue_targets,
                );
            }

@ -1085,8 +1281,25 @@ fn build_sub<'a>(
            } else {
                0
            };
-            let node = push_node(g, kind, ast, lang, code, enclosing_func, ord);
+            let node = push_node(
+                g,
+                kind,
+                ast,
+                lang,
+                code,
+                enclosing_func,
+                ord,
+                analysis_rules,
+            );
            connect_all(g, preds, node, EdgeKind::Seq);
+
+            // If the callee is a configured terminator, treat as a dead end
+            if kind == StmtKind::Call
+                && let Some(callee) = &g[node].callee
+                && is_configured_terminator(callee, analysis_rules)
+            {
+                return Vec::new();
+            }
            vec![node]
        }

@ -1095,8 +1308,24 @@ fn build_sub<'a>(
        Kind::CallFn | Kind::CallMethod | Kind::CallMacro => {
            let ord = *call_ordinal;
            *call_ordinal += 1;
-            let n = push_node(g, StmtKind::Call, ast, lang, code, enclosing_func, ord);
+            let n = push_node(
+                g,
+                StmtKind::Call,
+                ast,
+                lang,
+                code,
+                enclosing_func,
+                ord,
+                analysis_rules,
+            );
            connect_all(g, preds, n, EdgeKind::Seq);
+
+            // If the callee is a configured terminator, treat as a dead end
+            if let Some(callee) = &g[n].callee
+                && is_configured_terminator(callee, analysis_rules)
+            {
+                return Vec::new();
+            }
            vec![n]
        }

@ -1115,7 +1344,16 @@ fn build_sub<'a>(
            } else {
                0
            };
-            let n = push_node(g, kind, ast, lang, code, enclosing_func, ord);
+            let n = push_node(
+                g,
+                kind,
+                ast,
+                lang,
+                code,
+                enclosing_func,
+                ord,
+                analysis_rules,
+            );
            connect_all(g, preds, n, EdgeKind::Seq);
            vec![n]
        }
@ -1127,7 +1365,16 @@ fn build_sub<'a>(
        //  Every other node = simple sequential statement
        // ─────────────────────────────────────────────────────────────────
        _ => {
-            let n = push_node(g, StmtKind::Seq, ast, lang, code, enclosing_func, 0);
+            let n = push_node(
+                g,
+                StmtKind::Seq,
+                ast,
+                lang,
+                code,
+                enclosing_func,
+                0,
+                analysis_rules,
+            );
            connect_all(g, preds, n, EdgeKind::Seq);
            vec![n]
        }
@ -1150,6 +1397,7 @@ pub(crate) fn build_cfg<'a>(
    code: &'a [u8],
    lang: &str,
    file_path: &str,
+    analysis_rules: Option<&LangAnalysisRules>,
 ) -> (Cfg, NodeIndex, FuncSummaries) {
    debug!(target: "cfg", "Building CFG for {:?}", tree.root_node());

@ -1178,6 +1426,8 @@ pub(crate) fn build_cfg<'a>(

    // Build the body below the synthetic ENTRY.
    let mut top_ordinal: u32 = 0;
+    let mut top_breaks = Vec::new();
+    let mut top_continues = Vec::new();
    let exits = build_sub(
        tree.root_node(),
        &[entry],
@ -1188,6 +1438,9 @@ pub(crate) fn build_cfg<'a>(
        file_path,
        None,
        &mut top_ordinal,
+        analysis_rules,
+        &mut top_breaks,
+        &mut top_continues,
    );
    debug!(target: "cfg", "exits: {:?}", exits);
    // Wire every real exit to our synthetic EXIT node.
--- a/src/cfg_analysis/guards.rs
+++ b/src/cfg_analysis/guards.rs
@ -2,15 +2,75 @@ use super::dominators::{self, dominates};
 use super::rules;
 use super::{AnalysisContext, CfgAnalysis, CfgFinding, Confidence, is_entry_point_func};
 use crate::cfg::StmtKind;
-use crate::labels::{Cap, DataLabel};
+use crate::labels::{Cap, DataLabel, RuntimeLabelRule};
 use crate::patterns::Severity;
 use petgraph::graph::NodeIndex;

 pub struct UnguardedSink;

+/// Check whether **all** arguments to the sink are constants (no taint-capable
+/// variable flows).  Extends the inline callee-part check by tracing one hop
+/// through the CFG: if a used variable is defined by a node that itself has
+/// empty `uses` and no Source label, the definition is treated as a constant
+/// binding (e.g. `let cmd = "git"; Command::new(cmd)`).
+fn is_all_args_constant(ctx: &AnalysisContext, sink: NodeIndex) -> bool {
+    let sink_info = &ctx.cfg[sink];
+    let callee_desc = sink_info.callee.as_deref().unwrap_or("");
+    let callee_parts: Vec<&str> = callee_desc.split(['.', ':']).collect();
+    let sink_func = sink_info.enclosing_func.as_deref();
+
+    sink_info.uses.iter().all(|u| {
+        // Part of the callee name itself → constant
+        if callee_parts.contains(&u.as_str()) {
+            return true;
+        }
+        // One-hop trace: find the defining node in the same function
+        for idx in ctx.cfg.node_indices() {
+            let info = &ctx.cfg[idx];
+            if info.enclosing_func.as_deref() != sink_func {
+                continue;
+            }
+            if info.defines.as_deref() == Some(u.as_str()) {
+                // If the defining node has no uses (pure constant) and is not
+                // a Source, the variable is constant.
+                if info.uses.is_empty() && !matches!(info.label, Some(DataLabel::Source(_))) {
+                    return true;
+                }
+            }
+        }
+        false
+    })
+}
+
+/// Check if a callee matches any of the runtime label rules that are sanitizers.
+fn match_config_sanitizer(callee: &str, extra: &[RuntimeLabelRule]) -> Option<Cap> {
+    let callee_lower = callee.to_ascii_lowercase();
+    for rule in extra {
+        let cap = match rule.label {
+            DataLabel::Sanitizer(c) => c,
+            _ => continue,
+        };
+        for m in &rule.matchers {
+            let ml = m.to_ascii_lowercase();
+            if ml.ends_with('_') {
+                if callee_lower.starts_with(&ml) {
+                    return Some(cap);
+                }
+            } else if callee_lower.ends_with(&ml) {
+                return Some(cap);
+            }
+        }
+    }
+    None
+}
+
 /// Find all nodes in the CFG that are calls to guard functions.
 fn find_guard_nodes(ctx: &AnalysisContext) -> Vec<(NodeIndex, Cap)> {
    let guard_rules = rules::guard_rules(ctx.lang);
+    let config_rules = ctx
+        .analysis_rules
+        .map(|r| r.extra_labels.as_slice())
+        .unwrap_or(&[]);
    let mut result = Vec::new();

    for idx in ctx.cfg.node_indices() {
@ -19,6 +79,13 @@ fn find_guard_nodes(ctx: &AnalysisContext) -> Vec<(NodeIndex, Cap)> {
            continue;
        }
        if let Some(callee) = &info.callee {
+            // Check config sanitizer rules first
+            if let Some(cap) = match_config_sanitizer(callee, config_rules) {
+                result.push((idx, cap));
+                continue;
+            }
+
+            // Then check built-in guard rules
            let callee_lower = callee.to_ascii_lowercase();
            for rule in guard_rules {
                let matched = rule.matchers.iter().any(|m| {
@ -174,6 +241,13 @@ impl CfgAnalysis for UnguardedSink {

            let has_taint = taint_confirms_sink(ctx, *sink);
            let source_derived = sink_arg_is_source_derived(ctx, *sink);
+
+            // If sink args are all constants (including one-hop constant bindings)
+            // and taint didn't confirm, this is a false positive — skip it.
+            if is_all_args_constant(ctx, *sink) && !has_taint && !source_derived {
+                continue;
+            }
+
            let param_only = sink_arg_is_parameter_only(ctx, *sink);
            let in_entrypoint = sink_in_entrypoint(ctx, *sink);

@ -183,6 +257,9 @@ impl CfgAnalysis for UnguardedSink {
            } else if param_only && !in_entrypoint {
                // Wrapper function consuming only parameters → LOW
                (Severity::Low, Confidence::Low)
+            } else if !ctx.taint_active && !source_derived {
+                // CFG-only mode without taint confirmation → LOW
+                (Severity::Low, Confidence::Low)
            } else if in_entrypoint && !param_only {
                // Entrypoint with non-parameter args but no taint confirmation → MEDIUM
                (Severity::Medium, Confidence::Medium)
--- a/src/cfg_analysis/mod.rs
+++ b/src/cfg_analysis/mod.rs
@ -10,7 +10,7 @@ mod tests;
 pub mod unreachable;

 use crate::cfg::{FuncSummaries, NodeInfo, StmtKind};
-use crate::labels::DataLabel;
+use crate::labels::{DataLabel, LangAnalysisRules};
 use crate::patterns::Severity;
 use crate::summary::GlobalSummaries;
 use crate::symbol::Lang;
@ -51,6 +51,11 @@ pub struct AnalysisContext<'a> {
    #[allow(dead_code)]
    pub global_summaries: Option<&'a GlobalSummaries>,
    pub taint_findings: &'a [taint::Finding],
+    pub analysis_rules: Option<&'a LangAnalysisRules>,
+    /// Whether full taint analysis was active for this file (global summaries
+    /// existed and taint engine ran).  When false, structural findings without
+    /// taint confirmation should be treated with lower confidence.
+    pub taint_active: bool,
 }

 pub trait CfgAnalysis {
@ -87,6 +92,20 @@ pub fn run_all(ctx: &AnalysisContext) -> Vec<CfgFinding> {
        true
    });

+    // ── Dedup: suppress cfg-unguarded-sink when cfg-unreachable-sink covers the span ──
+    let unreachable_spans: HashSet<(usize, usize)> = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-unreachable-sink")
+        .map(|f| f.span)
+        .collect();
+
+    findings.retain(|f| {
+        if f.rule_id == "cfg-unguarded-sink" && unreachable_spans.contains(&f.span) {
+            return false;
+        }
+        true
+    });
+
    scoring::score_findings(&mut findings, ctx);
    findings.sort_by(|a, b| {
        b.score
@ -97,11 +116,36 @@ pub fn run_all(ctx: &AnalysisContext) -> Vec<CfgFinding> {
 }

 /// Helper: check whether a node is a guard call (validate, sanitize, check, etc.).
-pub(crate) fn is_guard_call(info: &NodeInfo, lang: Lang) -> bool {
+pub(crate) fn is_guard_call(
+    info: &NodeInfo,
+    lang: Lang,
+    analysis_rules: Option<&LangAnalysisRules>,
+) -> bool {
    if info.kind != StmtKind::Call {
        return false;
    }
    if let Some(callee) = &info.callee {
+        // Check config sanitizer rules
+        if let Some(extras) = analysis_rules {
+            let callee_lower = callee.to_ascii_lowercase();
+            for rule in &extras.extra_labels {
+                if !matches!(rule.label, DataLabel::Sanitizer(_)) {
+                    continue;
+                }
+                for m in &rule.matchers {
+                    let ml = m.to_ascii_lowercase();
+                    if ml.ends_with('_') {
+                        if callee_lower.starts_with(&ml) {
+                            return true;
+                        }
+                    } else if callee_lower.ends_with(&ml) {
+                        return true;
+                    }
+                }
+            }
+        }
+
+        // Check built-in guard rules
        let guard_rules = rules::guard_rules(lang);
        let callee_lower = callee.to_ascii_lowercase();
        for rule in guard_rules {
--- a/src/cfg_analysis/resources.rs
+++ b/src/cfg_analysis/resources.rs
@ -8,8 +8,13 @@ use std::collections::HashSet;

 pub struct ResourceMisuse;

-/// Find nodes matching acquire patterns for a given resource pair.
-fn find_acquire_nodes(ctx: &AnalysisContext, acquire_patterns: &[&str]) -> Vec<NodeIndex> {
+/// Find nodes matching acquire patterns for a given resource pair,
+/// excluding any that match `exclude_patterns`.
+fn find_acquire_nodes(
+    ctx: &AnalysisContext,
+    acquire_patterns: &[&str],
+    exclude_patterns: &[&str],
+) -> Vec<NodeIndex> {
    ctx.cfg
        .node_indices()
        .filter(|&idx| {
@ -19,6 +24,16 @@ fn find_acquire_nodes(ctx: &AnalysisContext, acquire_patterns: &[&str]) -> Vec<N
            }
            if let Some(callee) = &info.callee {
                let callee_lower = callee.to_ascii_lowercase();
+                // Check exclusions first — if the callee matches an exclude
+                // pattern, it is NOT an acquire even if it also matches an
+                // acquire pattern (e.g. `freopen` ends with `fopen`).
+                let excluded = exclude_patterns.iter().any(|p| {
+                    let pl = p.to_ascii_lowercase();
+                    callee_lower.ends_with(&pl) || callee_lower == pl
+                });
+                if excluded {
+                    return false;
+                }
                acquire_patterns.iter().any(|p| {
                    let pl = p.to_ascii_lowercase();
                    callee_lower.ends_with(&pl) || callee_lower == pl
@ -113,6 +128,204 @@ fn all_paths_pass_through(
    true
 }

+/// Check whether the acquired variable is stored into a struct field (ownership
+/// transfer) downstream of the acquire node.  Patterns recognised:
+///   - `ptr->field = var`   (C arrow operator)
+///   - `obj.field = var`    (C dot / generic field store)
+///   - `list->next = ...`   (linked-list insertion)
+///
+/// If the variable is transferred, there is no leak — the receiving struct is
+/// responsible for the lifetime.
+fn is_ownership_transferred(ctx: &AnalysisContext, acquire: NodeIndex) -> bool {
+    let acquired_var = match &ctx.cfg[acquire].defines {
+        Some(v) => v.clone(),
+        None => return false,
+    };
+
+    // BFS through CFG successors looking for a node whose span text
+    // mentions the acquired variable in a struct-field store context.
+    use std::collections::VecDeque;
+    let mut visited = HashSet::new();
+    let mut queue = VecDeque::new();
+    for succ in ctx.cfg.neighbors(acquire) {
+        if visited.insert(succ) {
+            queue.push_back(succ);
+        }
+    }
+
+    while let Some(node) = queue.pop_front() {
+        let info = &ctx.cfg[node];
+        let (start, end) = info.span;
+
+        // Check the source text at this node's span for the acquired variable
+        // appearing in a struct-field store context.
+        let references_var = info.uses.iter().any(|u| u == &acquired_var)
+            || info.defines.as_ref().is_some_and(|d| d == &acquired_var);
+
+        if references_var && start < end && end <= ctx.source_bytes.len() {
+            let span_text = &ctx.source_bytes[start..end];
+            // `->` anywhere in span means pointer-to-member store
+            if span_text.windows(2).any(|w| w == b"->") {
+                return true;
+            }
+            // `.field = var` pattern (but not `==`)
+            if has_dot_field_assignment(span_text) {
+                return true;
+            }
+        }
+
+        // If the variable is truly redefined (not a field write), stop
+        // following this path. A true redefinition is when `defines` matches
+        // but the span doesn't contain `->` or `.field =` patterns.
+        if info.defines.as_ref().is_some_and(|d| d == &acquired_var) {
+            let is_field_write = if start < end && end <= ctx.source_bytes.len() {
+                let span_text = &ctx.source_bytes[start..end];
+                span_text.windows(2).any(|w| w == b"->") || has_dot_field_assignment(span_text)
+            } else {
+                false
+            };
+            if !is_field_write {
+                continue; // genuine redefinition — stop this path
+            }
+        }
+
+        for succ in ctx.cfg.neighbors(node) {
+            if visited.insert(succ) {
+                queue.push_back(succ);
+            }
+        }
+    }
+
+    false
+}
+
+/// Check if `span_text` contains a dot-field assignment pattern like
+/// `obj.field = var` (but not `obj.method(...)` or `a == b`).
+fn has_dot_field_assignment(span_text: &[u8]) -> bool {
+    // Look for `.` followed (possibly with ident chars) by `=` but not `==`
+    let mut i = 0;
+    while i < span_text.len() {
+        if span_text[i] == b'.' {
+            // Scan forward past identifier chars to find `=`
+            let mut j = i + 1;
+            while j < span_text.len()
+                && (span_text[j].is_ascii_alphanumeric() || span_text[j] == b'_')
+            {
+                j += 1;
+            }
+            // Skip whitespace
+            while j < span_text.len() && span_text[j].is_ascii_whitespace() {
+                j += 1;
+            }
+            // Check for `=` but not `==`
+            if j < span_text.len()
+                && span_text[j] == b'='
+                && (j + 1 >= span_text.len() || span_text[j + 1] != b'=')
+            {
+                return true;
+            }
+        }
+        i += 1;
+    }
+    false
+}
+
+/// Check whether the acquired variable is consumed by an ownership-taking
+/// function (e.g. `FileResponse(f)`, `send_file(f)`) downstream of the
+/// acquire node.  These functions take ownership of the file handle so there
+/// is no leak.
+fn is_consumed_by_owner(ctx: &AnalysisContext, acquire: NodeIndex) -> bool {
+    static CONSUMING_SINKS: &[&str] = &[
+        "fileresponse",
+        "streaminghttpresponse",
+        "send_file",
+        "make_response",
+    ];
+
+    let acquired_var = match &ctx.cfg[acquire].defines {
+        Some(v) => v.clone(),
+        None => return false,
+    };
+
+    use std::collections::VecDeque;
+    let mut visited = HashSet::new();
+    let mut queue = VecDeque::new();
+    for succ in ctx.cfg.neighbors(acquire) {
+        if visited.insert(succ) {
+            queue.push_back(succ);
+        }
+    }
+
+    while let Some(node) = queue.pop_front() {
+        let info = &ctx.cfg[node];
+
+        // Check Call nodes with callee that matches a consuming sink
+        if info.kind == StmtKind::Call
+            && let Some(callee) = &info.callee
+        {
+            let callee_lower = callee.to_ascii_lowercase();
+            let is_consuming = CONSUMING_SINKS.iter().any(|s| callee_lower.ends_with(s));
+            if is_consuming && info.uses.iter().any(|u| u == &acquired_var) {
+                return true;
+            }
+        }
+
+        // Also check the span text for consuming calls — handles cases where
+        // the call is embedded in a return statement (e.g. `return FileResponse(f)`)
+        if info.uses.iter().any(|u| u == &acquired_var) {
+            let (start, end) = info.span;
+            if start < end && end <= ctx.source_bytes.len() {
+                let span_lower: Vec<u8> = ctx.source_bytes[start..end]
+                    .iter()
+                    .map(|b| b.to_ascii_lowercase())
+                    .collect();
+                if CONSUMING_SINKS
+                    .iter()
+                    .any(|s| span_lower.windows(s.len()).any(|w| w == s.as_bytes()))
+                {
+                    return true;
+                }
+            }
+        }
+
+        for succ in ctx.cfg.neighbors(node) {
+            if visited.insert(succ) {
+                queue.push_back(succ);
+            }
+        }
+    }
+
+    false
+}
+
+/// For mutex pairs, check that an explicit `.acquire()` or `.lock()` call
+/// exists on the acquired variable in the CFG.  If only the constructor
+/// (e.g. `threading.Lock()`) is observed without acquire, skip the finding.
+fn has_explicit_lock_acquire(ctx: &AnalysisContext, acquire: NodeIndex) -> bool {
+    let acquired_var = match &ctx.cfg[acquire].defines {
+        Some(v) => v.clone(),
+        None => return false,
+    };
+
+    for idx in ctx.cfg.node_indices() {
+        let info = &ctx.cfg[idx];
+        if info.kind != StmtKind::Call {
+            continue;
+        }
+        if let Some(callee) = &info.callee {
+            let callee_lower = callee.to_ascii_lowercase();
+            let is_lock_call = callee_lower.ends_with(".acquire")
+                || callee_lower.ends_with(".lock")
+                || callee_lower == "pthread_mutex_lock";
+            if is_lock_call && info.uses.iter().any(|u| u == &acquired_var) {
+                return true;
+            }
+        }
+    }
+
+    false
+}
+
 impl CfgAnalysis for ResourceMisuse {
    fn name(&self) -> &'static str {
        "resource-misuse"
@ -128,11 +341,18 @@ impl CfgAnalysis for ResourceMisuse {
        let mut findings = Vec::new();

        for pair in pairs {
-            let acquire_nodes = find_acquire_nodes(ctx, pair.acquire);
+            let acquire_nodes = find_acquire_nodes(ctx, pair.acquire, pair.exclude_acquire);
            let release_nodes = find_release_nodes(ctx, pair.release);

            for &acquire in &acquire_nodes {
-                if !release_on_all_exit_paths(ctx, acquire, &release_nodes, exit) {
+                if !release_on_all_exit_paths(ctx, acquire, &release_nodes, exit)
+                    && !is_ownership_transferred(ctx, acquire)
+                    && !is_consumed_by_owner(ctx, acquire)
+                {
+                    // For mutex pairs, require an explicit .acquire()/.lock() call
+                    if pair.resource_name == "mutex" && !has_explicit_lock_acquire(ctx, acquire) {
+                        continue;
+                    }
                    let info = &ctx.cfg[acquire];
                    let callee_desc = info.callee.as_deref().unwrap_or("(acquire)");

--- a/src/cfg_analysis/rules.rs
+++ b/src/cfg_analysis/rules.rs
@ -21,6 +21,9 @@ pub struct EntryPointRule {
 pub struct ResourcePair {
    pub acquire: &'static [&'static str],
    pub release: &'static [&'static str],
+    /// Patterns that look like acquire calls (e.g. `freopen` ends with `fopen`)
+    /// but should NOT be treated as acquisitions.
+    pub exclude_acquire: &'static [&'static str],
    pub resource_name: &'static str,
 }

@ -47,6 +50,16 @@ static COMMON_GUARDS: &[GuardRule] = &[
        matchers: &["url_encode", "encode_uri", "urlencode"],
        applies_to_sink_caps: Cap::URL_ENCODE,
    },
+    GuardRule {
+        matchers: &[
+            "which",
+            "resolve_binary",
+            "find_program",
+            "lookup_path",
+            "shutil.which",
+        ],
+        applies_to_sink_caps: Cap::SHELL_ESCAPE,
+    },
 ];

 pub fn guard_rules(_lang: Lang) -> &'static [GuardRule] {
@ -168,21 +181,25 @@ static C_RESOURCES: &[ResourcePair] = &[
    ResourcePair {
        acquire: &["malloc", "calloc", "realloc"],
        release: &["free"],
+        exclude_acquire: &[],
        resource_name: "memory",
    },
    ResourcePair {
-        acquire: &["fopen"],
-        release: &["fclose"],
+        acquire: &["fopen", "fdopen", "curlx_fopen", "curlx_fdopen"],
+        release: &["fclose", "curlx_fclose"],
+        exclude_acquire: &["freopen", "curlx_freopen"],
        resource_name: "file handle",
    },
    ResourcePair {
        acquire: &["open"],
        release: &["close"],
+        exclude_acquire: &["freopen", "curlx_freopen"],
        resource_name: "file descriptor",
    },
    ResourcePair {
        acquire: &["pthread_mutex_lock"],
        release: &["pthread_mutex_unlock"],
+        exclude_acquire: &[],
        resource_name: "mutex",
    },
 ];
@ -191,11 +208,13 @@ static GO_RESOURCES: &[ResourcePair] = &[
    ResourcePair {
        acquire: &["os.Open", "os.Create", "os.OpenFile"],
        release: &[".Close"],
+        exclude_acquire: &[],
        resource_name: "file handle",
    },
    ResourcePair {
        acquire: &[".Lock"],
        release: &[".Unlock"],
+        exclude_acquire: &[],
        resource_name: "mutex",
    },
 ];
@ -205,6 +224,7 @@ static RUST_RESOURCES: &[ResourcePair] = &[
    ResourcePair {
        acquire: &["alloc"],
        release: &["dealloc"],
+        exclude_acquire: &[],
        resource_name: "raw memory",
    },
 ];
@ -217,10 +237,93 @@ static JAVA_RESOURCES: &[ResourcePair] = &[ResourcePair {
        "openConnection",
    ],
    release: &[".close"],
+    exclude_acquire: &[],
    resource_name: "stream/connection",
 }];

-static EMPTY_RESOURCES: &[ResourcePair] = &[];
+static PYTHON_RESOURCES: &[ResourcePair] = &[
+    ResourcePair {
+        acquire: &["open"],
+        release: &[".close"],
+        exclude_acquire: &[],
+        resource_name: "file handle",
+    },
+    ResourcePair {
+        acquire: &["socket.socket", "socket"],
+        release: &[".close"],
+        exclude_acquire: &[],
+        resource_name: "socket",
+    },
+    ResourcePair {
+        acquire: &["connect", "cursor"],
+        release: &[".close"],
+        exclude_acquire: &["signal.connect", "event.connect", ".register"],
+        resource_name: "db connection",
+    },
+    ResourcePair {
+        acquire: &["threading.Lock", "threading.RLock"],
+        release: &[".release"],
+        exclude_acquire: &[],
+        resource_name: "mutex",
+    },
+];
+
+static RUBY_RESOURCES: &[ResourcePair] = &[
+    ResourcePair {
+        acquire: &["File.open", "open"],
+        release: &[".close"],
+        exclude_acquire: &[],
+        resource_name: "file handle",
+    },
+    ResourcePair {
+        acquire: &["TCPSocket.new", "UDPSocket.new"],
+        release: &[".close"],
+        exclude_acquire: &[],
+        resource_name: "socket",
+    },
+    ResourcePair {
+        acquire: &[".lock"],
+        release: &[".unlock"],
+        exclude_acquire: &[],
+        resource_name: "mutex",
+    },
+];
+
+static PHP_RESOURCES: &[ResourcePair] = &[
+    ResourcePair {
+        acquire: &["fopen"],
+        release: &["fclose"],
+        exclude_acquire: &["freopen"],
+        resource_name: "file handle",
+    },
+    ResourcePair {
+        acquire: &["mysqli_connect"],
+        release: &["mysqli_close"],
+        exclude_acquire: &[],
+        resource_name: "db connection",
+    },
+    ResourcePair {
+        acquire: &["curl_init"],
+        release: &["curl_close"],
+        exclude_acquire: &[],
+        resource_name: "curl handle",
+    },
+];
+
+static JS_RESOURCES: &[ResourcePair] = &[
+    ResourcePair {
+        acquire: &["fs.open", "fs.openSync"],
+        release: &["fs.close", "fs.closeSync"],
+        exclude_acquire: &[],
+        resource_name: "file descriptor",
+    },
+    ResourcePair {
+        acquire: &["createReadStream", "createWriteStream"],
+        release: &[".close", ".destroy"],
+        exclude_acquire: &[],
+        resource_name: "stream",
+    },
+];

 pub fn resource_pairs(lang: Lang) -> &'static [ResourcePair] {
    match lang {
@ -229,6 +332,9 @@ pub fn resource_pairs(lang: Lang) -> &'static [ResourcePair] {
        Lang::Go => GO_RESOURCES,
        Lang::Rust => RUST_RESOURCES,
        Lang::Java => JAVA_RESOURCES,
-        _ => EMPTY_RESOURCES,
+        Lang::Python => PYTHON_RESOURCES,
+        Lang::Ruby => RUBY_RESOURCES,
+        Lang::Php => PHP_RESOURCES,
+        Lang::JavaScript | Lang::TypeScript => JS_RESOURCES,
    }
 }
--- a/src/cfg_analysis/tests.rs
+++ b/src/cfg_analysis/tests.rs
@ -14,7 +14,7 @@ fn parse_and_analyse<A: CfgAnalysis>(
    let mut parser = tree_sitter::Parser::new();
    parser.set_language(&ts_lang).unwrap();
    let tree = parser.parse(src, None).unwrap();
-    let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs");
+    let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs", None);
    let lang = Lang::from_slug(lang_str).unwrap();
    let ctx = AnalysisContext {
        cfg: &cfg,
@ -25,6 +25,8 @@ fn parse_and_analyse<A: CfgAnalysis>(
        func_summaries: &summaries,
        global_summaries: None,
        taint_findings: &[],
+        analysis_rules: None,
+        taint_active: true,
    };
    analysis.run(&ctx)
 }
@ -34,7 +36,7 @@ fn parse_and_run_all(src: &[u8], lang_str: &str, ts_lang: Language) -> Vec<CfgFi
    let mut parser = tree_sitter::Parser::new();
    parser.set_language(&ts_lang).unwrap();
    let tree = parser.parse(src, None).unwrap();
-    let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs");
+    let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs", None);
    let lang = Lang::from_slug(lang_str).unwrap();
    let ctx = AnalysisContext {
        cfg: &cfg,
@ -45,6 +47,8 @@ fn parse_and_run_all(src: &[u8], lang_str: &str, ts_lang: Language) -> Vec<CfgFi
        func_summaries: &summaries,
        global_summaries: None,
        taint_findings: &[],
+        analysis_rules: None,
+        taint_active: true,
    };
    run_all(&ctx)
 }
@ -59,7 +63,7 @@ fn parse_and_run_all_with_taint(
    let mut parser = tree_sitter::Parser::new();
    parser.set_language(&ts_lang).unwrap();
    let tree = parser.parse(src, None).unwrap();
-    let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs");
+    let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs", None);
    let lang = Lang::from_slug(lang_str).unwrap();
    let ctx = AnalysisContext {
        cfg: &cfg,
@ -70,6 +74,8 @@ fn parse_and_run_all_with_taint(
        func_summaries: &summaries,
        global_summaries: None,
        taint_findings,
+        analysis_rules: None,
+        taint_active: true,
    };
    run_all(&ctx)
 }
@ -144,7 +150,7 @@ fn unreachable_detects_orphaned_nodes() {
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();
-    let (cfg, entry, _) = build_cfg(&tree, src, "rust", "test.rs");
+    let (cfg, entry, _) = build_cfg(&tree, src, "rust", "test.rs", None);

    // All nodes in linear code should be reachable
    let reachable = dominators::reachable_set(&cfg, entry);
@ -469,7 +475,7 @@ fn reachable_set_contains_all_connected_nodes() {
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();
-    let (cfg, entry, _) = build_cfg(&tree, src, "rust", "test.rs");
+    let (cfg, entry, _) = build_cfg(&tree, src, "rust", "test.rs", None);

    let reachable = dominators::reachable_set(&cfg, entry);

@ -493,7 +499,7 @@ fn find_exit_node_exists() {
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();
-    let (cfg, _, _) = build_cfg(&tree, src, "rust", "test.rs");
+    let (cfg, _, _) = build_cfg(&tree, src, "rust", "test.rs", None);

    let exit = dominators::find_exit_node(&cfg);
    assert!(exit.is_some(), "Should find an exit node");
@ -512,7 +518,7 @@ fn shortest_distance_basic() {
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();
-    let (cfg, entry, _) = build_cfg(&tree, src, "rust", "test.rs");
+    let (cfg, entry, _) = build_cfg(&tree, src, "rust", "test.rs", None);

    let exit = dominators::find_exit_node(&cfg).unwrap();
    let dist = dominators::shortest_distance(&cfg, entry, exit);
@ -656,7 +662,7 @@ fn taint_and_unguarded_sink_deduped() {
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();
-    let (cfg_graph, entry, _summaries) = build_cfg(&tree, src, "rust", "test.rs");
+    let (cfg_graph, entry, _summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
    let _lang = Lang::from_slug("rust").unwrap();

    // Find a sink node to create a synthetic taint finding
@ -674,6 +680,7 @@ fn taint_and_unguarded_sink_deduped() {
        sink: sink_node,
        source: entry,
        path: vec![entry, sink_node],
+        source_kind: crate::labels::SourceKind::UserInput,
    }];

    let findings = parse_and_run_all_with_taint(
@ -719,3 +726,831 @@ fn process_star_without_web_params_no_auth_gap() {
        auth_findings
    );
 }
+
+// ─── Resource leak tests (additional languages) ────────────────────
+
+#[test]
+fn resource_leak_python_open_without_close() {
+    let src = br#"
+def process():
+    f = open("data.txt")
+    data = f.read()
+"#;
+
+    let findings = parse_and_analyse(
+        &resources::ResourceMisuse,
+        src,
+        "python",
+        Language::from(tree_sitter_python::LANGUAGE),
+    );
+
+    let leak_findings: Vec<_> = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-resource-leak")
+        .collect();
+    assert!(
+        !leak_findings.is_empty(),
+        "Should detect open() without close() in Python"
+    );
+}
+
+#[test]
+fn resource_leak_php_fopen_without_fclose() {
+    let src = br#"<?php
+function read_file() {
+    $fp = fopen("data.txt", "r");
+    $data = fread($fp, 1024);
+}
+"#;
+
+    let findings = parse_and_analyse(
+        &resources::ResourceMisuse,
+        src,
+        "php",
+        Language::from(tree_sitter_php::LANGUAGE_PHP),
+    );
+
+    let leak_findings: Vec<_> = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-resource-leak")
+        .collect();
+    assert!(
+        !leak_findings.is_empty(),
+        "Should detect fopen() without fclose() in PHP"
+    );
+}
+
+#[test]
+fn resource_leak_js_open_without_close() {
+    let src = br#"
+function readFile() {
+    var fd = fs.openSync("data.txt", "r");
+    var data = fs.readSync(fd, buf, 0, 100, 0);
+}
+"#;
+
+    let findings = parse_and_analyse(
+        &resources::ResourceMisuse,
+        src,
+        "javascript",
+        Language::from(tree_sitter_javascript::LANGUAGE),
+    );
+
+    let leak_findings: Vec<_> = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-resource-leak")
+        .collect();
+    assert!(
+        !leak_findings.is_empty(),
+        "Should detect fs.openSync() without fs.closeSync() in JS"
+    );
+}
+
+// ─── JS CFG precision tests ────────────────────────────────────────
+
+#[test]
+fn js_throw_terminates_block() {
+    // throw should act as a terminator — code directly after throw in the same
+    // block should be unreachable.
+    let src = br#"
+        function fail() {
+            throw new Error("fatal");
+            eval("dead code");
+        }
+    "#;
+
+    let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
+    let mut parser = tree_sitter::Parser::new();
+    parser.set_language(&ts_lang).unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+    let (cfg, entry, _) = build_cfg(&tree, src, "javascript", "test.js", None);
+
+    // Verify throw creates a Return-kind node
+    let throw_nodes: Vec<_> = cfg
+        .node_indices()
+        .filter(|&idx| {
+            cfg[idx].kind == crate::cfg::StmtKind::Return
+                && cfg[idx].span.0 > 0
+                && src[cfg[idx].span.0..].starts_with(b"throw")
+        })
+        .collect();
+
+    assert!(
+        !throw_nodes.is_empty(),
+        "throw statement should create a Return-kind node"
+    );
+
+    // eval after throw should be unreachable
+    let reachable = crate::cfg_analysis::dominators::reachable_set(&cfg, entry);
+    let eval_nodes: Vec<_> = cfg
+        .node_indices()
+        .filter(|&idx| cfg[idx].callee.as_deref().is_some_and(|c| c == "eval"))
+        .collect();
+
+    // eval might not even be in the CFG, or if it is, it should be unreachable
+    if !eval_nodes.is_empty() {
+        assert!(
+            eval_nodes.iter().all(|n| !reachable.contains(n)),
+            "eval after throw should be unreachable"
+        );
+    }
+}
+
+#[test]
+fn configured_terminator_stops_flow() {
+    let src = br#"
+        function handler() {
+            process.exit(1);
+            eval("dangerous");
+        }
+    "#;
+
+    let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
+    let rules = crate::labels::LangAnalysisRules {
+        extra_labels: vec![],
+        terminators: vec!["process.exit".into()],
+        event_handlers: vec![],
+    };
+
+    let mut parser = tree_sitter::Parser::new();
+    parser.set_language(&ts_lang).unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+    let (cfg, entry, _) = build_cfg(&tree, src, "javascript", "test.js", Some(&rules));
+
+    let reachable = crate::cfg_analysis::dominators::reachable_set(&cfg, entry);
+
+    // eval should be unreachable since process.exit is a terminator
+    let eval_nodes: Vec<_> = cfg
+        .node_indices()
+        .filter(|&idx| cfg[idx].callee.as_deref().is_some_and(|c| c == "eval"))
+        .collect();
+
+    if !eval_nodes.is_empty() {
+        assert!(
+            eval_nodes.iter().all(|n| !reachable.contains(n)),
+            "eval should be unreachable after process.exit terminator"
+        );
+    }
+    // If eval_nodes is empty it means the node wasn't created (also acceptable —
+    // it's after a terminator so the CFG may not even emit it)
+}
+
+// ─── Href classification tests ─────────────────────────────────────
+
+#[test]
+fn location_href_assignment_is_sink() {
+    let src = br#"
+        function redirect(url) {
+            location.href = url;
+        }
+    "#;
+
+    let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
+    let mut parser = tree_sitter::Parser::new();
+    parser.set_language(&ts_lang).unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+    let (cfg, _entry, _summaries) = build_cfg(&tree, src, "javascript", "test.js", None);
+
+    let has_sink = cfg
+        .node_indices()
+        .any(|idx| matches!(cfg[idx].label, Some(crate::labels::DataLabel::Sink(_))));
+    assert!(has_sink, "location.href = url should produce a Sink node");
+}
+
+#[test]
+fn a_href_assignment_is_not_sink() {
+    let src = br#"
+        function setLink(el) {
+            el.href = "/about";
+        }
+    "#;
+
+    let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
+    let mut parser = tree_sitter::Parser::new();
+    parser.set_language(&ts_lang).unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+    let (cfg, _entry, _summaries) = build_cfg(&tree, src, "javascript", "test.js", None);
+
+    let has_sink = cfg
+        .node_indices()
+        .any(|idx| matches!(cfg[idx].label, Some(crate::labels::DataLabel::Sink(_))));
+    assert!(
+        !has_sink,
+        "el.href = '/about' should NOT produce a Sink node"
+    );
+}
+
+// ─── Config sanitizer tests ────────────────────────────────────────
+
+#[test]
+fn config_sanitizer_suppresses_unguarded_sink() {
+    // JS snippet: escapeHtml(x) before innerHTML = ... should not trigger
+    // cfg-unguarded-sink when escapeHtml is configured as a sanitizer.
+    let src = br#"
+        function render(input) {
+            var safe = escapeHtml(input);
+            document.body.innerHTML = safe;
+        }
+    "#;
+
+    let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
+    let lang_str = "javascript";
+
+    // Build with config sanitizer rules
+    let rules = crate::labels::LangAnalysisRules {
+        extra_labels: vec![crate::labels::RuntimeLabelRule {
+            matchers: vec!["escapeHtml".into()],
+            label: crate::labels::DataLabel::Sanitizer(crate::labels::Cap::HTML_ESCAPE),
+        }],
+        terminators: vec![],
+        event_handlers: vec![],
+    };
+
+    let mut parser = tree_sitter::Parser::new();
+    parser.set_language(&ts_lang).unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+    let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs", Some(&rules));
+    let lang = Lang::from_slug(lang_str).unwrap();
+    let ctx = AnalysisContext {
+        cfg: &cfg,
+        entry,
+        lang,
+        file_path: "test.rs",
+        source_bytes: src,
+        func_summaries: &summaries,
+        global_summaries: None,
+        taint_findings: &[],
+        analysis_rules: Some(&rules),
+        taint_active: true,
+    };
+    let findings = run_all(&ctx);
+
+    let unguarded = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-unguarded-sink")
+        .collect::<Vec<_>>();
+
+    assert!(
+        unguarded.is_empty(),
+        "escapeHtml config sanitizer should suppress cfg-unguarded-sink; got {:?}",
+        unguarded
+    );
+}
+
+// ─── Python precision tests ────────────────────────────────────────
+
+#[test]
+fn python_constant_subprocess_no_finding() {
+    // subprocess.run(["make","clean"]) with constant args should produce no finding
+    let src = br#"
+import subprocess
+
+def build():
+    subprocess.run(["make", "clean"])
+"#;
+
+    let findings = parse_and_run_all(src, "python", Language::from(tree_sitter_python::LANGUAGE));
+
+    let unguarded: Vec<_> = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-unguarded-sink")
+        .collect();
+    assert!(
+        unguarded.is_empty(),
+        "subprocess.run with constant list args should not be flagged; got {:?}",
+        unguarded
+    );
+}
+
+#[test]
+fn python_constant_git_status_no_finding() {
+    let src = br#"
+import subprocess
+
+def check():
+    subprocess.run(["git", "status"])
+"#;
+
+    let findings = parse_and_run_all(src, "python", Language::from(tree_sitter_python::LANGUAGE));
+
+    let unguarded: Vec<_> = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-unguarded-sink")
+        .collect();
+    assert!(
+        unguarded.is_empty(),
+        "subprocess.run with constant git args should not be flagged; got {:?}",
+        unguarded
+    );
+}
+
+#[test]
+fn python_tainted_os_system_produces_finding() {
+    // Source (sys.argv) flowing to os.system → should produce a finding
+    let src = br#"
+import sys
+import os
+
+def run():
+    cmd = sys.argv[1]
+    os.system(cmd)
+"#;
+
+    let findings = parse_and_run_all(src, "python", Language::from(tree_sitter_python::LANGUAGE));
+
+    let sink_findings: Vec<_> = findings
+        .iter()
+        .filter(|f| {
+            f.rule_id == "cfg-unguarded-sink" && f.severity == crate::patterns::Severity::High
+        })
+        .collect();
+    assert!(
+        !sink_findings.is_empty(),
+        "Source-derived os.system should produce a HIGH finding"
+    );
+}
+
+// ─── C++ precision tests ───────────────────────────────────────────
+
+#[test]
+fn cpp_cout_not_a_sink() {
+    let src = br#"
+#include <iostream>
+int main() {
+    std::cout << "hello" << std::endl;
+    return 0;
+}
+"#;
+
+    let findings = parse_and_run_all(src, "cpp", Language::from(tree_sitter_cpp::LANGUAGE));
+
+    let sink_findings: Vec<_> = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-unguarded-sink")
+        .collect();
+    assert!(
+        sink_findings.is_empty(),
+        "std::cout should not produce an unguarded-sink finding; got {:?}",
+        sink_findings
+    );
+}
+
+#[test]
+fn cpp_printf_constant_no_finding() {
+    // printf with constant args → FMT_STRING sink but constant-arg suppression
+    let src = br#"
+#include <stdio.h>
+int main() {
+    printf("hello\n");
+    return 0;
+}
+"#;
+
+    let findings = parse_and_run_all(src, "c", Language::from(tree_sitter_c::LANGUAGE));
+
+    let unguarded: Vec<_> = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-unguarded-sink")
+        .collect();
+    assert!(
+        unguarded.is_empty(),
+        "printf with constant args should be suppressed; got {:?}",
+        unguarded
+    );
+}
+
+#[test]
+fn cpp_system_with_getenv_produces_finding() {
+    let src = br#"
+#include <stdlib.h>
+int main() {
+    char* input = getenv("USER_CMD");
+    system(input);
+    return 0;
+}
+"#;
+
+    let findings = parse_and_run_all(src, "c", Language::from(tree_sitter_c::LANGUAGE));
+
+    let sink_findings: Vec<_> = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-unguarded-sink")
+        .collect();
+    assert!(
+        !sink_findings.is_empty(),
+        "system(getenv(...)) should produce an unguarded-sink finding"
+    );
+}
+
+// ─── Unreachable + unguarded dedup test ─────────────────────────────
+
+#[test]
+fn unreachable_sink_suppresses_unguarded() {
+    // If a sink is in unreachable code, only cfg-unreachable-sink should fire,
+    // NOT also cfg-unguarded-sink.
+    let src = br#"
+fn main() {
+    return;
+    std::process::Command::new("sh").arg("x").status().unwrap();
+}
+"#;
+
+    let findings = parse_and_run_all(src, "rust", Language::from(tree_sitter_rust::LANGUAGE));
+
+    let unreachable: Vec<_> = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-unreachable-sink")
+        .collect();
+    let unguarded_at_same_span: Vec<_> = findings
+        .iter()
+        .filter(|f| {
+            f.rule_id == "cfg-unguarded-sink" && unreachable.iter().any(|u| u.span == f.span)
+        })
+        .collect();
+    assert!(
+        unguarded_at_same_span.is_empty(),
+        "cfg-unguarded-sink should be suppressed when cfg-unreachable-sink fires on same span; got {:?}",
+        unguarded_at_same_span
+    );
+}
+
+// ─── Fix 3: Wrapper resource names (curlx_fopen/curlx_fclose) ──────
+
+#[test]
+fn curlx_fopen_with_curlx_fclose_no_leak() {
+    let src = br#"
+void process() {
+    FILE *fp = curlx_fopen("file.txt", "r");
+    curlx_fclose(fp);
+}
+"#;
+
+    let findings = parse_and_analyse(
+        &resources::ResourceMisuse,
+        src,
+        "c",
+        Language::from(tree_sitter_c::LANGUAGE),
+    );
+
+    let leak_findings: Vec<_> = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-resource-leak")
+        .collect();
+    assert!(
+        leak_findings.is_empty(),
+        "curlx_fopen + curlx_fclose should not produce a resource leak; got {:?}",
+        leak_findings
+    );
+}
+
+// ─── Fix 4: freopen exclusion ───────────────────────────────────────
+
+#[test]
+fn freopen_not_treated_as_acquire() {
+    let src = br#"
+void redirect_stderr() {
+    freopen("/dev/null", "w", stderr);
+}
+"#;
+
+    let findings = parse_and_analyse(
+        &resources::ResourceMisuse,
+        src,
+        "c",
+        Language::from(tree_sitter_c::LANGUAGE),
+    );
+
+    let leak_findings: Vec<_> = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-resource-leak")
+        .collect();
+    assert!(
+        leak_findings.is_empty(),
+        "freopen should not produce a resource leak finding; got {:?}",
+        leak_findings
+    );
+}
+
+// ─── Fix 5: Struct field ownership transfer ─────────────────────────
+
+#[test]
+fn struct_field_ownership_transfer_no_leak() {
+    let src = br#"
+void open_stream(struct session *s) {
+    FILE *fp = fopen("data.txt", "r");
+    s->stream = fp;
+    s->fopened = 1;
+}
+"#;
+
+    let findings = parse_and_analyse(
+        &resources::ResourceMisuse,
+        src,
+        "c",
+        Language::from(tree_sitter_c::LANGUAGE),
+    );
+
+    let leak_findings: Vec<_> = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-resource-leak")
+        .collect();
+    assert!(
+        leak_findings.is_empty(),
+        "Struct field ownership transfer should suppress resource leak; got {:?}",
+        leak_findings
+    );
+}
+
+// ─── Fix 6: Linked-list / global insertion ──────────────────────────
+
+#[test]
+fn linked_list_insertion_no_leak() {
+    let src = br#"
+void add_var(struct config *cfg, const char *name) {
+    struct var *p = malloc(sizeof(struct var));
+    p->next = cfg->variables;
+    cfg->variables = p;
+}
+"#;
+
+    let findings = parse_and_analyse(
+        &resources::ResourceMisuse,
+        src,
+        "c",
+        Language::from(tree_sitter_c::LANGUAGE),
+    );
+
+    let leak_findings: Vec<_> = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-resource-leak")
+        .collect();
+    assert!(
+        leak_findings.is_empty(),
+        "Linked-list insertion should suppress resource leak; got {:?}",
+        leak_findings
+    );
+}
+
+// ─── Fix 2: Preproc dangling-else CFG recovery ─────────────────────
+
+#[test]
+fn preproc_ifdef_does_not_orphan_subsequent_code() {
+    // After a #ifdef block containing an if/else, subsequent code should
+    // still be reachable (no unreachable findings).
+    let src = br#"
+void process() {
+    int x = 1;
+#ifdef _WIN32
+    if (x) {
+        x = 2;
+    } else
+#endif
+    {
+        x = 3;
+    }
+    free(x);
+}
+"#;
+
+    let ts_lang = Language::from(tree_sitter_c::LANGUAGE);
+    let mut parser = tree_sitter::Parser::new();
+    parser.set_language(&ts_lang).unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+    let (cfg, entry, _) = build_cfg(&tree, src, "c", "test.c", None);
+
+    let reachable = dominators::reachable_set(&cfg, entry);
+
+    // All nodes should be reachable — the preproc recovery should prevent
+    // the dangling-else from orphaning downstream code.
+    let unreachable_count = cfg.node_count() - reachable.len();
+    assert!(
+        unreachable_count == 0,
+        "Expected all nodes reachable after preproc block, but {} nodes are unreachable",
+        unreachable_count
+    );
+}
+
+// ─── Fix 1: Break in loop keeps post-loop code reachable ────────────
+
+#[test]
+fn break_in_loop_post_loop_reachable() {
+    let src = br#"
+void process() {
+    int x = 0;
+    while(1) {
+        if(x) break;
+        x = x + 1;
+    }
+    free(x);
+}
+"#;
+
+    let ts_lang = Language::from(tree_sitter_c::LANGUAGE);
+    let mut parser = tree_sitter::Parser::new();
+    parser.set_language(&ts_lang).unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+    let (cfg, entry, _) = build_cfg(&tree, src, "c", "test.c", None);
+
+    let reachable = dominators::reachable_set(&cfg, entry);
+
+    // All nodes should be reachable — break exits the loop and post-loop
+    // code (free(x)) should be connected.
+    let unreachable_count = cfg.node_count() - reachable.len();
+    assert!(
+        unreachable_count == 0,
+        "Expected all nodes reachable after break in loop, but {} nodes are unreachable",
+        unreachable_count
+    );
+}
+
+// ─── PART 2A: One-hop constant binding trace ────────────────────────
+
+#[test]
+fn python_one_hop_constant_binding_no_finding() {
+    // cmd = "git"; subprocess.run([cmd, "status"]) → no finding
+    let src = br#"
+import subprocess
+
+def check():
+    cmd = "git"
+    subprocess.run([cmd, "status"])
+"#;
+
+    let findings = parse_and_run_all(src, "python", Language::from(tree_sitter_python::LANGUAGE));
+
+    let unguarded: Vec<_> = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-unguarded-sink")
+        .collect();
+    assert!(
+        unguarded.is_empty(),
+        "One-hop constant binding should suppress cfg-unguarded-sink; got {:?}",
+        unguarded
+    );
+}
+
+// ─── PART 2B: Exec-path guard rules ─────────────────────────────────
+
+#[test]
+fn exec_path_guard_suppresses_unguarded_sink() {
+    // resolve_binary(&bin); Command::new(bin); → no finding
+    let src = br#"
+        use std::process::Command;
+        fn main() {
+            let bin = std::env::var("BIN").unwrap();
+            resolve_binary(&bin);
+            Command::new("sh").arg(&bin).status().unwrap();
+        }"#;
+
+    let findings = parse_and_analyse(
+        &guards::UnguardedSink,
+        src,
+        "rust",
+        Language::from(tree_sitter_rust::LANGUAGE),
+    );
+
+    let unguarded: Vec<_> = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-unguarded-sink")
+        .collect();
+    assert!(
+        unguarded.is_empty(),
+        "resolve_binary guard should suppress cfg-unguarded-sink; got {:?}",
+        unguarded
+    );
+}
+
+// ─── PART 2C: Evidence-based severity in cfg-only mode ──────────────
+
+#[test]
+fn cfg_only_no_taint_produces_low_severity() {
+    // In cfg-only mode (taint_active=false) with no source-derived evidence,
+    // unguarded sink should produce LOW severity instead of MEDIUM.
+    let src = br#"
+        use std::process::Command;
+        fn process_data() {
+            let x = compute_something();
+            Command::new("sh").arg(&x).status().unwrap();
+        }"#;
+
+    let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
+    let mut parser = tree_sitter::Parser::new();
+    parser.set_language(&ts_lang).unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+    let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
+    let lang = Lang::from_slug("rust").unwrap();
+    let ctx = AnalysisContext {
+        cfg: &cfg,
+        entry,
+        lang,
+        file_path: "test.rs",
+        source_bytes: src,
+        func_summaries: &summaries,
+        global_summaries: None,
+        taint_findings: &[],
+        analysis_rules: None,
+        taint_active: false, // cfg-only mode
+    };
+    let findings = guards::UnguardedSink.run(&ctx);
+
+    let medium_or_high: Vec<_> = findings
+        .iter()
+        .filter(|f| {
+            f.rule_id == "cfg-unguarded-sink"
+                && (f.severity == crate::patterns::Severity::Medium
+                    || f.severity == crate::patterns::Severity::High)
+        })
+        .collect();
+    assert!(
+        medium_or_high.is_empty(),
+        "cfg-only mode without taint should produce LOW severity, not MEDIUM/HIGH; got {:?}",
+        medium_or_high
+    );
+}
+
+// ─── PART 4B: FileResponse ownership transfer ──────────────────────
+
+#[test]
+fn file_response_ownership_transfer_no_leak() {
+    let src = br#"
+def serve_file():
+    f = open("report.pdf", "rb")
+    return FileResponse(f)
+"#;
+
+    let findings = parse_and_analyse(
+        &resources::ResourceMisuse,
+        src,
+        "python",
+        Language::from(tree_sitter_python::LANGUAGE),
+    );
+
+    let leak_findings: Vec<_> = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-resource-leak")
+        .collect();
+    assert!(
+        leak_findings.is_empty(),
+        "FileResponse should suppress cfg-resource-leak; got {:?}",
+        leak_findings
+    );
+}
+
+// ─── PART 4C: Lock-not-released refinement ──────────────────────────
+
+#[test]
+fn python_lock_constructor_only_no_finding() {
+    // threading.Lock() without .acquire() → no finding
+    let src = br#"
+import threading
+
+def setup():
+    lock = threading.Lock()
+    do_work()
+"#;
+
+    let findings = parse_and_analyse(
+        &resources::ResourceMisuse,
+        src,
+        "python",
+        Language::from(tree_sitter_python::LANGUAGE),
+    );
+
+    let lock_findings: Vec<_> = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-lock-not-released")
+        .collect();
+    assert!(
+        lock_findings.is_empty(),
+        "Lock constructor without acquire should not produce cfg-lock-not-released; got {:?}",
+        lock_findings
+    );
+}
+
+// ─── PART 4A: signal.connect exclusion ──────────────────────────────
+
+#[test]
+fn python_signal_connect_not_treated_as_db_acquire() {
+    let src = br#"
+def setup():
+    signal.connect(handler)
+    do_work()
+"#;
+
+    let findings = parse_and_analyse(
+        &resources::ResourceMisuse,
+        src,
+        "python",
+        Language::from(tree_sitter_python::LANGUAGE),
+    );
+
+    let leak_findings: Vec<_> = findings
+        .iter()
+        .filter(|f| f.rule_id == "cfg-resource-leak")
+        .collect();
+    assert!(
+        leak_findings.is_empty(),
+        "signal.connect should not be treated as db acquire; got {:?}",
+        leak_findings
+    );
+}
--- a/src/cfg_analysis/unreachable.rs
+++ b/src/cfg_analysis/unreachable.rs
@ -3,9 +3,40 @@ use super::{AnalysisContext, CfgAnalysis, CfgFinding, Confidence};
 use crate::cfg::StmtKind;
 use crate::labels::DataLabel;
 use crate::patterns::Severity;
+use std::collections::HashSet;

 pub struct UnreachableCode;

+/// Collect function names that appear as arguments to configured event handler calls.
+fn event_handler_callbacks(ctx: &AnalysisContext) -> HashSet<String> {
+    let mut callbacks = HashSet::new();
+    let handlers = match ctx.analysis_rules {
+        Some(rules) if !rules.event_handlers.is_empty() => &rules.event_handlers,
+        _ => return callbacks,
+    };
+
+    for idx in ctx.cfg.node_indices() {
+        let info = &ctx.cfg[idx];
+        if info.kind != StmtKind::Call {
+            continue;
+        }
+        if let Some(callee) = &info.callee {
+            let callee_lower = callee.to_ascii_lowercase();
+            let is_handler = handlers
+                .iter()
+                .any(|h| callee_lower.ends_with(&h.to_ascii_lowercase()));
+            if is_handler {
+                // The callback function is typically used within the call — any function
+                // that appears as `uses` of this call node is a potential callback.
+                for u in &info.uses {
+                    callbacks.insert(u.clone());
+                }
+            }
+        }
+    }
+    callbacks
+}
+
 impl CfgAnalysis for UnreachableCode {
    fn name(&self) -> &'static str {
        "unreachable-code"
@ -13,6 +44,7 @@ impl CfgAnalysis for UnreachableCode {

    fn run(&self, ctx: &AnalysisContext) -> Vec<CfgFinding> {
        let reachable = dominators::reachable_set(ctx.cfg, ctx.entry);
+        let handler_callbacks = event_handler_callbacks(ctx);
        let mut findings = Vec::new();

        for idx in ctx.cfg.node_indices() {
@ -27,6 +59,13 @@ impl CfgAnalysis for UnreachableCode {
                continue;
            }

+            // Suppress findings for nodes inside event handler callbacks
+            if let Some(func_name) = &info.enclosing_func
+                && handler_callbacks.contains(func_name)
+            {
+                continue;
+            }
+
            let (rule_id, title, severity) = match info.label {
                Some(DataLabel::Sanitizer(_)) => (
                    "cfg-unreachable-sanitizer",
@ -43,7 +82,9 @@ impl CfgAnalysis for UnreachableCode {
                ),
                _ => {
                    // Check if it's a guard/auth call
-                    if super::is_guard_call(info, ctx.lang) || super::is_auth_call(info, ctx.lang) {
+                    if super::is_guard_call(info, ctx.lang, ctx.analysis_rules)
+                        || super::is_auth_call(info, ctx.lang)
+                    {
                        (
                            "cfg-unreachable-guard",
                            "Unreachable guard/auth check",
--- a/src/cli.rs
+++ b/src/cli.rs
@ -9,6 +9,14 @@ pub struct Cli {
    pub(crate) command: Commands,
 }

+impl Commands {
+    /// Whether this command produces structured (machine-readable) output on
+    /// stdout, meaning human status messages must be suppressed entirely.
+    pub fn is_structured_output(&self) -> bool {
+        matches!(self, Commands::Scan { format, .. } if format == "json" || format == "sarif")
+    }
+}
+
 #[derive(Subcommand)]
 pub enum Commands {
    /// Scan project for vulnerabilities
@ -25,8 +33,8 @@ pub enum Commands {
        #[arg(long)]
        rebuild_index: bool,

-        /// Output format
-        #[arg(short, long, value_enum, default_value = "")]
+        /// Output format (console, json, sarif)
+        #[arg(short, long, default_value = "")]
        format: String,

        /// Show only high severity issues
@ -41,6 +49,11 @@ pub enum Commands {

        #[arg(long)]
        all_targets: bool,
+
+        /// Include findings from test/vendor/build paths at original severity
+        /// (by default these are downgraded)
+        #[arg(long)]
+        include_nonprod: bool,
    },

    /// Manage project indexes
@ -65,6 +78,51 @@ pub enum Commands {
        #[arg(long)]
        all: bool,
    },
+
+    /// Manage analysis configuration
+    Config {
+        #[command(subcommand)]
+        action: ConfigAction,
+    },
+}
+
+#[derive(Subcommand)]
+pub enum ConfigAction {
+    /// Print effective merged configuration as TOML
+    Show,
+
+    /// Print configuration directory path
+    Path,
+
+    /// Add a label rule to nyx.local
+    AddRule {
+        /// Language slug (e.g. javascript, rust, python)
+        #[arg(long)]
+        lang: String,
+
+        /// Function or property name to match
+        #[arg(long)]
+        matcher: String,
+
+        /// Rule kind: source, sanitizer, or sink
+        #[arg(long)]
+        kind: String,
+
+        /// Capability: env_var, html_escape, shell_escape, url_encode, json_parse, file_io, or all
+        #[arg(long)]
+        cap: String,
+    },
+
+    /// Add a terminator function to nyx.local
+    AddTerminator {
+        /// Language slug (e.g. javascript, rust, python)
+        #[arg(long)]
+        lang: String,
+
+        /// Function name that terminates execution (e.g. process.exit)
+        #[arg(long)]
+        name: String,
+    },
 }

 #[derive(Subcommand)]
--- a/src/commands/config.rs
+++ b/src/commands/config.rs
@ -0,0 +1,213 @@
+use crate::errors::NyxResult;
+use crate::utils::config::{AnalysisRulesConfig, Config, ConfigLabelRule};
+use console::style;
+use std::fs;
+use std::path::Path;
+
+/// Show the effective merged configuration as TOML.
+pub fn show(config: &Config) -> NyxResult<()> {
+    let toml_str =
+        toml::to_string_pretty(config).map_err(|e| format!("Failed to serialize config: {e}"))?;
+    println!("{toml_str}");
+    Ok(())
+}
+
+/// Print the configuration directory path.
+pub fn path(config_dir: &Path) -> NyxResult<()> {
+    println!("{}", config_dir.display());
+    Ok(())
+}
+
+/// Add a label rule to `nyx.local`.
+pub fn add_rule(
+    config_dir: &Path,
+    lang: &str,
+    matcher: &str,
+    kind: &str,
+    cap: &str,
+) -> NyxResult<()> {
+    // Validate kind
+    if !["source", "sanitizer", "sink"].contains(&kind) {
+        return Err(
+            format!("Invalid kind '{kind}'. Must be one of: source, sanitizer, sink").into(),
+        );
+    }
+
+    // Validate cap
+    if crate::labels::parse_cap(cap).is_none() {
+        return Err(format!(
+            "Invalid cap '{cap}'. Must be one of: env_var, html_escape, shell_escape, url_encode, json_parse, file_io, all"
+        )
+        .into());
+    }
+
+    let local_path = config_dir.join("nyx.local");
+    let mut config: Config = if local_path.exists() {
+        let content = fs::read_to_string(&local_path)?;
+        toml::from_str(&content)?
+    } else {
+        Config::default()
+    };
+
+    let lang_cfg = config
+        .analysis
+        .languages
+        .entry(lang.to_string())
+        .or_default();
+
+    let new_rule = ConfigLabelRule {
+        matchers: vec![matcher.to_string()],
+        kind: kind.to_string(),
+        cap: cap.to_string(),
+    };
+
+    // Dedup
+    if !lang_cfg.rules.contains(&new_rule) {
+        lang_cfg.rules.push(new_rule);
+    }
+
+    write_local_config(&local_path, &config)?;
+
+    println!(
+        "{}: Added {} rule for `{}` ({}) in {}",
+        style("ok").green().bold(),
+        kind,
+        matcher,
+        cap,
+        lang
+    );
+    Ok(())
+}
+
+/// Add a terminator to `nyx.local`.
+pub fn add_terminator(config_dir: &Path, lang: &str, name: &str) -> NyxResult<()> {
+    let local_path = config_dir.join("nyx.local");
+    let mut config: Config = if local_path.exists() {
+        let content = fs::read_to_string(&local_path)?;
+        toml::from_str(&content)?
+    } else {
+        Config::default()
+    };
+
+    let lang_cfg = config
+        .analysis
+        .languages
+        .entry(lang.to_string())
+        .or_default();
+
+    if !lang_cfg.terminators.contains(&name.to_string()) {
+        lang_cfg.terminators.push(name.to_string());
+    }
+
+    write_local_config(&local_path, &config)?;
+
+    println!(
+        "{}: Added terminator `{}` for {}",
+        style("ok").green().bold(),
+        name,
+        lang
+    );
+    Ok(())
+}
+
+/// Write only the non-default portions to nyx.local.
+fn write_local_config(path: &Path, config: &Config) -> NyxResult<()> {
+    // Only write the analysis section to nyx.local to keep it minimal.
+    // Other settings keep their defaults unless previously customized.
+    let mut local = Config {
+        analysis: config.analysis.clone(),
+        ..Config::default()
+    };
+
+    // Strip empty language entries
+    local.analysis.languages.retain(|_, v| {
+        !v.rules.is_empty() || !v.terminators.is_empty() || !v.event_handlers.is_empty()
+    });
+
+    // If no analysis rules, only write the analysis section
+    if local.analysis.languages.is_empty() {
+        local.analysis = AnalysisRulesConfig::default();
+    }
+
+    let toml_str =
+        toml::to_string_pretty(&local).map_err(|e| format!("Failed to serialize config: {e}"))?;
+    fs::write(path, toml_str)?;
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn add_rule_writes_valid_toml() {
+        let dir = tempfile::tempdir().unwrap();
+        add_rule(
+            dir.path(),
+            "javascript",
+            "escapeHtml",
+            "sanitizer",
+            "html_escape",
+        )
+        .unwrap();
+
+        let content = fs::read_to_string(dir.path().join("nyx.local")).unwrap();
+        let config: Config = toml::from_str(&content).unwrap();
+        let js = config.analysis.languages.get("javascript").unwrap();
+        assert_eq!(js.rules.len(), 1);
+        assert_eq!(js.rules[0].matchers, vec!["escapeHtml"]);
+        assert_eq!(js.rules[0].kind, "sanitizer");
+        assert_eq!(js.rules[0].cap, "html_escape");
+    }
+
+    #[test]
+    fn add_rule_deduplicates() {
+        let dir = tempfile::tempdir().unwrap();
+        add_rule(
+            dir.path(),
+            "javascript",
+            "escapeHtml",
+            "sanitizer",
+            "html_escape",
+        )
+        .unwrap();
+        add_rule(
+            dir.path(),
+            "javascript",
+            "escapeHtml",
+            "sanitizer",
+            "html_escape",
+        )
+        .unwrap();
+
+        let content = fs::read_to_string(dir.path().join("nyx.local")).unwrap();
+        let config: Config = toml::from_str(&content).unwrap();
+        let js = config.analysis.languages.get("javascript").unwrap();
+        assert_eq!(js.rules.len(), 1);
+    }
+
+    #[test]
+    fn add_terminator_works() {
+        let dir = tempfile::tempdir().unwrap();
+        add_terminator(dir.path(), "javascript", "process.exit").unwrap();
+
+        let content = fs::read_to_string(dir.path().join("nyx.local")).unwrap();
+        let config: Config = toml::from_str(&content).unwrap();
+        let js = config.analysis.languages.get("javascript").unwrap();
+        assert_eq!(js.terminators, vec!["process.exit"]);
+    }
+
+    #[test]
+    fn add_rule_rejects_invalid_kind() {
+        let dir = tempfile::tempdir().unwrap();
+        let result = add_rule(dir.path(), "javascript", "foo", "invalid_kind", "all");
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn add_rule_rejects_invalid_cap() {
+        let dir = tempfile::tempdir().unwrap();
+        let result = add_rule(dir.path(), "javascript", "foo", "sanitizer", "invalid_cap");
+        assert!(result.is_err());
+    }
+}
--- a/src/commands/index.rs
+++ b/src/commands/index.rs
@ -5,10 +5,10 @@ use crate::patterns::Severity;
 use crate::utils::Config;
 use crate::utils::project::get_project_info;
 use crate::walk::spawn_file_walker;
-use blake3;
 use bytesize::ByteSize;
 use chrono::{DateTime, Local};
 use console::style;
+use indicatif::{ProgressBar, ProgressStyle};
 use rayon::prelude::*;
 use std::fs;
 use std::path::PathBuf;
@ -25,7 +25,13 @@ pub fn handle(
            let (project_name, db_path) = get_project_info(&build_path, database_dir)?;

            if force || !db_path.exists() {
-                build_index(&project_name, &build_path, &db_path, config)?;
+                build_index(
+                    &project_name,
+                    &build_path,
+                    &db_path,
+                    config,
+                    !config.output.quiet,
+                )?;
                println!(
                    "✔ {} {}",
                    style("Index built:").green(),
@ -84,6 +90,7 @@ pub fn build_index(
    project_path: &std::path::Path,
    db_path: &std::path::Path,
    config: &Config,
+    show_progress: bool,
 ) -> NyxResult<()> {
    tracing::debug!("Building index for: {}", project_name);
    fs::File::create(db_path)?;
@ -97,10 +104,27 @@ pub fn build_index(
    tracing::debug!("Cleaned index for: {}", project_name);

    let (rx, handle) = spawn_file_walker(project_path, config);
+    // Drain the channel BEFORE joining — the bounded channel will deadlock
+    // if we join first and the walker blocks on send.
+    let paths: Vec<PathBuf> = rx.into_iter().flatten().collect();
    if let Err(err) = handle.join() {
        tracing::error!("walker thread panicked: {:#?}", err);
    }
-    let paths: Vec<PathBuf> = rx.into_iter().flatten().collect();
+
+    let pb = if show_progress {
+        let pb = ProgressBar::new(paths.len() as u64);
+        pb.set_style(
+            ProgressStyle::with_template(
+                "{spinner:.green} {msg} [{bar:30.cyan/blue}] {pos}/{len} ({eta})",
+            )
+            .unwrap()
+            .progress_chars("##-"),
+        );
+        pb.set_message("Indexing files");
+        pb
+    } else {
+        ProgressBar::hidden()
+    };

    paths
        .into_par_iter()
@ -108,18 +132,15 @@ pub fn build_index(
            let mut idx = Indexer::from_pool(project_name, &pool)?;

            // Read once, hash once — pass bytes to both rule execution and
-            // summary extraction.
+            // summary extraction.  Use pre-computed hash for upsert to avoid
+            // a redundant file read inside upsert_file.
            let bytes = std::fs::read(&path)?;
-            let hash = {
-                let mut hasher = blake3::Hasher::new();
-                hasher.update(&bytes);
-                hasher.finalize().as_bytes().to_vec()
-            };
+            let hash = Indexer::digest_bytes(&bytes);

            // Run AST-only rules (no taint yet — summaries come later in scan)
            let issues =
                crate::commands::scan::run_rules_on_bytes(&bytes, &path, config, None, None)?;
-            let file_id = idx.upsert_file(&path)?;
+            let file_id = idx.upsert_file_with_hash(&path, &hash)?;

            let rows: Vec<IssueRow> = issues
                .iter()
@ -144,8 +165,10 @@ pub fn build_index(
                idx.replace_summaries_for_file(&path, &hash, &sums)?;
            }

+            pb.inc(1);
            Ok(())
        })?;
+    pb.finish_and_clear();

    {
        let idx = Indexer::from_pool(project_name, &pool)?;
@ -170,7 +193,7 @@ fn build_index_creates_db_and_registers_files() {

    let db_path = td.path().join("proj.sqlite");

-    build_index("proj", &project_dir, &db_path, &cfg).expect("index build should succeed");
+    build_index("proj", &project_dir, &db_path, &cfg, false).expect("index build should succeed");

    // ── Assert ────────────────────────────────────────────────────────────────
    assert!(db_path.is_file(), "SQLite file must exist");
--- a/src/commands/mod.rs
+++ b/src/commands/mod.rs
@ -1,4 +1,5 @@
 pub mod clean;
+pub mod config;
 pub mod index;
 pub mod list;
 pub mod scan;
@ -12,6 +13,7 @@ use std::path::Path;
 pub fn handle_command(
    command: Commands,
    database_dir: &Path,
+    config_dir: &Path,
    config: &mut Config,
 ) -> NyxResult<()> {
    match command {
@ -24,6 +26,7 @@ pub fn handle_command(
            ast_only,
            cfg_only,
            all_targets,
+            include_nonprod,
        } => {
            if high_only {
                config.scanner.min_severity = Severity::High
@ -41,10 +44,37 @@ pub fn handle_command(
                config.scanner.mode = AnalysisMode::Full
            };

-            scan::handle(&path, no_index, rebuild_index, format, database_dir, config)
+            if include_nonprod {
+                config.scanner.include_nonprod = true
+            };
+
+            scan::handle(&path, no_index, rebuild_index, format, database_dir, config)?;
+        }
+        Commands::Index { action } => {
+            index::handle(action, database_dir, config)?;
+        }
+        Commands::List { verbose } => {
+            list::handle(verbose, database_dir)?;
+        }
+        Commands::Clean { project, all } => {
+            clean::handle(project, all, database_dir)?;
+        }
+        Commands::Config { action } => {
+            use crate::cli::ConfigAction;
+            match action {
+                ConfigAction::Show => self::config::show(config)?,
+                ConfigAction::Path => self::config::path(config_dir)?,
+                ConfigAction::AddRule {
+                    lang,
+                    matcher,
+                    kind,
+                    cap,
+                } => self::config::add_rule(config_dir, &lang, &matcher, &kind, &cap)?,
+                ConfigAction::AddTerminator { lang, name } => {
+                    self::config::add_terminator(config_dir, &lang, &name)?
+                }
+            }
        }
-        Commands::Index { action } => index::handle(action, database_dir, config),
-        Commands::List { verbose } => list::handle(verbose, database_dir),
-        Commands::Clean { project, all } => clean::handle(project, all, database_dir),
    }
+    Ok(())
 }
--- a/src/commands/scan.rs
+++ b/src/commands/scan.rs
@ -1,16 +1,16 @@
 pub(crate) use crate::ast::{
-    extract_summaries_from_bytes, extract_summaries_from_file, run_rules_on_bytes,
-    run_rules_on_file,
+    analyse_file_fused, extract_summaries_from_bytes, run_rules_on_bytes, run_rules_on_file,
 };
 use crate::database::index::{Indexer, IssueRow};
 use crate::errors::NyxResult;
 use crate::patterns::Severity;
-use crate::summary::{self, FuncSummary, GlobalSummaries};
+use crate::summary::{self, GlobalSummaries};
 use crate::utils::config::Config;
 use crate::utils::project::get_project_info;
 use crate::walk::spawn_file_walker;
 use console::style;
 use dashmap::DashMap;
+use indicatif::{ProgressBar, ProgressStyle};
 use r2d2::Pool;
 use r2d2_sqlite::SqliteConnectionManager;
 use rayon::prelude::*;
@ -18,6 +18,22 @@ use std::collections::BTreeMap;
 use std::path::{Path, PathBuf};
 use std::sync::Arc;

+fn make_progress_bar(len: u64, msg: &str, show: bool) -> ProgressBar {
+    if !show {
+        return ProgressBar::hidden();
+    }
+    let pb = ProgressBar::new(len);
+    pb.set_style(
+        ProgressStyle::with_template(
+            "{spinner:.green} {msg} [{bar:30.cyan/blue}] {pos}/{len} ({eta})",
+        )
+        .unwrap()
+        .progress_chars("##-"),
+    );
+    pb.set_message(msg.to_string());
+    pb
+}
+
 #[derive(Debug, Clone, serde::Serialize)]
 pub struct Diag {
    pub path: String,
@ -39,22 +55,37 @@ pub fn handle(
    let scan_path = Path::new(path).canonicalize()?;
    let (project_name, db_path) = get_project_info(&scan_path, database_dir)?;

-    println!(
-        "{} {}...\n",
-        style("Checking").green().bold(),
-        &project_name
-    );
+    let suppress_status = config.output.quiet || format == "json" || format == "sarif";
+    if !suppress_status {
+        println!(
+            "{} {}...\n",
+            style("Checking").green().bold(),
+            &project_name
+        );
+    }
+
+    let show_progress = format != "json" && format != "sarif" && !config.output.quiet;

    let diags: Vec<Diag> = if no_index {
-        scan_filesystem(&scan_path, config)?
+        scan_filesystem(&scan_path, config, show_progress)?
    } else {
        if rebuild_index || !db_path.exists() {
            tracing::debug!("Scanning filesystem index filesystem");
-            crate::commands::index::build_index(&project_name, &scan_path, &db_path, config)?;
+            crate::commands::index::build_index(
+                &project_name,
+                &scan_path,
+                &db_path,
+                config,
+                show_progress,
+            )?;
        }

        let pool = Indexer::init(&db_path)?;
-        scan_with_index_parallel(&project_name, pool, config)?
+        if config.database.vacuum_on_startup {
+            let idx = Indexer::from_pool(&project_name, &pool)?;
+            idx.vacuum()?;
+        }
+        scan_with_index_parallel(&project_name, pool, config, show_progress)?
    };

    tracing::debug!("Found {:?} issues.", diags.len());
@ -66,6 +97,14 @@ pub fn handle(
        return Ok(());
    }

+    if format == "sarif" {
+        let sarif = crate::output::build_sarif(&diags, &scan_path);
+        let json = serde_json::to_string_pretty(&sarif)
+            .map_err(|e| crate::errors::NyxError::Msg(e.to_string()))?;
+        println!("{json}");
+        return Ok(());
+    }
+
    if format == "console" || (format.is_empty() && config.output.default_format == "console") {
        tracing::debug!("Printing to console");
        let mut grouped: BTreeMap<&str, Vec<&Diag>> = BTreeMap::new();
@ -77,10 +116,10 @@ pub fn handle(
            println!("{}", style(path).blue().underlined());
            for d in issues {
                println!(
-                    "  {:>4}:{:<4}  [{:}]  {:}",
+                    "  {:>4}:{:<4}  {}  {}",
                    d.line,
                    d.col,
-                    d.severity,
+                    d.severity.colored_tag(),
                    style(&d.id).bold()
                );
            }
@ -109,55 +148,144 @@ pub fn handle(
 ///               merged cross‑file summaries.
 ///
 /// AST pattern queries are run during pass 2 (they don't depend on summaries).
-pub(crate) fn scan_filesystem(root: &Path, cfg: &Config) -> NyxResult<Vec<Diag>> {
+pub(crate) fn scan_filesystem(
+    root: &Path,
+    cfg: &Config,
+    show_progress: bool,
+) -> NyxResult<Vec<Diag>> {
    // ── Collect file list ────────────────────────────────────────────────
    let all_paths: Vec<PathBuf> = {
        let _span = tracing::info_span!("walk_files").entered();
        let (rx, handle) = spawn_file_walker(root, cfg);
+        // Drain the channel BEFORE joining the walker thread.
+        // The channel is bounded, so joining first would deadlock once
+        // the walker fills it and blocks on send.
+        let paths: Vec<PathBuf> = rx.into_iter().flatten().collect();
        if let Err(err) = handle.join() {
            tracing::error!("walker thread panicked: {:#?}", err);
        }
-        rx.into_iter().flatten().collect()
+        paths
    };
    tracing::info!(file_count = all_paths.len(), "file walk complete");

-    // ── Pass 1: extract summaries ────────────────────────────────────────
    let needs_taint = cfg.scanner.mode == crate::utils::config::AnalysisMode::Full
        || cfg.scanner.mode == crate::utils::config::AnalysisMode::Taint;

-    let global_summaries: Option<GlobalSummaries> = if needs_taint {
-        let _span = tracing::info_span!("pass1_summaries", files = all_paths.len()).entered();
+    if !needs_taint {
+        // ── AST-only: single fused pass (no cross-file context needed) ──
+        let _span = tracing::info_span!("ast_only_analysis", files = all_paths.len()).entered();
+        let pb = make_progress_bar(all_paths.len() as u64, "Running analysis", show_progress);

-        let collected: Vec<FuncSummary> = all_paths
+        let mut diags: Vec<Diag> = all_paths
            .par_iter()
-            .flat_map_iter(|path| match extract_summaries_from_file(path, cfg) {
-                Ok(sums) => sums,
-                Err(e) => {
-                    tracing::warn!("pass 1: failed to summarise {}: {e}", path.display());
-                    vec![]
-                }
+            .flat_map_iter(|path| {
+                let result = match analyse_file_fused(
+                    &std::fs::read(path).unwrap_or_default(),
+                    path,
+                    cfg,
+                    None,
+                    Some(root),
+                ) {
+                    Ok(r) => r.diags,
+                    Err(e) => {
+                        tracing::warn!("analysis: {}: {e}", path.display());
+                        vec![]
+                    }
+                };
+                pb.inc(1);
+                result
            })
            .collect();
+        pb.finish_and_clear();

-        tracing::info!(summaries = collected.len(), "pass 1 complete");
-        let _merge_span = tracing::info_span!("merge_summaries").entered();
+        if let Some(max) = cfg.output.max_results {
+            diags.truncate(max as usize);
+        }
+        return Ok(diags);
+    }
+
+    // ── Taint mode: two-pass with fused pass 1 ──────────────────────────
+    //
+    // Pass 1 (fused): parse + CFG (once!) → extract summaries + run
+    //   AST queries + local taint + CFG structural analyses.
+    //   Summaries are collected for the cross-file merge.
+    //
+    // Pass 2: re-run full analysis with global summaries injected.
+    //   This requires a second parse+CFG, but ONLY for taint-mode files
+    //   that need cross-file context.  For repos where most functions
+    //   don't have unresolved callees, pass 1 results are already correct.
+
+    // ── Pass 1: fused summary extraction + parallel merge ──────────────
+    //
+    // Each rayon thread builds a local `GlobalSummaries` from its chunk,
+    // then the per-thread maps are merged in a binary reduce tree.
+    // This eliminates the serial merge_summaries bottleneck.
+    let global_summaries: GlobalSummaries = {
+        let _span = tracing::info_span!("pass1_fused", files = all_paths.len()).entered();
+        let pb = make_progress_bar(
+            all_paths.len() as u64,
+            "Pass 1: Extracting summaries",
+            show_progress,
+        );
        let root_str = root.to_string_lossy();
-        Some(summary::merge_summaries(collected, Some(&root_str)))
-    } else {
-        None
+
+        let gs = all_paths
+            .par_iter()
+            .fold(GlobalSummaries::new, |mut local_gs, path| {
+                if let Ok(bytes) = std::fs::read(path) {
+                    match analyse_file_fused(&bytes, path, cfg, None, Some(root)) {
+                        Ok(r) => {
+                            for s in r.summaries {
+                                let key = s.func_key(Some(&root_str));
+                                local_gs.insert(key, s);
+                            }
+                        }
+                        Err(e) => {
+                            tracing::warn!("pass 1: {}: {e}", path.display());
+                        }
+                    }
+                } else {
+                    tracing::warn!("pass 1: cannot read {}", path.display());
+                }
+                pb.inc(1);
+                local_gs
+            })
+            .reduce(GlobalSummaries::new, |mut a, b| {
+                a.merge(b);
+                a
+            });
+
+        pb.finish_and_clear();
+        tracing::info!("pass 1 complete");
+        gs
    };

-    // ── Pass 2: full analysis with cross‑file context ────────────────────
+    // ── Pass 2: re-run with cross-file global summaries ──────────────────
    let mut diags: Vec<Diag> = {
        let _span = tracing::info_span!("pass2_analysis", files = all_paths.len()).entered();
+        let pb = make_progress_bar(
+            all_paths.len() as u64,
+            "Pass 2: Running analysis",
+            show_progress,
+        );

-        all_paths
+        let result: Vec<Diag> = all_paths
            .par_iter()
-            .map(|path| run_rules_on_file(path, cfg, global_summaries.as_ref(), Some(root)))
-            .try_reduce(Vec::new, |mut a, mut b| {
-                a.append(&mut b);
-                Ok(a)
-            })?
+            .flat_map_iter(|path| {
+                let result = match run_rules_on_file(path, cfg, Some(&global_summaries), Some(root))
+                {
+                    Ok(d) => d,
+                    Err(e) => {
+                        tracing::warn!("pass 2: {}: {e}", path.display());
+                        vec![]
+                    }
+                };
+                pb.inc(1);
+                result
+            })
+            .collect();
+        pb.finish_and_clear();
+        result
    };
    tracing::info!(diags = diags.len(), "pass 2 complete");

@ -187,6 +315,7 @@ pub fn scan_with_index_parallel(
    project: &str,
    pool: Arc<Pool<SqliteConnectionManager>>,
    cfg: &Config,
+    show_progress: bool,
 ) -> NyxResult<Vec<Diag>> {
    let files = {
        let idx = Indexer::from_pool(project, &pool)?;
@ -199,39 +328,37 @@ pub fn scan_with_index_parallel(
    // ── Pass 1: ensure summaries are up‑to‑date ──────────────────────────
    if needs_taint {
        let _span = tracing::info_span!("pass1_indexed", files = files.len()).entered();
+        let pb = make_progress_bar(
+            files.len() as u64,
+            "Pass 1: Extracting summaries",
+            show_progress,
+        );

        files.par_iter().for_each_init(
            || Indexer::from_pool(project, &pool).expect("db pool"),
            |idx, path| {
-                let needs_scan = idx.should_scan(path).unwrap_or(true);
-                if !needs_scan {
-                    return; // summaries in DB are still valid
-                }
-
-                // Read once, hash once, extract summaries from bytes.
-                let bytes = match std::fs::read(path) {
-                    Ok(b) => b,
-                    Err(e) => {
-                        tracing::warn!("pass 1: cannot read {}: {e}", path.display());
-                        return;
-                    }
-                };
-                let hash = {
-                    let mut h = blake3::Hasher::new();
-                    h.update(&bytes);
-                    h.finalize().as_bytes().to_vec()
-                };
-
-                match extract_summaries_from_bytes(&bytes, path, cfg) {
-                    Ok(sums) => {
-                        idx.replace_summaries_for_file(path, &hash, &sums).ok();
-                    }
-                    Err(e) => {
-                        tracing::warn!("pass 1: {}: {e}", path.display());
-                    }
+                // Read once, hash once — use the hash for the change check
+                // to avoid a second file read inside should_scan.
+                if let Ok(bytes) = std::fs::read(path) {
+                    let hash = Indexer::digest_bytes(&bytes);
+                    let needs_scan = idx.should_scan_with_hash(path, &hash).unwrap_or(true);
+                    if needs_scan {
+                        match extract_summaries_from_bytes(&bytes, path, cfg) {
+                            Ok(sums) => {
+                                idx.replace_summaries_for_file(path, &hash, &sums).ok();
+                            }
+                            Err(e) => {
+                                tracing::warn!("pass 1: {}: {e}", path.display());
+                            }
+                        }
+                    }
+                } else {
+                    tracing::warn!("pass 1: cannot read {}", path.display());
                }
+                pb.inc(1);
            },
        );
+        pb.finish_and_clear();
    }

    // ── Load global summaries ────────────────────────────────────────────
@ -247,26 +374,47 @@ pub fn scan_with_index_parallel(

    // ── Pass 2: full analysis ────────────────────────────────────────────
    let _span = tracing::info_span!("pass2_indexed").entered();
+    let pb2 = make_progress_bar(
+        files.len() as u64,
+        "Pass 2: Running analysis",
+        show_progress,
+    );
    let diag_map: DashMap<String, Vec<Diag>> = DashMap::new();

    files.into_par_iter().for_each_init(
        || Indexer::from_pool(project, &pool).expect("db pool"),
        |idx, path| {
+            // Read file once for both change-detection and analysis.
+            let bytes_opt = std::fs::read(&path).ok();
+            let hash = bytes_opt.as_ref().map(|b| Indexer::digest_bytes(b));
+
            // In pass 2 we always re-analyse when taint is enabled because
            // global summaries may have changed even if this file didn't.
            // For AST-only mode, we can still use the cached issues.
            let needs_scan = if needs_taint {
                true // conservative: always re-analyse in taint mode
            } else {
-                idx.should_scan(&path).unwrap_or(true)
+                match (&hash, &bytes_opt) {
+                    (Some(h), _) => idx.should_scan_with_hash(&path, h).unwrap_or(true),
+                    _ => true,
+                }
            };

            let mut diags = if needs_scan {
-                let d = run_rules_on_file(&path, cfg, global_summaries.as_ref(), None)
-                    .unwrap_or_default();
+                let d = match &bytes_opt {
+                    Some(bytes) => {
+                        run_rules_on_bytes(bytes, &path, cfg, global_summaries.as_ref(), None)
+                            .unwrap_or_default()
+                    }
+                    None => run_rules_on_file(&path, cfg, global_summaries.as_ref(), None)
+                        .unwrap_or_default(),
+                };

-                // Persist issues + update file record
-                let file_id = idx.upsert_file(&path).unwrap_or_default();
+                // Persist issues + update file record (use pre-computed hash)
+                let file_id = match &hash {
+                    Some(h) => idx.upsert_file_with_hash(&path, h).unwrap_or_default(),
+                    None => idx.upsert_file(&path).unwrap_or_default(),
+                };
                idx.replace_issues(
                    file_id,
                    d.iter().map(|d| IssueRow {
@ -298,8 +446,10 @@ pub fn scan_with_index_parallel(
                    .or_default()
                    .append(&mut diags);
            }
+            pb2.inc(1);
        },
    );
+    pb2.finish_and_clear();

    let mut diags: Vec<Diag> = diag_map.into_iter().flat_map(|(_, v)| v).collect();

@ -323,7 +473,8 @@ fn scan_with_index_parallel_uses_existing_index_without_rescanning() {
    std::fs::write(project_dir.join("foo.txt"), "abc").unwrap();

    let (project_name, db_path) = get_project_info(&project_dir, td.path()).unwrap();
-    crate::commands::index::build_index(&project_name, &project_dir, &db_path, &cfg).unwrap();
+    crate::commands::index::build_index(&project_name, &project_dir, &db_path, &cfg, false)
+        .unwrap();

    let pool = Indexer::init(&db_path).unwrap();

@ -336,7 +487,7 @@ fn scan_with_index_parallel_uses_existing_index_without_rescanning() {
        1
    );

-    let diags = scan_with_index_parallel(&project_name, Arc::clone(&pool), &cfg)
+    let diags = scan_with_index_parallel(&project_name, Arc::clone(&pool), &cfg, false)
        .expect("scan should succeed");

    assert!(diags.is_empty());
--- a/src/database.rs
+++ b/src/database.rs
@ -68,9 +68,13 @@ pub mod index {
    impl Indexer {
        pub fn init(database_path: &Path) -> NyxResult<Arc<Pool<SqliteConnectionManager>>> {
            let _span = tracing::info_span!("db_init", path = %database_path.display()).entered();
+            // NO_MUTEX is safe because r2d2 ensures each pooled connection
+            // is only ever used by one thread at a time.  Combined with WAL
+            // mode this allows concurrent readers + a single writer without
+            // the global serialization that FULL_MUTEX causes.
            let flags = OpenFlags::SQLITE_OPEN_READ_WRITE
                | OpenFlags::SQLITE_OPEN_CREATE
-                | OpenFlags::SQLITE_OPEN_FULL_MUTEX;
+                | OpenFlags::SQLITE_OPEN_NO_MUTEX;
            let manager = SqliteConnectionManager::file(database_path).with_flags(flags);
            let pool = Arc::new(Pool::new(manager)?);

@ -132,10 +136,13 @@ pub mod index {
        }

        /// Return true when the file *content* or *mtime* changed since the last scan.
+        ///
+        /// Short-circuits on mtime: if the stored mtime matches the
+        /// filesystem mtime, the file is assumed unchanged (skip hash).
+        #[allow(dead_code)] // used in tests and by should_scan_with_hash callers may fall back
        pub fn should_scan(&self, path: &Path) -> NyxResult<bool> {
            let meta = fs::metadata(path)?;
            let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64;
-            let digest = Self::digest_file(path)?;

            let row: Option<(Vec<u8>, i64)> = self
                .conn
@ -147,18 +154,56 @@ pub mod index {
                .optional()?;

            Ok(match row {
-                Some((stored_hash, stored_mtime)) => stored_hash != digest || stored_mtime != mtime,
+                Some((stored_hash, stored_mtime)) => {
+                    if stored_mtime != mtime {
+                        // mtime changed — must re-scan
+                        true
+                    } else {
+                        // mtime matches — compare hash only if cheap
+                        // (the caller already read the file and can use
+                        // should_scan_with_hash instead for full accuracy)
+                        let digest = Self::digest_file(path)?;
+                        stored_hash != digest
+                    }
+                }
+                None => true,
+            })
+        }
+
+        /// Like [`should_scan`] but accepts a pre-computed hash to avoid
+        /// redundant file reads.
+        pub fn should_scan_with_hash(&self, path: &Path, hash: &[u8]) -> NyxResult<bool> {
+            let row: Option<Vec<u8>> = self
+                .conn
+                .query_row(
+                    "SELECT hash FROM files WHERE project = ?1 AND path = ?2",
+                    params![self.project, path.to_string_lossy()],
+                    |r| r.get(0),
+                )
+                .optional()?;
+
+            Ok(match row {
+                Some(stored_hash) => stored_hash != hash,
                None => true,
            })
        }

        /// Insert or update the `files` row and return its id.
        pub fn upsert_file(&self, path: &Path) -> NyxResult<i64> {
+            let bytes = fs::read(path)?;
+            let hash = Self::digest_bytes(&bytes);
+            self.upsert_file_with_hash(path, &hash)
+        }
+
+        /// Insert or update the `files` row using a pre-computed hash.
+        /// Avoids redundant file reads when the caller already has the hash.
+        pub fn upsert_file_with_hash(&self, path: &Path, hash: &[u8]) -> NyxResult<i64> {
            let meta = fs::metadata(path)?;
            let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64;
            let scanned_at = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64;
-            let digest = Self::digest_file(path)?;
+            let path_str = path.to_string_lossy();

+            // Use a single statement: upsert then query the id.
            self.c().execute(
                "INSERT INTO files (project, path, hash, mtime, scanned_at)
                 VALUES (?1, ?2, ?3, ?4, ?5)
@ -166,18 +211,12 @@ pub mod index {
                 SET hash = excluded.hash,
                     mtime = excluded.mtime,
                     scanned_at = excluded.scanned_at",
-                params![
-                    self.project,
-                    path.to_string_lossy(),
-                    digest,
-                    mtime,
-                    scanned_at
-                ],
+                params![self.project, path_str, hash, mtime, scanned_at],
            )?;

            let id: i64 = self.c().query_row(
                "SELECT id FROM files WHERE project = ?1 AND path = ?2",
-                params![self.project, path.to_string_lossy()],
+                params![self.project, path_str],
                |r| r.get(0),
            )?;
            Ok(id)
@ -287,24 +326,38 @@ pub mod index {
        }

        /// Load every function summary for this project.
+        ///
+        /// Reads all JSON strings from SQLite in one pass, then
+        /// deserializes them in parallel with rayon for large result sets.
        pub fn load_all_summaries(&self) -> NyxResult<Vec<crate::summary::FuncSummary>> {
            let mut stmt = self
                .c()
                .prepare("SELECT summary FROM function_summaries WHERE project = ?1")?;

-            let iter = stmt.query_map([&self.project], |row| {
-                let json: String = row.get(0)?;
-                Ok(json)
-            })?;
+            let jsons: Vec<String> = stmt
+                .query_map([&self.project], |row| row.get::<_, String>(0))?
+                .filter_map(Result::ok)
+                .collect();

-            let mut out = Vec::new();
-            for row in iter {
-                let json = row?;
-                let s: crate::summary::FuncSummary = serde_json::from_str(&json)
-                    .map_err(|e| rusqlite::Error::ToSqlConversionFailure(Box::new(e)))?;
-                out.push(s);
+            // Parallel JSON deserialization for large sets
+            if jsons.len() > 256 {
+                use rayon::prelude::*;
+                let results: Vec<_> = jsons
+                    .par_iter()
+                    .filter_map(|json| {
+                        serde_json::from_str::<crate::summary::FuncSummary>(json).ok()
+                    })
+                    .collect();
+                Ok(results)
+            } else {
+                let mut out = Vec::with_capacity(jsons.len());
+                for json in &jsons {
+                    if let Ok(s) = serde_json::from_str::<crate::summary::FuncSummary>(json) {
+                        out.push(s);
+                    }
+                }
+                Ok(out)
            }
-            Ok(out)
        }

        /// gets files from the database
@ -351,12 +404,20 @@ pub mod index {
        // -------------------------------------------------------------------------
        // Helpers
        // -------------------------------------------------------------------------
+        #[allow(dead_code)] // used by should_scan() and tests
        fn digest_file(path: &Path) -> NyxResult<Vec<u8>> {
            let mut hasher = blake3::Hasher::new();
            let mut file = fs::File::open(path)?;
            std::io::copy(&mut file, &mut hasher)?;
            Ok(hasher.finalize().as_bytes().to_vec())
        }
+
+        /// Hash already-read bytes without re-reading from disk.
+        pub fn digest_bytes(bytes: &[u8]) -> Vec<u8> {
+            let mut hasher = blake3::Hasher::new();
+            hasher.update(bytes);
+            hasher.finalize().as_bytes().to_vec()
+        }
    }
 }

--- a/src/labels/c.rs
+++ b/src/labels/c.rs
@ -24,9 +24,13 @@ pub static RULES: &[LabelRule] = &[
        label: DataLabel::Sink(Cap::SHELL_ESCAPE),
    },
    LabelRule {
-        matchers: &["printf", "fprintf", "sprintf", "strcpy", "strcat"],
+        matchers: &["sprintf", "strcpy", "strcat"],
        label: DataLabel::Sink(Cap::HTML_ESCAPE),
    },
+    LabelRule {
+        matchers: &["printf", "fprintf"],
+        label: DataLabel::Sink(Cap::FMT_STRING),
+    },
 ];

 pub static KINDS: Map<&'static str, Kind> = phf_map! {
--- a/src/labels/cpp.rs
+++ b/src/labels/cpp.rs
@ -22,16 +22,13 @@ pub static RULES: &[LabelRule] = &[
        label: DataLabel::Sink(Cap::SHELL_ESCAPE),
    },
    LabelRule {
-        matchers: &[
-            "printf",
-            "fprintf",
-            "sprintf",
-            "strcpy",
-            "strcat",
-            "std::cout",
-        ],
+        matchers: &["sprintf", "strcpy", "strcat"],
        label: DataLabel::Sink(Cap::HTML_ESCAPE),
    },
+    LabelRule {
+        matchers: &["printf", "fprintf"],
+        label: DataLabel::Sink(Cap::FMT_STRING),
+    },
 ];

 pub static KINDS: Map<&'static str, Kind> = phf_map! {
--- a/src/labels/javascript.rs
+++ b/src/labels/javascript.rs
@ -38,6 +38,14 @@ pub static RULES: &[LabelRule] = &[
        matchers: &["innerHTML"],
        label: DataLabel::Sink(Cap::HTML_ESCAPE),
    },
+    LabelRule {
+        matchers: &[
+            "location.href",
+            "window.location.href",
+            "document.location.href",
+        ],
+        label: DataLabel::Sink(Cap::URL_ENCODE),
+    },
    LabelRule {
        matchers: &[
            "child_process.exec",
@ -56,6 +64,7 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
    "for_in_statement"      => Kind::For,

    "return_statement"      => Kind::Return,
+    "throw_statement"       => Kind::Return,
    "break_statement"       => Kind::Break,
    "continue_statement"    => Kind::Continue,

--- a/src/labels/mod.rs
+++ b/src/labels/mod.rs
@ -31,7 +31,7 @@ bitflags! {
        const URL_ENCODE   = 0b0000_1000;
        const JSON_PARSE   = 0b0001_0000;
        const FILE_IO      = 0b0010_0000;
-        // todo: add more if needed
+        const FMT_STRING   = 0b0100_0000;
    }
 }

@ -195,6 +195,147 @@ pub fn lookup(lang: &str, raw: &str) -> Kind {
        .unwrap_or(Kind::Other)
 }

+/// The kind of taint source, used to refine finding severity.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum SourceKind {
+    /// Direct user input (request params, argv, stdin, form data)
+    UserInput,
+    /// Environment variables and configuration
+    EnvironmentConfig,
+    /// File system reads
+    FileSystem,
+    /// Database query results
+    Database,
+    /// Could not determine — treat conservatively
+    Unknown,
+}
+
+/// Infer the source kind from capabilities and callee name.
+pub fn infer_source_kind(caps: Cap, callee: &str) -> SourceKind {
+    let cl = callee.to_ascii_lowercase();
+
+    // User input patterns
+    if cl.contains("argv")
+        || cl.contains("stdin")
+        || cl.contains("request")
+        || cl.contains("form")
+        || cl.contains("query")
+        || cl.contains("params")
+        || cl.contains("input")
+        || cl.contains("body")
+        || cl.contains("header")
+        || cl.contains("cookie")
+    {
+        return SourceKind::UserInput;
+    }
+
+    // Environment / config patterns
+    if cl.contains("env")
+        || cl.contains("getenv")
+        || cl.contains("environ")
+        || cl.contains("config")
+    {
+        return SourceKind::EnvironmentConfig;
+    }
+
+    // File system patterns
+    if cl.contains("read") || cl.contains("fopen") || cl.contains("open") {
+        // Distinguish from db reads — file reads typically have FILE_IO cap
+        if caps.contains(Cap::FILE_IO) {
+            return SourceKind::FileSystem;
+        }
+    }
+
+    // Database patterns
+    if cl.contains("fetchone")
+        || cl.contains("fetchall")
+        || cl.contains("fetch_row")
+        || cl.contains("query")
+        || cl.contains("execute")
+    {
+        // Queries that read back from db
+        return SourceKind::Database;
+    }
+
+    SourceKind::Unknown
+}
+
+/// Map a source kind to its appropriate severity level.
+pub fn severity_for_source_kind(kind: SourceKind) -> crate::patterns::Severity {
+    match kind {
+        SourceKind::UserInput => crate::patterns::Severity::High,
+        SourceKind::EnvironmentConfig => crate::patterns::Severity::High,
+        SourceKind::FileSystem => crate::patterns::Severity::Medium,
+        SourceKind::Database => crate::patterns::Severity::Medium,
+        SourceKind::Unknown => crate::patterns::Severity::High,
+    }
+}
+
+/// A runtime (config-derived) label rule with owned matchers.
+#[derive(Debug, Clone)]
+pub struct RuntimeLabelRule {
+    pub matchers: Vec<String>,
+    pub label: DataLabel,
+}
+
+/// Parse a capability name string into a `Cap` bitflag.
+pub fn parse_cap(s: &str) -> Option<Cap> {
+    match s.to_ascii_lowercase().as_str() {
+        "env_var" => Some(Cap::ENV_VAR),
+        "html_escape" => Some(Cap::HTML_ESCAPE),
+        "shell_escape" => Some(Cap::SHELL_ESCAPE),
+        "url_encode" => Some(Cap::URL_ENCODE),
+        "json_parse" => Some(Cap::JSON_PARSE),
+        "file_io" => Some(Cap::FILE_IO),
+        "fmt_string" => Some(Cap::FMT_STRING),
+        "all" => Some(Cap::all()),
+        _ => None,
+    }
+}
+
+/// Pre-built analysis rules for a specific language, derived from config.
+/// Built once per file and threaded through the pipeline.
+#[derive(Debug, Clone, Default)]
+pub struct LangAnalysisRules {
+    pub extra_labels: Vec<RuntimeLabelRule>,
+    pub terminators: Vec<String>,
+    pub event_handlers: Vec<String>,
+}
+
+/// Build `LangAnalysisRules` from a `Config` for a given language slug.
+pub fn build_lang_rules(
+    config: &crate::utils::config::Config,
+    lang_slug: &str,
+) -> LangAnalysisRules {
+    let Some(lang_cfg) = config.analysis.languages.get(lang_slug) else {
+        return LangAnalysisRules::default();
+    };
+
+    let extra_labels = lang_cfg
+        .rules
+        .iter()
+        .filter_map(|r| {
+            let cap = parse_cap(&r.cap)?;
+            let label = match r.kind.as_str() {
+                "source" => DataLabel::Source(cap),
+                "sanitizer" => DataLabel::Sanitizer(cap),
+                "sink" => DataLabel::Sink(cap),
+                _ => return None,
+            };
+            Some(RuntimeLabelRule {
+                matchers: r.matchers.clone(),
+                label,
+            })
+        })
+        .collect();
+
+    LangAnalysisRules {
+        extra_labels,
+        terminators: lang_cfg.terminators.clone(),
+        event_handlers: lang_cfg.event_handlers.clone(),
+    }
+}
+
 /// Case-insensitive suffix check (ASCII).
 #[inline]
 fn ends_with_ignore_case(haystack: &[u8], needle: &[u8]) -> bool {
@ -223,29 +364,58 @@ fn starts_with_ignore_case(haystack: &[u8], needle: &[u8]) -> bool {
 /// Try to classify a piece of syntax text.
 /// `lang` is the canonicalised language key ("rust", "javascript", ...).
 ///
+/// If `extra` runtime rules are provided, they are checked **first** (config
+/// takes priority over built-in rules).
+///
 /// **Two-pass matching** -- exact / suffix matches are checked across *all*
 /// rules before any prefix (`foo_`) match is attempted.  This prevents a
 /// greedy prefix like `sanitize_` from shadowing a more specific exact
 /// match like `sanitize_shell`.
-pub fn classify(lang: &str, text: &str) -> Option<DataLabel> {
-    // Lang slugs are already lowercase; try direct lookup first to avoid
-    // allocating a lowercased copy.
+pub fn classify(lang: &str, text: &str, extra: Option<&[RuntimeLabelRule]>) -> Option<DataLabel> {
+    let head = text.split(['(', '<']).next().unwrap_or("");
+    let trimmed = head.trim().as_bytes();
+
+    // ── Check runtime (config) rules first — they take priority ──────
+    if let Some(extras) = extra {
+        // Pass 1: exact / suffix
+        for rule in extras {
+            for raw in &rule.matchers {
+                let m = raw.as_bytes();
+                if m.last() == Some(&b'_') {
+                    continue;
+                }
+                if ends_with_ignore_case(trimmed, m) {
+                    let start = trimmed.len() - m.len();
+                    let ok = start == 0 || matches!(trimmed[start - 1], b'.' | b':');
+                    if ok {
+                        return Some(rule.label);
+                    }
+                }
+            }
+        }
+        // Pass 2: prefix
+        for rule in extras {
+            for raw in &rule.matchers {
+                let m = raw.as_bytes();
+                if m.last() == Some(&b'_') && starts_with_ignore_case(trimmed, m) {
+                    return Some(rule.label);
+                }
+            }
+        }
+    }
+
+    // ── Built-in static rules ────────────────────────────────────────
    let rules = REGISTRY.get(lang).or_else(|| {
        let key = lang.to_ascii_lowercase();
        REGISTRY.get(key.as_str())
    })?;

-    let head = text.split(['(', '<']).next().unwrap_or("");
-    let trimmed = head.trim().as_bytes();
-
    // Pass 1: exact / suffix matches (high confidence)
-    // Matchers are already lowercase &'static str, so we compare with
-    // case-insensitive byte helpers — zero heap allocations.
    for rule in *rules {
        for raw in rule.matchers {
            let m = raw.as_bytes();
            if m.last() == Some(&b'_') {
-                continue; // skip prefix matchers in pass 1
+                continue;
            }
            if ends_with_ignore_case(trimmed, m) {
                let start = trimmed.len() - m.len();
@ -269,3 +439,72 @@ pub fn classify(lang: &str, text: &str) -> Option<DataLabel> {

    None
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn classify_none_extra_unchanged() {
+        // Built-in rule: innerHTML → Sink(HTML_ESCAPE)
+        let result = classify("javascript", "innerHTML", None);
+        assert_eq!(result, Some(DataLabel::Sink(Cap::HTML_ESCAPE)));
+
+        // Non-existent should still be None
+        let result = classify("javascript", "myCustomFunc", None);
+        assert_eq!(result, None);
+    }
+
+    #[test]
+    fn classify_extra_rules_take_priority() {
+        let extras = vec![RuntimeLabelRule {
+            matchers: vec!["escapeHtml".into()],
+            label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
+        }];
+
+        let result = classify("javascript", "escapeHtml", Some(&extras));
+        assert_eq!(result, Some(DataLabel::Sanitizer(Cap::HTML_ESCAPE)));
+
+        // Built-in rules still work
+        let result = classify("javascript", "innerHTML", Some(&extras));
+        assert_eq!(result, Some(DataLabel::Sink(Cap::HTML_ESCAPE)));
+    }
+
+    #[test]
+    fn classify_extra_overrides_builtin() {
+        // Override innerHTML to be a sanitizer (contrived but tests priority)
+        let extras = vec![RuntimeLabelRule {
+            matchers: vec!["innerHTML".into()],
+            label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
+        }];
+
+        let result = classify("javascript", "innerHTML", Some(&extras));
+        assert_eq!(result, Some(DataLabel::Sanitizer(Cap::HTML_ESCAPE)));
+    }
+
+    #[test]
+    fn classify_location_href_is_sink() {
+        let result = classify("javascript", "location.href", None);
+        assert_eq!(result, Some(DataLabel::Sink(Cap::URL_ENCODE)));
+    }
+
+    #[test]
+    fn classify_bare_href_is_none() {
+        // Bare "href" should NOT be a sink — only "location.href" and variants
+        let result = classify("javascript", "href", None);
+        assert_eq!(result, None);
+    }
+
+    #[test]
+    fn parse_cap_works() {
+        assert_eq!(parse_cap("html_escape"), Some(Cap::HTML_ESCAPE));
+        assert_eq!(parse_cap("shell_escape"), Some(Cap::SHELL_ESCAPE));
+        assert_eq!(parse_cap("url_encode"), Some(Cap::URL_ENCODE));
+        assert_eq!(parse_cap("json_parse"), Some(Cap::JSON_PARSE));
+        assert_eq!(parse_cap("env_var"), Some(Cap::ENV_VAR));
+        assert_eq!(parse_cap("file_io"), Some(Cap::FILE_IO));
+        assert_eq!(parse_cap("all"), Some(Cap::all()));
+        assert_eq!(parse_cap("ALL"), Some(Cap::all()));
+        assert_eq!(parse_cap("invalid"), None);
+    }
+}
--- a/src/labels/python.rs
+++ b/src/labels/python.rs
@ -22,6 +22,19 @@ pub static RULES: &[LabelRule] = &[
        matchers: &["sys.argv"],
        label: DataLabel::Source(Cap::all()),
    },
+    LabelRule {
+        matchers: &["open"],
+        label: DataLabel::Source(Cap::all()),
+    },
+    LabelRule {
+        matchers: &[
+            "argparse.parse_args",
+            "urllib.request.urlopen",
+            "requests.get",
+            "requests.post",
+        ],
+        label: DataLabel::Source(Cap::all()),
+    },
    // ───────── Sanitizers ──────────
    LabelRule {
        matchers: &["html.escape"],
--- a/src/lib.rs
+++ b/src/lib.rs
@ -11,6 +11,7 @@ pub mod database;
 pub mod errors;
 pub mod interop;
 pub mod labels;
+pub mod output;
 pub mod patterns;
 pub mod summary;
 pub mod symbol;
@ -25,5 +26,5 @@ use utils::config::Config;
 /// Run a two-pass scan without index (filesystem only).
 /// This is the primary entry point for integration tests.
 pub fn scan_no_index(root: &Path, cfg: &Config) -> NyxResult<Vec<commands::scan::Diag>> {
-    commands::scan::scan_filesystem(root, cfg)
+    commands::scan::scan_filesystem(root, cfg, false)
 }
--- a/src/main.rs
+++ b/src/main.rs
@ -7,6 +7,7 @@ mod database;
 mod errors;
 mod interop;
 mod labels;
+mod output;
 mod patterns;
 mod summary;
 mod symbol;
@ -65,19 +66,28 @@ fn main() -> NyxResult<()> {
    let database_dir = proj_dirs.data_local_dir();
    fs::create_dir_all(database_dir)?;

-    let mut config = Config::load(config_dir)?;
+    let (mut config, config_note) = Config::load(config_dir)?;

    rayon::ThreadPoolBuilder::new()
        .stack_size(config.performance.rayon_thread_stack_size)
        .build_global()
        .expect("set rayon stack size");

-    commands::handle_command(cli.command, database_dir, &mut config)?;
+    let quiet = config.output.quiet || cli.command.is_structured_output();

-    println!(
-        "{} in {:.3}s.",
-        style("Finished").green().bold(),
-        now.elapsed().as_secs_f32()
-    );
+    // Print config note before scanning (human-readable mode only).
+    if let Some(note) = config_note.filter(|_| !quiet) {
+        eprint!("{note}");
+    }
+
+    commands::handle_command(cli.command, database_dir, config_dir, &mut config)?;
+
+    if !quiet {
+        println!(
+            "{} in {:.3}s.",
+            style("Finished").green().bold(),
+            now.elapsed().as_secs_f32()
+        );
+    }
    Ok(())
 }
--- a/src/output.rs
+++ b/src/output.rs
@ -0,0 +1,152 @@
+use crate::commands::scan::Diag;
+use crate::patterns::{self, Severity};
+use once_cell::sync::Lazy;
+use serde_json::{Value, json};
+use std::collections::HashMap;
+use std::path::Path;
+
+/// Lazily-built global map: pattern ID → description from all language registries.
+static PATTERN_DESCRIPTIONS: Lazy<HashMap<&'static str, &'static str>> = Lazy::new(|| {
+    let mut map = HashMap::new();
+    for lang in &[
+        "rust",
+        "c",
+        "cpp",
+        "java",
+        "go",
+        "php",
+        "python",
+        "ruby",
+        "javascript",
+        "typescript",
+    ] {
+        for p in patterns::load(lang) {
+            map.entry(p.id).or_insert(p.description);
+        }
+    }
+    map
+});
+
+/// CFG rule descriptions for rules not in the pattern registry.
+fn cfg_rule_description(id: &str) -> Option<&'static str> {
+    match id {
+        "cfg-unguarded-sink" => Some("Dangerous sink reachable without prior guard or sanitizer"),
+        "cfg-unreachable-sink" => Some("Sink in unreachable code"),
+        "cfg-auth-gap" => Some("Entry-point handler reaches sink without authentication check"),
+        "cfg-error-fallthrough" => {
+            Some("Error check does not terminate; dangerous call follows on error path")
+        }
+        "cfg-resource-leak" => Some("Resource acquired but not released on all exit paths"),
+        "cfg-lock-not-released" => Some("Lock acquired but not released on all exit paths"),
+        _ => None,
+    }
+}
+
+/// Look up a human-readable description for any rule ID.
+fn rule_description(id: &str) -> &str {
+    // Strip taint-specific suffix for lookup (e.g. "taint-unsanitised-flow:foo.rs:42" → base)
+    let base_id = if id.starts_with("taint-") {
+        "taint-unsanitised-flow"
+    } else {
+        id
+    };
+
+    if let Some(desc) = PATTERN_DESCRIPTIONS.get(base_id) {
+        return desc;
+    }
+    if let Some(desc) = cfg_rule_description(base_id) {
+        return desc;
+    }
+    if base_id == "taint-unsanitised-flow" {
+        return "Unsanitised data flows from source to sink";
+    }
+    id
+}
+
+fn severity_to_level(sev: Severity) -> &'static str {
+    match sev {
+        Severity::High => "error",
+        Severity::Medium => "warning",
+        Severity::Low => "note",
+    }
+}
+
+/// Build a SARIF 2.1.0 JSON value from a list of diagnostics.
+pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value {
+    // Deduplicate rule IDs and build rules array.
+    let mut rule_ids: Vec<String> = Vec::new();
+    let mut rule_index_map: HashMap<String, usize> = HashMap::new();
+
+    for d in diags {
+        let base = if d.id.starts_with("taint-") {
+            "taint-unsanitised-flow".to_string()
+        } else {
+            d.id.clone()
+        };
+        if !rule_index_map.contains_key(&base) {
+            let idx = rule_ids.len();
+            rule_index_map.insert(base.clone(), idx);
+            rule_ids.push(base);
+        }
+    }
+
+    let rules: Vec<Value> = rule_ids
+        .iter()
+        .map(|id| {
+            json!({
+                "id": id,
+                "shortDescription": { "text": rule_description(id) },
+            })
+        })
+        .collect();
+
+    let results: Vec<Value> = diags
+        .iter()
+        .map(|d| {
+            let base = if d.id.starts_with("taint-") {
+                "taint-unsanitised-flow"
+            } else {
+                &d.id
+            };
+            let rule_index = rule_index_map[base];
+
+            // Make path relative to scan root if possible
+            let uri = Path::new(&d.path)
+                .strip_prefix(scan_root)
+                .map(|p| p.to_string_lossy().to_string())
+                .unwrap_or_else(|_| d.path.clone());
+
+            json!({
+                "ruleId": base,
+                "ruleIndex": rule_index,
+                "level": severity_to_level(d.severity),
+                "message": { "text": rule_description(base) },
+                "locations": [{
+                    "physicalLocation": {
+                        "artifactLocation": { "uri": uri },
+                        "region": {
+                            "startLine": d.line,
+                            "startColumn": d.col
+                        }
+                    }
+                }]
+            })
+        })
+        .collect();
+
+    json!({
+        "$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/main/sarif-2.1/schema/sarif-schema-2.1.0.json",
+        "version": "2.1.0",
+        "runs": [{
+            "tool": {
+                "driver": {
+                    "name": "nyx",
+                    "version": env!("CARGO_PKG_VERSION"),
+                    "informationUri": env!("CARGO_PKG_HOMEPAGE"),
+                    "rules": rules
+                }
+            },
+            "results": results
+        }]
+    })
+}
--- a/src/patterns/javascript.rs
+++ b/src/patterns/javascript.rs
@ -54,9 +54,10 @@ pub const PATTERNS: &[Pattern] = &[
        description: "Assignment to window.location / location.href",
        query: "(assignment_expression
               left: (member_expression
-                        object: (identifier)? @obj
+                        object: (identifier) @obj
+                        (#match? @obj \"^(window|location|document|self|top|parent|frames)$\")
                        property: (property_identifier) @prop
-                        (#match? @prop \"location|href\"))) @vuln",
+                        (#match? @prop \"^(location|href)$\"))) @vuln",
        severity: Severity::High,
    },
    Pattern {
@ -77,7 +78,7 @@ pub const PATTERNS: &[Pattern] = &[
               left: (member_expression
                        property: (property_identifier) @prop
                        (#eq? @prop \"__proto__\"))) @vuln",
-        severity: Severity::High,
+        severity: Severity::Low,
    },
    Pattern {
        id: "weak_hash_md5",
--- a/src/patterns/mod.rs
+++ b/src/patterns/mod.rs
@ -23,14 +23,33 @@ pub enum Severity {
    Low,
 }

+impl Severity {
+    /// Bracketed, colored, fixed-width tag for aligned console output.
+    ///
+    /// Returns e.g. `"[HIGH]  "` or `"[MEDIUM]"` — always 8 visible characters
+    /// so the column after the tag lines up regardless of severity.
+    pub fn colored_tag(self) -> String {
+        // Visible widths: "[HIGH]" = 6, "[MEDIUM]" = 8, "[LOW]" = 5.
+        // Pad the *whole* tag to 8 visible chars (the longest, "[MEDIUM]").
+        let (label, styled_fn): (&str, fn(&str) -> String) = match self {
+            Severity::High => ("HIGH", |s| style(s).red().bold().to_string()),
+            Severity::Medium => ("MEDIUM", |s| style(s).yellow().bold().to_string()),
+            Severity::Low => ("LOW", |s| style(s).cyan().bold().to_string()),
+        };
+        let bracket_len = label.len() + 2; // "[" + label + "]"
+        let pad = 8usize.saturating_sub(bracket_len);
+        format!("[{}]{:pad$}", styled_fn(label), "", pad = pad)
+    }
+}
+
 impl fmt::Display for Severity {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        let s = match *self {
+        let styled = match *self {
            Severity::High => style("HIGH").red().bold().to_string(),
            Severity::Medium => style("MEDIUM").yellow().bold().to_string(),
            Severity::Low => style("LOW").cyan().bold().to_string(),
        };
-        f.write_str(&s)
+        f.write_str(&styled)
    }
 }

--- a/src/summary/mod.rs
+++ b/src/summary/mod.rs
@ -209,6 +209,13 @@ impl GlobalSummaries {
            .unwrap_or_default()
    }

+    /// Merge another `GlobalSummaries` into this one (for parallel fold/reduce).
+    pub fn merge(&mut self, other: GlobalSummaries) {
+        for (key, summary) in other.by_key {
+            self.insert(key, summary);
+        }
+    }
+
    #[allow(dead_code)]
    pub fn is_empty(&self) -> bool {
        self.by_key.is_empty()
--- a/src/taint/mod.rs
+++ b/src/taint/mod.rs
@ -1,6 +1,6 @@
 use crate::cfg::{Cfg, FuncSummaries, NodeInfo, StmtKind};
 use crate::interop::InteropEdge;
-use crate::labels::{Cap, DataLabel};
+use crate::labels::{Cap, DataLabel, SourceKind};
 use crate::summary::GlobalSummaries;
 use crate::symbol::Lang;
 use petgraph::graph::NodeIndex;
@ -18,18 +18,28 @@ pub struct Finding {
    /// The full path from source to sink through the CFG.
    #[allow(dead_code)] // used for future detailed diagnostics / path display
    pub path: Vec<NodeIndex>,
+    /// The kind of source that originated the taint.
+    pub source_kind: SourceKind,
 }

+/// Order-independent hash of a taint map.
+///
+/// Uses XOR of per-entry hashes so the result is the same regardless of
+/// iteration order — no allocation or sorting required.
 fn taint_hash(taint: &HashMap<String, Cap>) -> u64 {
-    let mut v: Vec<_> = taint.iter().collect();
-    v.sort_by_key(|(k, _)| k.as_str());
-    let mut hasher = blake3::Hasher::new();
-    for (k, bits) in v {
-        hasher.update(k.as_bytes());
-        hasher.update(&bits.bits().to_le_bytes());
+    let mut h: u64 = 0;
+    for (k, bits) in taint {
+        // Per-entry hash: FNV-1a-style mixing of key bytes + cap bits.
+        let mut entry_h: u64 = 0xcbf2_9ce4_8422_2325; // FNV offset basis
+        for b in k.as_bytes() {
+            entry_h ^= *b as u64;
+            entry_h = entry_h.wrapping_mul(0x0100_0000_01b3); // FNV prime
+        }
+        entry_h ^= bits.bits() as u64;
+        entry_h = entry_h.wrapping_mul(0x0100_0000_01b3);
+        h ^= entry_h;
    }
-    let digest = hasher.finalize();
-    u64::from_le_bytes(digest.as_bytes()[0..8].try_into().unwrap())
+    h
 }

 /// Resolved summary for a callee — a uniform view regardless of whether the
@ -140,18 +150,21 @@ fn resolve_callee(
    None
 }

+/// Apply taint transfer for a single node, mutating `out` in place.
+///
+/// Callers should clone the taint map before calling if they need
+/// the original state preserved.
 fn apply_taint(
    node: &NodeInfo,
-    taint: &HashMap<String, Cap>,
+    out: &mut HashMap<String, Cap>,
    local_summaries: &FuncSummaries,
    global_summaries: Option<&GlobalSummaries>,
    caller_lang: Lang,
    caller_namespace: &str,
    interop_edges: &[InteropEdge],
-) -> HashMap<String, Cap> {
+) {
    debug!(target: "taint", "Applying taint to node: {:?}", node);
-    debug!(target: "taint", "Taint: {:?}", taint);
-    let mut out = taint.clone();
+    debug!(target: "taint", "Taint: {:?}", out);

    let caller_func = node.enclosing_func.as_deref().unwrap_or("");

@ -236,7 +249,7 @@ fn apply_taint(
                // ── Sink behaviour: handled in the main analysis loop
                //    (checked via node.label or resolved summary) ──

-                return out;
+                return;
            }

            // Unresolved call — fall through to default gen/kill below
@ -264,8 +277,6 @@ fn apply_taint(
            out.insert(d.clone(), combined);
        }
    }
-
-    out
 }

 /// Run taint analysis on a single file's CFG.
@ -309,9 +320,10 @@ pub fn analyse_file(

    while let Some(Item { node, taint }) = q.pop_front() {
        let caller_func = cfg[node].enclosing_func.as_deref().unwrap_or("");
-        let out = apply_taint(
+        let mut out = taint.clone();
+        apply_taint(
            &cfg[node],
-            &taint,
+            &mut out,
            local_summaries,
            global_summaries,
            caller_lang,
@ -398,26 +410,44 @@ pub fn analyse_file(
                }

                path.reverse();
+
+                // Infer the source kind from the source node's label and callee
+                let source_kind = match cfg[source_node].label {
+                    Some(DataLabel::Source(caps)) => {
+                        let callee = cfg[source_node].callee.as_deref().unwrap_or("");
+                        crate::labels::infer_source_kind(caps, callee)
+                    }
+                    _ => SourceKind::Unknown,
+                };
+
                findings.push(Finding {
                    sink: sink_node,
                    source: source_node,
                    path,
+                    source_kind,
                });
            }
        }

-        // enqueue successors
-        for succ in cfg.neighbors(node) {
-            let h = taint_hash(&out);
-            let key = (succ, h);
+        // enqueue successors — cache hashes to avoid recomputation
+        let out_h = taint_hash(&out);
+        let in_h = taint_hash(&taint);
+        let succs: Vec<_> = cfg.neighbors(node).collect();
+        for (i, succ) in succs.iter().enumerate() {
+            let key = (*succ, out_h);
            if !seen.contains(&key) {
                seen.insert(key);
-                pred.insert(key, (node, taint_hash(&taint)));
-                let item = Item {
-                    node: succ,
-                    taint: out.clone(),
+                pred.insert(key, (node, in_h));
+                // Move the map into the last successor to avoid a clone
+                let taint_for_succ = if i + 1 == succs.len() {
+                    std::mem::take(&mut out)
+                } else {
+                    out.clone()
                };
-                q.push_back(item);
+                q.push_back(Item {
+                    node: *succ,
+                    taint: taint_for_succ,
+                });
            }
        }
    }
--- a/src/taint/tests.rs
+++ b/src/taint/tests.rs
@ -20,7 +20,7 @@ fn env_to_arg_is_flagged() {
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

-    let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
+    let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);

    assert_eq!(findings.len(), 1); // exactly one unsanitised Source→Sink
@ -49,7 +49,7 @@ fn taint_through_if_else() {
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

-    let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
+    let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);

    // exactly one path (via the True branch) should be flagged
@ -76,7 +76,7 @@ fn taint_through_while_loop() {
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

-    let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
+    let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
    assert_eq!(findings.len(), 1);
 }
@ -102,7 +102,7 @@ fn taint_killed_by_matching_sanitizer() {
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

-    let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
+    let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
    assert!(
        findings.is_empty(),
@ -131,7 +131,7 @@ fn wrong_sanitizer_preserves_taint() {
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

-    let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
+    let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
    assert_eq!(
        findings.len(),
@ -160,7 +160,7 @@ fn taint_breaks_out_of_loop() {
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

-    let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
+    let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
    assert_eq!(findings.len(), 1);
 }
@ -189,7 +189,7 @@ fn test_two_sources_one_sanitised() {
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

-    let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
+    let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
    assert_eq!(
        findings.len(),
@ -222,7 +222,7 @@ fn test_two_sources_wrong_sanitiser_both_flagged() {
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

-    let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
+    let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
    assert_eq!(
        findings.len(),
@ -250,7 +250,7 @@ fn test_should_not_panic_on_empty_function() {
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

-    let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
+    let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
    let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
    assert!(findings.is_empty());
 }
@ -374,7 +374,7 @@ fn parse_rust(src: &[u8]) -> (Cfg, NodeIndex, FuncSummaries) {
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src, None).unwrap();
-    build_cfg(&tree, src, "rust", "test.rs")
+    build_cfg(&tree, src, "rust", "test.rs", None)
 }

 /// Parse Rust source bytes, build CFG, and export cross-file summaries.
@ -1089,7 +1089,7 @@ fn parse_lang(
        "ruby" => "test.rb",
        _ => "test.txt",
    };
-    build_cfg(&tree, src, slug, ext)
+    build_cfg(&tree, src, slug, ext, None)
 }

 #[test]
@ -2206,7 +2206,7 @@ fn return_call_recognized_as_source() {
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();
-    let (_, _, summaries) = build_cfg(&tree, src, "rust", "test.rs");
+    let (_, _, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
    let exported = export_summaries(&summaries, "test.rs", "rust");

    let foo = exported
--- a/src/utils/config.rs
+++ b/src/utils/config.rs
@ -2,6 +2,7 @@ use crate::errors::NyxResult;
 use crate::patterns::Severity;
 use console::style;
 use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
 use std::fs;
 use std::path::Path;
 use toml;
@ -55,6 +56,11 @@ pub struct ScannerConfig {

    /// Whether to scan hidden files or not.
    pub scan_hidden_files: bool,
+
+    /// Whether to include findings from non-production paths (tests, vendor,
+    /// benchmarks, etc.) at their original severity.  When false (default),
+    /// findings in these paths are downgraded by one severity tier.
+    pub include_nonprod: bool,
 }
 impl Default for ScannerConfig {
    fn default() -> Self {
@ -87,6 +93,7 @@ impl Default for ScannerConfig {
            one_file_system: false,
            follow_symlinks: false,
            scan_hidden_files: false,
+            include_nonprod: false,
        }
    }
 }
@ -103,7 +110,7 @@ pub struct DatabaseConfig {
    /// The maximum size of the database, in megabytes. TODO: IMPLEMENT
    pub max_db_size_mb: u64,

-    /// Whether to run a VACUUM on startup or not. TODO: IMPLEMENT
+    /// Whether to run a VACUUM on startup or not.
    pub vacuum_on_startup: bool,
 }
 impl Default for DatabaseConfig {
@ -120,10 +127,10 @@ impl Default for DatabaseConfig {
 #[derive(Debug, Serialize, Deserialize, Clone)]
 #[serde(default)]
 pub struct OutputConfig {
-    /// The default output format. TODO: IMPLEMENT others
+    /// The default output format.
    pub default_format: String,

-    /// Whether to print anything to the console or not. TODO: IMPLEMENT
+    /// Whether to print anything to the console or not.
    pub quiet: bool,

    /// The maximum number of results to show.
@ -147,10 +154,10 @@ pub struct PerformanceConfig {
    ///
    /// A depth of `1` includes all files under the current directory, a depth of `2` also includes
    /// all files under subdirectories of the current directory, etc.
-    pub max_depth: Option<usize>, // TODO: IMPLEMENT
+    pub max_depth: Option<usize>,

    /// The minimum depth for reported entries, or `None`.
-    pub min_depth: Option<usize>, // TODO: IMPLEMENT
+    pub min_depth: Option<usize>,

    /// Whether to stop traversing into matching directories.
    pub prune: bool,
@ -190,6 +197,33 @@ impl Default for PerformanceConfig {
    }
 }

+/// A single user-defined label rule from config.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
+pub struct ConfigLabelRule {
+    pub matchers: Vec<String>,
+    /// "source", "sanitizer", or "sink"
+    pub kind: String,
+    /// Capability name: "html_escape", "shell_escape", "url_encode", "json_parse",
+    /// "env_var", "file_io", or "all"
+    pub cap: String,
+}
+
+/// Per-language analysis configuration from config file.
+#[derive(Debug, Serialize, Deserialize, Clone, Default, PartialEq, Eq)]
+#[serde(default)]
+pub struct LanguageAnalysisConfig {
+    pub rules: Vec<ConfigLabelRule>,
+    pub terminators: Vec<String>,
+    pub event_handlers: Vec<String>,
+}
+
+/// Top-level analysis rules config, keyed by language slug.
+#[derive(Debug, Serialize, Deserialize, Clone, Default, PartialEq, Eq)]
+#[serde(default)]
+pub struct AnalysisRulesConfig {
+    pub languages: HashMap<String, LanguageAnalysisConfig>,
+}
+
 #[derive(Debug, Serialize, Deserialize, Clone)]
 #[serde(default)]
 #[derive(Default)]
@ -198,10 +232,16 @@ pub struct Config {
    pub database: DatabaseConfig,
    pub output: OutputConfig,
    pub performance: PerformanceConfig,
+    pub analysis: AnalysisRulesConfig,
 }

 impl Config {
-    pub fn load(config_dir: &Path) -> NyxResult<Self> {
+    /// Load config and return `(config, optional_note)`.
+    ///
+    /// The note is a formatted status message about which config file was
+    /// loaded (or that defaults are in use).  The caller decides whether to
+    /// print it based on output format / quiet mode.
+    pub fn load(config_dir: &Path) -> NyxResult<(Self, Option<String>)> {
        let mut config = Config::default();

        let default_config_path = config_dir.join("nyx.conf");
@ -210,33 +250,33 @@ impl Config {
        }

        let user_config_path = config_dir.join("nyx.local");
-        if user_config_path.exists() {
+        let note = if user_config_path.exists() {
            let user_config_content = fs::read_to_string(&user_config_path)?;
            let user_config: Config = toml::from_str(&user_config_content)?;

            config = merge_configs(config, user_config);

-            println!(
+            Some(format!(
                "{}: Loaded user config from: {}\n",
                style("note").green().bold(),
                style(user_config_path.display())
                    .underlined()
                    .white()
                    .bold()
-            );
+            ))
        } else {
-            println!(
-                "{}: Using {} configuration.\n      Create file in '{}'to customize.\n",
+            Some(format!(
+                "{}: Using {} configuration.\n      Create file in '{}' to customize.\n",
                style("note").green().bold(),
                style("default").bold(),
                style(user_config_path.display())
                    .underlined()
                    .white()
                    .bold()
-            );
-        }
+            ))
+        };

-        Ok(config)
+        Ok((config, note))
    }
 }

@ -262,6 +302,7 @@ fn merge_configs(mut default: Config, user: Config) -> Config {
    default.scanner.one_file_system = user.scanner.one_file_system;
    default.scanner.follow_symlinks = user.scanner.follow_symlinks;
    default.scanner.scan_hidden_files = user.scanner.scan_hidden_files;
+    default.scanner.include_nonprod = user.scanner.include_nonprod;

    // Merge exclusion lists (default ⊔ user), then sort & dedupe
    default
@ -299,6 +340,32 @@ fn merge_configs(mut default: Config, user: Config) -> Config {
    default.performance.scan_timeout_secs = user.performance.scan_timeout_secs;
    default.performance.memory_limit_mb = user.performance.memory_limit_mb;

+    // --- AnalysisRulesConfig ---
+    for (lang, user_lang_cfg) in user.analysis.languages {
+        let entry = default.analysis.languages.entry(lang).or_default();
+
+        // Union-merge rules with dedup
+        for rule in user_lang_cfg.rules {
+            if !entry.rules.contains(&rule) {
+                entry.rules.push(rule);
+            }
+        }
+
+        // Union-merge terminators with dedup
+        for t in user_lang_cfg.terminators {
+            if !entry.terminators.contains(&t) {
+                entry.terminators.push(t);
+            }
+        }
+
+        // Union-merge event_handlers with dedup
+        for eh in user_lang_cfg.event_handlers {
+            if !entry.event_handlers.contains(&eh) {
+                entry.event_handlers.push(eh);
+            }
+        }
+    }
+
    default
 }

@ -318,6 +385,72 @@ fn merge_configs_dedupes_and_keeps_order() {
    );
 }

+#[test]
+fn merge_analysis_rules_unions_and_dedupes() {
+    let mut default_cfg = Config::default();
+    default_cfg.analysis.languages.insert(
+        "javascript".into(),
+        LanguageAnalysisConfig {
+            rules: vec![ConfigLabelRule {
+                matchers: vec!["escapeHtml".into()],
+                kind: "sanitizer".into(),
+                cap: "html_escape".into(),
+            }],
+            terminators: vec!["process.exit".into()],
+            event_handlers: vec![],
+        },
+    );
+
+    let mut user_cfg = Config::default();
+    user_cfg.analysis.languages.insert(
+        "javascript".into(),
+        LanguageAnalysisConfig {
+            rules: vec![
+                ConfigLabelRule {
+                    matchers: vec!["escapeHtml".into()],
+                    kind: "sanitizer".into(),
+                    cap: "html_escape".into(),
+                },
+                ConfigLabelRule {
+                    matchers: vec!["sanitizeUrl".into()],
+                    kind: "sanitizer".into(),
+                    cap: "url_encode".into(),
+                },
+            ],
+            terminators: vec!["process.exit".into(), "abort".into()],
+            event_handlers: vec!["addEventListener".into()],
+        },
+    );
+
+    let merged = merge_configs(default_cfg, user_cfg);
+    let js = merged.analysis.languages.get("javascript").unwrap();
+    assert_eq!(js.rules.len(), 2); // deduped
+    assert_eq!(js.terminators, vec!["process.exit", "abort"]);
+    assert_eq!(js.event_handlers, vec!["addEventListener"]);
+}
+
+#[test]
+fn analysis_config_toml_roundtrip() {
+    let toml_str = r#"
+[analysis.languages.javascript]
+terminators = ["process.exit"]
+event_handlers = ["addEventListener"]
+
+[[analysis.languages.javascript.rules]]
+matchers = ["escapeHtml"]
+kind = "sanitizer"
+cap = "html_escape"
+    "#;
+    let cfg: Config = toml::from_str(toml_str).unwrap();
+    let js = cfg.analysis.languages.get("javascript").unwrap();
+    assert_eq!(js.rules.len(), 1);
+    assert_eq!(js.rules[0].matchers, vec!["escapeHtml"]);
+    assert_eq!(js.rules[0].kind, "sanitizer");
+    assert_eq!(js.rules[0].cap, "html_escape");
+    assert_eq!(js.terminators, vec!["process.exit"]);
+    assert_eq!(js.event_handlers, vec!["addEventListener"]);
+}
+
 #[test]
 fn load_creates_example_and_reads_user_overrides() {
    let cfg_dir = tempfile::tempdir().unwrap();
@ -333,7 +466,7 @@ fn load_creates_example_and_reads_user_overrides() {
    "#;
    fs::write(cfg_path.join("nyx.local"), user_toml).unwrap();

-    let cfg = Config::load(cfg_path).expect("Config::load should succeed");
+    let (cfg, _note) = Config::load(cfg_path).expect("Config::load should succeed");

    assert!(cfg_path.join("nyx.conf").is_file());

--- a/src/walk.rs
+++ b/src/walk.rs
@ -61,6 +61,11 @@ fn build_overrides(root: &Path, cfg: &Config) -> ignore::overrides::Override {
            tracing::warn!("invalid exclude‐dir pattern ‘{dir}’: {e}");
        }
    }
+    for file in &cfg.scanner.excluded_files {
+        if let Err(e) = ob.add(&format!("!{file}")) {
+            tracing::warn!("invalid exclude‐file pattern ‘{file}’: {e}");
+        }
+    }

    ob.build().unwrap_or_else(|e| {
        tracing::error!("failed to build ignore overrides: {e}");
@ -83,6 +88,9 @@ pub fn spawn_file_walker(root: &Path, cfg: &Config) -> (Receiver<Paths>, JoinHan
    let follow = cfg.scanner.follow_symlinks;
    let max_bytes = cfg.scanner.max_file_size_mb.unwrap_or(0) * 1_048_576;
    let batch_size = cfg.performance.batch_size;
+    let max_depth = cfg.performance.max_depth;
+    let same_file_system = cfg.scanner.one_file_system;
+    let require_git = cfg.scanner.require_git_to_read_vcsignore;

    // ----- 3  the background walker thread ---------------------------------
    let handle = thread::spawn(move || {
@ -96,11 +104,18 @@ pub fn spawn_file_walker(root: &Path, cfg: &Config) -> (Receiver<Paths>, JoinHan
            "starting directory walk"
        );

-        WalkBuilder::new(root)
+        let mut builder = WalkBuilder::new(root);
+        builder
            .hidden(!scan_hidden)
            .follow_links(follow)
            .threads(workers)
            .overrides(overrides)
+            .same_file_system(same_file_system)
+            .require_git(require_git);
+        if let Some(depth) = max_depth {
+            builder.max_depth(Some(depth));
+        }
+        builder
            .filter_entry(|e| {
                e.file_type()
                    .map(|ft| ft.is_dir() || ft.is_file())