mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
* feat: Introduce function-scoped variable interning for state analysis with new tests and fixtures * feat: Add Phase 26 symbolic execution enhancements with bitwise operator support, abstract interpretation refinements, and new taint analysis tests * feat: Refine state analysis to handle factory-pattern resource returns with mixed-path tests and leak detection enhancements * feat: Add Phase 27 debug views with symbolic execution, abstract interpretation, SSA, and call graph viewers; integrate with debug layout and styles * feat: Add Phase 31 type-qualified symbolic resolution with receiver-based callee disambiguation and testing * feat: Extend symbolic execution with state iteration, enhanced debug views, and debounced input handling * feat: Add Phase 13 resource and auth pattern extensions with new tests and fixtures * feat: Introduce CFG debug graph renderer with compact mode, toolbar, and DAG layout integration * feat: Add Phase 28 encoding and decoding transform modeling with structural symex enhancements and new taint analysis tests * feat: Extend abstract interpretation with type facts and constant value tracking in debug views and server logic * feat: Add linear path handling and witness extraction to symbolic execution with Phase 28 transform mismatch detection * feat: Refine Go auth and sanitizer handling with enhanced rules, state updates, and benchmark improvements * feat: Enable auth-state analysis by default and update relevant tests in benchmark config * test: Update state_tests to reflect default enablement of auth-state analysis and add auth suppression test * docs: update CHANGELOG.md * feat: Introduce per-index taint tracking in `HeapState` with `HeapSlot`, overflow handling, and revised SSA transfers * feat: Introduce C/C++ language labels and refine heap state tracking in SSA transfers * feat: Implement per-index array slot tracking in symbolic heap with overflow collapse * feat: Add implicit definition handling for uninitialized declarations in SSA value allocation * feat: Refactor function parameters and constants for improved clarity and maintainability * refactor: Reorder module imports and improve formatting for consistency * refactor: Fix formatting erorrs * refactor: Fix clippy warnings * refactor: Fix fmt warnings (again) * chore: Update dependencies and improve feature configuration * Add comprehensive tests for undertested modules (#36) (COPILOT) * Add comprehensive tests for undertested modules Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> Agent-Logs-Url: https://github.com/elicpeter/nyx/sessions/f3fc877e-f386-49ba-9793-fc93d3805083 * Add comprehensive tests for ext, project, walk, and errors modules Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> Agent-Logs-Url: https://github.com/elicpeter/nyx/sessions/f3fc877e-f386-49ba-9793-fc93d3805083 --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> * chore: Update dependencies and improve feature configuration * fix: formatting errors in new tests * chore: Update license list in about.toml * chore: made functions input inline * chore: updated cfg graph to take up the full page * chore: add Prettier configuration and update code formatting * Add frontend test suite with Vitest (111 tests) (#37) * Add Vitest test suite for frontend - 111 tests across utils, components, hooks, and graph utilities Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> Agent-Logs-Url: https://github.com/elicpeter/nyx/sessions/7cf0dba2-ecff-4740-ba4d-92717e74a0b7 * ci: add frontend test step to CI workflow Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> Agent-Logs-Url: https://github.com/elicpeter/nyx/sessions/5bc0ac9f-0a32-4d03-9cb7-7a15aea53fca --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> * chore: simplify array initialization in test files for consistency * ran typecheck * feat: add AnalysisWorkspace component and integrate it into CfgViewerPage * feat: update routing in AppLayout and improve empty state message in ExplorerPage * feat: enhance scan progress tracking with additional metrics and stages * feat: update license information and add license check script * feat: implement cross-file symbolic execution with callee body persistence * feat: replace dagre graphs with Graphology + ELK + Sigma for more advanced call stack and cfg rendering * feat: ensure CFG function view is scoped to the selected function, preventing bleed into sibling functions * feat: enhance resource tracking with proxy method summaries and improve finding extraction * feat: add terminal function exit detection for accurate resource leak analysis * feat: add warnings for loops and functions without bodies to improve error recovery * feat: update lambda expression handling to ensure proper function classification and control flow * feat: remove bounded formatting/string ops and add JSON.parse sanitizer for improved data handling * feat: add inline return taint analysis and regression tests for improved security checks * feat: add engine version management and migration handling for database schema updates * feat: enhance first_call_ident to skip nested function bodies and add regression tests * feat: enhance callee name resolution with two-segment normalization and disambiguation * feat: add cross-file context flags and debug assertions for taint analysis * feat: refactor taint analysis structure to unify context handling and improve clarity * feat: enhance dead code elimination to preserve Sink, Source, and Sanitizer labels with new tests * docs: updated CHANGELOG.md * fmt: formatting fixes * fix: fixed frontend formatting and lint warnings * fix: optimized ci * fix: optimized ci * Add comprehensive multi-file test coverage to Nyx (#38) * Initial checklist for multi-file test suite expansion Agent-Logs-Url: https://github.com/elicpeter/nyx/sessions/e550cb88-9767-4442-94d4-101bf5bb0e23 Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> * Add 12 new multi-file test fixtures with TP/TN/near-miss coverage Agent-Logs-Url: https://github.com/elicpeter/nyx/sessions/e550cb88-9767-4442-94d4-101bf5bb0e23 Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> * deleted root repo * rebuilt to test for regressions --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> Co-authored-by: elipeter <elicpeter@gmail.com> * feat: enhance import alias resolution and taint tracking * feat: implement security hardening with CSRF protection and path validation * feat: add support for import alias bindings in Python, PHP, and Rust * feat: enhance CFG analysis modes and improve code readability * feat: add detection for parameterized SQL queries to enhance security * feat: add safe internal redirect handling and enhance session destroy validation * feat: implement security improvements by addressing vulnerabilities in execAsync, session management, and file downloads * feat: enhance taint detection by adding support for inline source member expressions in call arguments * feat: implement pre-emission of Source nodes for inline source member expressions in call arguments * feat: add support for Throw statement in control flow and error handling * feat: add debug and echo endpoints with potential information leakage * feat: implement internal redirect suppression and enhance taint detection * feat: implement module alias tracking for dynamic dispatch in JS/TS * feat: add authorization analysis module with Express support * feat: add authorization analysis module with Express support * feat: add tests for admin guard requirements and clean checks in authorization analysis * feat: integrate Koa and Fastify frameworks into authorization analysis * feat: add Flask and Django support to authorization analysis module * feat: add support for Rails and Sinatra frameworks in authorization analysis * feat: add support for Axum, ActixWeb, and Rocket frameworks in authorization analysis * feat: add support for ActixWeb, Axum, and Rocket frameworks in authorization analysis * feat: add support for Rails and Sinatra in authorization analysis * chore: add .DS_Store to .gitignore * refactor: simplify conditional checks and improve readability in multiple files * refactor: update usage of Option methods for improved clarity and consistency * refactor: improve code readability by simplifying conditional checks and formatting * refactor: improve code formatting and readability by simplifying conditional checks * refactor: simplify conditional checks and improve readability in multiple files * refactor: simplify conditional checks in axum.rs for improved readability * feat: add CodeQL analysis configuration for enhanced security scanning * test: add comprehensive tests for `src/output.rs` SARIF builder (#39) * chore: start test coverage improvement work Agent-Logs-Url: https://github.com/elicpeter/nyx/sessions/cd7ff398-134e-4728-a5e7-0353a0744423 Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> * test: add comprehensive tests for src/output.rs SARIF builder Agent-Logs-Url: https://github.com/elicpeter/nyx/sessions/cd7ff398-134e-4728-a5e7-0353a0744423 Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> * refactor: improve code formatting and readability in output.rs --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> Co-authored-by: elipeter <elicpeter@gmail.com> * refactor: improve code formatting and readability in output.rs * Potential fix for code scanning alert no. 210: Uncontrolled data used in path expression Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> * Potential fix for code scanning alert no. 211: Uncontrolled data used in path expression Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> * refactor: enhance triage file path handling with improved error management and validation * refactor: updated func summaries for richer detail * refactor: update SSA summary extraction to use canonical FuncKey for distinct entries * refactor: enhance callee metadata structure to support arity, receiver, and qualifier for better overload resolution * refactor: add support for keyword arguments in function calls and enhance receiver extraction for method-style calls * refactor: implement new Flask routes for safe and unsafe shell command execution * refactor: separate receiver handling in SSA operations and enhance taint propagation * refactor: improve arity handling by using arg_uses for positional argument count and enhance witness scoring for tainted arguments * refactor: implement auth decorator extraction and classification for multiple languages * refactor: enhance Rust module path resolution and use map handling for cross-file disambiguation * refactor: introduce CalleeQuery struct for structured callee resolution and enhance resolver logic * refactor: implement same-file identity collision handling for `runTask` to ensure correct resolver behavior * refactor: standardize default struct initialization across multiple files * feat: add scripts for formatting checks and auto-fixes with test summaries * refactor: simplify character splitting and enhance namespace qualifier handling * refactor: improve documentation clarity and enhance code readability in resolver logic * refactor: replace default struct initialization with explicit field assignments for clarity * feat: enhance anonymous function naming by deriving context-based bindings * refactor: streamline match expressions for improved readability and performance * refactor: streamline match expressions for improved readability and performance * refactor: replace loop with while let for improved clarity and performance * feat: add SSA constant propagation support to analysis context for improved accuracy * feat: add SSA constant propagation support to analysis context for improved accuracy * feat: implement shell metacharacter validation and bounded-length checks in Rust analysis * feat: add static map analysis for command injection suppression and type safety * refactor: simplify match statements and reduce line breaks for improved readability * feat(summary): phase 1/5 SinkSite data model for primary sink-location attribution Introduce SinkSite (file_rel, line, col, snippet, cap) carrying the primary sink source-location through function summaries. Swap SsaFuncSummary.param_to_sink and FuncSummary.param_to_sink from a coarse Cap map to a deduped SmallVec<[SinkSite; 1]> per parameter, with a backward-compatible cap_sites() helper and serde defaults so pre-phase-1 on-disk rows continue to deserialise cleanly. Extraction: SinkSiteLocator bundles the tree/bytes/file_rel needed by extract_ssa_func_summary; ParsedFile::extract_ssa_artifacts wires the locator in for the persisted pass-1 path, while pass-2 intra-file transient summaries fall back to cap-only sites (behavior unchanged). Merge: GlobalSummaries::insert now unions sink sites with (file_rel, line, col, cap) dedup via shared union_param_sink_sites helper. Database: JSON-serialised summary columns carry the new shape automatically; no schema change needed. Phase 2 will consume SinkSite in build_taint_diag() to overwrite the caller-site Finding.line with the callee's sink line when resolved via summary. Phase 1 keeps behavior unchanged: scanning tests/benchmark/corpus/rust/cmdi/cmdi_indirect.rs still produces the same (wrong) line 10 finding. Adds round-trip tests covering SinkSite solo, SsaFuncSummary with sink sites, legacy-JSON default handling for both summary types, and merge dedup. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * feat(taint): phase 2/5 thread SinkSite into SsaTaintEvent and Finding Plumb Phase 1's SinkSite through the event pipeline into Findings, no output change yet. SsaTaintEvent gains `primary_sink_site: Option<SinkSite>`; when the main or callback sink-emission path has non-empty `param_to_sink_sites`, filter to sites whose `(line != 0) && (cap ∩ sink_caps != ∅)` and emit one event per distinct site — the multi-primary collapse keeps each downstream Finding single-primary. Resolution: ResolvedSummary and SinkInfo gain mirror `param_to_sink_sites` fields, populated from `SsaFuncSummary.param_to_sink` (SSA + callback paths) and `FuncSummary.param_to_sink` (global paths). Label, local-summary, and interop resolution paths leave the field empty — they only ever had cap-level info to begin with. Finding: new `primary_location: Option<SinkLocation>` with `file_rel/line/col`. `ssa_events_to_findings` maps `event.primary_sink_site` → `Finding.primary_location`, filtering cap-only sites (`line == 0`) to `None` so the (0,0) sentinel never leaks to formatters. Dedup key extended with the primary location so multi-site events aren't collapsed back together. Invariants (debug_assert!): * every SinkSite reaching emission has `line != 0 && cap ∩ sink_caps != ∅` — enforced by the pick_primary_sink_sites* filters; * every populated Finding.primary_location has `line != 0` AND non-empty `file_rel` — the cap-only → None translation upstream guarantees this. Deliberately independent of `uses_summary`: that flag tracks whether the *taint chain* used a summary, whereas primary attribution requires only that the *sink* itself was summary-resolved. A local source reaching a cross-file sink produces `uses_summary=false` alongside a populated primary_location — documented on Finding.primary_location, covered by `cross_file_sink_finding_carries_primary_location`. build_taint_diag, SARIF/JSON/explanation formatters, and the benchmark scorer remain untouched: finding.line still comes from `cfg_graph[finding.sink]`, so cmdi_indirect.rs still reports line 10 and the benchmark's rs-cmdi-003 row still shows FN in the LOC column. Tests: `cross_file_sink_finding_carries_primary_location` (proves plumbing via a synthetic FuncSummary carrying a SinkSite at 42:5) and `cross_file_sink_cap_only_site_leaves_primary_location_none` (regression guard against cap-only sites surfacing). All 1566 lib tests + integration tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * feat(output): phase 3/5 consume primary sink location in diag + SARIF When a finding's primary_location (populated in phase 2 from a callee summary's SinkSite) names the dangerous instruction inside a callee body, attribute the diagnostic line to that location instead of the caller's call site. The call site is demoted to a Call step in flow_steps, and a synthetic Sink step at the primary location is appended so analysts still see the full trace. Changes: - Add scan_root parameter to build_taint_diag so file_rel can be resolved back to an absolute path via a shared resolve_file_rel helper. Empty file_rel (single-file scans where namespace == "") resolves to the file under analysis. - Extend SinkLocation with snippet, carried from the upstream SinkSite so the formatter needs no second file read. - Relax the ssa_events_to_findings debug_assert to allow empty file_rel, which is valid when scan root equals the file itself. - SARIF: emit data-flow as codeFlows[0].threadFlows[0].locations[]; locations[0] already reflects the primary sink position via the updated diag line/col. Acceptance: scan on tests/benchmark/corpus/rust/cmdi/cmdi_indirect.rs now reports line 5 (Command::new) as the primary sink, with the call site at line 10 visible in flow_steps. Two expect.json fixtures updated (must_match line_range widened): - javascript/taint/context_sensitive_call: 12-14 -> 7-14 (line 8 is the real sink inside run()). - rust/cfg/closure_async: 10-10 -> 10-11 (line 11 is Command::new inside the closure). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * feat(bench): phase 4/5 validate primary sink attribution across corpus Extend the benchmark scorer and ground truth to lock in phase 3's primary-location behavior, and add fixtures that exercise the new capability end-to-end. Scorer (tests/benchmark_test.rs): - Add optional `expected_call_site_lines: Option<Vec<[usize; 2]>>` on Case. When present, score_location_level additionally requires at least one flow_step in the finding's evidence trace to fall within ±2 of the call-site range. When absent, the check is skipped — fully forward-compatible with existing fixtures. - Retain ±2 tolerance on expected_sink_lines (compared against the now-primary Diag.line post-phase-3). Ground truth edits: - rs-cmdi-cross-001: expected_sink_lines [8,8] -> [9,9]. Line 8 is the transform::wrap call site (a cross-file propagator, not a sink); line 9 is Command::new, the real sink. The ±2 tolerance happened to mask this stale attribution but it was semantically wrong — phase 4 is the right time to correct it. Also adds expected_call_site_lines [8,8] so the new field is exercised on an existing cross-file case. - rs-cmdi-003: adds expected_call_site_lines [10,10] (run_cmd call). This fixture's sink (Command::new inside run_cmd at line 5) was the motivating case for phases 1-3; adding the call-site assertion guards against regression to caller-line attribution. New fixtures: - rust/cmdi/cmdi_indirect_multisink.rs (rs-cmdi-009): helper run_both takes two tainted params and invokes two Command sinks on consecutive lines. Locks in that primary line lands inside the helper (lines 5-6), not at the caller (line 12). Notes document that SinkSite is currently one-per-callee so both findings today collapse onto the first sink; expected_sink_lines=[5,6] and expected_call_site_lines=[12,12] stay valid either way. - python/cmdi/cross_indirect_sink/{app.py,helper.py} (py-cmdi-cross- 004): sink os.system lives in helper.py (cross-file), caller in app.py reads env source and calls run_cmd. Verifies phase 3's cross-file primary attribution: Diag.path = helper.py, Diag.line = 5, with app.py:7 recorded in flow_steps as a Call step. Acceptance: - `cargo test --test benchmark_test -- --ignored --nocapture` passes. - rs-cmdi-003 is TP/TP/TP (the target flip FN->TP at LOC). All pre-existing TP/TP/TP fixtures remain TP/TP/TP; 2 new fixtures are TP/TP/TP. - Aggregate rule-level: TP=158 FP=10 FN=1 TN=97, P=0.940 R=0.994 F1=0.966 on the 266-case corpus (was TP=156 FP=10 FN=1 TN=97 on 264 pre-phase-4, delta is the +2 new cases both resolving TP). - Full `cargo test` green (1566 lib tests + all integration tests). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * feat(taint): phase 5/5 lock Finding.primary_location contract via regression test Add a regression test in src/taint/ssa_transfer.rs that wires up a synthetic SsaFuncSummary with a SinkSite at other.rs:42:10 and drives the three emission stages (pick_primary_sink_sites → emit_ssa_taint_events → ssa_events_to_findings) against a minimal caller SSA body. Asserts the resulting Finding.primary_location is exactly that triple. The existing integration tests in src/taint/tests.rs cover the coarse FuncSummary path end-to-end through analyse_file. This test locks in the lower-level SSA-side plumbing so a future refactor that silently drops the site between pick → emit → findings fails here rather than only at the benchmark layer. Also refreshes tests/benchmark/results/latest.json (timestamp only; rs-cmdi-003 remains TP/TP/TP and the aggregate P/R/F1 are unchanged from phase 4). Closes the primary sink-location attribution feature (phases 1-5/5): * Phase 1 — SinkSite data model on summaries. * Phase 2 — SinkSite threaded into SsaTaintEvent and Finding. * Phase 3 — diag + SARIF consume primary_location. * Phase 4 — benchmark validates primary_call_site_lines across corpus. * Phase 5 — regression test locks the event→finding contract. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * refactor: clean up formatting and improve readability in multiple files * refactor: simplify type definition for deduplication key in findings * test(harness): add must_not_match expectation for FP regression guards Extends ExpectedFinding with must_not_match field that asserts a diagnostic must NOT fire — presence is a hard failure. Non-consuming scan so it coexists with must_match entries on the same rule_id. Adds forbidden_violations accumulator and updates summary line. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * feat(regression): update expectations to ensure must_not_match for various taint and resource leak rules * feat: implement auto-seeding for JS/TS handler parameters to enhance taint tracking * feat: update switch statement handling to improve control flow analysis * feat: implement promisify alias handling for JS/TS to enhance taint tracking * feat: enhance taint tracking by refining expectation handling and adding mode filtering * feat: refine SQL handling in stream processing and enhance auto-seeding for handler parameters * feat: update taint tracking rules to enforce full mode matching and improve flow analysis * feat: enhance Ruby subshell handling to improve taint tracking and flow analysis * feat: update xss_response expectations to refine taint flow analysis and enhance regression guarding * feat: refine framework detection and update expectation handling for Echo and Sinatra * feat: implement max_count for taint tracking expectations and deduplicate findings * feat: add strict_unexpected handling for taint-unsanitised-flow in expectation files * feat: enhance deduplication of taint-unsanitised-flow findings by collapsing based on line and severity * feat: add strict_unexpected handling for taint-unsanitised-flow in multiple expectation files * feat: add structural invariant checks for SSA bodies * feat: ensure deterministic phi emission order using BTreeSet * feat: enhance handling of terminators to ensure authoritative flow through successor edges * feat: enhance Goto terminator handling to ensure all successors are marked executable * feat: refactor code for improved readability and organization * feat: simplify predicate checks and enhance readability in SSA handling * feat: implement per-file parse timeout and enhance file size handling * feat: migrate analysis engine toggles from environment variables to configuration file * feat: remove unnecessary whitespace in hostile_input_tests.rs * feat: remove unnecessary whitespace in hostile_input_tests.rs * feat: update dependencies and enhance documentation on language maturity * feat: enhance security headers and improve request body limits * feat: implement sink capability bits for deduplication and enhance evidence tagging * feat: implement dynamic activation handling for gated sinks and enhance validation logic * feat: enhance configuration documentation and clarify inline analysis cache behavior * feat: implement panic recovery during analysis to continue scans past errors * feat: add expectations configuration for taint analysis and performance metrics * feat: enhance error handling and logging during file reading and mutex locking * feat: add cross-file body loading tests and plumbing for CF-1 phase * feat: implement cross-file k=1 context-sensitive inline taint analysis with new tests and fixtures * feat: implement indexed-scan parity in cross-file inline analysis with new dropdown and copy functionality * feat: enhance classification span handling in CFG and AST for improved source attribution * feat: add new Express routes for handling user input and telemetry data * feat: implement ternary expression handling in CFG with diamond structure for JS/TS * feat: implement Phase CF-3 abstract-domain transfer channels in summaries * feat: add support for string-prefix transfer in cross-file calls and update tests * docs: reduce RESULTS.md doc size * feat: implement Phase CF-4 per-return-path summary decomposition with tests * feat: update parameter handling in pass1 and refactor SsaFuncSummary initialization * feat: implement Phase CF-5 for cross-file SCC joint fixed-point convergence with new flags and tests * feat: implement Phase CF-6 with parameter-granularity points-to summaries and associated tests * refactor: update comments and documentation for clarity and consistency * style: format code for consistency and readability * refactor: simplify verdict handling and improve edge checking logic * refactor: optimize path and identifier collection by avoiding unnecessary cloning * chore: update Cargo.toml for Rust version 1.85 and add ignored files; modify CHANGELOG and README for clarity on state analysis defaults * refactor: update documentation and improve clarity in configuration files * refactor: update documentation and improve clarity in configuration files * feat: add JS/TS pass-2 convergence tests and expectations configuration * feat: add Phase 5 regression tests for inline cache origin attribution and update related logic * feat: implement Phase 7 deduplication and alternative path linking for taint findings * feat: implement structural DFS index for anonymous functions and update naming conventions * feat: add Phase 8 regression tests for container-element taint in JS and Python * feat: add engine-depth profiles and explain-engine option for CLI * feat: update expectations and add new README fixtures for multi-file scan regression * feat: implement Phase 11 callback-alias and factory patterns with regression tests * feat: implement Terminator::Switch for multi-way dispatch and add regression tests * feat: add real-CVE benchmark fixtures for CVE-2023-48022, CVE-2019-14939, and CVE-2023-26159 with corresponding patched variants * refactor: extract cfg and ssa_transfer to submodules * refactor: cargo fmt * refactor: remove unnecessary blank line in cfg_tests.rs * refactor: remove unnecessary planning file * chore: update Rust version to 1.88 and bump dependencies in Cargo files * feat: enhance triage UI with new layout and controls, update README for clarity * feat: enhance triage UI with new layout and controls, update README for clarity * chore: remove outdated section from README for version 0.5.0 * docs: improve clarity and consistency in README content * chore: add "GPL-3.0-or-later" to license options in about.toml * chore: update license handling in about.toml and check-licenses.mjs * style: format code for improved readability in TriagePage component * style: format code for improved readability in TriagePage component * chore: enhance license handling and improve body_id scoping in seed lookup * feat: introduce owner and parent body IDs for enhanced seed scoping * feat: implement direction-aware engine provenance with new CLI flag for strict CI gating * feat: add Undef SSA operation for improved control-flow handling * style: improve code formatting for consistency and readability in multiple files * feat: add 16-function chain SCC across multiple files for enhanced analysis * style: simplify code formatting for improved readability in multiple files * fix: update CapHitReason default implementation and improve README clarity * docs: enhance README with detailed explanations of taint analysis and limitations * docs: refine README for clarity and consistency in taint analysis section * style: improve code formatting for better readability in NewScanModal and scans * fix: update cargo-about command to use --offline for deterministic license generation * fix: update cargo-about command to use --offline for deterministic license generation * ci: add step to prime cargo registry cache for deterministic license generation * feat: add support for non-sink collections in authorization analysis * feat: enhance authorization checks with row-level ownership equality and binding tracking * feat: implement self-scoped user handling and enhance ownership checks * refactor: simplify assertions and formatting in authorization analysis tests * fix: normalize line endings in THIRDPARTY-LICENSES.html generation and update README with AI disclosure * docs: update AI disclosure section for clarity and conciseness * feat: add AI Contribution Policy and update contributing guidelines for AI assistance disclosure * feat: enhance authorization analysis with SSA-derived variable type classification * feat: implement auth_finding_to_diag function for enhanced security diagnostics * feat: add args_value_refs to CallSite struct for enhanced argument tracking * feat: add args_value_refs to CallSite struct for enhanced argument tracking * feat: add direction-aware engine provenance with LossDirection classification and new CLI flag * feat: simplify strip_cap_from_call_args call by removing unnecessary line breaks * feat: enhance error message handling in cli_validation_tests for better Windows compatibility * feat: optimize release profile settings in Cargo.toml and update CodeQL configuration * feat: enhance release build process with SBOM generation and SLSA provenance * feat: update actions/checkout and actions/setup-node to v6, enhance CLI options, and improve auth-check summaries * feat: introduce PathFact handling for path safety checks and rejection logic * feat: introduce PathFact handling for path safety checks and rejection logic * feat: update benchmark data and enhance path sanitization logic with new safety checks * feat: document AI assistance in frontend UI development and human review process * feat: add return path facts for enhanced path safety checks and update documentation * chore: update release date for version 0.5.0 in CHANGELOG.md * chore: clean up ci.yml by removing outdated comments and clarifying steps * feat: implement cross-language path sanitizers and validators for enhanced security * feat: enhance SSA value usage tracking by including block terminators and improve path safety checks * feat: enhance switch statement handling by adding per-case path constraints and support for exclusive cases * refactor: simplify conditional formatting and improve code readability in executor and lower modules * feat: add vulnerable examples for various languages demonstrating authentication and sanitization issues * feat: enhance actor context recognition for self-actor identifiers and add support for global non-sink receivers * feat: enhance actor context recognition for self-actor identifiers and add support for global non-sink receivers * feat: add transform classifiers for Java, Go, and Ruby with corresponding tests * refactor: clarify comments on reassign-to-constant idiom and sink behavior in guards.rs --------- Co-authored-by: Copilot <198982749+Copilot@users.noreply.github.com> Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
1323 lines
49 KiB
Rust
1323 lines
49 KiB
Rust
//! Structured evidence and confidence types for scan diagnostics.
|
||
//!
|
||
//! These types capture the provenance of findings (source locations,
|
||
//! sanitizer/guard info, state-machine transitions) in a structured form
|
||
//! that can be serialized to JSON and consumed by ranking, filtering,
|
||
//! and downstream tooling.
|
||
#![allow(clippy::collapsible_if)]
|
||
|
||
use crate::commands::scan::Diag;
|
||
use crate::patterns::Severity;
|
||
use serde::{Deserialize, Serialize};
|
||
use std::fmt;
|
||
use std::str::FromStr;
|
||
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
// Confidence
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
/// Confidence level for a diagnostic finding.
|
||
///
|
||
/// Ordered Low < Medium < High so that `>=` comparisons work naturally
|
||
/// for filtering (e.g. `--min-confidence medium` keeps Medium and High).
|
||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
|
||
pub enum Confidence {
|
||
Low,
|
||
Medium,
|
||
High,
|
||
}
|
||
|
||
impl fmt::Display for Confidence {
|
||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||
match self {
|
||
Self::Low => write!(f, "Low"),
|
||
Self::Medium => write!(f, "Medium"),
|
||
Self::High => write!(f, "High"),
|
||
}
|
||
}
|
||
}
|
||
|
||
impl FromStr for Confidence {
|
||
type Err = String;
|
||
|
||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||
match s.to_ascii_lowercase().as_str() {
|
||
"low" => Ok(Self::Low),
|
||
"medium" | "med" => Ok(Self::Medium),
|
||
"high" => Ok(Self::High),
|
||
_ => Err(format!(
|
||
"unknown confidence level: {s:?} (expected low, medium, high)"
|
||
)),
|
||
}
|
||
}
|
||
}
|
||
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
// Flow Steps
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
/// The kind of operation at a flow step.
|
||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||
#[serde(rename_all = "snake_case")]
|
||
pub enum FlowStepKind {
|
||
Source,
|
||
Assignment,
|
||
Call,
|
||
Phi,
|
||
Sink,
|
||
}
|
||
|
||
impl fmt::Display for FlowStepKind {
|
||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||
match self {
|
||
Self::Source => write!(f, "source"),
|
||
Self::Assignment => write!(f, "assignment"),
|
||
Self::Call => write!(f, "call"),
|
||
Self::Phi => write!(f, "phi"),
|
||
Self::Sink => write!(f, "sink"),
|
||
}
|
||
}
|
||
}
|
||
|
||
/// A single step in a taint flow path (display-ready).
|
||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||
pub struct FlowStep {
|
||
pub step: u32,
|
||
pub kind: FlowStepKind,
|
||
pub file: String,
|
||
pub line: u32,
|
||
pub col: u32,
|
||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||
pub snippet: Option<String>,
|
||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||
pub variable: Option<String>,
|
||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||
pub callee: Option<String>,
|
||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||
pub function: Option<String>,
|
||
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
|
||
pub is_cross_file: bool,
|
||
}
|
||
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
// Symbolic verdict
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
/// Symbolic verification verdict for a taint path.
|
||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||
#[serde(rename_all = "snake_case")]
|
||
pub enum Verdict {
|
||
/// Constraint solver confirmed the path is feasible.
|
||
Confirmed,
|
||
/// Constraint solver proved the path is infeasible.
|
||
Infeasible,
|
||
/// Constraint solver could not determine feasibility.
|
||
Inconclusive,
|
||
/// No symbolic analysis was attempted for this finding.
|
||
NotAttempted,
|
||
}
|
||
|
||
/// Summary of symbolic constraint analysis for a finding.
|
||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||
pub struct SymbolicVerdict {
|
||
/// The outcome of symbolic path feasibility analysis.
|
||
pub verdict: Verdict,
|
||
/// Number of path constraints checked during analysis.
|
||
#[serde(default)]
|
||
pub constraints_checked: u32,
|
||
/// Number of distinct paths explored from source to sink.
|
||
#[serde(default)]
|
||
pub paths_explored: u32,
|
||
/// Human-readable witness or proof sketch.
|
||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||
pub witness: Option<String>,
|
||
/// Interprocedural call chains leading to callee-internal sinks.
|
||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||
pub interproc_call_chains: Vec<Vec<String>>,
|
||
/// Cutoff/fallback reasons that limited analysis precision.
|
||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||
pub cutoff_notes: Vec<String>,
|
||
}
|
||
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
// Evidence
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
/// Structured evidence for a diagnostic finding.
|
||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||
pub struct Evidence {
|
||
/// Where tainted data originated.
|
||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||
pub source: Option<SpanEvidence>,
|
||
|
||
/// Where the dangerous operation happens.
|
||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||
pub sink: Option<SpanEvidence>,
|
||
|
||
/// Validation guards protecting this path.
|
||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||
pub guards: Vec<SpanEvidence>,
|
||
|
||
/// Sanitizers applied to this path.
|
||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||
pub sanitizers: Vec<SpanEvidence>,
|
||
|
||
/// State-machine evidence (resource lifecycle / auth).
|
||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||
pub state: Option<StateEvidence>,
|
||
|
||
/// Free-form notes for ranking and display.
|
||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||
pub notes: Vec<String>,
|
||
|
||
/// Kind of taint source (structured; replaces "source_kind:..." in notes).
|
||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||
pub source_kind: Option<crate::labels::SourceKind>,
|
||
|
||
/// Number of SSA blocks between source and sink.
|
||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||
pub hop_count: Option<u16>,
|
||
|
||
/// Whether this finding was resolved via a cross-function summary.
|
||
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
|
||
pub uses_summary: bool,
|
||
|
||
/// Number of matching capability bits between source and sink.
|
||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||
pub cap_specificity: Option<u8>,
|
||
|
||
/// Step-by-step taint flow from source to sink.
|
||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||
pub flow_steps: Vec<FlowStep>,
|
||
|
||
/// Human-readable explanation of the finding.
|
||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||
pub explanation: Option<String>,
|
||
|
||
/// Reasons why confidence is not higher.
|
||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||
pub confidence_limiters: Vec<String>,
|
||
|
||
/// Symbolic constraint analysis verdict for this finding's taint path.
|
||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||
pub symbolic: Option<SymbolicVerdict>,
|
||
|
||
/// Resolved sink capability bits (u16 from `Cap::bits()`).
|
||
///
|
||
/// Used by deduplication to distinguish findings that share a
|
||
/// `(path, line, severity)` key but target different sinks (e.g.
|
||
/// `sink_sql(x); sink_shell(x);` on the same line). 0 when the sink
|
||
/// caps could not be resolved at the CFG node (e.g. pure summary
|
||
/// resolution where the caller's sink node carries no label).
|
||
#[serde(default, skip_serializing_if = "is_zero_u16")]
|
||
pub sink_caps: u16,
|
||
|
||
/// Engine provenance notes attached to this finding (e.g. "worklist
|
||
/// iteration budget was hit before convergence"), propagated from
|
||
/// [`crate::taint::Finding::engine_notes`]. Empty for typical
|
||
/// under-budget findings and skipped during serialization in that case.
|
||
#[serde(default, skip_serializing_if = "smallvec::SmallVec::is_empty")]
|
||
pub engine_notes: smallvec::SmallVec<[crate::engine_notes::EngineNote; 2]>,
|
||
}
|
||
|
||
fn is_zero_u16(v: &u16) -> bool {
|
||
*v == 0
|
||
}
|
||
|
||
impl Evidence {
|
||
/// Returns `true` if the evidence contains no useful data.
|
||
pub fn is_empty(&self) -> bool {
|
||
self.source.is_none()
|
||
&& self.sink.is_none()
|
||
&& self.guards.is_empty()
|
||
&& self.sanitizers.is_empty()
|
||
&& self.state.is_none()
|
||
&& self.notes.is_empty()
|
||
&& self.source_kind.is_none()
|
||
&& self.hop_count.is_none()
|
||
&& !self.uses_summary
|
||
&& self.cap_specificity.is_none()
|
||
&& self.flow_steps.is_empty()
|
||
&& self.explanation.is_none()
|
||
&& self.confidence_limiters.is_empty()
|
||
&& self.symbolic.is_none()
|
||
&& self.sink_caps == 0
|
||
&& self.engine_notes.is_empty()
|
||
}
|
||
}
|
||
|
||
/// A source-location evidence span.
|
||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||
pub struct SpanEvidence {
|
||
pub path: String,
|
||
pub line: u32,
|
||
pub col: u32,
|
||
/// One of: `"source"`, `"sink"`, `"guard"`, `"sanitizer"`.
|
||
pub kind: String,
|
||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||
pub snippet: Option<String>,
|
||
}
|
||
|
||
/// Evidence from a state-machine analysis (resource lifecycle / auth).
|
||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||
pub struct StateEvidence {
|
||
/// The state machine: `"resource"` or `"auth"`.
|
||
pub machine: String,
|
||
/// Variable name if available.
|
||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||
pub subject: Option<String>,
|
||
/// State before the event.
|
||
pub from_state: String,
|
||
/// State after the event.
|
||
pub to_state: String,
|
||
}
|
||
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
// compute_confidence
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
/// Derive a confidence level for `diag` based on its rule ID, severity,
|
||
/// evidence, and analysis kind.
|
||
///
|
||
/// This is called as a post-pass after all findings are collected; findings
|
||
/// that already have a confidence set (e.g. from CFG analysis) are preserved.
|
||
///
|
||
/// When the finding carries engine provenance notes whose
|
||
/// [`crate::engine_notes::LossDirection`] is `OverReport` or `Bail`,
|
||
/// the computed confidence is capped at `Medium` regardless of the
|
||
/// points-based taint score. `OverReport` means precision was widened
|
||
/// (validation guards may have been lost, so the finding is more
|
||
/// likely to be a false positive); `Bail` means analysis of the body
|
||
/// aborted before producing a trustworthy result. `UnderReport` notes
|
||
/// (e.g. `WorklistCapped`) do *not* cap confidence — the reported flow
|
||
/// is still real, just surrounded by an incomplete result set.
|
||
pub fn compute_confidence(diag: &Diag) -> Confidence {
|
||
// Degraded analysis caps confidence
|
||
if let Some(ev) = &diag.evidence
|
||
&& ev.notes.iter().any(|n| n.starts_with("degraded:"))
|
||
{
|
||
return Confidence::Low;
|
||
}
|
||
|
||
let id = &diag.id;
|
||
|
||
let base = if id.starts_with("taint-") {
|
||
compute_taint_confidence(diag)
|
||
} else if id.starts_with("state-") {
|
||
match id.as_str() {
|
||
"state-use-after-close" => Confidence::High,
|
||
"state-double-close" => Confidence::High,
|
||
"state-unauthed-access" => Confidence::High,
|
||
"state-resource-leak" => Confidence::Medium,
|
||
"state-resource-leak-possible" => Confidence::Low,
|
||
_ => Confidence::Medium,
|
||
}
|
||
} else if id.starts_with("cfg-") {
|
||
// If CFG conversion already set confidence, preserve it
|
||
diag.confidence.unwrap_or(Confidence::Medium)
|
||
} else if diag.severity == Severity::High {
|
||
// AST patterns: High severity → Medium confidence, else Low
|
||
Confidence::Medium
|
||
} else {
|
||
Confidence::Low
|
||
};
|
||
|
||
apply_engine_notes_cap(diag, base)
|
||
}
|
||
|
||
/// Cap `base` at `Medium` when the finding carries any engine note
|
||
/// whose direction is [`crate::engine_notes::LossDirection::OverReport`]
|
||
/// or [`crate::engine_notes::LossDirection::Bail`].
|
||
///
|
||
/// Returns `base` unchanged when no evidence is present, no notes are
|
||
/// attached, or only `Informational` / `UnderReport` notes are present.
|
||
fn apply_engine_notes_cap(diag: &Diag, base: Confidence) -> Confidence {
|
||
let Some(ev) = &diag.evidence else {
|
||
return base;
|
||
};
|
||
let Some(worst) = crate::engine_notes::worst_direction(&ev.engine_notes) else {
|
||
return base;
|
||
};
|
||
match worst {
|
||
crate::engine_notes::LossDirection::OverReport
|
||
| crate::engine_notes::LossDirection::Bail => base.min(Confidence::Medium),
|
||
// UnderReport: result set is a lower bound, but the emitted
|
||
// finding itself remains as credible as the analysis decided.
|
||
// Do not cap — the rank completeness penalty is the right lever
|
||
// for that case (see rank.rs::completeness_penalty).
|
||
crate::engine_notes::LossDirection::UnderReport => base,
|
||
// Informational is filtered out upstream by `worst_direction`,
|
||
// but keep the arm to force a decision if the enum grows.
|
||
crate::engine_notes::LossDirection::Informational => base,
|
||
}
|
||
}
|
||
|
||
/// Points-based confidence scoring for taint findings.
|
||
///
|
||
/// Uses evidence metadata (source kind, path length, validation, cap
|
||
/// specificity, summary resolution) to produce a nuanced confidence level
|
||
/// instead of the previous flat High assignment.
|
||
fn compute_taint_confidence(diag: &Diag) -> Confidence {
|
||
let ev = match &diag.evidence {
|
||
Some(e) => e,
|
||
None => return Confidence::High, // no evidence struct → conservative High
|
||
};
|
||
|
||
let mut score: i32 = 0;
|
||
|
||
// Source kind (prefer structured field, fall back to notes)
|
||
score += match ev.source_kind {
|
||
Some(kind) => structured_source_kind_score(kind),
|
||
None => source_kind_score(&ev.notes),
|
||
};
|
||
|
||
// Evidence completeness
|
||
let has_source = ev.source.is_some();
|
||
let has_sink = ev.sink.is_some();
|
||
let has_snippet = ev.source.as_ref().is_some_and(|s| s.snippet.is_some())
|
||
|| ev.sink.as_ref().is_some_and(|s| s.snippet.is_some());
|
||
score += if has_source && has_sink && has_snippet {
|
||
3
|
||
} else if has_source && has_sink {
|
||
2
|
||
} else {
|
||
1
|
||
};
|
||
|
||
// Hop count penalty (prefer structured field)
|
||
score += match ev.hop_count {
|
||
Some(count) => match count {
|
||
0..=3 => 0,
|
||
4..=8 => -1,
|
||
_ => -2,
|
||
},
|
||
None => hop_count_score(&ev.notes),
|
||
};
|
||
|
||
// Path validation penalty (use Diag field directly)
|
||
if diag.path_validated {
|
||
score -= 3;
|
||
}
|
||
|
||
// Cap specificity bonus (prefer structured field)
|
||
score += match ev.cap_specificity {
|
||
Some(count) => {
|
||
if count == 1 {
|
||
1
|
||
} else {
|
||
0
|
||
}
|
||
}
|
||
None => cap_specificity_score(&ev.notes),
|
||
};
|
||
|
||
// Summary resolution penalty (prefer structured field)
|
||
if ev.uses_summary || ev.notes.iter().any(|n| n == "uses_summary") {
|
||
score -= 1;
|
||
}
|
||
|
||
// Symbolic verdict adjustments
|
||
if let Some(ref sv) = ev.symbolic {
|
||
match sv.verdict {
|
||
Verdict::Infeasible => score -= 5,
|
||
Verdict::Confirmed => {
|
||
// Stronger bonus when extract_witness produced a concrete payload
|
||
// (contains "flows to" or "reaches"); raw Display-only fallback
|
||
// from get_sink_witness does not contain these phrases.
|
||
if sv
|
||
.witness
|
||
.as_ref()
|
||
.is_some_and(|w| w.contains("flows to") || w.contains("reaches"))
|
||
{
|
||
score += 3;
|
||
} else {
|
||
score += 2;
|
||
}
|
||
}
|
||
Verdict::Inconclusive | Verdict::NotAttempted => {}
|
||
}
|
||
|
||
// Backwards-driven corroboration / infeasibility. We
|
||
// deliberately use a smaller magnitude than the symex verdict so
|
||
// symex (which reasons about concrete payloads) stays the stronger
|
||
// signal; backwards is a structural agreement check.
|
||
use crate::taint::backwards::{NOTE_BUDGET, NOTE_CONFIRMED, NOTE_INFEASIBLE};
|
||
if sv.cutoff_notes.iter().any(|n| n == NOTE_CONFIRMED) {
|
||
score += 1;
|
||
}
|
||
if sv.cutoff_notes.iter().any(|n| n == NOTE_INFEASIBLE) {
|
||
score -= 3;
|
||
}
|
||
let _ = NOTE_BUDGET;
|
||
}
|
||
|
||
match score {
|
||
5.. => Confidence::High,
|
||
2..=4 => Confidence::Medium,
|
||
_ => Confidence::Low,
|
||
}
|
||
}
|
||
|
||
/// Score a structured `SourceKind` value.
|
||
///
|
||
/// UserInput=+3, EnvironmentConfig=+2, Unknown/FileSystem=+1, Database/CaughtException=0.
|
||
fn structured_source_kind_score(kind: crate::labels::SourceKind) -> i32 {
|
||
use crate::labels::SourceKind;
|
||
match kind {
|
||
SourceKind::UserInput => 3,
|
||
SourceKind::EnvironmentConfig => 2,
|
||
SourceKind::Unknown | SourceKind::FileSystem => 1,
|
||
SourceKind::Database | SourceKind::CaughtException => 0,
|
||
}
|
||
}
|
||
|
||
/// Extract source_kind from evidence notes and return points (legacy fallback).
|
||
///
|
||
/// UserInput=+3, EnvironmentConfig=+2, Unknown/FileSystem=+1, Database/CaughtException=0.
|
||
fn source_kind_score(notes: &[String]) -> i32 {
|
||
for note in notes {
|
||
if let Some(kind) = note.strip_prefix("source_kind:") {
|
||
return match kind {
|
||
"UserInput" => 3,
|
||
"EnvironmentConfig" => 2,
|
||
"Unknown" | "FileSystem" => 1,
|
||
_ => 0, // Database, CaughtException, etc.
|
||
};
|
||
}
|
||
}
|
||
1 // conservative default if missing
|
||
}
|
||
|
||
/// Extract hop_count from evidence notes and return penalty.
|
||
///
|
||
/// 0–3 blocks = 0, 4–8 = −1, 9+ = −2.
|
||
fn hop_count_score(notes: &[String]) -> i32 {
|
||
for note in notes {
|
||
if let Some(count_str) = note.strip_prefix("hop_count:") {
|
||
if let Ok(count) = count_str.parse::<u16>() {
|
||
return match count {
|
||
0..=3 => 0,
|
||
4..=8 => -1,
|
||
_ => -2,
|
||
};
|
||
}
|
||
}
|
||
}
|
||
0 // no hop info → no penalty
|
||
}
|
||
|
||
/// Extract cap_specificity from evidence notes and return bonus.
|
||
///
|
||
/// 1 bit (exact match) = +1, otherwise 0.
|
||
fn cap_specificity_score(notes: &[String]) -> i32 {
|
||
for note in notes {
|
||
if let Some(count_str) = note.strip_prefix("cap_specificity:") {
|
||
if let Ok(count) = count_str.parse::<u8>() {
|
||
return if count == 1 { 1 } else { 0 };
|
||
}
|
||
}
|
||
}
|
||
0
|
||
}
|
||
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
// Explanation & Confidence Limiters
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
/// Generate a human-readable explanation of a taint finding from its evidence.
|
||
pub fn generate_explanation(diag: &Diag) -> Option<String> {
|
||
let ev = diag.evidence.as_ref()?;
|
||
let source = ev.source.as_ref()?;
|
||
let sink = ev.sink.as_ref()?;
|
||
|
||
let source_callee = source.snippet.as_deref().unwrap_or("(unknown source)");
|
||
let sink_callee = sink.snippet.as_deref().unwrap_or("(unknown sink)");
|
||
|
||
// Extract source kind label (prefer structured field)
|
||
let source_kind_label = if let Some(kind) = ev.source_kind {
|
||
use crate::labels::SourceKind;
|
||
match kind {
|
||
SourceKind::UserInput => "user input",
|
||
SourceKind::EnvironmentConfig => "environment/config",
|
||
SourceKind::Database => "database",
|
||
SourceKind::FileSystem => "file system",
|
||
SourceKind::CaughtException => "caught exception",
|
||
SourceKind::Unknown => "unclassified",
|
||
}
|
||
} else {
|
||
// Legacy fallback: parse from notes
|
||
let kind_str = ev
|
||
.notes
|
||
.iter()
|
||
.find_map(|n| n.strip_prefix("source_kind:"))
|
||
.unwrap_or("unknown");
|
||
match kind_str {
|
||
"UserInput" => "user input",
|
||
"EnvironmentConfig" => "environment/config",
|
||
"Database" => "database",
|
||
"FileSystem" => "file system",
|
||
"CaughtException" => "caught exception",
|
||
_ => "unclassified",
|
||
}
|
||
};
|
||
|
||
// Extract category from rule ID
|
||
let category = diag
|
||
.id
|
||
.strip_prefix("taint-unsanitised-flow")
|
||
.map(|_| extract_category_from_id(&diag.id))
|
||
.unwrap_or_else(|| "injection".to_string());
|
||
|
||
let step_count = ev.flow_steps.len();
|
||
let mut explanation = if step_count > 2 {
|
||
format!(
|
||
"Unsanitised {source_kind_label} data flows from {source_callee} (line {}) through {} steps to {sink_callee} (line {}), creating a potential {category} vulnerability.",
|
||
source.line,
|
||
step_count - 2, // exclude source and sink themselves
|
||
sink.line,
|
||
)
|
||
} else {
|
||
format!(
|
||
"Unsanitised {source_kind_label} data flows from {source_callee} (line {}) to {sink_callee} (line {}), creating a potential {category} vulnerability.",
|
||
source.line, sink.line,
|
||
)
|
||
};
|
||
|
||
// Conditional addenda
|
||
if diag.path_validated {
|
||
if let Some(ref guard) = diag.guard_kind {
|
||
explanation.push_str(&format!(
|
||
" A {guard} guard was detected but may not be sufficient."
|
||
));
|
||
}
|
||
}
|
||
if ev.uses_summary || ev.notes.iter().any(|n| n == "uses_summary") {
|
||
explanation.push_str(" The flow crosses function boundaries via summary resolution.");
|
||
}
|
||
|
||
Some(explanation)
|
||
}
|
||
|
||
/// Extract a vulnerability category label from the Diag (used in explanation text).
|
||
fn extract_category_from_id(id: &str) -> String {
|
||
// Rule IDs like "taint-unsanitised-flow (source 3:1)" — category comes
|
||
// from the finding category field, but we approximate from the ID here.
|
||
if id.contains("sql") || id.contains("SQL") {
|
||
"SQL injection".to_string()
|
||
} else if id.contains("xss") || id.contains("XSS") {
|
||
"XSS".to_string()
|
||
} else {
|
||
"injection".to_string()
|
||
}
|
||
}
|
||
|
||
/// Compute reasons why confidence is not higher.
|
||
pub fn compute_confidence_limiters(diag: &Diag) -> Vec<String> {
|
||
let mut limiters = Vec::new();
|
||
let ev = match &diag.evidence {
|
||
Some(e) => e,
|
||
None => return limiters,
|
||
};
|
||
|
||
// Hop count (prefer structured field)
|
||
let hop = ev.hop_count.or_else(|| {
|
||
ev.notes
|
||
.iter()
|
||
.find_map(|n| n.strip_prefix("hop_count:")?.parse::<u16>().ok())
|
||
});
|
||
if let Some(count) = hop {
|
||
if count >= 4 {
|
||
limiters.push(format!(
|
||
"Taint path spans {count} blocks, increasing chance of intermediate sanitization"
|
||
));
|
||
}
|
||
}
|
||
|
||
// Summary resolution (prefer structured field)
|
||
if ev.uses_summary || ev.notes.iter().any(|n| n == "uses_summary") {
|
||
limiters.push("Flow resolved via cross-function summary (may be imprecise)".into());
|
||
}
|
||
|
||
// Path validated (use Diag field directly)
|
||
if diag.path_validated {
|
||
limiters.push("Validation guard detected on path (may provide protection)".into());
|
||
}
|
||
|
||
// Cap specificity (prefer structured field)
|
||
let cap_spec = ev.cap_specificity.or_else(|| {
|
||
ev.notes
|
||
.iter()
|
||
.find_map(|n| n.strip_prefix("cap_specificity:")?.parse::<u8>().ok())
|
||
});
|
||
if cap_spec == Some(0) {
|
||
limiters.push("Source and sink capability types do not match specifically".into());
|
||
}
|
||
|
||
// Source kind unknown (prefer structured field)
|
||
let is_unknown = ev.source_kind == Some(crate::labels::SourceKind::Unknown)
|
||
|| ev.notes.iter().any(|n| n == "source_kind:Unknown");
|
||
if is_unknown {
|
||
limiters.push("Source type is unclassified (lower exploitation confidence)".into());
|
||
}
|
||
|
||
// Symbolic verdict
|
||
if let Some(ref sv) = ev.symbolic {
|
||
if sv.verdict == Verdict::Infeasible {
|
||
limiters.push("Symbolic analysis proved this path is infeasible".into());
|
||
}
|
||
}
|
||
|
||
// Demand-driven backwards analysis notes (stored on
|
||
// `symbolic.cutoff_notes` so the evidence pipeline already plumbs
|
||
// them). When the backwards walk proved the flow infeasible or ran
|
||
// out of budget, surface a user-readable limiter.
|
||
if let Some(ref sv) = ev.symbolic {
|
||
use crate::taint::backwards::{NOTE_BUDGET, NOTE_CONFIRMED, NOTE_INFEASIBLE};
|
||
if sv.cutoff_notes.iter().any(|n| n == NOTE_INFEASIBLE) {
|
||
limiters.push("Backwards demand-driven analysis proved this flow infeasible".into());
|
||
} else if sv.cutoff_notes.iter().any(|n| n == NOTE_BUDGET) {
|
||
limiters.push(
|
||
"Backwards demand-driven analysis exceeded its budget (verdict not reached)".into(),
|
||
);
|
||
}
|
||
// Confirmation is *not* a limiter — it is a positive signal. The
|
||
// taint-confidence scorer picks it up separately.
|
||
let _ = NOTE_CONFIRMED;
|
||
}
|
||
|
||
limiters
|
||
}
|
||
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
// Tests
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
use crate::labels::SourceKind;
|
||
|
||
fn make_diag(id: &str, severity: Severity) -> Diag {
|
||
Diag {
|
||
path: "test.rs".into(),
|
||
line: 1,
|
||
col: 1,
|
||
severity,
|
||
id: id.into(),
|
||
category: crate::patterns::FindingCategory::Security,
|
||
path_validated: false,
|
||
guard_kind: None,
|
||
message: None,
|
||
labels: vec![],
|
||
confidence: None,
|
||
evidence: None,
|
||
rank_score: None,
|
||
rank_reason: None,
|
||
suppressed: false,
|
||
suppression: None,
|
||
rollup: None,
|
||
finding_id: String::new(),
|
||
alternative_finding_ids: Vec::new(),
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn compute_confidence_taint_strong_path() {
|
||
// UserInput(+3) + source+sink+snippet(+3) + short path(0) + cap_specificity:1(+1) = 7 → High
|
||
let mut d = make_diag("taint-unsanitised-flow (source 1:1)", Severity::High);
|
||
d.evidence = Some(Evidence {
|
||
source: Some(SpanEvidence {
|
||
path: "test.rs".into(),
|
||
line: 1,
|
||
col: 1,
|
||
kind: "source".into(),
|
||
snippet: Some("env::var(\"X\")".into()),
|
||
}),
|
||
sink: Some(SpanEvidence {
|
||
path: "test.rs".into(),
|
||
line: 10,
|
||
col: 5,
|
||
kind: "sink".into(),
|
||
snippet: Some("exec()".into()),
|
||
}),
|
||
guards: vec![],
|
||
sanitizers: vec![],
|
||
state: None,
|
||
notes: vec![
|
||
"source_kind:UserInput".into(),
|
||
"hop_count:1".into(),
|
||
"cap_specificity:1".into(),
|
||
],
|
||
source_kind: Some(crate::labels::SourceKind::UserInput),
|
||
hop_count: Some(1),
|
||
cap_specificity: Some(1),
|
||
..Default::default()
|
||
});
|
||
assert_eq!(compute_confidence(&d), Confidence::High);
|
||
}
|
||
|
||
#[test]
|
||
fn compute_confidence_taint_medium_path() {
|
||
// EnvironmentConfig(+2) + source+sink no snippet(+2) + hop_count:5(−1) = 3 → Medium
|
||
let mut d = make_diag("taint-unsanitised-flow (source 1:1)", Severity::High);
|
||
d.evidence = Some(Evidence {
|
||
source: Some(SpanEvidence {
|
||
path: "test.rs".into(),
|
||
line: 1,
|
||
col: 1,
|
||
kind: "source".into(),
|
||
snippet: None,
|
||
}),
|
||
sink: Some(SpanEvidence {
|
||
path: "test.rs".into(),
|
||
line: 10,
|
||
col: 5,
|
||
kind: "sink".into(),
|
||
snippet: None,
|
||
}),
|
||
guards: vec![],
|
||
sanitizers: vec![],
|
||
state: None,
|
||
notes: vec!["source_kind:EnvironmentConfig".into(), "hop_count:5".into()],
|
||
source_kind: Some(crate::labels::SourceKind::EnvironmentConfig),
|
||
hop_count: Some(5),
|
||
..Default::default()
|
||
});
|
||
assert_eq!(compute_confidence(&d), Confidence::Medium);
|
||
}
|
||
|
||
#[test]
|
||
fn compute_confidence_taint_weak_path() {
|
||
// Database(0) + source+sink no snippet(+2) + hop_count:12(−2) + uses_summary(−1) = −1 → Low
|
||
let mut d = make_diag("taint-unsanitised-flow (source 1:1)", Severity::High);
|
||
d.evidence = Some(Evidence {
|
||
source: Some(SpanEvidence {
|
||
path: "test.rs".into(),
|
||
line: 1,
|
||
col: 1,
|
||
kind: "source".into(),
|
||
snippet: None,
|
||
}),
|
||
sink: Some(SpanEvidence {
|
||
path: "test.rs".into(),
|
||
line: 20,
|
||
col: 5,
|
||
kind: "sink".into(),
|
||
snippet: None,
|
||
}),
|
||
guards: vec![],
|
||
sanitizers: vec![],
|
||
state: None,
|
||
notes: vec![
|
||
"source_kind:Database".into(),
|
||
"hop_count:12".into(),
|
||
"uses_summary".into(),
|
||
],
|
||
source_kind: Some(crate::labels::SourceKind::Database),
|
||
hop_count: Some(12),
|
||
uses_summary: true,
|
||
..Default::default()
|
||
});
|
||
assert_eq!(compute_confidence(&d), Confidence::Low);
|
||
}
|
||
|
||
#[test]
|
||
fn compute_confidence_taint_validated_with_source() {
|
||
// UserInput(+3) + source+sink+snippet(+3) + path_validated(−3) = 3 → Medium
|
||
let mut d = make_diag("taint-unsanitised-flow (source 1:1)", Severity::High);
|
||
d.path_validated = true;
|
||
d.evidence = Some(Evidence {
|
||
source: Some(SpanEvidence {
|
||
path: "test.rs".into(),
|
||
line: 1,
|
||
col: 1,
|
||
kind: "source".into(),
|
||
snippet: Some("req.query".into()),
|
||
}),
|
||
sink: Some(SpanEvidence {
|
||
path: "test.rs".into(),
|
||
line: 10,
|
||
col: 5,
|
||
kind: "sink".into(),
|
||
snippet: Some("exec()".into()),
|
||
}),
|
||
guards: vec![],
|
||
sanitizers: vec![],
|
||
state: None,
|
||
notes: vec!["path_validated".into(), "source_kind:UserInput".into()],
|
||
source_kind: Some(crate::labels::SourceKind::UserInput),
|
||
..Default::default()
|
||
});
|
||
assert_eq!(compute_confidence(&d), Confidence::Medium);
|
||
}
|
||
|
||
#[test]
|
||
fn compute_confidence_taint_no_evidence() {
|
||
// No Evidence struct → conservative High
|
||
let d = make_diag("taint-unsanitised-flow (source 1:1)", Severity::High);
|
||
assert_eq!(compute_confidence(&d), Confidence::High);
|
||
}
|
||
|
||
#[test]
|
||
fn compute_confidence_degraded_caps_to_low() {
|
||
let mut d = make_diag("taint-unsanitised-flow (source 1:1)", Severity::High);
|
||
d.evidence = Some(Evidence {
|
||
source: None,
|
||
sink: None,
|
||
guards: vec![],
|
||
sanitizers: vec![],
|
||
state: None,
|
||
notes: vec!["degraded:budget_exceeded".into()],
|
||
..Default::default()
|
||
});
|
||
assert_eq!(compute_confidence(&d), Confidence::Low);
|
||
}
|
||
|
||
#[test]
|
||
fn compute_confidence_state_rules() {
|
||
assert_eq!(
|
||
compute_confidence(&make_diag("state-use-after-close", Severity::High)),
|
||
Confidence::High,
|
||
);
|
||
assert_eq!(
|
||
compute_confidence(&make_diag("state-double-close", Severity::Medium)),
|
||
Confidence::High,
|
||
);
|
||
assert_eq!(
|
||
compute_confidence(&make_diag("state-unauthed-access", Severity::High)),
|
||
Confidence::High,
|
||
);
|
||
assert_eq!(
|
||
compute_confidence(&make_diag("state-resource-leak", Severity::Medium)),
|
||
Confidence::Medium,
|
||
);
|
||
assert_eq!(
|
||
compute_confidence(&make_diag("state-resource-leak-possible", Severity::Low)),
|
||
Confidence::Low,
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn compute_confidence_cfg_preserves_existing() {
|
||
let mut d = make_diag("cfg-unguarded-sink", Severity::High);
|
||
d.confidence = Some(Confidence::Low);
|
||
assert_eq!(compute_confidence(&d), Confidence::Low);
|
||
}
|
||
|
||
#[test]
|
||
fn compute_confidence_ast_low() {
|
||
let d = make_diag("rs.code_exec.eval", Severity::Medium);
|
||
assert_eq!(compute_confidence(&d), Confidence::Low);
|
||
}
|
||
|
||
#[test]
|
||
fn compute_confidence_ast_high_severity_medium() {
|
||
let d = make_diag("rs.code_exec.eval", Severity::High);
|
||
assert_eq!(compute_confidence(&d), Confidence::Medium);
|
||
}
|
||
|
||
// ── engine_notes direction-aware capping ────────────────────────
|
||
|
||
fn taint_high_confidence_diag() -> Diag {
|
||
// A known-High taint configuration: UserInput + source+sink+snippet +
|
||
// short path + cap_specificity=1 → score 7 → High. Re-used as the
|
||
// "clean" baseline for every engine-notes cap test.
|
||
let mut d = make_diag("taint-unsanitised-flow (source 1:1)", Severity::High);
|
||
d.evidence = Some(Evidence {
|
||
source: Some(SpanEvidence {
|
||
path: "test.rs".into(),
|
||
line: 1,
|
||
col: 1,
|
||
kind: "source".into(),
|
||
snippet: Some("req.query.id".into()),
|
||
}),
|
||
sink: Some(SpanEvidence {
|
||
path: "test.rs".into(),
|
||
line: 5,
|
||
col: 1,
|
||
kind: "sink".into(),
|
||
snippet: Some("exec(id)".into()),
|
||
}),
|
||
source_kind: Some(SourceKind::UserInput),
|
||
cap_specificity: Some(1),
|
||
hop_count: Some(1),
|
||
..Default::default()
|
||
});
|
||
d
|
||
}
|
||
|
||
fn with_notes(mut d: Diag, notes: Vec<crate::engine_notes::EngineNote>) -> Diag {
|
||
let mut ev = d.evidence.clone().unwrap_or_default();
|
||
ev.engine_notes = smallvec::SmallVec::from_vec(notes);
|
||
d.evidence = Some(ev);
|
||
d
|
||
}
|
||
|
||
#[test]
|
||
fn confidence_uncapped_without_engine_notes() {
|
||
assert_eq!(
|
||
compute_confidence(&taint_high_confidence_diag()),
|
||
Confidence::High,
|
||
"baseline must be High so cap tests have something to cap"
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn confidence_not_capped_by_under_report() {
|
||
// UnderReport indicates we may have missed OTHER findings. The
|
||
// finding we *did* emit is still sound; its confidence stays High.
|
||
let d = with_notes(
|
||
taint_high_confidence_diag(),
|
||
vec![crate::engine_notes::EngineNote::WorklistCapped { iterations: 100 }],
|
||
);
|
||
assert_eq!(compute_confidence(&d), Confidence::High);
|
||
}
|
||
|
||
#[test]
|
||
fn confidence_capped_at_medium_by_over_report() {
|
||
// OverReport (PredicateStateWidened) means validation predicates
|
||
// were lost — the emitted finding is more likely to be spurious.
|
||
let d = with_notes(
|
||
taint_high_confidence_diag(),
|
||
vec![crate::engine_notes::EngineNote::PredicateStateWidened],
|
||
);
|
||
assert_eq!(compute_confidence(&d), Confidence::Medium);
|
||
}
|
||
|
||
#[test]
|
||
fn confidence_capped_at_medium_by_bail() {
|
||
let d = with_notes(
|
||
taint_high_confidence_diag(),
|
||
vec![crate::engine_notes::EngineNote::ParseTimeout { timeout_ms: 1000 }],
|
||
);
|
||
assert_eq!(compute_confidence(&d), Confidence::Medium);
|
||
}
|
||
|
||
#[test]
|
||
fn confidence_cap_does_not_upgrade_low() {
|
||
// `base.min(Medium)` is what caps — it must not *raise* a Low
|
||
// baseline to Medium. Use a taint finding with weak evidence so
|
||
// the points scorer gives us Low, then attach a Bail note.
|
||
let mut d = make_diag("taint-unsanitised-flow (source 1:1)", Severity::Low);
|
||
d.evidence = Some(Evidence {
|
||
source: None,
|
||
sink: None,
|
||
source_kind: Some(SourceKind::Database),
|
||
hop_count: Some(10),
|
||
..Default::default()
|
||
});
|
||
d = with_notes(
|
||
d,
|
||
vec![crate::engine_notes::EngineNote::ParseTimeout { timeout_ms: 100 }],
|
||
);
|
||
assert_eq!(
|
||
compute_confidence(&d),
|
||
Confidence::Low,
|
||
"Bail cap must never raise Low → Medium"
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn confidence_not_capped_by_informational() {
|
||
let d = with_notes(
|
||
taint_high_confidence_diag(),
|
||
vec![crate::engine_notes::EngineNote::InlineCacheReused],
|
||
);
|
||
assert_eq!(compute_confidence(&d), Confidence::High);
|
||
}
|
||
|
||
#[test]
|
||
fn confidence_cap_applies_to_state_findings_too() {
|
||
// state-use-after-close is High by default; an OverReport note
|
||
// on it must cap it to Medium, same as the taint path.
|
||
let d = with_notes(
|
||
make_diag("state-use-after-close", Severity::High),
|
||
vec![crate::engine_notes::EngineNote::PredicateStateWidened],
|
||
);
|
||
assert_eq!(compute_confidence(&d), Confidence::Medium);
|
||
}
|
||
|
||
#[test]
|
||
fn confidence_cap_chooses_worst_when_mixed() {
|
||
// UnderReport alone does not cap; OverReport does. Mixing them
|
||
// must apply the cap (worst-direction wins).
|
||
let d = with_notes(
|
||
taint_high_confidence_diag(),
|
||
vec![
|
||
crate::engine_notes::EngineNote::WorklistCapped { iterations: 10 },
|
||
crate::engine_notes::EngineNote::PredicateStateWidened,
|
||
],
|
||
);
|
||
assert_eq!(compute_confidence(&d), Confidence::Medium);
|
||
}
|
||
|
||
#[test]
|
||
fn evidence_is_empty() {
|
||
let ev = Evidence::default();
|
||
assert!(ev.is_empty());
|
||
|
||
let ev2 = Evidence {
|
||
source: Some(SpanEvidence {
|
||
path: "x.rs".into(),
|
||
line: 1,
|
||
col: 1,
|
||
kind: "source".into(),
|
||
snippet: None,
|
||
}),
|
||
..Default::default()
|
||
};
|
||
assert!(!ev2.is_empty());
|
||
}
|
||
|
||
#[test]
|
||
fn confidence_ord() {
|
||
assert!(Confidence::Low < Confidence::Medium);
|
||
assert!(Confidence::Medium < Confidence::High);
|
||
assert!(Confidence::Low < Confidence::High);
|
||
}
|
||
|
||
#[test]
|
||
fn confidence_display_and_parse() {
|
||
assert_eq!(Confidence::Low.to_string(), "Low");
|
||
assert_eq!(Confidence::Medium.to_string(), "Medium");
|
||
assert_eq!(Confidence::High.to_string(), "High");
|
||
|
||
assert_eq!("low".parse::<Confidence>().unwrap(), Confidence::Low);
|
||
assert_eq!("MEDIUM".parse::<Confidence>().unwrap(), Confidence::Medium);
|
||
assert_eq!("High".parse::<Confidence>().unwrap(), Confidence::High);
|
||
assert!("invalid".parse::<Confidence>().is_err());
|
||
}
|
||
|
||
#[test]
|
||
fn compute_confidence_does_not_override_preset() {
|
||
// AST patterns set confidence directly; compute_confidence must not overwrite.
|
||
let mut d = make_diag("rs.quality.expect", Severity::Low);
|
||
d.confidence = Some(Confidence::High);
|
||
// The post-pass only runs when confidence is None, but verify compute_confidence
|
||
// itself would return something different (Low for AST + Low severity), proving
|
||
// the guard in scan.rs is necessary.
|
||
assert_eq!(compute_confidence(&d), Confidence::Low);
|
||
// The actual guard: confidence is already Some, so scan.rs skips compute_confidence.
|
||
assert_eq!(d.confidence, Some(Confidence::High));
|
||
}
|
||
|
||
#[test]
|
||
fn json_omits_none_fields() {
|
||
let ev = Evidence::default();
|
||
let json = serde_json::to_string(&ev).unwrap();
|
||
assert_eq!(json, "{}");
|
||
}
|
||
|
||
#[test]
|
||
fn symbolic_verdict_serde_round_trip() {
|
||
for verdict in [
|
||
Verdict::Confirmed,
|
||
Verdict::Infeasible,
|
||
Verdict::Inconclusive,
|
||
Verdict::NotAttempted,
|
||
] {
|
||
let sv = SymbolicVerdict {
|
||
verdict,
|
||
constraints_checked: 42,
|
||
paths_explored: 7,
|
||
witness: Some("x=null forces false branch".into()),
|
||
interproc_call_chains: Vec::new(),
|
||
cutoff_notes: Vec::new(),
|
||
};
|
||
let json = serde_json::to_string(&sv).unwrap();
|
||
let rt: SymbolicVerdict = serde_json::from_str(&json).unwrap();
|
||
assert_eq!(rt.verdict, verdict);
|
||
assert_eq!(rt.constraints_checked, 42);
|
||
assert_eq!(rt.paths_explored, 7);
|
||
assert_eq!(rt.witness.as_deref(), Some("x=null forces false branch"));
|
||
}
|
||
// Verify snake_case serialization
|
||
let json = serde_json::to_string(&Verdict::NotAttempted).unwrap();
|
||
assert_eq!(json, "\"not_attempted\"");
|
||
}
|
||
|
||
#[test]
|
||
fn evidence_with_symbolic_not_empty() {
|
||
let ev = Evidence {
|
||
symbolic: Some(SymbolicVerdict {
|
||
verdict: Verdict::Confirmed,
|
||
constraints_checked: 1,
|
||
paths_explored: 1,
|
||
witness: None,
|
||
interproc_call_chains: Vec::new(),
|
||
cutoff_notes: Vec::new(),
|
||
}),
|
||
..Default::default()
|
||
};
|
||
assert!(!ev.is_empty());
|
||
}
|
||
|
||
#[test]
|
||
fn symbolic_witness_omitted_when_none() {
|
||
let sv = SymbolicVerdict {
|
||
verdict: Verdict::Inconclusive,
|
||
constraints_checked: 0,
|
||
paths_explored: 0,
|
||
witness: None,
|
||
interproc_call_chains: Vec::new(),
|
||
cutoff_notes: Vec::new(),
|
||
};
|
||
let json = serde_json::to_string(&sv).unwrap();
|
||
assert!(!json.contains("witness"));
|
||
}
|
||
|
||
#[test]
|
||
fn compute_confidence_structured_fields_only() {
|
||
// Structured fields without notes → same result as with notes
|
||
// UserInput(+3) + source+sink+snippet(+3) + hop_count:1(0) + cap_specificity:1(+1) = 7 → High
|
||
let mut d = make_diag("taint-unsanitised-flow (source 1:1)", Severity::High);
|
||
d.evidence = Some(Evidence {
|
||
source: Some(SpanEvidence {
|
||
path: "test.rs".into(),
|
||
line: 1,
|
||
col: 1,
|
||
kind: "source".into(),
|
||
snippet: Some("req.query".into()),
|
||
}),
|
||
sink: Some(SpanEvidence {
|
||
path: "test.rs".into(),
|
||
line: 10,
|
||
col: 5,
|
||
kind: "sink".into(),
|
||
snippet: Some("exec()".into()),
|
||
}),
|
||
source_kind: Some(crate::labels::SourceKind::UserInput),
|
||
hop_count: Some(1),
|
||
cap_specificity: Some(1),
|
||
..Default::default()
|
||
});
|
||
assert_eq!(compute_confidence(&d), Confidence::High);
|
||
}
|
||
|
||
#[test]
|
||
fn compute_confidence_notes_only_backward_compat() {
|
||
// Notes only (no structured fields) → backward compatible
|
||
// EnvironmentConfig(+2) + source+sink(+2) + hop_count:5(−1) = 3 → Medium
|
||
let mut d = make_diag("taint-unsanitised-flow (source 1:1)", Severity::High);
|
||
d.evidence = Some(Evidence {
|
||
source: Some(SpanEvidence {
|
||
path: "test.rs".into(),
|
||
line: 1,
|
||
col: 1,
|
||
kind: "source".into(),
|
||
snippet: None,
|
||
}),
|
||
sink: Some(SpanEvidence {
|
||
path: "test.rs".into(),
|
||
line: 10,
|
||
col: 5,
|
||
kind: "sink".into(),
|
||
snippet: None,
|
||
}),
|
||
notes: vec!["source_kind:EnvironmentConfig".into(), "hop_count:5".into()],
|
||
..Default::default()
|
||
});
|
||
assert_eq!(compute_confidence(&d), Confidence::Medium);
|
||
}
|
||
|
||
#[test]
|
||
fn compute_confidence_symbolic_infeasible_demotes() {
|
||
// UserInput(+3) + source+sink+snippet(+3) + Infeasible(−5) = 1 → Low
|
||
let mut d = make_diag("taint-unsanitised-flow (source 1:1)", Severity::High);
|
||
d.evidence = Some(Evidence {
|
||
source: Some(SpanEvidence {
|
||
path: "test.rs".into(),
|
||
line: 1,
|
||
col: 1,
|
||
kind: "source".into(),
|
||
snippet: Some("req.query".into()),
|
||
}),
|
||
sink: Some(SpanEvidence {
|
||
path: "test.rs".into(),
|
||
line: 10,
|
||
col: 5,
|
||
kind: "sink".into(),
|
||
snippet: Some("exec()".into()),
|
||
}),
|
||
source_kind: Some(crate::labels::SourceKind::UserInput),
|
||
symbolic: Some(SymbolicVerdict {
|
||
verdict: Verdict::Infeasible,
|
||
constraints_checked: 3,
|
||
paths_explored: 1,
|
||
witness: None,
|
||
interproc_call_chains: Vec::new(),
|
||
cutoff_notes: Vec::new(),
|
||
}),
|
||
..Default::default()
|
||
});
|
||
assert_eq!(compute_confidence(&d), Confidence::Low);
|
||
}
|
||
|
||
#[test]
|
||
fn compute_confidence_symbolic_confirmed_boosts() {
|
||
// EnvironmentConfig(+2) + source+sink(+2) + Confirmed(+2) = 6 → High
|
||
let mut d = make_diag("taint-unsanitised-flow (source 1:1)", Severity::High);
|
||
d.evidence = Some(Evidence {
|
||
source: Some(SpanEvidence {
|
||
path: "test.rs".into(),
|
||
line: 1,
|
||
col: 1,
|
||
kind: "source".into(),
|
||
snippet: None,
|
||
}),
|
||
sink: Some(SpanEvidence {
|
||
path: "test.rs".into(),
|
||
line: 10,
|
||
col: 5,
|
||
kind: "sink".into(),
|
||
snippet: None,
|
||
}),
|
||
source_kind: Some(crate::labels::SourceKind::EnvironmentConfig),
|
||
symbolic: Some(SymbolicVerdict {
|
||
verdict: Verdict::Confirmed,
|
||
constraints_checked: 2,
|
||
paths_explored: 1,
|
||
witness: None,
|
||
interproc_call_chains: Vec::new(),
|
||
cutoff_notes: Vec::new(),
|
||
}),
|
||
..Default::default()
|
||
});
|
||
assert_eq!(compute_confidence(&d), Confidence::High);
|
||
}
|
||
|
||
#[test]
|
||
fn evidence_with_structured_fields_not_empty() {
|
||
let ev = Evidence {
|
||
source_kind: Some(crate::labels::SourceKind::UserInput),
|
||
..Default::default()
|
||
};
|
||
assert!(!ev.is_empty());
|
||
|
||
let ev2 = Evidence {
|
||
uses_summary: true,
|
||
..Default::default()
|
||
};
|
||
assert!(!ev2.is_empty());
|
||
}
|
||
|
||
#[test]
|
||
fn source_kind_serde_round_trip() {
|
||
use crate::labels::SourceKind;
|
||
for kind in [
|
||
SourceKind::UserInput,
|
||
SourceKind::EnvironmentConfig,
|
||
SourceKind::FileSystem,
|
||
SourceKind::Database,
|
||
SourceKind::CaughtException,
|
||
SourceKind::Unknown,
|
||
] {
|
||
let json = serde_json::to_string(&kind).unwrap();
|
||
let rt: SourceKind = serde_json::from_str(&json).unwrap();
|
||
assert_eq!(rt, kind);
|
||
}
|
||
// Verify snake_case serialization
|
||
let json = serde_json::to_string(&crate::labels::SourceKind::UserInput).unwrap();
|
||
assert_eq!(json, "\"user_input\"");
|
||
}
|
||
}
|