mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-24 20:28:06 +02:00
Release/0.5.0 (#35)
* feat: Introduce function-scoped variable interning for state analysis with new tests and fixtures * feat: Add Phase 26 symbolic execution enhancements with bitwise operator support, abstract interpretation refinements, and new taint analysis tests * feat: Refine state analysis to handle factory-pattern resource returns with mixed-path tests and leak detection enhancements * feat: Add Phase 27 debug views with symbolic execution, abstract interpretation, SSA, and call graph viewers; integrate with debug layout and styles * feat: Add Phase 31 type-qualified symbolic resolution with receiver-based callee disambiguation and testing * feat: Extend symbolic execution with state iteration, enhanced debug views, and debounced input handling * feat: Add Phase 13 resource and auth pattern extensions with new tests and fixtures * feat: Introduce CFG debug graph renderer with compact mode, toolbar, and DAG layout integration * feat: Add Phase 28 encoding and decoding transform modeling with structural symex enhancements and new taint analysis tests * feat: Extend abstract interpretation with type facts and constant value tracking in debug views and server logic * feat: Add linear path handling and witness extraction to symbolic execution with Phase 28 transform mismatch detection * feat: Refine Go auth and sanitizer handling with enhanced rules, state updates, and benchmark improvements * feat: Enable auth-state analysis by default and update relevant tests in benchmark config * test: Update state_tests to reflect default enablement of auth-state analysis and add auth suppression test * docs: update CHANGELOG.md * feat: Introduce per-index taint tracking in `HeapState` with `HeapSlot`, overflow handling, and revised SSA transfers * feat: Introduce C/C++ language labels and refine heap state tracking in SSA transfers * feat: Implement per-index array slot tracking in symbolic heap with overflow collapse * feat: Add implicit definition handling for uninitialized declarations in SSA value allocation * feat: Refactor function parameters and constants for improved clarity and maintainability * refactor: Reorder module imports and improve formatting for consistency * refactor: Fix formatting erorrs * refactor: Fix clippy warnings * refactor: Fix fmt warnings (again) * chore: Update dependencies and improve feature configuration * Add comprehensive tests for undertested modules (#36) (COPILOT) * Add comprehensive tests for undertested modules Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> Agent-Logs-Url: https://github.com/elicpeter/nyx/sessions/f3fc877e-f386-49ba-9793-fc93d3805083 * Add comprehensive tests for ext, project, walk, and errors modules Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> Agent-Logs-Url: https://github.com/elicpeter/nyx/sessions/f3fc877e-f386-49ba-9793-fc93d3805083 --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> * chore: Update dependencies and improve feature configuration * fix: formatting errors in new tests * chore: Update license list in about.toml * chore: made functions input inline * chore: updated cfg graph to take up the full page * chore: add Prettier configuration and update code formatting * Add frontend test suite with Vitest (111 tests) (#37) * Add Vitest test suite for frontend - 111 tests across utils, components, hooks, and graph utilities Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> Agent-Logs-Url: https://github.com/elicpeter/nyx/sessions/7cf0dba2-ecff-4740-ba4d-92717e74a0b7 * ci: add frontend test step to CI workflow Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> Agent-Logs-Url: https://github.com/elicpeter/nyx/sessions/5bc0ac9f-0a32-4d03-9cb7-7a15aea53fca --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> * chore: simplify array initialization in test files for consistency * ran typecheck * feat: add AnalysisWorkspace component and integrate it into CfgViewerPage * feat: update routing in AppLayout and improve empty state message in ExplorerPage * feat: enhance scan progress tracking with additional metrics and stages * feat: update license information and add license check script * feat: implement cross-file symbolic execution with callee body persistence * feat: replace dagre graphs with Graphology + ELK + Sigma for more advanced call stack and cfg rendering * feat: ensure CFG function view is scoped to the selected function, preventing bleed into sibling functions * feat: enhance resource tracking with proxy method summaries and improve finding extraction * feat: add terminal function exit detection for accurate resource leak analysis * feat: add warnings for loops and functions without bodies to improve error recovery * feat: update lambda expression handling to ensure proper function classification and control flow * feat: remove bounded formatting/string ops and add JSON.parse sanitizer for improved data handling * feat: add inline return taint analysis and regression tests for improved security checks * feat: add engine version management and migration handling for database schema updates * feat: enhance first_call_ident to skip nested function bodies and add regression tests * feat: enhance callee name resolution with two-segment normalization and disambiguation * feat: add cross-file context flags and debug assertions for taint analysis * feat: refactor taint analysis structure to unify context handling and improve clarity * feat: enhance dead code elimination to preserve Sink, Source, and Sanitizer labels with new tests * docs: updated CHANGELOG.md * fmt: formatting fixes * fix: fixed frontend formatting and lint warnings * fix: optimized ci * fix: optimized ci * Add comprehensive multi-file test coverage to Nyx (#38) * Initial checklist for multi-file test suite expansion Agent-Logs-Url: https://github.com/elicpeter/nyx/sessions/e550cb88-9767-4442-94d4-101bf5bb0e23 Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> * Add 12 new multi-file test fixtures with TP/TN/near-miss coverage Agent-Logs-Url: https://github.com/elicpeter/nyx/sessions/e550cb88-9767-4442-94d4-101bf5bb0e23 Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> * deleted root repo * rebuilt to test for regressions --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> Co-authored-by: elipeter <elicpeter@gmail.com> * feat: enhance import alias resolution and taint tracking * feat: implement security hardening with CSRF protection and path validation * feat: add support for import alias bindings in Python, PHP, and Rust * feat: enhance CFG analysis modes and improve code readability * feat: add detection for parameterized SQL queries to enhance security * feat: add safe internal redirect handling and enhance session destroy validation * feat: implement security improvements by addressing vulnerabilities in execAsync, session management, and file downloads * feat: enhance taint detection by adding support for inline source member expressions in call arguments * feat: implement pre-emission of Source nodes for inline source member expressions in call arguments * feat: add support for Throw statement in control flow and error handling * feat: add debug and echo endpoints with potential information leakage * feat: implement internal redirect suppression and enhance taint detection * feat: implement module alias tracking for dynamic dispatch in JS/TS * feat: add authorization analysis module with Express support * feat: add authorization analysis module with Express support * feat: add tests for admin guard requirements and clean checks in authorization analysis * feat: integrate Koa and Fastify frameworks into authorization analysis * feat: add Flask and Django support to authorization analysis module * feat: add support for Rails and Sinatra frameworks in authorization analysis * feat: add support for Axum, ActixWeb, and Rocket frameworks in authorization analysis * feat: add support for ActixWeb, Axum, and Rocket frameworks in authorization analysis * feat: add support for Rails and Sinatra in authorization analysis * chore: add .DS_Store to .gitignore * refactor: simplify conditional checks and improve readability in multiple files * refactor: update usage of Option methods for improved clarity and consistency * refactor: improve code readability by simplifying conditional checks and formatting * refactor: improve code formatting and readability by simplifying conditional checks * refactor: simplify conditional checks and improve readability in multiple files * refactor: simplify conditional checks in axum.rs for improved readability * feat: add CodeQL analysis configuration for enhanced security scanning * test: add comprehensive tests for `src/output.rs` SARIF builder (#39) * chore: start test coverage improvement work Agent-Logs-Url: https://github.com/elicpeter/nyx/sessions/cd7ff398-134e-4728-a5e7-0353a0744423 Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> * test: add comprehensive tests for src/output.rs SARIF builder Agent-Logs-Url: https://github.com/elicpeter/nyx/sessions/cd7ff398-134e-4728-a5e7-0353a0744423 Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> * refactor: improve code formatting and readability in output.rs --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> Co-authored-by: elipeter <elicpeter@gmail.com> * refactor: improve code formatting and readability in output.rs * Potential fix for code scanning alert no. 210: Uncontrolled data used in path expression Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> * Potential fix for code scanning alert no. 211: Uncontrolled data used in path expression Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> * refactor: enhance triage file path handling with improved error management and validation * refactor: updated func summaries for richer detail * refactor: update SSA summary extraction to use canonical FuncKey for distinct entries * refactor: enhance callee metadata structure to support arity, receiver, and qualifier for better overload resolution * refactor: add support for keyword arguments in function calls and enhance receiver extraction for method-style calls * refactor: implement new Flask routes for safe and unsafe shell command execution * refactor: separate receiver handling in SSA operations and enhance taint propagation * refactor: improve arity handling by using arg_uses for positional argument count and enhance witness scoring for tainted arguments * refactor: implement auth decorator extraction and classification for multiple languages * refactor: enhance Rust module path resolution and use map handling for cross-file disambiguation * refactor: introduce CalleeQuery struct for structured callee resolution and enhance resolver logic * refactor: implement same-file identity collision handling for `runTask` to ensure correct resolver behavior * refactor: standardize default struct initialization across multiple files * feat: add scripts for formatting checks and auto-fixes with test summaries * refactor: simplify character splitting and enhance namespace qualifier handling * refactor: improve documentation clarity and enhance code readability in resolver logic * refactor: replace default struct initialization with explicit field assignments for clarity * feat: enhance anonymous function naming by deriving context-based bindings * refactor: streamline match expressions for improved readability and performance * refactor: streamline match expressions for improved readability and performance * refactor: replace loop with while let for improved clarity and performance * feat: add SSA constant propagation support to analysis context for improved accuracy * feat: add SSA constant propagation support to analysis context for improved accuracy * feat: implement shell metacharacter validation and bounded-length checks in Rust analysis * feat: add static map analysis for command injection suppression and type safety * refactor: simplify match statements and reduce line breaks for improved readability * feat(summary): phase 1/5 SinkSite data model for primary sink-location attribution Introduce SinkSite (file_rel, line, col, snippet, cap) carrying the primary sink source-location through function summaries. Swap SsaFuncSummary.param_to_sink and FuncSummary.param_to_sink from a coarse Cap map to a deduped SmallVec<[SinkSite; 1]> per parameter, with a backward-compatible cap_sites() helper and serde defaults so pre-phase-1 on-disk rows continue to deserialise cleanly. Extraction: SinkSiteLocator bundles the tree/bytes/file_rel needed by extract_ssa_func_summary; ParsedFile::extract_ssa_artifacts wires the locator in for the persisted pass-1 path, while pass-2 intra-file transient summaries fall back to cap-only sites (behavior unchanged). Merge: GlobalSummaries::insert now unions sink sites with (file_rel, line, col, cap) dedup via shared union_param_sink_sites helper. Database: JSON-serialised summary columns carry the new shape automatically; no schema change needed. Phase 2 will consume SinkSite in build_taint_diag() to overwrite the caller-site Finding.line with the callee's sink line when resolved via summary. Phase 1 keeps behavior unchanged: scanning tests/benchmark/corpus/rust/cmdi/cmdi_indirect.rs still produces the same (wrong) line 10 finding. Adds round-trip tests covering SinkSite solo, SsaFuncSummary with sink sites, legacy-JSON default handling for both summary types, and merge dedup. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * feat(taint): phase 2/5 thread SinkSite into SsaTaintEvent and Finding Plumb Phase 1's SinkSite through the event pipeline into Findings, no output change yet. SsaTaintEvent gains `primary_sink_site: Option<SinkSite>`; when the main or callback sink-emission path has non-empty `param_to_sink_sites`, filter to sites whose `(line != 0) && (cap ∩ sink_caps != ∅)` and emit one event per distinct site — the multi-primary collapse keeps each downstream Finding single-primary. Resolution: ResolvedSummary and SinkInfo gain mirror `param_to_sink_sites` fields, populated from `SsaFuncSummary.param_to_sink` (SSA + callback paths) and `FuncSummary.param_to_sink` (global paths). Label, local-summary, and interop resolution paths leave the field empty — they only ever had cap-level info to begin with. Finding: new `primary_location: Option<SinkLocation>` with `file_rel/line/col`. `ssa_events_to_findings` maps `event.primary_sink_site` → `Finding.primary_location`, filtering cap-only sites (`line == 0`) to `None` so the (0,0) sentinel never leaks to formatters. Dedup key extended with the primary location so multi-site events aren't collapsed back together. Invariants (debug_assert!): * every SinkSite reaching emission has `line != 0 && cap ∩ sink_caps != ∅` — enforced by the pick_primary_sink_sites* filters; * every populated Finding.primary_location has `line != 0` AND non-empty `file_rel` — the cap-only → None translation upstream guarantees this. Deliberately independent of `uses_summary`: that flag tracks whether the *taint chain* used a summary, whereas primary attribution requires only that the *sink* itself was summary-resolved. A local source reaching a cross-file sink produces `uses_summary=false` alongside a populated primary_location — documented on Finding.primary_location, covered by `cross_file_sink_finding_carries_primary_location`. build_taint_diag, SARIF/JSON/explanation formatters, and the benchmark scorer remain untouched: finding.line still comes from `cfg_graph[finding.sink]`, so cmdi_indirect.rs still reports line 10 and the benchmark's rs-cmdi-003 row still shows FN in the LOC column. Tests: `cross_file_sink_finding_carries_primary_location` (proves plumbing via a synthetic FuncSummary carrying a SinkSite at 42:5) and `cross_file_sink_cap_only_site_leaves_primary_location_none` (regression guard against cap-only sites surfacing). All 1566 lib tests + integration tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * feat(output): phase 3/5 consume primary sink location in diag + SARIF When a finding's primary_location (populated in phase 2 from a callee summary's SinkSite) names the dangerous instruction inside a callee body, attribute the diagnostic line to that location instead of the caller's call site. The call site is demoted to a Call step in flow_steps, and a synthetic Sink step at the primary location is appended so analysts still see the full trace. Changes: - Add scan_root parameter to build_taint_diag so file_rel can be resolved back to an absolute path via a shared resolve_file_rel helper. Empty file_rel (single-file scans where namespace == "") resolves to the file under analysis. - Extend SinkLocation with snippet, carried from the upstream SinkSite so the formatter needs no second file read. - Relax the ssa_events_to_findings debug_assert to allow empty file_rel, which is valid when scan root equals the file itself. - SARIF: emit data-flow as codeFlows[0].threadFlows[0].locations[]; locations[0] already reflects the primary sink position via the updated diag line/col. Acceptance: scan on tests/benchmark/corpus/rust/cmdi/cmdi_indirect.rs now reports line 5 (Command::new) as the primary sink, with the call site at line 10 visible in flow_steps. Two expect.json fixtures updated (must_match line_range widened): - javascript/taint/context_sensitive_call: 12-14 -> 7-14 (line 8 is the real sink inside run()). - rust/cfg/closure_async: 10-10 -> 10-11 (line 11 is Command::new inside the closure). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * feat(bench): phase 4/5 validate primary sink attribution across corpus Extend the benchmark scorer and ground truth to lock in phase 3's primary-location behavior, and add fixtures that exercise the new capability end-to-end. Scorer (tests/benchmark_test.rs): - Add optional `expected_call_site_lines: Option<Vec<[usize; 2]>>` on Case. When present, score_location_level additionally requires at least one flow_step in the finding's evidence trace to fall within ±2 of the call-site range. When absent, the check is skipped — fully forward-compatible with existing fixtures. - Retain ±2 tolerance on expected_sink_lines (compared against the now-primary Diag.line post-phase-3). Ground truth edits: - rs-cmdi-cross-001: expected_sink_lines [8,8] -> [9,9]. Line 8 is the transform::wrap call site (a cross-file propagator, not a sink); line 9 is Command::new, the real sink. The ±2 tolerance happened to mask this stale attribution but it was semantically wrong — phase 4 is the right time to correct it. Also adds expected_call_site_lines [8,8] so the new field is exercised on an existing cross-file case. - rs-cmdi-003: adds expected_call_site_lines [10,10] (run_cmd call). This fixture's sink (Command::new inside run_cmd at line 5) was the motivating case for phases 1-3; adding the call-site assertion guards against regression to caller-line attribution. New fixtures: - rust/cmdi/cmdi_indirect_multisink.rs (rs-cmdi-009): helper run_both takes two tainted params and invokes two Command sinks on consecutive lines. Locks in that primary line lands inside the helper (lines 5-6), not at the caller (line 12). Notes document that SinkSite is currently one-per-callee so both findings today collapse onto the first sink; expected_sink_lines=[5,6] and expected_call_site_lines=[12,12] stay valid either way. - python/cmdi/cross_indirect_sink/{app.py,helper.py} (py-cmdi-cross- 004): sink os.system lives in helper.py (cross-file), caller in app.py reads env source and calls run_cmd. Verifies phase 3's cross-file primary attribution: Diag.path = helper.py, Diag.line = 5, with app.py:7 recorded in flow_steps as a Call step. Acceptance: - `cargo test --test benchmark_test -- --ignored --nocapture` passes. - rs-cmdi-003 is TP/TP/TP (the target flip FN->TP at LOC). All pre-existing TP/TP/TP fixtures remain TP/TP/TP; 2 new fixtures are TP/TP/TP. - Aggregate rule-level: TP=158 FP=10 FN=1 TN=97, P=0.940 R=0.994 F1=0.966 on the 266-case corpus (was TP=156 FP=10 FN=1 TN=97 on 264 pre-phase-4, delta is the +2 new cases both resolving TP). - Full `cargo test` green (1566 lib tests + all integration tests). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * feat(taint): phase 5/5 lock Finding.primary_location contract via regression test Add a regression test in src/taint/ssa_transfer.rs that wires up a synthetic SsaFuncSummary with a SinkSite at other.rs:42:10 and drives the three emission stages (pick_primary_sink_sites → emit_ssa_taint_events → ssa_events_to_findings) against a minimal caller SSA body. Asserts the resulting Finding.primary_location is exactly that triple. The existing integration tests in src/taint/tests.rs cover the coarse FuncSummary path end-to-end through analyse_file. This test locks in the lower-level SSA-side plumbing so a future refactor that silently drops the site between pick → emit → findings fails here rather than only at the benchmark layer. Also refreshes tests/benchmark/results/latest.json (timestamp only; rs-cmdi-003 remains TP/TP/TP and the aggregate P/R/F1 are unchanged from phase 4). Closes the primary sink-location attribution feature (phases 1-5/5): * Phase 1 — SinkSite data model on summaries. * Phase 2 — SinkSite threaded into SsaTaintEvent and Finding. * Phase 3 — diag + SARIF consume primary_location. * Phase 4 — benchmark validates primary_call_site_lines across corpus. * Phase 5 — regression test locks the event→finding contract. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * refactor: clean up formatting and improve readability in multiple files * refactor: simplify type definition for deduplication key in findings * test(harness): add must_not_match expectation for FP regression guards Extends ExpectedFinding with must_not_match field that asserts a diagnostic must NOT fire — presence is a hard failure. Non-consuming scan so it coexists with must_match entries on the same rule_id. Adds forbidden_violations accumulator and updates summary line. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * feat(regression): update expectations to ensure must_not_match for various taint and resource leak rules * feat: implement auto-seeding for JS/TS handler parameters to enhance taint tracking * feat: update switch statement handling to improve control flow analysis * feat: implement promisify alias handling for JS/TS to enhance taint tracking * feat: enhance taint tracking by refining expectation handling and adding mode filtering * feat: refine SQL handling in stream processing and enhance auto-seeding for handler parameters * feat: update taint tracking rules to enforce full mode matching and improve flow analysis * feat: enhance Ruby subshell handling to improve taint tracking and flow analysis * feat: update xss_response expectations to refine taint flow analysis and enhance regression guarding * feat: refine framework detection and update expectation handling for Echo and Sinatra * feat: implement max_count for taint tracking expectations and deduplicate findings * feat: add strict_unexpected handling for taint-unsanitised-flow in expectation files * feat: enhance deduplication of taint-unsanitised-flow findings by collapsing based on line and severity * feat: add strict_unexpected handling for taint-unsanitised-flow in multiple expectation files * feat: add structural invariant checks for SSA bodies * feat: ensure deterministic phi emission order using BTreeSet * feat: enhance handling of terminators to ensure authoritative flow through successor edges * feat: enhance Goto terminator handling to ensure all successors are marked executable * feat: refactor code for improved readability and organization * feat: simplify predicate checks and enhance readability in SSA handling * feat: implement per-file parse timeout and enhance file size handling * feat: migrate analysis engine toggles from environment variables to configuration file * feat: remove unnecessary whitespace in hostile_input_tests.rs * feat: remove unnecessary whitespace in hostile_input_tests.rs * feat: update dependencies and enhance documentation on language maturity * feat: enhance security headers and improve request body limits * feat: implement sink capability bits for deduplication and enhance evidence tagging * feat: implement dynamic activation handling for gated sinks and enhance validation logic * feat: enhance configuration documentation and clarify inline analysis cache behavior * feat: implement panic recovery during analysis to continue scans past errors * feat: add expectations configuration for taint analysis and performance metrics * feat: enhance error handling and logging during file reading and mutex locking * feat: add cross-file body loading tests and plumbing for CF-1 phase * feat: implement cross-file k=1 context-sensitive inline taint analysis with new tests and fixtures * feat: implement indexed-scan parity in cross-file inline analysis with new dropdown and copy functionality * feat: enhance classification span handling in CFG and AST for improved source attribution * feat: add new Express routes for handling user input and telemetry data * feat: implement ternary expression handling in CFG with diamond structure for JS/TS * feat: implement Phase CF-3 abstract-domain transfer channels in summaries * feat: add support for string-prefix transfer in cross-file calls and update tests * docs: reduce RESULTS.md doc size * feat: implement Phase CF-4 per-return-path summary decomposition with tests * feat: update parameter handling in pass1 and refactor SsaFuncSummary initialization * feat: implement Phase CF-5 for cross-file SCC joint fixed-point convergence with new flags and tests * feat: implement Phase CF-6 with parameter-granularity points-to summaries and associated tests * refactor: update comments and documentation for clarity and consistency * style: format code for consistency and readability * refactor: simplify verdict handling and improve edge checking logic * refactor: optimize path and identifier collection by avoiding unnecessary cloning * chore: update Cargo.toml for Rust version 1.85 and add ignored files; modify CHANGELOG and README for clarity on state analysis defaults * refactor: update documentation and improve clarity in configuration files * refactor: update documentation and improve clarity in configuration files * feat: add JS/TS pass-2 convergence tests and expectations configuration * feat: add Phase 5 regression tests for inline cache origin attribution and update related logic * feat: implement Phase 7 deduplication and alternative path linking for taint findings * feat: implement structural DFS index for anonymous functions and update naming conventions * feat: add Phase 8 regression tests for container-element taint in JS and Python * feat: add engine-depth profiles and explain-engine option for CLI * feat: update expectations and add new README fixtures for multi-file scan regression * feat: implement Phase 11 callback-alias and factory patterns with regression tests * feat: implement Terminator::Switch for multi-way dispatch and add regression tests * feat: add real-CVE benchmark fixtures for CVE-2023-48022, CVE-2019-14939, and CVE-2023-26159 with corresponding patched variants * refactor: extract cfg and ssa_transfer to submodules * refactor: cargo fmt * refactor: remove unnecessary blank line in cfg_tests.rs * refactor: remove unnecessary planning file * chore: update Rust version to 1.88 and bump dependencies in Cargo files * feat: enhance triage UI with new layout and controls, update README for clarity * feat: enhance triage UI with new layout and controls, update README for clarity * chore: remove outdated section from README for version 0.5.0 * docs: improve clarity and consistency in README content * chore: add "GPL-3.0-or-later" to license options in about.toml * chore: update license handling in about.toml and check-licenses.mjs * style: format code for improved readability in TriagePage component * style: format code for improved readability in TriagePage component * chore: enhance license handling and improve body_id scoping in seed lookup * feat: introduce owner and parent body IDs for enhanced seed scoping * feat: implement direction-aware engine provenance with new CLI flag for strict CI gating * feat: add Undef SSA operation for improved control-flow handling * style: improve code formatting for consistency and readability in multiple files * feat: add 16-function chain SCC across multiple files for enhanced analysis * style: simplify code formatting for improved readability in multiple files * fix: update CapHitReason default implementation and improve README clarity * docs: enhance README with detailed explanations of taint analysis and limitations * docs: refine README for clarity and consistency in taint analysis section * style: improve code formatting for better readability in NewScanModal and scans * fix: update cargo-about command to use --offline for deterministic license generation * fix: update cargo-about command to use --offline for deterministic license generation * ci: add step to prime cargo registry cache for deterministic license generation * feat: add support for non-sink collections in authorization analysis * feat: enhance authorization checks with row-level ownership equality and binding tracking * feat: implement self-scoped user handling and enhance ownership checks * refactor: simplify assertions and formatting in authorization analysis tests * fix: normalize line endings in THIRDPARTY-LICENSES.html generation and update README with AI disclosure * docs: update AI disclosure section for clarity and conciseness * feat: add AI Contribution Policy and update contributing guidelines for AI assistance disclosure * feat: enhance authorization analysis with SSA-derived variable type classification * feat: implement auth_finding_to_diag function for enhanced security diagnostics * feat: add args_value_refs to CallSite struct for enhanced argument tracking * feat: add args_value_refs to CallSite struct for enhanced argument tracking * feat: add direction-aware engine provenance with LossDirection classification and new CLI flag * feat: simplify strip_cap_from_call_args call by removing unnecessary line breaks * feat: enhance error message handling in cli_validation_tests for better Windows compatibility * feat: optimize release profile settings in Cargo.toml and update CodeQL configuration * feat: enhance release build process with SBOM generation and SLSA provenance * feat: update actions/checkout and actions/setup-node to v6, enhance CLI options, and improve auth-check summaries * feat: introduce PathFact handling for path safety checks and rejection logic * feat: introduce PathFact handling for path safety checks and rejection logic * feat: update benchmark data and enhance path sanitization logic with new safety checks * feat: document AI assistance in frontend UI development and human review process * feat: add return path facts for enhanced path safety checks and update documentation * chore: update release date for version 0.5.0 in CHANGELOG.md * chore: clean up ci.yml by removing outdated comments and clarifying steps * feat: implement cross-language path sanitizers and validators for enhanced security * feat: enhance SSA value usage tracking by including block terminators and improve path safety checks * feat: enhance switch statement handling by adding per-case path constraints and support for exclusive cases * refactor: simplify conditional formatting and improve code readability in executor and lower modules * feat: add vulnerable examples for various languages demonstrating authentication and sanitization issues * feat: enhance actor context recognition for self-actor identifiers and add support for global non-sink receivers * feat: enhance actor context recognition for self-actor identifiers and add support for global non-sink receivers * feat: add transform classifiers for Java, Go, and Ruby with corresponding tests * refactor: clarify comments on reassign-to-constant idiom and sink behavior in guards.rs --------- Co-authored-by: Copilot <198982749+Copilot@users.noreply.github.com> Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
c4ce08b452
commit
41128177d2
2144 changed files with 201812 additions and 8927 deletions
1119
src/taint/backwards.rs
Normal file
1119
src/taint/backwards.rs
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -1,19 +1,16 @@
|
|||
use crate::labels::{Cap, SourceKind};
|
||||
use crate::state::lattice::Lattice;
|
||||
use crate::state::symbol::SymbolId;
|
||||
use crate::taint::path_state::PredicateKind;
|
||||
use petgraph::graph::NodeIndex;
|
||||
use smallvec::SmallVec;
|
||||
|
||||
/// Maximum origins tracked per variable (bounded to prevent growth).
|
||||
const MAX_ORIGINS_PER_VAR: usize = 4;
|
||||
|
||||
/// Per-variable taint information.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct VarTaint {
|
||||
pub caps: Cap,
|
||||
/// Up to N origins that contributed taint (bounded).
|
||||
pub origins: SmallVec<[TaintOrigin; 2]>,
|
||||
/// Whether taint propagated through a function summary (cross-function).
|
||||
pub uses_summary: bool,
|
||||
}
|
||||
|
||||
/// A single taint origin — the node and classification of where taint came from.
|
||||
|
|
@ -21,9 +18,30 @@ pub struct VarTaint {
|
|||
pub struct TaintOrigin {
|
||||
pub node: NodeIndex,
|
||||
pub source_kind: SourceKind,
|
||||
/// Original source byte span, preserved when origin is remapped across
|
||||
/// body boundaries. `None` for intra-body origins (span can be looked
|
||||
/// up from `cfg[node].span`). `Some` for cross-body origins where
|
||||
/// `node` has been remapped to a body-local anchor.
|
||||
pub source_span: Option<(usize, usize)>,
|
||||
}
|
||||
|
||||
/// Compact bitset for up to 64 variables (indexed by SymbolId ordinal).
|
||||
///
|
||||
/// # Capacity limit
|
||||
///
|
||||
/// `SmallBitSet` is a fixed-size 64-slot bitset backed by a single `u64`.
|
||||
/// Inserting a `SymbolId` with ordinal ≥ 64 is a no-op — the bit is silently
|
||||
/// dropped. This is a deliberate precision-over-completeness trade: the
|
||||
/// bitset underpins predicate / validation tracking in the SSA taint engine,
|
||||
/// and functions with more than 64 distinct predicate-relevant variables are
|
||||
/// rare enough that the cost of a spill-out map is not worth the extra
|
||||
/// allocations on the common path.
|
||||
///
|
||||
/// When an out-of-range id is dropped, a `tracing::debug!` event is emitted
|
||||
/// under `target = "nyx::predicate_bitset"` so operators can detect the
|
||||
/// degraded-precision case. Path-sensitivity for variables beyond id 63
|
||||
/// degrades gracefully (no predicate bit recorded) rather than failing
|
||||
/// loudly.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct SmallBitSet(u64);
|
||||
|
||||
|
|
@ -32,14 +50,20 @@ impl SmallBitSet {
|
|||
Self(0)
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, id: SymbolId) {
|
||||
pub fn insert(&mut self, id: crate::state::symbol::SymbolId) {
|
||||
let idx = id.0;
|
||||
if idx < 64 {
|
||||
self.0 |= 1u64 << idx;
|
||||
} else {
|
||||
tracing::debug!(
|
||||
target: "nyx::predicate_bitset",
|
||||
id = idx,
|
||||
"SmallBitSet: dropped id >= 64; path-sensitivity degrades for this variable"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn contains(&self, id: SymbolId) -> bool {
|
||||
pub fn contains(&self, id: crate::state::symbol::SymbolId) -> bool {
|
||||
let idx = id.0;
|
||||
if idx < 64 {
|
||||
self.0 & (1u64 << idx) != 0
|
||||
|
|
@ -58,22 +82,24 @@ impl SmallBitSet {
|
|||
Self(self.0 & other.0)
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn is_empty(self) -> bool {
|
||||
self.0 == 0
|
||||
}
|
||||
|
||||
/// Whether self is a subset of other.
|
||||
#[allow(dead_code)] // used by Lattice::leq
|
||||
pub fn is_subset_of(self, other: Self) -> bool {
|
||||
self.0 & other.0 == self.0
|
||||
}
|
||||
|
||||
/// Whether self is a superset of other.
|
||||
#[allow(dead_code)] // used by Lattice::leq
|
||||
pub fn is_superset_of(self, other: Self) -> bool {
|
||||
other.is_subset_of(self)
|
||||
}
|
||||
|
||||
/// Raw bits for serialization/debug display.
|
||||
pub fn bits(self) -> u64 {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
/// Monotone predicate summary per variable.
|
||||
|
|
@ -124,465 +150,10 @@ pub fn predicate_kind_bit(kind: PredicateKind) -> Option<u8> {
|
|||
}
|
||||
}
|
||||
|
||||
/// The abstract taint state at a program point.
|
||||
///
|
||||
/// Uses sorted SmallVec keyed by SymbolId for O(n) merge-join.
|
||||
/// Variables beyond the interner's capacity are naturally excluded.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct TaintState {
|
||||
/// Per-variable taint, sorted by SymbolId.
|
||||
pub vars: SmallVec<[(SymbolId, VarTaint); 16]>,
|
||||
|
||||
/// Variables validated on ALL paths (intersection on join).
|
||||
pub validated_must: SmallBitSet,
|
||||
|
||||
/// Variables validated on ANY path (union on join).
|
||||
pub validated_may: SmallBitSet,
|
||||
|
||||
/// Per-variable predicate summary (sorted by SymbolId).
|
||||
pub predicates: SmallVec<[(SymbolId, PredicateSummary); 4]>,
|
||||
}
|
||||
|
||||
impl TaintState {
|
||||
/// Create the initial state (no taint, no validation, no predicates).
|
||||
pub fn initial() -> Self {
|
||||
Self {
|
||||
vars: SmallVec::new(),
|
||||
validated_must: SmallBitSet::empty(),
|
||||
validated_may: SmallBitSet::empty(),
|
||||
predicates: SmallVec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Look up taint for a variable.
|
||||
pub fn get(&self, sym: SymbolId) -> Option<&VarTaint> {
|
||||
self.vars
|
||||
.binary_search_by_key(&sym, |(id, _)| *id)
|
||||
.ok()
|
||||
.map(|idx| &self.vars[idx].1)
|
||||
}
|
||||
|
||||
/// Insert or update taint for a variable.
|
||||
pub fn set(&mut self, sym: SymbolId, taint: VarTaint) {
|
||||
match self.vars.binary_search_by_key(&sym, |(id, _)| *id) {
|
||||
Ok(idx) => self.vars[idx].1 = taint,
|
||||
Err(idx) => self.vars.insert(idx, (sym, taint)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove taint for a variable.
|
||||
pub fn remove(&mut self, sym: SymbolId) {
|
||||
if let Ok(idx) = self.vars.binary_search_by_key(&sym, |(id, _)| *id) {
|
||||
self.vars.remove(idx);
|
||||
}
|
||||
}
|
||||
|
||||
/// Set a predicate summary for a variable.
|
||||
pub fn set_predicate(&mut self, sym: SymbolId, summary: PredicateSummary) {
|
||||
match self.predicates.binary_search_by_key(&sym, |(id, _)| *id) {
|
||||
Ok(idx) => self.predicates[idx].1 = summary,
|
||||
Err(idx) => self.predicates.insert(idx, (sym, summary)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get predicate summary for a variable.
|
||||
pub fn get_predicate(&self, sym: SymbolId) -> PredicateSummary {
|
||||
self.predicates
|
||||
.binary_search_by_key(&sym, |(id, _)| *id)
|
||||
.ok()
|
||||
.map(|idx| self.predicates[idx].1)
|
||||
.unwrap_or_else(PredicateSummary::empty)
|
||||
}
|
||||
|
||||
/// Check if any variable has contradictory predicates.
|
||||
pub fn has_contradiction(&self) -> bool {
|
||||
self.predicates.iter().any(|(_, s)| s.has_contradiction())
|
||||
}
|
||||
}
|
||||
|
||||
impl Lattice for TaintState {
|
||||
fn bot() -> Self {
|
||||
Self::initial()
|
||||
}
|
||||
|
||||
fn join(&self, other: &Self) -> Self {
|
||||
// Merge-join vars (sorted by SymbolId)
|
||||
let vars = merge_join_vars(&self.vars, &other.vars);
|
||||
|
||||
// validated_must = intersection (must hold on ALL paths)
|
||||
let validated_must = self.validated_must.intersection(other.validated_must);
|
||||
|
||||
// validated_may = union (holds on ANY path)
|
||||
let validated_may = self.validated_may.union(other.validated_may);
|
||||
|
||||
// predicates = per-key intersection of known_true/known_false bits
|
||||
let predicates = merge_join_predicates(&self.predicates, &other.predicates);
|
||||
|
||||
TaintState {
|
||||
vars,
|
||||
validated_must,
|
||||
validated_may,
|
||||
predicates,
|
||||
}
|
||||
}
|
||||
|
||||
fn leq(&self, other: &Self) -> bool {
|
||||
// Per-key Cap subset + origins subset
|
||||
if !vars_leq(&self.vars, &other.vars) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// validated_must: self ⊇ other (superset = less info = lower)
|
||||
if !self.validated_must.is_superset_of(other.validated_must) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// validated_may: self ⊆ other
|
||||
if !self.validated_may.is_subset_of(other.validated_may) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// predicates: self.known_true ⊇ other.known_true (more precise = lower)
|
||||
predicates_leq(&self.predicates, &other.predicates)
|
||||
}
|
||||
}
|
||||
|
||||
/// Merge-join two sorted var lists: per-key Cap OR + origins merge (bounded).
|
||||
fn merge_join_vars(
|
||||
a: &[(SymbolId, VarTaint)],
|
||||
b: &[(SymbolId, VarTaint)],
|
||||
) -> SmallVec<[(SymbolId, VarTaint); 16]> {
|
||||
let mut result = SmallVec::with_capacity(a.len().max(b.len()));
|
||||
let (mut i, mut j) = (0, 0);
|
||||
|
||||
while i < a.len() && j < b.len() {
|
||||
match a[i].0.cmp(&b[j].0) {
|
||||
std::cmp::Ordering::Less => {
|
||||
result.push(a[i].clone());
|
||||
i += 1;
|
||||
}
|
||||
std::cmp::Ordering::Greater => {
|
||||
result.push(b[j].clone());
|
||||
j += 1;
|
||||
}
|
||||
std::cmp::Ordering::Equal => {
|
||||
let caps = a[i].1.caps | b[j].1.caps;
|
||||
let origins = merge_origins(&a[i].1.origins, &b[j].1.origins);
|
||||
result.push((a[i].0, VarTaint { caps, origins }));
|
||||
i += 1;
|
||||
j += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remaining from either side
|
||||
while i < a.len() {
|
||||
result.push(a[i].clone());
|
||||
i += 1;
|
||||
}
|
||||
while j < b.len() {
|
||||
result.push(b[j].clone());
|
||||
j += 1;
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Merge two origin lists, deduplicating by node and bounding at MAX_ORIGINS_PER_VAR.
|
||||
fn merge_origins(
|
||||
a: &SmallVec<[TaintOrigin; 2]>,
|
||||
b: &SmallVec<[TaintOrigin; 2]>,
|
||||
) -> SmallVec<[TaintOrigin; 2]> {
|
||||
let mut merged = a.clone();
|
||||
for origin in b {
|
||||
if merged.len() >= MAX_ORIGINS_PER_VAR {
|
||||
break;
|
||||
}
|
||||
if !merged.iter().any(|o| o.node == origin.node) {
|
||||
merged.push(*origin);
|
||||
}
|
||||
}
|
||||
merged
|
||||
}
|
||||
|
||||
/// Check if a.vars ⊑ b.vars (per-key Cap subset + origins subset).
|
||||
#[allow(dead_code)] // called by Lattice::leq
|
||||
fn vars_leq(a: &[(SymbolId, VarTaint)], b: &[(SymbolId, VarTaint)]) -> bool {
|
||||
let (mut i, mut j) = (0, 0);
|
||||
|
||||
while i < a.len() {
|
||||
if j >= b.len() {
|
||||
return false; // a has keys not in b → not ⊑
|
||||
}
|
||||
match a[i].0.cmp(&b[j].0) {
|
||||
std::cmp::Ordering::Less => return false, // key in a but not b
|
||||
std::cmp::Ordering::Greater => {
|
||||
j += 1; // key only in b, skip
|
||||
}
|
||||
std::cmp::Ordering::Equal => {
|
||||
// Cap subset check
|
||||
if a[i].1.caps & b[j].1.caps != a[i].1.caps {
|
||||
return false;
|
||||
}
|
||||
// Origins subset check (by node)
|
||||
for orig in &a[i].1.origins {
|
||||
if !b[j].1.origins.iter().any(|o| o.node == orig.node) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
i += 1;
|
||||
j += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Merge-join predicate summaries with intersection semantics.
|
||||
fn merge_join_predicates(
|
||||
a: &[(SymbolId, PredicateSummary)],
|
||||
b: &[(SymbolId, PredicateSummary)],
|
||||
) -> SmallVec<[(SymbolId, PredicateSummary); 4]> {
|
||||
let mut result = SmallVec::new();
|
||||
let (mut i, mut j) = (0, 0);
|
||||
|
||||
while i < a.len() && j < b.len() {
|
||||
match a[i].0.cmp(&b[j].0) {
|
||||
std::cmp::Ordering::Less => {
|
||||
// Key only in a — intersection with empty = empty → drop
|
||||
i += 1;
|
||||
}
|
||||
std::cmp::Ordering::Greater => {
|
||||
j += 1;
|
||||
}
|
||||
std::cmp::Ordering::Equal => {
|
||||
let joined = a[i].1.join(b[j].1);
|
||||
if !joined.is_empty() {
|
||||
result.push((a[i].0, joined));
|
||||
}
|
||||
i += 1;
|
||||
j += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Keys only in one side → intersection with empty = drop
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Check if a.predicates ⊑ b.predicates.
|
||||
/// More precise (more known_true bits) = lower in the lattice.
|
||||
/// So a ⊑ b means a.known_true ⊇ b.known_true for each key.
|
||||
#[allow(dead_code)] // called by Lattice::leq
|
||||
fn predicates_leq(a: &[(SymbolId, PredicateSummary)], b: &[(SymbolId, PredicateSummary)]) -> bool {
|
||||
let (mut i, mut j) = (0, 0);
|
||||
|
||||
// For each key in b, a must have at least as many bits
|
||||
while j < b.len() {
|
||||
if i >= a.len() {
|
||||
// b has keys that a doesn't — a is missing info = not lower
|
||||
return false;
|
||||
}
|
||||
match a[i].0.cmp(&b[j].0) {
|
||||
std::cmp::Ordering::Less => {
|
||||
// a has extra keys (more info) — OK for leq
|
||||
i += 1;
|
||||
}
|
||||
std::cmp::Ordering::Greater => {
|
||||
// b has a key that a doesn't → a has fewer bits → not ⊑
|
||||
return false;
|
||||
}
|
||||
std::cmp::Ordering::Equal => {
|
||||
// a.known_true must be a superset of b.known_true
|
||||
if a[i].1.known_true & b[j].1.known_true != b[j].1.known_true {
|
||||
return false;
|
||||
}
|
||||
if a[i].1.known_false & b[j].1.known_false != b[j].1.known_false {
|
||||
return false;
|
||||
}
|
||||
i += 1;
|
||||
j += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn make_taint(sym: u32, caps: Cap) -> (SymbolId, VarTaint) {
|
||||
(
|
||||
SymbolId(sym),
|
||||
VarTaint {
|
||||
caps,
|
||||
origins: SmallVec::new(),
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
fn make_taint_with_origin(sym: u32, caps: Cap, node: usize) -> (SymbolId, VarTaint) {
|
||||
(
|
||||
SymbolId(sym),
|
||||
VarTaint {
|
||||
caps,
|
||||
origins: smallvec::smallvec![TaintOrigin {
|
||||
node: NodeIndex::new(node),
|
||||
source_kind: SourceKind::Unknown,
|
||||
}],
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
fn state_with_vars(vars: Vec<(SymbolId, VarTaint)>) -> TaintState {
|
||||
let mut s = TaintState::initial();
|
||||
s.vars = SmallVec::from_vec(vars);
|
||||
s
|
||||
}
|
||||
|
||||
// ── Lattice property tests ──────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn bot_identity() {
|
||||
let a = state_with_vars(vec![make_taint(0, Cap::ENV_VAR)]);
|
||||
assert_eq!(a.join(&TaintState::bot()), a);
|
||||
assert_eq!(TaintState::bot().join(&a), a);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn join_commutativity() {
|
||||
let a = state_with_vars(vec![make_taint(0, Cap::ENV_VAR)]);
|
||||
let b = state_with_vars(vec![make_taint(1, Cap::SHELL_ESCAPE)]);
|
||||
assert_eq!(a.join(&b), b.join(&a));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn join_associativity() {
|
||||
let a = state_with_vars(vec![make_taint(0, Cap::ENV_VAR)]);
|
||||
let b = state_with_vars(vec![make_taint(0, Cap::SHELL_ESCAPE)]);
|
||||
let c = state_with_vars(vec![make_taint(1, Cap::HTML_ESCAPE)]);
|
||||
assert_eq!(a.join(&b).join(&c), a.join(&b.join(&c)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn join_idempotency() {
|
||||
let a = state_with_vars(vec![make_taint(0, Cap::ENV_VAR | Cap::SHELL_ESCAPE)]);
|
||||
assert_eq!(a.join(&a), a);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn leq_reflexive() {
|
||||
let a = state_with_vars(vec![make_taint(0, Cap::ENV_VAR)]);
|
||||
assert!(a.leq(&a));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn leq_consistent_with_join() {
|
||||
let a = state_with_vars(vec![make_taint(0, Cap::ENV_VAR)]);
|
||||
let b = state_with_vars(vec![make_taint(0, Cap::ENV_VAR | Cap::SHELL_ESCAPE)]);
|
||||
assert!(a.leq(&b));
|
||||
assert_eq!(a.join(&b), b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn join_merges_caps() {
|
||||
let a = state_with_vars(vec![make_taint(0, Cap::ENV_VAR)]);
|
||||
let b = state_with_vars(vec![make_taint(0, Cap::SHELL_ESCAPE)]);
|
||||
let joined = a.join(&b);
|
||||
assert_eq!(
|
||||
joined.get(SymbolId(0)).unwrap().caps,
|
||||
Cap::ENV_VAR | Cap::SHELL_ESCAPE
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn join_merges_origins() {
|
||||
let a = state_with_vars(vec![make_taint_with_origin(0, Cap::ENV_VAR, 1)]);
|
||||
let b = state_with_vars(vec![make_taint_with_origin(0, Cap::ENV_VAR, 2)]);
|
||||
let joined = a.join(&b);
|
||||
assert_eq!(joined.get(SymbolId(0)).unwrap().origins.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validated_must_intersection() {
|
||||
let mut a = TaintState::initial();
|
||||
a.validated_must.insert(SymbolId(0));
|
||||
a.validated_must.insert(SymbolId(1));
|
||||
|
||||
let mut b = TaintState::initial();
|
||||
b.validated_must.insert(SymbolId(1));
|
||||
b.validated_must.insert(SymbolId(2));
|
||||
|
||||
let joined = a.join(&b);
|
||||
assert!(!joined.validated_must.contains(SymbolId(0)));
|
||||
assert!(joined.validated_must.contains(SymbolId(1)));
|
||||
assert!(!joined.validated_must.contains(SymbolId(2)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validated_may_union() {
|
||||
let mut a = TaintState::initial();
|
||||
a.validated_may.insert(SymbolId(0));
|
||||
|
||||
let mut b = TaintState::initial();
|
||||
b.validated_may.insert(SymbolId(1));
|
||||
|
||||
let joined = a.join(&b);
|
||||
assert!(joined.validated_may.contains(SymbolId(0)));
|
||||
assert!(joined.validated_may.contains(SymbolId(1)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn predicate_contradiction() {
|
||||
let mut state = TaintState::initial();
|
||||
state.set_predicate(
|
||||
SymbolId(0),
|
||||
PredicateSummary {
|
||||
known_true: 1, // NullCheck true
|
||||
known_false: 1, // NullCheck false
|
||||
},
|
||||
);
|
||||
assert!(state.has_contradiction());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn predicate_no_contradiction() {
|
||||
let mut state = TaintState::initial();
|
||||
state.set_predicate(
|
||||
SymbolId(0),
|
||||
PredicateSummary {
|
||||
known_true: 1, // NullCheck true
|
||||
known_false: 2, // EmptyCheck false (different kind)
|
||||
},
|
||||
);
|
||||
assert!(!state.has_contradiction());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn predicate_join_intersection() {
|
||||
let mut a = TaintState::initial();
|
||||
a.set_predicate(
|
||||
SymbolId(0),
|
||||
PredicateSummary {
|
||||
known_true: 0b011, // NullCheck + EmptyCheck
|
||||
known_false: 0,
|
||||
},
|
||||
);
|
||||
|
||||
let mut b = TaintState::initial();
|
||||
b.set_predicate(
|
||||
SymbolId(0),
|
||||
PredicateSummary {
|
||||
known_true: 0b010, // EmptyCheck only
|
||||
known_false: 0,
|
||||
},
|
||||
);
|
||||
|
||||
let joined = a.join(&b);
|
||||
let pred = joined.get_predicate(SymbolId(0));
|
||||
assert_eq!(pred.known_true, 0b010); // only EmptyCheck on both paths
|
||||
}
|
||||
use crate::state::symbol::SymbolId;
|
||||
|
||||
// ── SmallBitSet tests ───────────────────────────────────────────────
|
||||
|
||||
|
|
@ -617,4 +188,38 @@ mod tests {
|
|||
assert!(!i.contains(SymbolId(1)));
|
||||
assert!(i.contains(SymbolId(2)));
|
||||
}
|
||||
|
||||
// ── PredicateSummary tests ──────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn predicate_contradiction() {
|
||||
let s = PredicateSummary {
|
||||
known_true: 1, // NullCheck true
|
||||
known_false: 1, // NullCheck false
|
||||
};
|
||||
assert!(s.has_contradiction());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn predicate_no_contradiction() {
|
||||
let s = PredicateSummary {
|
||||
known_true: 1, // NullCheck true
|
||||
known_false: 2, // EmptyCheck false (different kind)
|
||||
};
|
||||
assert!(!s.has_contradiction());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn predicate_join_intersection() {
|
||||
let a = PredicateSummary {
|
||||
known_true: 0b011, // NullCheck + EmptyCheck
|
||||
known_false: 0,
|
||||
};
|
||||
let b = PredicateSummary {
|
||||
known_true: 0b010, // EmptyCheck only
|
||||
known_false: 0,
|
||||
};
|
||||
let joined = a.join(b);
|
||||
assert_eq!(joined.known_true, 0b010); // only EmptyCheck on both paths
|
||||
}
|
||||
}
|
||||
|
|
|
|||
1561
src/taint/mod.rs
1561
src/taint/mod.rs
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
433
src/taint/ssa_transfer/events.rs
Normal file
433
src/taint/ssa_transfer/events.rs
Normal file
|
|
@ -0,0 +1,433 @@
|
|||
//! Taint event emission and conversion to [`crate::taint::Finding`].
|
||||
//!
|
||||
//! Extracted from the monolithic `ssa_transfer.rs`. Contains:
|
||||
//! * [`SsaTaintEvent`] — the raw event struct produced by the block-level
|
||||
//! worklist each time a tainted value reaches a sink.
|
||||
//! * [`ssa_events_to_findings`] — event → `Finding` conversion with the
|
||||
//! `primary_location` invariant and dedup.
|
||||
//! * Flow-path reconstruction helpers ([`reconstruct_flow_path`] and
|
||||
//! operand pickers).
|
||||
//! * Small post-hoc utilities ([`block_distance`],
|
||||
//! [`extract_sink_arg_positions`], [`compute_path_hash`]).
|
||||
|
||||
use crate::cfg::Cfg;
|
||||
use crate::labels::Cap;
|
||||
use crate::ssa::ir::{SsaBody, SsaOp, SsaValue};
|
||||
use crate::summary::SinkSite;
|
||||
use crate::taint::domain::TaintOrigin;
|
||||
use crate::taint::path_state::PredicateKind;
|
||||
use petgraph::graph::NodeIndex;
|
||||
use smallvec::SmallVec;
|
||||
use std::collections::{HashSet, VecDeque};
|
||||
|
||||
/// Event emitted when taint reaches a sink in SSA analysis.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct SsaTaintEvent {
|
||||
pub sink_node: NodeIndex,
|
||||
pub tainted_values: Vec<(SsaValue, Cap, SmallVec<[TaintOrigin; 2]>)>,
|
||||
pub sink_caps: Cap,
|
||||
pub all_validated: bool,
|
||||
pub guard_kind: Option<PredicateKind>,
|
||||
/// Whether any callee in this event's taint path was resolved via a
|
||||
/// function summary (SSA, local, or global) rather than direct label.
|
||||
pub uses_summary: bool,
|
||||
/// Primary (callee-internal) sink location for cross-file attribution.
|
||||
///
|
||||
/// Populated when this event was emitted via summary resolution and the
|
||||
/// callee summary carried a [`SinkSite`] whose `cap` intersects
|
||||
/// `sink_caps`. When multiple [`SinkSite`]s for the same `(param_idx,
|
||||
/// cap mask)` match, the emission site produces one event per
|
||||
/// [`SinkSite`] so each downstream [`crate::taint::Finding`] carries a
|
||||
/// single primary attribution — the multi-primary case collapses to
|
||||
/// multiple single-primary events.
|
||||
///
|
||||
/// `None` for:
|
||||
/// * intra-procedural sinks (`uses_summary == false`), where the
|
||||
/// caller's sink span already names the dangerous instruction;
|
||||
/// * summary-resolved sinks whose callee summary carried only cap-only
|
||||
/// [`SinkSite`]s (no source coordinates — e.g. pass-2 transient
|
||||
/// summaries or local `LocalFuncSummary`-only callees).
|
||||
pub primary_sink_site: Option<SinkSite>,
|
||||
}
|
||||
|
||||
pub(super) fn block_distance(ssa: &SsaBody, source_node: NodeIndex, sink_node: NodeIndex) -> u16 {
|
||||
let src_block = match ssa.cfg_node_map.get(&source_node) {
|
||||
Some(v) => ssa.def_of(*v).block,
|
||||
None => return 0,
|
||||
};
|
||||
let sink_block = match ssa.cfg_node_map.get(&sink_node) {
|
||||
Some(v) => ssa.def_of(*v).block,
|
||||
None => return 0,
|
||||
};
|
||||
if src_block == sink_block {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// BFS from src_block to sink_block
|
||||
let mut visited = HashSet::new();
|
||||
let mut queue = VecDeque::new();
|
||||
visited.insert(src_block);
|
||||
queue.push_back((src_block, 0u16));
|
||||
|
||||
while let Some((blk, dist)) = queue.pop_front() {
|
||||
for &succ in &ssa.block(blk).succs {
|
||||
if succ == sink_block {
|
||||
return (dist + 1).min(255);
|
||||
}
|
||||
if visited.insert(succ) && dist + 1 < 255 {
|
||||
queue.push_back((succ, dist + 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
0 // unreachable or not connected — conservative default
|
||||
}
|
||||
|
||||
// ── Flow Path Reconstruction ─────────────────────────────────────────────
|
||||
|
||||
/// Reconstruct the taint flow path from source to sink by walking backward
|
||||
/// through the SSA def-use chain.
|
||||
///
|
||||
/// Returns steps in source→sink order.
|
||||
pub(super) fn reconstruct_flow_path(
|
||||
tainted_val: SsaValue,
|
||||
origin: &crate::taint::domain::TaintOrigin,
|
||||
sink_node: NodeIndex,
|
||||
ssa: &SsaBody,
|
||||
cfg: &Cfg,
|
||||
) -> Vec<crate::taint::FlowStepRaw> {
|
||||
use crate::evidence::FlowStepKind;
|
||||
use crate::taint::FlowStepRaw;
|
||||
|
||||
const MAX_STEPS: usize = 64;
|
||||
|
||||
let mut steps = Vec::new();
|
||||
let mut visited = HashSet::new();
|
||||
|
||||
// 1. Add sink step
|
||||
steps.push(FlowStepRaw {
|
||||
cfg_node: sink_node,
|
||||
var_name: cfg
|
||||
.node_weight(sink_node)
|
||||
.and_then(|n| n.call.callee.clone()),
|
||||
op_kind: FlowStepKind::Sink,
|
||||
});
|
||||
|
||||
// 2. Walk backward from tainted_val
|
||||
let mut current = tainted_val;
|
||||
for _ in 0..MAX_STEPS {
|
||||
if !visited.insert(current) {
|
||||
break;
|
||||
}
|
||||
|
||||
let def = ssa.def_of(current);
|
||||
let block = ssa.block(def.block);
|
||||
|
||||
// Find the instruction for this value
|
||||
let inst = block
|
||||
.phis
|
||||
.iter()
|
||||
.chain(block.body.iter())
|
||||
.find(|i| i.value == current);
|
||||
|
||||
let inst = match inst {
|
||||
Some(i) => i,
|
||||
None => break,
|
||||
};
|
||||
|
||||
// Skip if same cfg_node as previous step (dedup consecutive same-line)
|
||||
if let Some(prev) = steps.last() {
|
||||
if prev.cfg_node == inst.cfg_node {
|
||||
// Still follow the chain, just don't add a duplicate step
|
||||
match &inst.op {
|
||||
SsaOp::Source | SsaOp::Param { .. } | SsaOp::SelfParam | SsaOp::CatchParam => {
|
||||
break;
|
||||
}
|
||||
SsaOp::Assign(uses) => {
|
||||
current = pick_tainted_operand(uses, origin, ssa);
|
||||
continue;
|
||||
}
|
||||
SsaOp::Call { args, receiver, .. } => {
|
||||
current = pick_tainted_operand_call(args, receiver, origin, ssa);
|
||||
continue;
|
||||
}
|
||||
SsaOp::Phi(operands) => {
|
||||
let vals: SmallVec<[SsaValue; 4]> =
|
||||
operands.iter().map(|(_, v)| *v).collect();
|
||||
current = pick_tainted_operand(&vals, origin, ssa);
|
||||
continue;
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match &inst.op {
|
||||
SsaOp::Source | SsaOp::Param { .. } | SsaOp::SelfParam | SsaOp::CatchParam => {
|
||||
steps.push(FlowStepRaw {
|
||||
cfg_node: inst.cfg_node,
|
||||
var_name: inst.var_name.clone(),
|
||||
op_kind: FlowStepKind::Source,
|
||||
});
|
||||
break;
|
||||
}
|
||||
SsaOp::Assign(uses) => {
|
||||
steps.push(FlowStepRaw {
|
||||
cfg_node: inst.cfg_node,
|
||||
var_name: inst.var_name.clone(),
|
||||
op_kind: FlowStepKind::Assignment,
|
||||
});
|
||||
if uses.is_empty() {
|
||||
break;
|
||||
}
|
||||
current = pick_tainted_operand(uses, origin, ssa);
|
||||
}
|
||||
SsaOp::Call { args, receiver, .. } => {
|
||||
steps.push(FlowStepRaw {
|
||||
cfg_node: inst.cfg_node,
|
||||
var_name: inst.var_name.clone(),
|
||||
op_kind: FlowStepKind::Call,
|
||||
});
|
||||
current = pick_tainted_operand_call(args, receiver, origin, ssa);
|
||||
}
|
||||
SsaOp::Phi(operands) => {
|
||||
steps.push(FlowStepRaw {
|
||||
cfg_node: inst.cfg_node,
|
||||
var_name: inst.var_name.clone(),
|
||||
op_kind: FlowStepKind::Phi,
|
||||
});
|
||||
let vals: SmallVec<[SsaValue; 4]> = operands.iter().map(|(_, v)| *v).collect();
|
||||
if vals.is_empty() {
|
||||
break;
|
||||
}
|
||||
current = pick_tainted_operand(&vals, origin, ssa);
|
||||
}
|
||||
SsaOp::Const(_) | SsaOp::Nop | SsaOp::Undef => break,
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Reverse: was built sink→source, need source→sink
|
||||
steps.reverse();
|
||||
steps
|
||||
}
|
||||
|
||||
/// Pick the operand whose definition is closest to the origin node (direct match preferred).
|
||||
fn pick_tainted_operand(
|
||||
operands: &[SsaValue],
|
||||
origin: &crate::taint::domain::TaintOrigin,
|
||||
ssa: &SsaBody,
|
||||
) -> SsaValue {
|
||||
// Prefer operand defined at the origin node
|
||||
for &op in operands {
|
||||
if ssa.def_of(op).cfg_node == origin.node {
|
||||
return op;
|
||||
}
|
||||
}
|
||||
// Fallback: pick first (heuristic)
|
||||
operands.first().copied().unwrap_or(SsaValue(0))
|
||||
}
|
||||
|
||||
/// Pick tainted operand for Call instructions (flatten args + receiver).
|
||||
fn pick_tainted_operand_call(
|
||||
args: &[SmallVec<[SsaValue; 2]>],
|
||||
receiver: &Option<SsaValue>,
|
||||
origin: &crate::taint::domain::TaintOrigin,
|
||||
ssa: &SsaBody,
|
||||
) -> SsaValue {
|
||||
let mut all_vals: SmallVec<[SsaValue; 8]> = SmallVec::new();
|
||||
for arg in args {
|
||||
all_vals.extend_from_slice(arg);
|
||||
}
|
||||
if let Some(r) = receiver {
|
||||
all_vals.push(*r);
|
||||
}
|
||||
pick_tainted_operand(&all_vals, origin, ssa)
|
||||
}
|
||||
|
||||
/// Convert SSA taint events to the standard Finding struct.
|
||||
///
|
||||
/// # Invariants enforced by debug_assert!
|
||||
///
|
||||
/// The `primary_location` field carries the primary sink-location
|
||||
/// attribution. One invariant must hold across every emitted Finding:
|
||||
///
|
||||
/// * A populated `primary_location` implies the attribution came from a
|
||||
/// [`SinkSite`] with resolved coordinates (`line != 0` AND `file_rel`
|
||||
/// non-empty). Cap-only sites are filtered to `None` here; they never
|
||||
/// reach downstream formatters claiming a `(0, 0)` origin.
|
||||
///
|
||||
/// Note: this invariant is intentionally independent of `uses_summary`.
|
||||
/// The taint-chain flag tracks summary-propagated *taint*, not summary-
|
||||
/// resolved *sinks* — a local source can reach a cross-file sink, so
|
||||
/// `primary_location.is_some()` does not imply `uses_summary == true`.
|
||||
pub fn ssa_events_to_findings(
|
||||
events: &[SsaTaintEvent],
|
||||
ssa: &SsaBody,
|
||||
cfg: &Cfg,
|
||||
) -> Vec<crate::taint::Finding> {
|
||||
type FindingDedupKey = (usize, usize, Option<(String, u32, u32)>);
|
||||
let mut findings = Vec::new();
|
||||
let mut seen: HashSet<FindingDedupKey> = HashSet::new();
|
||||
|
||||
for event in events {
|
||||
// Suppress findings where all tainted variables were validated
|
||||
// (passed through an allowlist, type-check, or validation branch).
|
||||
if event.all_validated {
|
||||
// Mirror the path-safety pathway: when the SSA engine has
|
||||
// already proved every tainted input to a privileged
|
||||
// FILE_IO sink passed through validation, publish the sink
|
||||
// span so the state-analysis pass suppresses
|
||||
// `state-unauthed-access` on the same span. Trust here
|
||||
// matches the trust the engine already extends when
|
||||
// dropping the taint flow finding. Scoped to FILE_IO sinks
|
||||
// because that is the only sink class state-unauthed-access
|
||||
// currently fires on; broadening would risk stretching
|
||||
// validator-name heuristics into unrelated finding classes.
|
||||
if event.sink_caps.intersects(Cap::FILE_IO) {
|
||||
let span = cfg[event.sink_node].ast.span;
|
||||
crate::taint::ssa_transfer::state::record_path_safe_suppressed_span(span);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
let primary_location = event.primary_sink_site.as_ref().and_then(|s| {
|
||||
// Only promote to a Finding.primary_location when the site has
|
||||
// resolved coordinates (cap-only sites at (0, 0) carry no
|
||||
// attribution and would just add noise).
|
||||
if s.line == 0 {
|
||||
None
|
||||
} else {
|
||||
Some(crate::taint::SinkLocation {
|
||||
file_rel: s.file_rel.clone(),
|
||||
line: s.line,
|
||||
col: s.col,
|
||||
snippet: s.snippet.clone(),
|
||||
})
|
||||
}
|
||||
});
|
||||
|
||||
// Data-integrity invariant: a populated primary_location must at least
|
||||
// carry resolved line coordinates. `file_rel` may legitimately be
|
||||
// empty — when the scan root is the caller file itself (single-file
|
||||
// scans), every namespace normalizes to `""` and the callee's site
|
||||
// inherits that empty path; consumers resolve it against the file
|
||||
// under analysis. Line==0 is the only filter-worthy invariant.
|
||||
debug_assert!(
|
||||
primary_location.as_ref().is_none_or(|l| l.line != 0),
|
||||
"primary_location must carry a resolved line coordinate",
|
||||
);
|
||||
|
||||
// Dedup key includes primary location so multi-site events that
|
||||
// share a single (source, sink) pair still produce distinct findings
|
||||
// — one per resolved callee-internal site.
|
||||
let loc_key = primary_location
|
||||
.as_ref()
|
||||
.map(|l| (l.file_rel.clone(), l.line, l.col));
|
||||
for (val, caps, origins) in &event.tainted_values {
|
||||
let cap_specificity = (*caps & event.sink_caps).bits().count_ones() as u8;
|
||||
for origin in origins {
|
||||
if seen.insert((
|
||||
origin.node.index(),
|
||||
event.sink_node.index(),
|
||||
loc_key.clone(),
|
||||
)) {
|
||||
let hop_count = block_distance(ssa, origin.node, event.sink_node);
|
||||
let flow_steps = reconstruct_flow_path(*val, origin, event.sink_node, ssa, cfg);
|
||||
let path_hash = compute_path_hash(&flow_steps);
|
||||
findings.push(crate::taint::Finding {
|
||||
body_id: crate::cfg::BodyId(0), // set by caller
|
||||
sink: event.sink_node,
|
||||
source: origin.node,
|
||||
path: vec![origin.node, event.sink_node],
|
||||
source_kind: origin.source_kind,
|
||||
path_validated: event.all_validated,
|
||||
guard_kind: event.guard_kind,
|
||||
hop_count,
|
||||
cap_specificity,
|
||||
uses_summary: event.uses_summary,
|
||||
flow_steps,
|
||||
symbolic: None,
|
||||
source_span: origin.source_span.map(|(start, _)| start),
|
||||
primary_location: primary_location.clone(),
|
||||
engine_notes: smallvec::SmallVec::new(),
|
||||
path_hash,
|
||||
finding_id: String::new(),
|
||||
alternative_finding_ids: smallvec::SmallVec::new(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
findings
|
||||
}
|
||||
|
||||
/// Compute a stable hash over the sequence of intermediate CFG nodes
|
||||
/// that a tainted value traversed from source to sink. Used as part of
|
||||
/// the dedup key so two flows that share `(body_id, sink, source)` but
|
||||
/// cross different intermediate variables are preserved as distinct
|
||||
/// findings rather than collapsed to one.
|
||||
///
|
||||
/// Hashes the `(cfg_node.index(), op_kind-tag, var_name)` tuple per
|
||||
/// step. `op_kind` is captured as a small integer tag so changes in
|
||||
/// enum encoding do not silently alter the hash; `var_name` is included
|
||||
/// because two flows may touch the same cfg_node via different phi
|
||||
/// operands (same node, different variable).
|
||||
fn compute_path_hash(steps: &[crate::taint::FlowStepRaw]) -> u64 {
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::hash::{Hash, Hasher};
|
||||
let mut hasher = DefaultHasher::new();
|
||||
for step in steps {
|
||||
step.cfg_node.index().hash(&mut hasher);
|
||||
// Encode FlowStepKind as a stable small integer. Using the
|
||||
// discriminant directly would tie us to enum ordering; an
|
||||
// explicit tag is more resilient to reordering.
|
||||
let kind_tag: u8 = match step.op_kind {
|
||||
crate::evidence::FlowStepKind::Source => 0,
|
||||
crate::evidence::FlowStepKind::Assignment => 1,
|
||||
crate::evidence::FlowStepKind::Call => 2,
|
||||
crate::evidence::FlowStepKind::Phi => 3,
|
||||
crate::evidence::FlowStepKind::Sink => 4,
|
||||
};
|
||||
kind_tag.hash(&mut hasher);
|
||||
step.var_name.hash(&mut hasher);
|
||||
}
|
||||
hasher.finish()
|
||||
}
|
||||
|
||||
/// Given an SSA taint event at a sink, find which argument positions of the
|
||||
/// sink call instruction were tainted.
|
||||
pub(super) fn extract_sink_arg_positions(event: &SsaTaintEvent, ssa: &SsaBody) -> Vec<usize> {
|
||||
let ssa_val = match ssa.cfg_node_map.get(&event.sink_node) {
|
||||
Some(v) => *v,
|
||||
None => return vec![],
|
||||
};
|
||||
|
||||
let def = ssa.def_of(ssa_val);
|
||||
let block = &ssa.blocks[def.block.0 as usize];
|
||||
|
||||
let inst = block
|
||||
.phis
|
||||
.iter()
|
||||
.chain(block.body.iter())
|
||||
.find(|i| i.value == ssa_val);
|
||||
|
||||
let inst = match inst {
|
||||
Some(i) => i,
|
||||
None => return vec![],
|
||||
};
|
||||
|
||||
if let SsaOp::Call { args, .. } = &inst.op {
|
||||
let tainted_vals: HashSet<SsaValue> =
|
||||
event.tainted_values.iter().map(|(v, _, _)| *v).collect();
|
||||
|
||||
let mut positions = Vec::new();
|
||||
for (i, arg_vals) in args.iter().enumerate() {
|
||||
if arg_vals.iter().any(|v| tainted_vals.contains(v)) {
|
||||
positions.push(i);
|
||||
}
|
||||
}
|
||||
positions
|
||||
} else {
|
||||
vec![]
|
||||
}
|
||||
}
|
||||
367
src/taint/ssa_transfer/inline.rs
Normal file
367
src/taint/ssa_transfer/inline.rs
Normal file
|
|
@ -0,0 +1,367 @@
|
|||
//! Context-sensitive inline analysis — cache, body, and attribution types.
|
||||
//!
|
||||
//! Extracted from the monolithic `ssa_transfer.rs`. Contains:
|
||||
//! * [`ArgTaintSig`] — compact per-arg cap signature used as a cache key.
|
||||
//! * [`InlineResult`] / [`CachedInlineShape`] / [`ReturnShape`] — the
|
||||
//! callsite-adapted and callsite-agnostic inline-analysis result types.
|
||||
//! * [`InlineCache`] — the shared cache map keyed by
|
||||
//! `(FuncKey, ArgTaintSig)`.
|
||||
//! * [`CrossFileNodeMeta`] / [`CalleeSsaBody`] — the serde-able bodies
|
||||
//! persisted to SQLite for cross-file context-sensitive analysis.
|
||||
//! * [`populate_node_meta`] / [`rebuild_body_graph`] — bookkeeping for
|
||||
//! cross-file body proxy CFGs.
|
||||
//!
|
||||
//! The implementation functions (`inline_analyse_callee`,
|
||||
//! `apply_cached_shape`, `extract_inline_return_taint`) remain in the
|
||||
//! parent `mod.rs` because they depend tightly on the block worklist, the
|
||||
//! `run_ssa_taint_full` entry point, and the callee-resolution pipeline.
|
||||
//!
|
||||
//! # Cache key scope and origin attribution
|
||||
//!
|
||||
//! The inline-analysis cache below ([`InlineCache`]) is keyed by
|
||||
//! `(FuncKey, ArgTaintSig)`, where [`ArgTaintSig`] encodes **per-arg
|
||||
//! capability bits only** — not the identity of the source
|
||||
//! [`crate::taint::domain::TaintOrigin`]s that produced those caps. The
|
||||
//! stored value ([`CachedInlineShape`]) captures **only the structural**
|
||||
//! shape of the callee's return taint: return caps, callee-internal
|
||||
//! origins (from `Source` ops inside the callee body), and per-parameter
|
||||
//! provenance flags that record which formal parameters contributed to
|
||||
//! the return. Caller-specific origin identity is *not* stored — it is
|
||||
//! re-attributed at cache-apply time from the current call site's
|
||||
//! argument taint.
|
||||
|
||||
use crate::labels::Cap;
|
||||
use crate::ssa::ir::{SsaBody, Terminator};
|
||||
use crate::summary::ssa_summary::PathFactReturnEntry;
|
||||
use crate::symbol::FuncKey;
|
||||
use crate::taint::domain::{TaintOrigin, VarTaint};
|
||||
use petgraph::graph::NodeIndex;
|
||||
use smallvec::SmallVec;
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Maximum SSA blocks in a callee body before skipping inline analysis.
|
||||
pub(super) const MAX_INLINE_BLOCKS: usize = 500;
|
||||
|
||||
/// Compact cache key: per-arg-position cap bits (sorted, non-empty only).
|
||||
///
|
||||
/// Two calls with identical `ArgTaintSig` produce identical inline results
|
||||
/// for soundness purposes (return caps, callee-internal sink activations).
|
||||
/// Origin identity is **not** part of the key — see the module-level note
|
||||
/// above on origin-attribution non-determinism.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub(crate) struct ArgTaintSig(pub(super) SmallVec<[(usize, u16); 4]>);
|
||||
|
||||
/// Call-site-adapted result of inline-analyzing a callee.
|
||||
///
|
||||
/// Constructed fresh per call site by `apply_cached_shape` from a stored
|
||||
/// [`CachedInlineShape`]; carries origins that point to the *current*
|
||||
/// caller's source chain, not to whichever caller first populated the
|
||||
/// cache entry.
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct InlineResult {
|
||||
/// Taint on the return value after inline analysis.
|
||||
pub(super) return_taint: Option<VarTaint>,
|
||||
/// PathFact on the return value after inline analysis.
|
||||
///
|
||||
/// Non-top when the callee's body provably narrows the
|
||||
/// [`crate::abstract_interp::PathFact`] of the value it returns (for
|
||||
/// example, a `sanitize_path(s) -> Option<String>` helper that
|
||||
/// early-returns on `s.contains("..")` / `s.starts_with('/')`). At
|
||||
/// apply time the caller sets its call-result SSA value's PathFact to
|
||||
/// this narrowed fact, so downstream FILE_IO sinks see the sanitised
|
||||
/// axis regardless of whether a named label-rule exists for the
|
||||
/// helper. Top when the callee produces no narrowing — matches
|
||||
/// pre-PathFact behaviour exactly.
|
||||
pub(super) return_path_fact: crate::abstract_interp::PathFact,
|
||||
/// Per-return-path decomposition of [`Self::return_path_fact`].
|
||||
///
|
||||
/// Non-empty when the callee has ≥2 distinct return blocks whose
|
||||
/// predicate gates differ. Match-arm-sensitive callers pick the
|
||||
/// entry whose `variant_inner_fact` matches the arm binding's
|
||||
/// variant; path-resolvable callers may refuse infeasible entries.
|
||||
/// Callers unable to distinguish paths still consult
|
||||
/// [`Self::return_path_fact`] (the join of all entries) and see
|
||||
/// pre-decomposition behaviour.
|
||||
#[allow(dead_code)]
|
||||
pub(super) return_path_facts: SmallVec<[PathFactReturnEntry; 2]>,
|
||||
}
|
||||
|
||||
/// Structural (callsite-agnostic) summary of an inline-analyzed callee.
|
||||
///
|
||||
/// Stored in [`InlineCache`] in place of a fully-attributed `InlineResult`.
|
||||
/// Origin-identity information that depends on the caller's argument chain
|
||||
/// is *not* kept here; instead, [`ReturnShape::param_provenance`]
|
||||
/// records which callee parameter positions contributed seed taint to the
|
||||
/// return, and the actual caller origins are re-unioned in at apply time.
|
||||
///
|
||||
/// `None` means "this callee produced no return taint for the given
|
||||
/// argument shape". A cached `None` is still a meaningful result — it
|
||||
/// short-circuits re-analysis on subsequent calls with matching caps.
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct CachedInlineShape(pub(super) Option<ReturnShape>);
|
||||
|
||||
/// Structural parts of a non-trivial inline-analysis result.
|
||||
///
|
||||
/// Split from the full [`VarTaint`] so that cached entries can be re-used
|
||||
/// across call sites with matching arg-cap signatures but differing source
|
||||
/// origins. See the module-level note above on origin attribution.
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct ReturnShape {
|
||||
/// Return value caps (cap bits only — structural).
|
||||
pub(super) caps: Cap,
|
||||
/// Origins produced **inside the callee body** (e.g. `Source` op fired
|
||||
/// in the callee). `node` is set to a placeholder; at apply time the
|
||||
/// caller remaps it to its own call-site NodeIndex. `source_span` is
|
||||
/// stable (from the callee CFG) and preserved as-is.
|
||||
pub(super) internal_origins: SmallVec<[TaintOrigin; 2]>,
|
||||
/// Bit i set = callee's `Param(i)` seed taint reached the return value.
|
||||
/// At apply time, caller's argument origins at matching positions are
|
||||
/// unioned into the applied `VarTaint`. Params beyond index 63 are
|
||||
/// dropped (matching `SmallBitSet` semantics); the capped case is rare
|
||||
/// and still yields cap-correct results.
|
||||
pub(super) param_provenance: u64,
|
||||
/// Whether the receiver (`SelfParam`) seed taint flowed to the return.
|
||||
pub(super) receiver_provenance: bool,
|
||||
/// Whether the applied `VarTaint` should be tagged `uses_summary`.
|
||||
pub(super) uses_summary: bool,
|
||||
/// PathFact of the return value observed from the callee's exit
|
||||
/// abstract state. Cache-safe because the callee is inline-analysed
|
||||
/// with [`crate::abstract_interp::PathFact::top`] Param seeds — the
|
||||
/// resulting fact describes the callee's intrinsic narrowing (e.g.
|
||||
/// the `Some` arm of a `sanitize(..) -> Option<String>` body
|
||||
/// proves `dotdot = No`) and does not depend on caller-side
|
||||
/// narrowing of the argument's PathFact. Top when the callee does
|
||||
/// not narrow.
|
||||
pub(super) return_path_fact: crate::abstract_interp::PathFact,
|
||||
/// Per-return-path [`PathFact`] decomposition of the return value.
|
||||
///
|
||||
/// Populated alongside [`Self::return_path_fact`] when the callee
|
||||
/// has ≥2 distinct return blocks with different predicate gates.
|
||||
/// Cache-safe for the same reason as `return_path_fact`: entries
|
||||
/// describe callee-intrinsic narrowing under Top-seeded Params.
|
||||
/// Empty when no per-path distinction was observed.
|
||||
pub(super) return_path_facts: SmallVec<[PathFactReturnEntry; 2]>,
|
||||
}
|
||||
|
||||
impl CachedInlineShape {
|
||||
/// Cap bits of the return value, or zero if this shape records "no
|
||||
/// return taint". Used by [`inline_cache_fingerprint`].
|
||||
fn return_caps_bits(&self) -> u16 {
|
||||
self.0.as_ref().map(|s| s.caps.bits()).unwrap_or(0)
|
||||
}
|
||||
}
|
||||
|
||||
/// Cache for context-sensitive inline analysis results.
|
||||
///
|
||||
/// Keyed by the callee's canonical [`FuncKey`] rather than a bare function
|
||||
/// name so that same-name definitions (e.g. two `process/1` methods on
|
||||
/// different classes in the same file) never share or overwrite each
|
||||
/// other's cache entries. Values are stored as [`CachedInlineShape`]; see
|
||||
/// the module-level note above for why origins are stripped from the
|
||||
/// cache value and re-attributed at apply time.
|
||||
pub(crate) type InlineCache = HashMap<(FuncKey, ArgTaintSig), CachedInlineShape>;
|
||||
|
||||
/// Drop every entry from an inline cache, marking the start of a new
|
||||
/// convergence epoch.
|
||||
///
|
||||
/// Cross-file SCC fixed-point iteration runs pass 2 repeatedly until the
|
||||
/// merged summaries stop changing. Between iterations the callee-summary
|
||||
/// inputs to inline analysis may have changed, so results cached under a
|
||||
/// stale snapshot must not leak into the next iteration — otherwise the
|
||||
/// engine could converge to a non-fixed-point (reporting a taint result
|
||||
/// that would not reproduce on a fresh run of the same file order).
|
||||
///
|
||||
/// The per-file inline cache is already reconstructed fresh at the top of
|
||||
/// each [`crate::taint::analyse_file`] call, so in the current code this
|
||||
/// call is effectively a no-op plumbing hook. Keeping the method (instead
|
||||
/// of relying on ambient re-construction) makes the lifecycle explicit for
|
||||
/// any future refactor that moves the cache up into the SCC orchestrator.
|
||||
#[allow(dead_code)] // semantic hook; used by tests and future shared-cache refactor
|
||||
pub(crate) fn inline_cache_clear_epoch(cache: &mut InlineCache) {
|
||||
cache.clear();
|
||||
}
|
||||
|
||||
/// Set-equal fingerprint of an inline cache, used by the SCC orchestrator
|
||||
/// to detect when cross-file inline analysis has reached a fixed point
|
||||
/// alongside summary convergence.
|
||||
///
|
||||
/// Returns a `HashMap` mapping each `(FuncKey, ArgTaintSig)` cache key to
|
||||
/// the return-value capability bits of its inline result. `HashMap`
|
||||
/// equality is set-equal (unordered), so two caches with the same entries
|
||||
/// compare equal regardless of insertion order.
|
||||
///
|
||||
/// Origins are intentionally omitted — they are non-deterministic across
|
||||
/// callers with identical caps (see the module-level note on origin
|
||||
/// attribution) and would cause the fingerprint to oscillate without
|
||||
/// reflecting a real precision change.
|
||||
#[allow(dead_code)] // observability hook; used by tests and future shared-cache refactor
|
||||
pub(crate) fn inline_cache_fingerprint(
|
||||
cache: &InlineCache,
|
||||
) -> HashMap<(FuncKey, ArgTaintSig), u16> {
|
||||
cache
|
||||
.iter()
|
||||
.map(|(k, v)| (k.clone(), v.return_caps_bits()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// CFG node metadata embedded in cross-file callee bodies.
|
||||
///
|
||||
/// ## Why a full [`crate::cfg::NodeInfo`] lives here
|
||||
///
|
||||
/// An earlier variant carried only the two fields the symex executor reads
|
||||
/// (`bin_op`, `labels`). That was sufficient for symex but not for the
|
||||
/// taint engine, which reads ~20 fields off `cfg[inst.cfg_node]` across
|
||||
/// `transfer_inst`, `collect_block_events`, `compute_succ_states`, and
|
||||
/// helpers (callee name, `arg_uses`, `arg_callees`, `call_ordinal`,
|
||||
/// `outer_callee`, `kwargs`, `arg_string_literals`, `ast.span`,
|
||||
/// `ast.enclosing_func`, `condition_*`, `all_args_literal`, `catch_param`,
|
||||
/// `parameterized_query`, `in_defer`, `cast_target_type`, `string_prefix`,
|
||||
/// `taint.uses`, `taint.defines`, `taint.extra_defines`,
|
||||
/// `taint.const_text`, …). Rather than shuttling each of those through a
|
||||
/// `CfgView` accessor at every callsite, we store a full serde-able
|
||||
/// [`crate::cfg::NodeInfo`] snapshot here so the indexed-scan path can
|
||||
/// rehydrate an equivalent `Cfg` on load (see [`rebuild_body_graph`]).
|
||||
/// Both scan paths then feed the same `&Cfg` into the taint engine, and
|
||||
/// cross-file inline fires regardless of whether the body came from pass
|
||||
/// 1 or from SQLite.
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub struct CrossFileNodeMeta {
|
||||
/// Full `NodeInfo` snapshot for this body-local NodeIndex.
|
||||
pub info: crate::cfg::NodeInfo,
|
||||
}
|
||||
|
||||
/// Pre-lowered and optimized SSA body for a function,
|
||||
/// ready for context-sensitive re-analysis with different argument taint.
|
||||
///
|
||||
/// For intra-file use, `node_meta` is empty and the original CFG is used.
|
||||
/// For cross-file persistence, `node_meta` carries the minimal CFG
|
||||
/// metadata needed by the symex executor.
|
||||
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
|
||||
pub struct CalleeSsaBody {
|
||||
pub ssa: SsaBody,
|
||||
pub opt: crate::ssa::OptimizeResult,
|
||||
pub param_count: usize,
|
||||
/// Per-NodeIndex CFG metadata for cross-file bodies.
|
||||
/// Empty for intra-file bodies (the original CFG is used instead).
|
||||
#[serde(default, skip_serializing_if = "std::collections::HashMap::is_empty")]
|
||||
pub node_meta: std::collections::HashMap<u32, CrossFileNodeMeta>,
|
||||
/// The body's own CFG graph. Populated for intra-file bodies so that
|
||||
/// inline analysis can reference the correct graph (per-body CFGs have
|
||||
/// body-local NodeIndex spaces). `None` for cross-file deserialized
|
||||
/// bodies.
|
||||
#[serde(skip)]
|
||||
pub body_graph: Option<crate::cfg::Cfg>,
|
||||
}
|
||||
|
||||
/// Populate `node_meta` from the original CFG for cross-file persistence.
|
||||
///
|
||||
/// Returns `true` if all referenced NodeIndex values were resolved
|
||||
/// successfully. Returns `false` if any node was out of bounds (body is
|
||||
/// ineligible for cross-file use).
|
||||
pub fn populate_node_meta(body: &mut CalleeSsaBody, cfg: &crate::cfg::Cfg) -> bool {
|
||||
// Collect every NodeIndex this body references, then snapshot each one's
|
||||
// NodeInfo into `node_meta`. Done in two passes so the inner loop can
|
||||
// mutate `body.node_meta` without borrow-checker conflicts on
|
||||
// `body.ssa.blocks`.
|
||||
//
|
||||
// `Terminator::Branch.cond` must be captured as well: it is consumed by
|
||||
// `compute_succ_states` via `cfg[*cond]`, so without it the synthesized
|
||||
// cross-file proxy CFG (`rebuild_body_graph`) ends up too small whenever
|
||||
// the callee body has any conditional branch whose `cond` index sits
|
||||
// past the maximum `inst.cfg_node` index — inline analysis then panics
|
||||
// with an out-of-bounds index.
|
||||
let mut referenced: Vec<NodeIndex> = Vec::new();
|
||||
for block in &body.ssa.blocks {
|
||||
for inst in block.phis.iter().chain(block.body.iter()) {
|
||||
referenced.push(inst.cfg_node);
|
||||
}
|
||||
if let Terminator::Branch { cond, .. } = &block.terminator {
|
||||
referenced.push(*cond);
|
||||
}
|
||||
}
|
||||
for node in referenced {
|
||||
let idx = node.index() as u32;
|
||||
if body.node_meta.contains_key(&idx) {
|
||||
continue;
|
||||
}
|
||||
if node.index() >= cfg.node_count() {
|
||||
return false;
|
||||
}
|
||||
let info = cfg[node].clone();
|
||||
body.node_meta.insert(idx, CrossFileNodeMeta { info });
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Synthesize a proxy [`crate::cfg::Cfg`] from `node_meta` so the taint
|
||||
/// engine can index `cfg[inst.cfg_node]` uniformly on the indexed-scan
|
||||
/// path.
|
||||
///
|
||||
/// When the callee body was loaded from SQLite, `body_graph` is `None`
|
||||
/// (it is `#[serde(skip)]`), but `node_meta` carries a full
|
||||
/// [`crate::cfg::NodeInfo`] for every referenced NodeIndex (see
|
||||
/// [`populate_node_meta`]). This helper rebuilds a petgraph `Cfg` with
|
||||
/// nodes at exactly the right NodeIndex positions so the taint engine's
|
||||
/// existing indexing works without change.
|
||||
///
|
||||
/// Returns `true` if a proxy graph was freshly installed. Idempotent:
|
||||
/// subsequent calls are cheap no-ops once `body_graph` is `Some`. No-op
|
||||
/// for intra-file bodies (which arrive with `body_graph` already set and
|
||||
/// `node_meta` empty).
|
||||
pub fn rebuild_body_graph(body: &mut CalleeSsaBody) -> bool {
|
||||
if body.body_graph.is_some() {
|
||||
return false;
|
||||
}
|
||||
if body.node_meta.is_empty() {
|
||||
return false;
|
||||
}
|
||||
// Determine the maximum NodeIndex referenced by the SSA so the
|
||||
// synthesized graph has an entry at every position the engine may
|
||||
// index. We fill any unreferenced intermediate indices with
|
||||
// `NodeInfo::default()`.
|
||||
//
|
||||
// Walks both instruction `cfg_node`s and `Terminator::Branch.cond` —
|
||||
// the latter is read by `compute_succ_states` via `cfg[*cond]`, so
|
||||
// missing it produces an OOB panic when a conditional branch's cond
|
||||
// node has a higher index than any `inst.cfg_node` in the body.
|
||||
let mut max_idx: u32 = 0;
|
||||
for block in &body.ssa.blocks {
|
||||
for inst in block.phis.iter().chain(block.body.iter()) {
|
||||
let idx = inst.cfg_node.index() as u32;
|
||||
if idx > max_idx {
|
||||
max_idx = idx;
|
||||
}
|
||||
}
|
||||
if let Terminator::Branch { cond, .. } = &block.terminator {
|
||||
let idx = cond.index() as u32;
|
||||
if idx > max_idx {
|
||||
max_idx = idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Also consider node_meta keys — they should be a subset of the
|
||||
// SSA-referenced indices, but be defensive.
|
||||
for &k in body.node_meta.keys() {
|
||||
if k > max_idx {
|
||||
max_idx = k;
|
||||
}
|
||||
}
|
||||
|
||||
use petgraph::graph::Graph;
|
||||
let mut graph: crate::cfg::Cfg = Graph::new();
|
||||
// petgraph allocates sequential NodeIndex values. Insert placeholders
|
||||
// up to and including max_idx.
|
||||
for i in 0..=max_idx {
|
||||
let info = body
|
||||
.node_meta
|
||||
.get(&i)
|
||||
.map(|m| m.info.clone())
|
||||
.unwrap_or_default();
|
||||
graph.add_node(info);
|
||||
}
|
||||
// Edges are not consulted by the taint engine during inline analysis
|
||||
// (control flow comes from `SsaBlock::preds`/`succs` and
|
||||
// `SsaBlock::terminator`), so we leave the graph edge-free.
|
||||
body.body_graph = Some(graph);
|
||||
true
|
||||
}
|
||||
7336
src/taint/ssa_transfer/mod.rs
Normal file
7336
src/taint/ssa_transfer/mod.rs
Normal file
File diff suppressed because it is too large
Load diff
758
src/taint/ssa_transfer/state.rs
Normal file
758
src/taint/ssa_transfer/state.rs
Normal file
|
|
@ -0,0 +1,758 @@
|
|||
//! Taint state, lattice, and per-body observability hooks extracted from
|
||||
//! the original monolithic `ssa_transfer.rs`.
|
||||
//!
|
||||
//! Contains:
|
||||
//! * [`SsaTaintState`] — the per-block lattice value with `values`,
|
||||
//! `validated_must`/`validated_may`, `predicates`, `heap`, `path_env`,
|
||||
//! `abstract_state`.
|
||||
//! * [`BindingKey`] / [`seed_lookup`] for cross-body taint seeding.
|
||||
//! * Observability globals and overrides for worklist iterations and
|
||||
//! origin truncation (`MAX_ORIGINS`, `WORKLIST_SAFETY_CAP`, etc.).
|
||||
//! * The merge-join helpers used by [`Lattice::join`] / [`Lattice::leq`].
|
||||
|
||||
use crate::abstract_interp::{self, AbstractState};
|
||||
use crate::cfg::BodyId;
|
||||
use crate::constraint;
|
||||
use crate::ssa::heap::HeapState;
|
||||
use crate::ssa::ir::SsaValue;
|
||||
use crate::state::lattice::Lattice;
|
||||
use crate::state::symbol::SymbolId;
|
||||
use crate::taint::domain::{PredicateSummary, SmallBitSet, TaintOrigin, VarTaint};
|
||||
use smallvec::SmallVec;
|
||||
use std::cell::RefCell;
|
||||
use std::collections::HashMap;
|
||||
|
||||
// NOTE: The per-SSA-value origin cap used to be a hardcoded
|
||||
// `MAX_ORIGINS: usize = 4`. It is now governed by the stable
|
||||
// `analysis.engine.max_origins` option (default `32`) — see
|
||||
// `crate::utils::analysis_options` and [`effective_max_origins`]. The
|
||||
// test-only override below still short-circuits the config read so
|
||||
// `engine_notes_tests.rs` can force a tiny cap to trigger truncation
|
||||
// on small fixtures.
|
||||
|
||||
/// Default safety cap on taint worklist iterations. Deliberately large so
|
||||
/// well-formed programs never hit it; the cap exists to bound adversarial
|
||||
/// inputs that would otherwise loop forever. Observable and override-able
|
||||
/// via [`set_worklist_cap_override`] / [`max_worklist_iterations`] for
|
||||
/// tests; production behaviour unchanged.
|
||||
pub(super) const WORKLIST_SAFETY_CAP: usize = 100_000;
|
||||
|
||||
static WORKLIST_CAP_OVERRIDE: std::sync::atomic::AtomicUsize =
|
||||
std::sync::atomic::AtomicUsize::new(0);
|
||||
/// Records the MAX iteration count observed across every
|
||||
/// `run_ssa_taint_full` call since the most recent reset. Cheaper and
|
||||
/// more useful for regression tests than the last-call value — a cap
|
||||
/// hit anywhere in the scan is remembered.
|
||||
pub(super) static MAX_WORKLIST_ITERATIONS: std::sync::atomic::AtomicUsize =
|
||||
std::sync::atomic::AtomicUsize::new(0);
|
||||
/// Counts how many times the worklist safety cap tripped since the
|
||||
/// most recent reset. Lets tests assert "the cap fired at least once"
|
||||
/// without depending on per-finding attribution, which can lose the
|
||||
/// signal when cap-hit analyses produce no findings.
|
||||
pub(super) static WORKLIST_CAP_HITS: std::sync::atomic::AtomicUsize =
|
||||
std::sync::atomic::AtomicUsize::new(0);
|
||||
|
||||
/// Test-only override for [`WORKLIST_SAFETY_CAP`]. `cap = 0` restores the
|
||||
/// default. Intended exclusively for the engine-notes regression tests
|
||||
/// that need to force a worklist cap-hit on tiny fixtures.
|
||||
#[doc(hidden)]
|
||||
pub fn set_worklist_cap_override(cap: usize) {
|
||||
WORKLIST_CAP_OVERRIDE.store(cap, std::sync::atomic::Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub(super) fn effective_worklist_cap() -> usize {
|
||||
let o = WORKLIST_CAP_OVERRIDE.load(std::sync::atomic::Ordering::Relaxed);
|
||||
if o == 0 { WORKLIST_SAFETY_CAP } else { o }
|
||||
}
|
||||
|
||||
/// Observability hook: records the max iteration count used by any
|
||||
/// `run_ssa_taint_full` call since the most recent reset.
|
||||
pub fn max_worklist_iterations() -> usize {
|
||||
MAX_WORKLIST_ITERATIONS.load(std::sync::atomic::Ordering::Relaxed)
|
||||
}
|
||||
|
||||
/// How many times the worklist cap has tripped since the most recent
|
||||
/// reset. Zero when the cap was never hit.
|
||||
pub fn worklist_cap_hit_count() -> usize {
|
||||
WORKLIST_CAP_HITS.load(std::sync::atomic::Ordering::Relaxed)
|
||||
}
|
||||
|
||||
/// Reset the worklist observability counters. Intended for tests that
|
||||
/// want a clean baseline before a scan.
|
||||
pub fn reset_worklist_observability() {
|
||||
MAX_WORKLIST_ITERATIONS.store(0, std::sync::atomic::Ordering::Relaxed);
|
||||
WORKLIST_CAP_HITS.store(0, std::sync::atomic::Ordering::Relaxed);
|
||||
}
|
||||
|
||||
/// Test-only override for the origin cap. `cap = 0` restores the
|
||||
/// runtime-configured default (see [`effective_max_origins`]). Used to
|
||||
/// force `OriginsTruncated` emission on small fixtures.
|
||||
static MAX_ORIGINS_OVERRIDE: std::sync::atomic::AtomicUsize =
|
||||
std::sync::atomic::AtomicUsize::new(0);
|
||||
/// Total number of origins dropped since the most recent reset — captured
|
||||
/// from `merge_origins` and the post-hoc saturation scan. Used by tests
|
||||
/// to detect truncation events that don't propagate to a finding (e.g.
|
||||
/// when the cap is so tight no taint flow survives to emit a sink event).
|
||||
pub(super) static ORIGINS_TRUNCATION_COUNT: std::sync::atomic::AtomicUsize =
|
||||
std::sync::atomic::AtomicUsize::new(0);
|
||||
|
||||
#[doc(hidden)]
|
||||
pub fn set_max_origins_override(cap: usize) {
|
||||
MAX_ORIGINS_OVERRIDE.store(cap, std::sync::atomic::Ordering::Relaxed);
|
||||
}
|
||||
|
||||
/// Resolve the live origin cap.
|
||||
///
|
||||
/// Precedence (highest first):
|
||||
/// 1. The test-only `MAX_ORIGINS_OVERRIDE` atomic (`set_max_origins_override`).
|
||||
/// 2. The runtime `analysis.engine.max_origins` option, which itself
|
||||
/// resolves through the installed runtime → `NYX_MAX_ORIGINS` →
|
||||
/// [`crate::utils::analysis_options::DEFAULT_MAX_ORIGINS`].
|
||||
///
|
||||
/// A result of `0` is never returned: the runtime path clamps to
|
||||
/// [`crate::utils::analysis_options::MIN_MAX_ORIGINS`] on ingest, so the
|
||||
/// engine always carries at least one origin slot.
|
||||
pub(super) fn effective_max_origins() -> usize {
|
||||
let o = MAX_ORIGINS_OVERRIDE.load(std::sync::atomic::Ordering::Relaxed);
|
||||
if o != 0 {
|
||||
return o;
|
||||
}
|
||||
crate::utils::analysis_options::current().max_origins as usize
|
||||
}
|
||||
|
||||
/// Observability: total origins dropped by the engine since the most
|
||||
/// recent `reset_origins_observability` call. Zero when no truncation
|
||||
/// happened. Monotone-increasing across calls.
|
||||
pub fn origins_truncation_count() -> usize {
|
||||
ORIGINS_TRUNCATION_COUNT.load(std::sync::atomic::Ordering::Relaxed)
|
||||
}
|
||||
|
||||
/// Reset the origins-truncation counter. Intended for tests.
|
||||
pub fn reset_origins_observability() {
|
||||
ORIGINS_TRUNCATION_COUNT.store(0, std::sync::atomic::Ordering::Relaxed);
|
||||
}
|
||||
|
||||
thread_local! {
|
||||
/// Per-body engine-note collector. Cleared at the start of each
|
||||
/// `analyse_body_with_seed` invocation and drained after
|
||||
/// `run_ssa_taint_full` returns — notes are then attached to every
|
||||
/// finding emitted from that body. Living as a thread-local avoids
|
||||
/// threading a `&RefCell` through the nearly-10-argument transfer
|
||||
/// struct; inline analysis recursion is intentionally allowed to
|
||||
/// bubble callee-side cap hits up into the caller's collector.
|
||||
static BODY_ENGINE_NOTES: RefCell<SmallVec<[crate::engine_notes::EngineNote; 2]>> =
|
||||
RefCell::new(SmallVec::new());
|
||||
|
||||
/// File-level set of CFG sink spans whose path-traversal taint flow
|
||||
/// was suppressed by an SSA-engine path-safety proof (PathFact
|
||||
/// `dotdot=No && absolute=No`). Populated by `is_path_safe_for_sink`
|
||||
/// and consumed by the state-analysis pass to suppress
|
||||
/// `state-unauthed-access` on the same sink — when the taint engine
|
||||
/// has already proved the user-controlled input cannot escape into a
|
||||
/// privileged location, the auth concern on that sink is reduced.
|
||||
/// Reset at start of `analyse_file`, drained before state analysis.
|
||||
static PATH_SAFE_SUPPRESSED_SPANS: RefCell<std::collections::HashSet<(usize, usize)>> =
|
||||
RefCell::new(std::collections::HashSet::new());
|
||||
}
|
||||
|
||||
/// Record an engine note for the body currently being analysed. Safe to
|
||||
/// call from anywhere under a `run_ssa_taint_full` call stack; duplicates
|
||||
/// against notes already present in the body collector are suppressed.
|
||||
pub(crate) fn record_engine_note(note: crate::engine_notes::EngineNote) {
|
||||
BODY_ENGINE_NOTES.with(|c| {
|
||||
crate::engine_notes::push_unique(&mut c.borrow_mut(), note);
|
||||
});
|
||||
}
|
||||
|
||||
/// Reset the per-body collector (called at start of each body analysis).
|
||||
pub(crate) fn reset_body_engine_notes() {
|
||||
BODY_ENGINE_NOTES.with(|c| c.borrow_mut().clear());
|
||||
}
|
||||
|
||||
/// Take the current collected notes, leaving the collector empty. Called
|
||||
/// after `run_ssa_taint_full` to attach collected notes to findings.
|
||||
pub(crate) fn take_body_engine_notes() -> SmallVec<[crate::engine_notes::EngineNote; 2]> {
|
||||
BODY_ENGINE_NOTES.with(|c| std::mem::take(&mut *c.borrow_mut()))
|
||||
}
|
||||
|
||||
/// Record a sink CFG-node span whose tainted input is proven path-safe by
|
||||
/// the SSA abstract domain (`PathFact::is_path_safe()`). Consumed by the
|
||||
/// state-analysis pass to suppress `state-unauthed-access` on the same
|
||||
/// span: once the taint engine has proved the input cannot reach a
|
||||
/// privileged location, the auth concern is structurally reduced.
|
||||
pub(crate) fn record_path_safe_suppressed_span(span: (usize, usize)) {
|
||||
PATH_SAFE_SUPPRESSED_SPANS.with(|c| {
|
||||
c.borrow_mut().insert(span);
|
||||
});
|
||||
}
|
||||
|
||||
/// Reset the file-level path-safe-suppressed sink-span set. Called at
|
||||
/// the start of `analyse_file` so each file scan starts with a clean
|
||||
/// slate.
|
||||
pub fn reset_path_safe_suppressed_spans() {
|
||||
PATH_SAFE_SUPPRESSED_SPANS.with(|c| c.borrow_mut().clear());
|
||||
}
|
||||
|
||||
/// Take the file-level path-safe-suppressed sink-span set, leaving it
|
||||
/// empty. Called by the analysis orchestrator after `analyse_file` and
|
||||
/// before `run_state_analysis` so the state pass can read which sinks
|
||||
/// the taint engine already proved safe.
|
||||
pub fn take_path_safe_suppressed_spans() -> std::collections::HashSet<(usize, usize)> {
|
||||
PATH_SAFE_SUPPRESSED_SPANS.with(|c| std::mem::take(&mut *c.borrow_mut()))
|
||||
}
|
||||
|
||||
/// Stable identity for a variable binding at body boundaries.
|
||||
///
|
||||
/// Translates between independent per-body `SymbolId` spaces.
|
||||
/// `SymbolId` remains body-local for intra-body analysis; `BindingKey`
|
||||
/// is used when taint crosses body boundaries via `global_seed`.
|
||||
///
|
||||
/// The `body_id` scopes the binding to a specific body. Same-named
|
||||
/// bindings across different bodies never alias. Callers that write
|
||||
/// into the seed map always specify the owning body's id; readers look
|
||||
/// up by the scope they know they want (typically their own
|
||||
/// `parent_body_id`, with a fallback to `BodyId(0)` for entries that
|
||||
/// the JS/TS two-level solve has re-keyed onto the top-level scope —
|
||||
/// see [`crate::taint::ssa_transfer::filter_seed_to_toplevel`]).
|
||||
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
|
||||
pub struct BindingKey {
|
||||
pub name: String,
|
||||
/// Owning body id.
|
||||
pub body_id: BodyId,
|
||||
}
|
||||
|
||||
impl BindingKey {
|
||||
pub fn new(name: impl Into<String>, body_id: BodyId) -> Self {
|
||||
Self {
|
||||
name: name.into(),
|
||||
body_id,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Look up a binding in a seed map.
|
||||
///
|
||||
/// Thin wrapper over [`HashMap::get`] retained for call-site readability
|
||||
/// — every seed entry is now exactly scoped to a single `(name,
|
||||
/// BodyId)`, so the lookup is O(1) with no fallback. Writers that want
|
||||
/// cross-scope reachability must explicitly re-key their entries (see
|
||||
/// [`crate::taint::ssa_transfer::filter_seed_to_toplevel`]).
|
||||
pub fn seed_lookup<'a>(
|
||||
seed: &'a HashMap<BindingKey, VarTaint>,
|
||||
key: &BindingKey,
|
||||
) -> Option<&'a VarTaint> {
|
||||
seed.get(key)
|
||||
}
|
||||
|
||||
// ── SSA Taint State ─────────────────────────────────────────────────────
|
||||
|
||||
/// Taint state keyed by SsaValue instead of SymbolId.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct SsaTaintState {
|
||||
/// Per-SSA-value taint, sorted by SsaValue for O(n) merge-join.
|
||||
pub values: SmallVec<[(SsaValue, VarTaint); 16]>,
|
||||
/// Variables validated on ALL paths (intersection on join). Keyed by SymbolId.
|
||||
pub validated_must: SmallBitSet,
|
||||
/// Variables validated on ANY path (union on join). Keyed by SymbolId.
|
||||
pub validated_may: SmallBitSet,
|
||||
/// Per-variable predicate summary (sorted by SymbolId, intersection on join).
|
||||
pub predicates: SmallVec<[(SymbolId, PredicateSummary); 4]>,
|
||||
/// Per-heap-object taint: container contents taint tracked through
|
||||
/// abstract heap identity. Separate from `values` so container taint
|
||||
/// persists independently of the SSA value referencing the container.
|
||||
pub heap: HeapState,
|
||||
/// Path constraint environment. `None` when constraint solving is
|
||||
/// disabled (`analysis.engine.constraint_solving = false`).
|
||||
pub path_env: Option<constraint::PathEnv>,
|
||||
/// Per-SSA-value abstract domain state. `None` when abstract
|
||||
/// interpretation is disabled (`analysis.engine.abstract_interpretation
|
||||
/// = false`).
|
||||
pub abstract_state: Option<AbstractState>,
|
||||
}
|
||||
|
||||
impl SsaTaintState {
|
||||
pub fn initial() -> Self {
|
||||
Self {
|
||||
values: SmallVec::new(),
|
||||
validated_must: SmallBitSet::empty(),
|
||||
validated_may: SmallBitSet::empty(),
|
||||
predicates: SmallVec::new(),
|
||||
heap: HeapState::empty(),
|
||||
path_env: if constraint::is_enabled() {
|
||||
Some(constraint::PathEnv::empty())
|
||||
} else {
|
||||
None
|
||||
},
|
||||
abstract_state: if abstract_interp::is_enabled() {
|
||||
Some(AbstractState::empty())
|
||||
} else {
|
||||
None
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if any variable has contradictory predicates or path constraints.
|
||||
pub fn has_contradiction(&self) -> bool {
|
||||
self.predicates.iter().any(|(_, s)| s.has_contradiction())
|
||||
|| self.path_env.as_ref().is_some_and(|e| e.is_unsat())
|
||||
}
|
||||
|
||||
pub fn get(&self, v: SsaValue) -> Option<&VarTaint> {
|
||||
self.values
|
||||
.binary_search_by_key(&v, |(id, _)| *id)
|
||||
.ok()
|
||||
.map(|idx| &self.values[idx].1)
|
||||
}
|
||||
|
||||
pub fn set(&mut self, v: SsaValue, taint: VarTaint) {
|
||||
match self.values.binary_search_by_key(&v, |(id, _)| *id) {
|
||||
Ok(idx) => self.values[idx].1 = taint,
|
||||
Err(idx) => self.values.insert(idx, (v, taint)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn remove(&mut self, v: SsaValue) {
|
||||
if let Ok(idx) = self.values.binary_search_by_key(&v, |(id, _)| *id) {
|
||||
self.values.remove(idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Lattice for SsaTaintState {
|
||||
fn bot() -> Self {
|
||||
Self::initial()
|
||||
}
|
||||
|
||||
fn join(&self, other: &Self) -> Self {
|
||||
let values = merge_join_ssa_vars(&self.values, &other.values);
|
||||
let validated_must = self.validated_must.intersection(other.validated_must);
|
||||
let validated_may = self.validated_may.union(other.validated_may);
|
||||
let predicates = merge_join_ssa_predicates(&self.predicates, &other.predicates);
|
||||
let heap = self.heap.join(&other.heap);
|
||||
let path_env = match (&self.path_env, &other.path_env) {
|
||||
(Some(a), Some(b)) => Some(a.join(b)),
|
||||
_ => None, // absent = Top, Top.join(x) = Top
|
||||
};
|
||||
let abstract_state = match (&self.abstract_state, &other.abstract_state) {
|
||||
(Some(a), Some(b)) => Some(a.join(b)),
|
||||
_ => None,
|
||||
};
|
||||
SsaTaintState {
|
||||
values,
|
||||
validated_must,
|
||||
validated_may,
|
||||
predicates,
|
||||
heap,
|
||||
path_env,
|
||||
abstract_state,
|
||||
}
|
||||
}
|
||||
|
||||
fn leq(&self, other: &Self) -> bool {
|
||||
if !ssa_vars_leq(&self.values, &other.values) {
|
||||
return false;
|
||||
}
|
||||
if !self.validated_must.is_superset_of(other.validated_must) {
|
||||
return false;
|
||||
}
|
||||
if !self.validated_may.is_subset_of(other.validated_may) {
|
||||
return false;
|
||||
}
|
||||
if !self.heap.leq(&other.heap) {
|
||||
return false;
|
||||
}
|
||||
// path_env: None (Top) ≥ everything; Some(a) ≤ None only if a is Top-equivalent
|
||||
match (&self.path_env, &other.path_env) {
|
||||
(None, Some(_)) => return false, // Top is NOT ≤ constrained
|
||||
(Some(_), None) => {} // constrained ≤ Top: ok
|
||||
(None, None) => {}
|
||||
(Some(a), Some(b)) => {
|
||||
// a ≤ b means a has at least as many constraints as b.
|
||||
// For the worklist to converge, we only need: if the
|
||||
// joined state didn't change, we stop. The PartialEq
|
||||
// check on the full SsaTaintState handles this.
|
||||
// For leq, we use a simple approximation: a ≤ b iff
|
||||
// a.fact_count() >= b.fact_count() (more facts = lower).
|
||||
// This is sound for convergence but approximate.
|
||||
if a.fact_count() < b.fact_count() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Abstract-state comparison
|
||||
match (&self.abstract_state, &other.abstract_state) {
|
||||
(None, Some(_)) => return false,
|
||||
(Some(a), Some(b)) if !a.leq(b) => return false,
|
||||
_ => {}
|
||||
}
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
/// Merge-join two sorted SSA var lists.
|
||||
pub(super) fn merge_join_ssa_vars(
|
||||
a: &[(SsaValue, VarTaint)],
|
||||
b: &[(SsaValue, VarTaint)],
|
||||
) -> SmallVec<[(SsaValue, VarTaint); 16]> {
|
||||
let mut result = SmallVec::with_capacity(a.len().max(b.len()));
|
||||
let (mut i, mut j) = (0, 0);
|
||||
|
||||
while i < a.len() && j < b.len() {
|
||||
match a[i].0.cmp(&b[j].0) {
|
||||
std::cmp::Ordering::Less => {
|
||||
result.push(a[i].clone());
|
||||
i += 1;
|
||||
}
|
||||
std::cmp::Ordering::Greater => {
|
||||
result.push(b[j].clone());
|
||||
j += 1;
|
||||
}
|
||||
std::cmp::Ordering::Equal => {
|
||||
let caps = a[i].1.caps | b[j].1.caps;
|
||||
let origins = merge_origins(&a[i].1.origins, &b[j].1.origins);
|
||||
let uses_summary = a[i].1.uses_summary || b[j].1.uses_summary;
|
||||
result.push((
|
||||
a[i].0,
|
||||
VarTaint {
|
||||
caps,
|
||||
origins,
|
||||
uses_summary,
|
||||
},
|
||||
));
|
||||
i += 1;
|
||||
j += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while i < a.len() {
|
||||
result.push(a[i].clone());
|
||||
i += 1;
|
||||
}
|
||||
while j < b.len() {
|
||||
result.push(b[j].clone());
|
||||
j += 1;
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Deterministic sort key for a [`TaintOrigin`].
|
||||
///
|
||||
/// Ordering is lexicographic over
|
||||
/// `(source_span_start, source_span_end, source_kind_tag, node_index)`.
|
||||
/// `source_span` is the most stable component across bodies — cross-body
|
||||
/// remapped origins carry the original byte span explicitly; intra-body
|
||||
/// origins default to `(0, 0)` and fall through to the secondary keys.
|
||||
///
|
||||
/// Using a total order lets [`push_origin_bounded`] and
|
||||
/// [`merge_origins`] decide *which* origin to drop when the cap is
|
||||
/// exceeded: they always drop the origin with the largest key, making
|
||||
/// the survivor set a deterministic function of the input set rather
|
||||
/// than of merge visitation order.
|
||||
fn origin_sort_key(o: &TaintOrigin) -> (usize, usize, u8, usize) {
|
||||
let (span_start, span_end) = o.source_span.unwrap_or((0, 0));
|
||||
let kind_tag: u8 = match o.source_kind {
|
||||
crate::labels::SourceKind::UserInput => 0,
|
||||
crate::labels::SourceKind::EnvironmentConfig => 1,
|
||||
crate::labels::SourceKind::FileSystem => 2,
|
||||
crate::labels::SourceKind::Database => 3,
|
||||
crate::labels::SourceKind::CaughtException => 4,
|
||||
crate::labels::SourceKind::Unknown => 5,
|
||||
};
|
||||
(span_start, span_end, kind_tag, o.node.index())
|
||||
}
|
||||
|
||||
/// Bounded, deterministic insertion of an origin into a sorted origin
|
||||
/// set. Returns `true` when `new` was admitted (or de-duplicated against
|
||||
/// an existing entry), `false` when the cap forced a drop. On drop,
|
||||
/// the origin with the *largest* sort key is evicted first — the caller
|
||||
/// sees a survivor set that depends only on the input multiset and
|
||||
/// [`effective_max_origins`], not on insertion order.
|
||||
///
|
||||
/// Records the engine note and increments [`ORIGINS_TRUNCATION_COUNT`]
|
||||
/// exactly once per physical drop. Calling sites that used to inline
|
||||
/// the "dedup + push if under cap" pattern should migrate here so
|
||||
/// truncation is globally consistent.
|
||||
pub(crate) fn push_origin_bounded(
|
||||
target: &mut SmallVec<[TaintOrigin; 2]>,
|
||||
new: TaintOrigin,
|
||||
) -> bool {
|
||||
// Identity check: same node counts as the same origin. We keep
|
||||
// node-only dedup to match [`ssa_vars_leq`], which compares origin
|
||||
// sets by node membership — widening dedup here without tightening
|
||||
// there would break the monotonicity invariant.
|
||||
if target.iter().any(|o| o.node == new.node) {
|
||||
return true;
|
||||
}
|
||||
|
||||
let cap = effective_max_origins();
|
||||
let new_key = origin_sort_key(&new);
|
||||
|
||||
if target.len() < cap {
|
||||
// Insert in sorted order so iteration is deterministic.
|
||||
let pos = target
|
||||
.iter()
|
||||
.position(|o| origin_sort_key(o) > new_key)
|
||||
.unwrap_or(target.len());
|
||||
target.insert(pos, new);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Cap reached: evict the worst (largest key) entry iff `new` is better.
|
||||
let worst_idx = target
|
||||
.iter()
|
||||
.enumerate()
|
||||
.max_by_key(|(_, o)| origin_sort_key(o))
|
||||
.map(|(i, _)| i)
|
||||
.expect("cap ≥ MIN_MAX_ORIGINS (1) means target is non-empty");
|
||||
let worst_key = origin_sort_key(&target[worst_idx]);
|
||||
|
||||
ORIGINS_TRUNCATION_COUNT.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||
record_engine_note(crate::engine_notes::EngineNote::OriginsTruncated { dropped: 1 });
|
||||
|
||||
if new_key < worst_key {
|
||||
target.remove(worst_idx);
|
||||
let pos = target
|
||||
.iter()
|
||||
.position(|o| origin_sort_key(o) > new_key)
|
||||
.unwrap_or(target.len());
|
||||
target.insert(pos, new);
|
||||
true
|
||||
} else {
|
||||
// `new` itself is the worst — drop it instead of the survivor.
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Merge two origin sets with deterministic truncation.
|
||||
///
|
||||
/// Equivalent to seeding the survivor list with `a` and folding each
|
||||
/// element of `b` through [`push_origin_bounded`]. The resulting list
|
||||
/// is sorted by [`origin_sort_key`] and bounded at
|
||||
/// [`effective_max_origins`].
|
||||
pub(super) fn merge_origins(
|
||||
a: &SmallVec<[TaintOrigin; 2]>,
|
||||
b: &SmallVec<[TaintOrigin; 2]>,
|
||||
) -> SmallVec<[TaintOrigin; 2]> {
|
||||
// Seed the result with `a` — but re-sort defensively in case the
|
||||
// caller constructed `a` through non-bounded paths. Historically
|
||||
// every write goes through `push_origin_bounded` (or `merge_origins`
|
||||
// itself), so this resort is a no-op on the steady state but costs
|
||||
// nothing at cap sizes ≤ 32.
|
||||
let mut merged: SmallVec<[TaintOrigin; 2]> = SmallVec::new();
|
||||
for o in a.iter().copied() {
|
||||
push_origin_bounded(&mut merged, o);
|
||||
}
|
||||
for o in b.iter().copied() {
|
||||
push_origin_bounded(&mut merged, o);
|
||||
}
|
||||
merged
|
||||
}
|
||||
|
||||
#[allow(dead_code)] // called by Lattice::leq
|
||||
fn ssa_vars_leq(a: &[(SsaValue, VarTaint)], b: &[(SsaValue, VarTaint)]) -> bool {
|
||||
let (mut i, mut j) = (0, 0);
|
||||
|
||||
while i < a.len() {
|
||||
if j >= b.len() {
|
||||
return false;
|
||||
}
|
||||
match a[i].0.cmp(&b[j].0) {
|
||||
std::cmp::Ordering::Less => return false,
|
||||
std::cmp::Ordering::Greater => {
|
||||
j += 1;
|
||||
}
|
||||
std::cmp::Ordering::Equal => {
|
||||
if a[i].1.caps & b[j].1.caps != a[i].1.caps {
|
||||
return false;
|
||||
}
|
||||
// uses_summary is monotone: a.uses_summary ≤ b.uses_summary
|
||||
if a[i].1.uses_summary && !b[j].1.uses_summary {
|
||||
return false;
|
||||
}
|
||||
for orig in &a[i].1.origins {
|
||||
if !b[j].1.origins.iter().any(|o| o.node == orig.node) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
i += 1;
|
||||
j += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Merge-join predicate summaries with intersection semantics.
|
||||
pub(super) fn merge_join_ssa_predicates(
|
||||
a: &[(SymbolId, PredicateSummary)],
|
||||
b: &[(SymbolId, PredicateSummary)],
|
||||
) -> SmallVec<[(SymbolId, PredicateSummary); 4]> {
|
||||
let mut result = SmallVec::new();
|
||||
let (mut i, mut j) = (0, 0);
|
||||
|
||||
while i < a.len() && j < b.len() {
|
||||
match a[i].0.cmp(&b[j].0) {
|
||||
std::cmp::Ordering::Less => {
|
||||
i += 1;
|
||||
}
|
||||
std::cmp::Ordering::Greater => {
|
||||
j += 1;
|
||||
}
|
||||
std::cmp::Ordering::Equal => {
|
||||
let joined = a[i].1.join(b[j].1);
|
||||
if !joined.is_empty() {
|
||||
result.push((a[i].0, joined));
|
||||
}
|
||||
i += 1;
|
||||
j += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod origin_cap_tests {
|
||||
//! Tests for the deterministic, config-driven origin cap. These
|
||||
//! cover the behavior at the `push_origin_bounded` / `merge_origins`
|
||||
//! boundary — the end-to-end engine-note signal is exercised in
|
||||
//! `tests/engine_notes_tests.rs`.
|
||||
|
||||
use super::*;
|
||||
use crate::labels::SourceKind;
|
||||
use petgraph::graph::NodeIndex;
|
||||
use std::sync::Mutex;
|
||||
|
||||
static TEST_GUARD: Mutex<()> = Mutex::new(());
|
||||
|
||||
fn origin(node: usize, span_start: usize) -> TaintOrigin {
|
||||
TaintOrigin {
|
||||
node: NodeIndex::new(node),
|
||||
source_kind: SourceKind::UserInput,
|
||||
source_span: Some((span_start, span_start + 1)),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn push_origin_bounded_dedups_by_node() {
|
||||
let _g = TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner());
|
||||
set_max_origins_override(4);
|
||||
|
||||
let mut target: SmallVec<[TaintOrigin; 2]> = SmallVec::new();
|
||||
assert!(push_origin_bounded(&mut target, origin(1, 10)));
|
||||
assert!(push_origin_bounded(&mut target, origin(1, 99))); // same node, dedups
|
||||
assert_eq!(target.len(), 1, "duplicate node must not grow the set");
|
||||
|
||||
set_max_origins_override(0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn push_origin_bounded_is_order_independent() {
|
||||
// Core invariant: the survivor set is a function of the input
|
||||
// multiset and the cap, not of insertion order. Regression
|
||||
// guard against the pre-fix "keep first 4, drop rest" policy
|
||||
// which made the survivor set depend on merge-visitation order.
|
||||
let _g = TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner());
|
||||
set_max_origins_override(3);
|
||||
|
||||
let origins = [
|
||||
origin(1, 50),
|
||||
origin(2, 10), // smallest span
|
||||
origin(3, 30),
|
||||
origin(4, 70),
|
||||
origin(5, 90), // largest span
|
||||
];
|
||||
|
||||
let mut forward: SmallVec<[TaintOrigin; 2]> = SmallVec::new();
|
||||
for o in origins.iter() {
|
||||
push_origin_bounded(&mut forward, *o);
|
||||
}
|
||||
|
||||
let mut reverse: SmallVec<[TaintOrigin; 2]> = SmallVec::new();
|
||||
for o in origins.iter().rev() {
|
||||
push_origin_bounded(&mut reverse, *o);
|
||||
}
|
||||
|
||||
let forward_nodes: Vec<_> = forward.iter().map(|o| o.node.index()).collect();
|
||||
let reverse_nodes: Vec<_> = reverse.iter().map(|o| o.node.index()).collect();
|
||||
assert_eq!(
|
||||
forward_nodes, reverse_nodes,
|
||||
"survivor set must not depend on insertion order: forward {forward_nodes:?} \
|
||||
reverse {reverse_nodes:?}"
|
||||
);
|
||||
|
||||
// Spot-check: the 3 smallest-span origins (nodes 2, 3, 1 by span
|
||||
// order) survive; the two largest (4, 5) are evicted.
|
||||
assert_eq!(forward_nodes, vec![2, 3, 1]);
|
||||
|
||||
set_max_origins_override(0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn push_origin_bounded_increments_truncation_counter() {
|
||||
let _g = TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner());
|
||||
set_max_origins_override(2);
|
||||
reset_origins_observability();
|
||||
|
||||
let mut target: SmallVec<[TaintOrigin; 2]> = SmallVec::new();
|
||||
push_origin_bounded(&mut target, origin(1, 10));
|
||||
push_origin_bounded(&mut target, origin(2, 20));
|
||||
// Both below cause truncation (new is worse than worst survivor
|
||||
// at node 2 because span=50 > 20, or new beats and evicts).
|
||||
push_origin_bounded(&mut target, origin(3, 30));
|
||||
push_origin_bounded(&mut target, origin(4, 40));
|
||||
|
||||
assert_eq!(
|
||||
origins_truncation_count(),
|
||||
2,
|
||||
"expected 2 truncation events (3rd and 4th push at cap=2)"
|
||||
);
|
||||
|
||||
set_max_origins_override(0);
|
||||
reset_origins_observability();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn merge_origins_is_symmetric() {
|
||||
// join(a, b) and join(b, a) must produce identical survivor
|
||||
// sets. The old implementation was asymmetric: it always kept
|
||||
// all of `a` and only added from `b` until cap, so which side
|
||||
// was passed as `a` determined the survivors at truncation.
|
||||
let _g = TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner());
|
||||
set_max_origins_override(3);
|
||||
|
||||
let a: SmallVec<[TaintOrigin; 2]> = [origin(1, 100), origin(2, 200)].into_iter().collect();
|
||||
let b: SmallVec<[TaintOrigin; 2]> = [origin(3, 10), origin(4, 50)].into_iter().collect();
|
||||
|
||||
let ab = merge_origins(&a, &b);
|
||||
let ba = merge_origins(&b, &a);
|
||||
|
||||
let ab_nodes: Vec<_> = ab.iter().map(|o| o.node.index()).collect();
|
||||
let ba_nodes: Vec<_> = ba.iter().map(|o| o.node.index()).collect();
|
||||
assert_eq!(
|
||||
ab_nodes, ba_nodes,
|
||||
"merge must be commutative under truncation: ab={ab_nodes:?} ba={ba_nodes:?}"
|
||||
);
|
||||
|
||||
set_max_origins_override(0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn effective_cap_reads_runtime_config_when_override_zero() {
|
||||
// Override takes priority; override=0 falls through to config.
|
||||
// `current()` returns the default (32) when no runtime is
|
||||
// installed — which is the state the rest of the test suite runs
|
||||
// under. Guard that the fallback path reaches 32.
|
||||
let _g = TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner());
|
||||
set_max_origins_override(0);
|
||||
assert_eq!(
|
||||
effective_max_origins(),
|
||||
crate::utils::analysis_options::DEFAULT_MAX_ORIGINS as usize
|
||||
);
|
||||
set_max_origins_override(7);
|
||||
assert_eq!(effective_max_origins(), 7);
|
||||
set_max_origins_override(0);
|
||||
}
|
||||
}
|
||||
992
src/taint/ssa_transfer/summary_extract.rs
Normal file
992
src/taint/ssa_transfer/summary_extract.rs
Normal file
|
|
@ -0,0 +1,992 @@
|
|||
//! SSA function-summary and container-flow extraction.
|
||||
//!
|
||||
//! Extracted from the monolithic `ssa_transfer.rs`. Contains:
|
||||
//! * [`extract_ssa_func_summary`] — runs per-parameter taint probes and
|
||||
//! synthesises an [`crate::summary::ssa_summary::SsaFuncSummary`] with
|
||||
//! source caps, return transforms, per-path transforms, and sink site
|
||||
//! attribution.
|
||||
//! * [`extract_container_flow_summary`] — structural scan for
|
||||
//! `param_container_to_return` + `param_to_container_store` pairs.
|
||||
//! * Private helpers for predicate-hash summarisation, abstract-transfer
|
||||
//! derivation, callback source detection, and return-type inference.
|
||||
|
||||
use super::events::extract_sink_arg_positions;
|
||||
use super::state::{BindingKey, SsaTaintState};
|
||||
use super::{
|
||||
SsaTaintEvent, SsaTaintTransfer, detect_variant_inner_fact, run_ssa_taint_full, transfer_block,
|
||||
transfer_inst,
|
||||
};
|
||||
|
||||
use crate::cfg::{BodyId, Cfg, FuncSummaries};
|
||||
use crate::labels::{Cap, SourceKind};
|
||||
use crate::ssa::ir::{SsaBody, SsaOp, SsaValue, Terminator};
|
||||
use crate::summary::GlobalSummaries;
|
||||
use crate::symbol::Lang;
|
||||
use crate::taint::domain::{TaintOrigin, VarTaint};
|
||||
use petgraph::graph::NodeIndex;
|
||||
use smallvec::SmallVec;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
/// Maximum number of parameters to probe for summary extraction.
|
||||
/// Functions with more params fall back to legacy `FuncSummary`.
|
||||
const MAX_PROBE_PARAMS: usize = 8;
|
||||
|
||||
/// Extract a precise per-parameter `SsaFuncSummary` from an already-lowered SSA body.
|
||||
///
|
||||
/// For each parameter (up to [`MAX_PROBE_PARAMS`]), runs a taint probe by seeding
|
||||
/// that parameter with `Cap::all()` via `global_seed` and observing what caps
|
||||
/// survive to return positions and which sinks fire. A final probe with no params
|
||||
/// tainted detects intrinsic source caps.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn extract_ssa_func_summary(
|
||||
ssa: &SsaBody,
|
||||
cfg: &Cfg,
|
||||
local_summaries: &FuncSummaries,
|
||||
global_summaries: Option<&GlobalSummaries>,
|
||||
lang: Lang,
|
||||
namespace: &str,
|
||||
interner: &crate::state::symbol::SymbolInterner,
|
||||
param_count: usize,
|
||||
module_aliases: Option<&HashMap<SsaValue, SmallVec<[String; 2]>>>,
|
||||
locator: Option<&crate::summary::SinkSiteLocator<'_>>,
|
||||
formal_param_names: Option<&[String]>,
|
||||
) -> crate::summary::ssa_summary::SsaFuncSummary {
|
||||
use crate::summary::SinkSite;
|
||||
use crate::summary::ssa_summary::{SsaFuncSummary, TaintTransform};
|
||||
|
||||
let effective_params = param_count.min(MAX_PROBE_PARAMS);
|
||||
|
||||
// Collect (param_index, var_name, ssa_value) from the SSA body
|
||||
let mut param_info: Vec<(usize, String, SsaValue)> = Vec::new();
|
||||
for block in &ssa.blocks {
|
||||
for inst in block.phis.iter().chain(block.body.iter()) {
|
||||
if let SsaOp::Param { index } = &inst.op {
|
||||
if *index < effective_params {
|
||||
if let Some(name) = inst.var_name.as_ref() {
|
||||
param_info.push((*index, name.clone(), inst.value));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Identify return-reaching blocks
|
||||
let return_blocks: Vec<usize> = ssa
|
||||
.blocks
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, b)| matches!(b.terminator, Terminator::Return(_)))
|
||||
.map(|(i, _)| i)
|
||||
.collect();
|
||||
|
||||
// Collect all param SSA values to exclude from return cap collection.
|
||||
// Param values persist with their seeded taint throughout the function —
|
||||
// we only want caps on derived values (call results, assigns) at return.
|
||||
let all_param_values: std::collections::HashSet<SsaValue> =
|
||||
param_info.iter().map(|(_, _, v)| *v).collect();
|
||||
|
||||
// Per-return-block observation captured alongside the aggregate return
|
||||
// caps. Each entry records one return block's exit state — caps
|
||||
// contributed on that path, path-predicate hash, known_true/false bits,
|
||||
// and the return SSA value's abstract fact — so the per-param loop can
|
||||
// emit one [`ReturnPathTransform`] per distinct predicate gate.
|
||||
struct ReturnBlockObs {
|
||||
/// Caps at the return SSA value (or joined live values for
|
||||
/// implicit returns) on this block's exit.
|
||||
derived_caps: Cap,
|
||||
/// Caps collected from parameter values reaching this return
|
||||
/// (passthrough fallback).
|
||||
param_caps: Cap,
|
||||
/// Deterministic hash of the predicate gate at this return.
|
||||
/// `0` means "no predicate gate" — an unguarded return.
|
||||
predicate_hash: u64,
|
||||
/// `PredicateSummary::known_true` bits intersected across all
|
||||
/// tracked variables at this return. Encoded via
|
||||
/// [`crate::taint::domain::predicate_kind_bit`].
|
||||
known_true: u8,
|
||||
/// `PredicateSummary::known_false` bits at this return.
|
||||
known_false: u8,
|
||||
/// Abstract fact on the return SSA value at this return (None
|
||||
/// when Top or abstract interp disabled).
|
||||
abstract_value: Option<crate::abstract_interp::AbstractValue>,
|
||||
/// [`crate::abstract_interp::PathFact`] on the return SSA value
|
||||
/// at this block's exit. Top when abstract interp is disabled
|
||||
/// or no narrowing was proved on this path.
|
||||
path_fact: crate::abstract_interp::PathFact,
|
||||
/// Inner [`PathFact`] when the rv on this path is a one-arg
|
||||
/// variant constructor; [`None`] otherwise.
|
||||
variant_inner_fact: Option<crate::abstract_interp::PathFact>,
|
||||
}
|
||||
|
||||
// Helper: run a taint probe with a given global_seed and return
|
||||
// the aggregate return caps, sink events, joined return abstract,
|
||||
// and the per-return-block observation list used to derive
|
||||
// per-return-path transforms.
|
||||
let run_probe = |seed: HashMap<BindingKey, VarTaint>| -> (
|
||||
Cap,
|
||||
Vec<SsaTaintEvent>,
|
||||
Option<crate::abstract_interp::AbstractValue>,
|
||||
Vec<ReturnBlockObs>,
|
||||
) {
|
||||
let seed_ref = if seed.is_empty() { None } else { Some(&seed) };
|
||||
let transfer = SsaTaintTransfer {
|
||||
lang,
|
||||
namespace,
|
||||
interner,
|
||||
local_summaries,
|
||||
global_summaries,
|
||||
interop_edges: &[],
|
||||
owner_body_id: BodyId(0),
|
||||
parent_body_id: None,
|
||||
global_seed: seed_ref,
|
||||
param_seed: None,
|
||||
receiver_seed: None,
|
||||
const_values: None,
|
||||
type_facts: None,
|
||||
ssa_summaries: None,
|
||||
extra_labels: None,
|
||||
base_aliases: None,
|
||||
callee_bodies: None,
|
||||
inline_cache: None,
|
||||
context_depth: 0,
|
||||
callback_bindings: None,
|
||||
points_to: None,
|
||||
dynamic_pts: None,
|
||||
import_bindings: None,
|
||||
promisify_aliases: None,
|
||||
module_aliases,
|
||||
static_map: None,
|
||||
auto_seed_handler_params: false,
|
||||
cross_file_bodies: None,
|
||||
};
|
||||
|
||||
let (events, block_states) = run_ssa_taint_full(ssa, cfg, &transfer);
|
||||
|
||||
// Collect surviving caps at return blocks.
|
||||
// Separate param values from derived values: derived values give
|
||||
// more precise transforms (they reflect function-internal sanitization).
|
||||
// If only param values reach return → pure passthrough (Identity).
|
||||
let mut total_derived_caps = Cap::empty();
|
||||
let mut total_param_caps = Cap::empty();
|
||||
// Extract abstract value of the return SSA value.
|
||||
let mut return_abstract: Option<crate::abstract_interp::AbstractValue> = None;
|
||||
// Per-return-block observations for per-path transforms.
|
||||
let mut per_return: Vec<ReturnBlockObs> = Vec::with_capacity(return_blocks.len());
|
||||
for &bid in &return_blocks {
|
||||
if let Some(entry) = &block_states[bid] {
|
||||
let empty_induction = HashSet::new();
|
||||
let exit = transfer_block(
|
||||
&ssa.blocks[bid],
|
||||
cfg,
|
||||
ssa,
|
||||
&transfer,
|
||||
entry.clone(),
|
||||
&empty_induction,
|
||||
None,
|
||||
);
|
||||
|
||||
let ret_val = match &ssa.blocks[bid].terminator {
|
||||
Terminator::Return(rv) => rv.as_ref().copied(),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let mut block_derived_caps = Cap::empty();
|
||||
let mut block_param_caps = Cap::empty();
|
||||
|
||||
if let Some(rv) = ret_val {
|
||||
// Explicit return value: use only its taint for derived_caps.
|
||||
// If rv has no taint entry, this block contributes no derived caps.
|
||||
if let Some(taint) = exit.get(rv) {
|
||||
if all_param_values.contains(&rv) {
|
||||
block_param_caps |= taint.caps;
|
||||
} else {
|
||||
block_derived_caps |= taint.caps;
|
||||
}
|
||||
}
|
||||
// When rv is not a param value, also collect param taint as a
|
||||
// fallback. The SSA terminator's rv may point to the last body
|
||||
// instruction (e.g. push/append result) rather than the actual
|
||||
// return expression (the container parameter itself). This fires
|
||||
// both when rv is tainted (derived) and when rv is untainted
|
||||
// (the push result may have no taint but the param does).
|
||||
// Skip when rv IS a param (already handled above) or when rv is
|
||||
// a Const (provably untainted constant return).
|
||||
let rv_is_const = ssa.blocks[bid]
|
||||
.body
|
||||
.iter()
|
||||
.chain(ssa.blocks[bid].phis.iter())
|
||||
.any(|inst| inst.value == rv && matches!(inst.op, SsaOp::Const(_)));
|
||||
if !all_param_values.contains(&rv) && !rv_is_const {
|
||||
for (val, taint) in &exit.values {
|
||||
if all_param_values.contains(val) {
|
||||
block_param_caps |= taint.caps;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Return(None): implicit return — fall back to all live values.
|
||||
for (val, taint) in &exit.values {
|
||||
if all_param_values.contains(val) {
|
||||
block_param_caps |= taint.caps;
|
||||
} else {
|
||||
block_derived_caps |= taint.caps;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
total_derived_caps |= block_derived_caps;
|
||||
total_param_caps |= block_param_caps;
|
||||
|
||||
// Abstract return: use terminator's return value when available,
|
||||
// fall back to last instruction heuristic for Return(None).
|
||||
let mut block_abs: Option<crate::abstract_interp::AbstractValue> = None;
|
||||
let mut block_path_fact = crate::abstract_interp::PathFact::top();
|
||||
let mut block_variant_inner: Option<crate::abstract_interp::PathFact> = None;
|
||||
if let Some(ref abs) = exit.abstract_state {
|
||||
let abs_rv = ret_val.or_else(|| {
|
||||
ssa.blocks[bid]
|
||||
.body
|
||||
.last()
|
||||
.or_else(|| ssa.blocks[bid].phis.last())
|
||||
.map(|inst| inst.value)
|
||||
});
|
||||
if let Some(rv) = abs_rv {
|
||||
let av = abs.get(rv);
|
||||
block_path_fact = av.path.clone();
|
||||
if !av.is_top() {
|
||||
block_abs = Some(av.clone());
|
||||
return_abstract = Some(match return_abstract {
|
||||
None => av,
|
||||
Some(prev) => prev.join(&av),
|
||||
});
|
||||
}
|
||||
block_variant_inner = detect_variant_inner_fact(rv, ssa, &exit);
|
||||
}
|
||||
}
|
||||
|
||||
// Derive a predicate hash + known-true/false
|
||||
// intersection across tracked variables at this return.
|
||||
// The hash is stable across runs for a given predicate
|
||||
// shape so call sites can compare paths deterministically.
|
||||
let (predicate_hash, known_true, known_false) = summarise_return_predicates(&exit);
|
||||
per_return.push(ReturnBlockObs {
|
||||
derived_caps: block_derived_caps,
|
||||
param_caps: block_param_caps,
|
||||
predicate_hash,
|
||||
known_true,
|
||||
known_false,
|
||||
abstract_value: block_abs,
|
||||
path_fact: block_path_fact,
|
||||
variant_inner_fact: block_variant_inner,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Prefer derived caps; fall back to param caps for passthrough functions
|
||||
let return_caps = if !total_derived_caps.is_empty() {
|
||||
total_derived_caps
|
||||
} else {
|
||||
total_param_caps
|
||||
};
|
||||
|
||||
// Drop return_abstract if it joined to Top
|
||||
let return_abstract = return_abstract.filter(|v| !v.is_top());
|
||||
|
||||
(return_caps, events, return_abstract, per_return)
|
||||
};
|
||||
|
||||
// Probe with no params tainted → detect source_caps + return abstract.
|
||||
// Abstract values don't depend on taint seeding, so the baseline probe
|
||||
// captures the function's intrinsic abstract return value.
|
||||
let (baseline_return_caps, _baseline_events, return_abstract, baseline_obs) =
|
||||
run_probe(HashMap::new());
|
||||
let source_caps = baseline_return_caps;
|
||||
|
||||
// Per-return-path PathFact decomposition derived from the baseline
|
||||
// probe (no seeded taint). Abstract facts on the return rv are
|
||||
// independent of taint seeding — they describe the function's
|
||||
// intrinsic narrowing, so the baseline run captures them without
|
||||
// per-param noise.
|
||||
//
|
||||
// Emitted only when ≥2 return-block entries have distinct predicate
|
||||
// hashes *and* at least one entry carries non-Top signal (fact or
|
||||
// variant_inner_fact). A uniform all-Top list adds bytes without
|
||||
// helping any caller.
|
||||
let mut return_path_facts: SmallVec<[crate::summary::ssa_summary::PathFactReturnEntry; 2]> =
|
||||
SmallVec::new();
|
||||
if baseline_obs.len() >= 2 {
|
||||
let mut merged: SmallVec<[crate::summary::ssa_summary::PathFactReturnEntry; 2]> =
|
||||
SmallVec::new();
|
||||
for obs in &baseline_obs {
|
||||
let entry = crate::summary::ssa_summary::PathFactReturnEntry {
|
||||
predicate_hash: obs.predicate_hash,
|
||||
known_true: obs.known_true,
|
||||
known_false: obs.known_false,
|
||||
path_fact: obs.path_fact.clone(),
|
||||
variant_inner_fact: obs.variant_inner_fact.clone(),
|
||||
};
|
||||
crate::summary::ssa_summary::merge_path_fact_return_paths(&mut merged, &[entry]);
|
||||
}
|
||||
let distinct_hashes = merged
|
||||
.iter()
|
||||
.map(|e| e.predicate_hash)
|
||||
.collect::<std::collections::HashSet<_>>();
|
||||
let has_signal = merged
|
||||
.iter()
|
||||
.any(|e| !e.path_fact.is_top() || e.variant_inner_fact.is_some());
|
||||
if distinct_hashes.len() >= 2 && has_signal {
|
||||
return_path_facts = merged;
|
||||
}
|
||||
}
|
||||
|
||||
// Probe each param
|
||||
let mut param_to_return = Vec::new();
|
||||
let mut param_to_sink: Vec<(usize, SmallVec<[SinkSite; 1]>)> = Vec::new();
|
||||
let mut param_to_sink_param = Vec::new();
|
||||
// Per-param return-path decomposition. Populated only when the param
|
||||
// has ≥2 distinct return-block predicate hashes — a single-return-path
|
||||
// callee is already precise via `param_to_return`.
|
||||
let mut param_return_paths: Vec<(
|
||||
usize,
|
||||
SmallVec<[crate::summary::ssa_summary::ReturnPathTransform; 2]>,
|
||||
)> = Vec::new();
|
||||
|
||||
for &(idx, ref var_name, _ssa_val) in ¶m_info {
|
||||
let mut seed = HashMap::new();
|
||||
let origin = TaintOrigin {
|
||||
node: NodeIndex::new(0), // synthetic origin for probing
|
||||
source_kind: SourceKind::UserInput,
|
||||
source_span: None,
|
||||
};
|
||||
seed.insert(
|
||||
BindingKey::new(var_name.as_str(), BodyId(0)),
|
||||
VarTaint {
|
||||
caps: Cap::all(),
|
||||
origins: SmallVec::from_elem(origin, 1),
|
||||
uses_summary: false,
|
||||
},
|
||||
);
|
||||
|
||||
let (return_caps, events, _, per_return_obs) = run_probe(seed);
|
||||
|
||||
// Subtract baseline source_caps — we only want param-contributed caps
|
||||
let param_return_caps = return_caps & !source_caps;
|
||||
|
||||
if !param_return_caps.is_empty() {
|
||||
let stripped = Cap::all() & !param_return_caps;
|
||||
let transform = if stripped.is_empty() {
|
||||
TaintTransform::Identity
|
||||
} else {
|
||||
TaintTransform::StripBits(stripped)
|
||||
};
|
||||
param_to_return.push((idx, transform));
|
||||
}
|
||||
|
||||
// Derive per-return-path decomposition. For each
|
||||
// observed return block, derive a `ReturnPathTransform` mirroring
|
||||
// the aggregate logic (prefer derived caps, fall back to param
|
||||
// caps, strip baseline source caps). Only emit when ≥2 distinct
|
||||
// predicate hashes are present — a single-hash summary adds no
|
||||
// signal over the aggregate `param_to_return`.
|
||||
if per_return_obs.len() >= 2 {
|
||||
let mut per_path: SmallVec<[crate::summary::ssa_summary::ReturnPathTransform; 2]> =
|
||||
SmallVec::new();
|
||||
for obs in &per_return_obs {
|
||||
let block_return_caps = if !obs.derived_caps.is_empty() {
|
||||
obs.derived_caps
|
||||
} else {
|
||||
obs.param_caps
|
||||
};
|
||||
let block_contributed = block_return_caps & !source_caps;
|
||||
let transform_kind = if block_contributed.is_empty() {
|
||||
// No caps on this path — param does not reach return
|
||||
// under this predicate. A `StripBits(all)` records
|
||||
// "all bits cleared" so downstream join preserves the
|
||||
// disparity with other paths.
|
||||
TaintTransform::StripBits(Cap::all())
|
||||
} else {
|
||||
let stripped = Cap::all() & !block_contributed;
|
||||
if stripped.is_empty() {
|
||||
TaintTransform::Identity
|
||||
} else {
|
||||
TaintTransform::StripBits(stripped)
|
||||
}
|
||||
};
|
||||
crate::summary::ssa_summary::merge_return_paths(
|
||||
&mut per_path,
|
||||
&[crate::summary::ssa_summary::ReturnPathTransform {
|
||||
transform: transform_kind,
|
||||
path_predicate_hash: obs.predicate_hash,
|
||||
known_true: obs.known_true,
|
||||
known_false: obs.known_false,
|
||||
abstract_contribution: obs.abstract_value.clone(),
|
||||
}],
|
||||
);
|
||||
}
|
||||
// Only record when ≥2 distinct predicate gates survived
|
||||
// the dedup (a single-entry vector is no finer than the
|
||||
// aggregate `param_to_return` and wastes bytes on disk).
|
||||
let distinct_hashes = per_path
|
||||
.iter()
|
||||
.map(|e| e.path_predicate_hash)
|
||||
.collect::<std::collections::HashSet<_>>();
|
||||
if distinct_hashes.len() >= 2 {
|
||||
param_return_paths.push((idx, per_path));
|
||||
}
|
||||
}
|
||||
|
||||
// Collect sink caps + primary-location sites from events + per-arg-position detail
|
||||
let mut param_sites: SmallVec<[SinkSite; 1]> = SmallVec::new();
|
||||
for event in &events {
|
||||
for pos in extract_sink_arg_positions(event, ssa) {
|
||||
param_to_sink_param.push((idx, pos, event.sink_caps));
|
||||
}
|
||||
if event.sink_caps.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let site = match locator {
|
||||
Some(loc) => {
|
||||
loc.site_for_span(cfg[event.sink_node].classification_span(), event.sink_caps)
|
||||
}
|
||||
None => SinkSite::cap_only(event.sink_caps),
|
||||
};
|
||||
let key = site.dedup_key();
|
||||
if !param_sites.iter().any(|s| s.dedup_key() == key) {
|
||||
param_sites.push(site);
|
||||
}
|
||||
}
|
||||
if !param_sites.is_empty() {
|
||||
param_to_sink.push((idx, param_sites));
|
||||
}
|
||||
}
|
||||
|
||||
let (param_container_to_return, param_to_container_store) =
|
||||
extract_container_flow_summary(ssa, lang, effective_params);
|
||||
|
||||
// Parameter-granularity points-to summary.
|
||||
let points_to = crate::ssa::param_points_to::analyse_param_points_to(
|
||||
ssa,
|
||||
¶m_info,
|
||||
effective_params,
|
||||
formal_param_names,
|
||||
Some(lang),
|
||||
);
|
||||
|
||||
// Infer return type: scan return-reaching blocks for constructor calls.
|
||||
let return_type = infer_summary_return_type(ssa, lang);
|
||||
|
||||
// Detect source_to_callback: internal source taint flowing to calls of
|
||||
// parameter functions (e.g., `fn apply(f) { let x = source(); f(x); }`).
|
||||
// Re-runs the baseline probe internally to get accurate taint state.
|
||||
let source_to_callback = if !source_caps.is_empty() && !param_info.is_empty() {
|
||||
let baseline_transfer = SsaTaintTransfer {
|
||||
lang,
|
||||
namespace,
|
||||
interner,
|
||||
local_summaries,
|
||||
global_summaries,
|
||||
interop_edges: &[],
|
||||
owner_body_id: BodyId(0),
|
||||
parent_body_id: None,
|
||||
global_seed: None,
|
||||
param_seed: None,
|
||||
receiver_seed: None,
|
||||
const_values: None,
|
||||
type_facts: None,
|
||||
ssa_summaries: None,
|
||||
extra_labels: None,
|
||||
base_aliases: None,
|
||||
callee_bodies: None,
|
||||
inline_cache: None,
|
||||
context_depth: 0,
|
||||
callback_bindings: None,
|
||||
points_to: None,
|
||||
dynamic_pts: None,
|
||||
import_bindings: None,
|
||||
promisify_aliases: None,
|
||||
module_aliases: None,
|
||||
static_map: None,
|
||||
auto_seed_handler_params: false,
|
||||
cross_file_bodies: None,
|
||||
};
|
||||
detect_source_to_callback_from_states(
|
||||
ssa,
|
||||
cfg,
|
||||
source_caps,
|
||||
¶m_info,
|
||||
&baseline_transfer,
|
||||
)
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
|
||||
// Per-parameter abstract-domain transfers.
|
||||
//
|
||||
// Derived structurally from the SSA body — no additional taint probes.
|
||||
// Three-step inference per parameter:
|
||||
// 1. Identity: return SSA value at every return block traces back to
|
||||
// this parameter (possibly through assigns / phi merges all feeding
|
||||
// from the same param).
|
||||
// 2. Callee-intrinsic bound: baseline `return_abstract` carries a
|
||||
// concrete fact (bounded interval or known prefix) that holds
|
||||
// regardless of caller input — record it once per parameter as
|
||||
// `Clamped` / `LiteralPrefix` so the caller sees the bound even
|
||||
// when it has no abstract info on its own argument.
|
||||
// 3. Top: default; the entry is omitted (empty transfer is meaningless).
|
||||
let abstract_transfer = derive_abstract_transfer(ssa, ¶m_info, return_abstract.as_ref());
|
||||
|
||||
SsaFuncSummary {
|
||||
param_to_return,
|
||||
param_to_sink,
|
||||
source_caps,
|
||||
param_to_sink_param,
|
||||
param_container_to_return,
|
||||
param_to_container_store,
|
||||
return_type,
|
||||
return_abstract,
|
||||
source_to_callback,
|
||||
receiver_to_return: None,
|
||||
receiver_to_sink: Cap::empty(),
|
||||
abstract_transfer,
|
||||
param_return_paths,
|
||||
return_path_facts,
|
||||
points_to,
|
||||
}
|
||||
}
|
||||
|
||||
/// Derive a deterministic predicate-hash + known-true/false intersection
|
||||
/// for a return-block exit state.
|
||||
///
|
||||
/// The hash combines the sorted `(SymbolId, known_true, known_false)` tuples
|
||||
/// from the state's `predicates` list with the validated_must bitmask. Two
|
||||
/// return blocks whose predicate gates are observationally identical produce
|
||||
/// the same hash; the intersection of known_true/false gives the bits that
|
||||
/// hold on every path into each return block.
|
||||
///
|
||||
/// Returns `(0, 0, 0)` for a Top state (no predicates tracked).
|
||||
pub(super) fn summarise_return_predicates(state: &SsaTaintState) -> (u64, u8, u8) {
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
if state.predicates.is_empty() && state.validated_must.is_empty() {
|
||||
return (0, 0, 0);
|
||||
}
|
||||
|
||||
let mut h = DefaultHasher::new();
|
||||
// Validated-must contributes deterministically via bits().
|
||||
state.validated_must.bits().hash(&mut h);
|
||||
// Sort by SymbolId (predicates list is already sorted by SsaTaintState
|
||||
// invariants, but hash-input stability matters here).
|
||||
let mut sorted: smallvec::SmallVec<[(u32, u8, u8); 4]> = state
|
||||
.predicates
|
||||
.iter()
|
||||
.map(|(id, s)| (id.0, s.known_true, s.known_false))
|
||||
.collect();
|
||||
sorted.sort_by_key(|(id, _, _)| *id);
|
||||
for (id, kt, kf) in &sorted {
|
||||
id.hash(&mut h);
|
||||
kt.hash(&mut h);
|
||||
kf.hash(&mut h);
|
||||
}
|
||||
let hash = h.finish();
|
||||
// Intersect known_true / known_false across all tracked variables:
|
||||
// the bits that hold for EVERY predicate-tracked var at this return.
|
||||
let known_true = sorted
|
||||
.iter()
|
||||
.map(|(_, kt, _)| *kt)
|
||||
.fold(u8::MAX, |a, b| a & b);
|
||||
let known_false = sorted
|
||||
.iter()
|
||||
.map(|(_, _, kf)| *kf)
|
||||
.fold(u8::MAX, |a, b| a & b);
|
||||
// Use `1` for the "no predicates but validated_must non-empty" case to
|
||||
// avoid colliding with the unguarded sentinel (0).
|
||||
let hash = if hash == 0 { 1 } else { hash };
|
||||
(hash, known_true, known_false)
|
||||
}
|
||||
|
||||
/// Derive per-parameter [`AbstractTransfer`] entries for a function's SSA
|
||||
/// body.
|
||||
///
|
||||
/// `return_abstract` is the callee's intrinsic baseline (from the no-seed
|
||||
/// probe). When present, it describes a fact that holds for the return
|
||||
/// regardless of parameter input — so it can be attached as a
|
||||
/// `Clamped` / `LiteralPrefix` transform to every parameter that flows to
|
||||
/// the return.
|
||||
///
|
||||
/// Identity detection is structural: walk the return values back through
|
||||
/// [`SsaOp::Assign`] / [`SsaOp::Phi`] chains (bounded) and check whether
|
||||
/// every leaf resolves to the same [`SsaOp::Param`]. The trace is cheap
|
||||
/// and can only produce `Identity` for passthrough callees — anything
|
||||
/// more complex degrades to the baseline fact or `Top`.
|
||||
fn derive_abstract_transfer(
|
||||
ssa: &SsaBody,
|
||||
param_info: &[(usize, String, SsaValue)],
|
||||
return_abstract: Option<&crate::abstract_interp::AbstractValue>,
|
||||
) -> Vec<(usize, crate::abstract_interp::AbstractTransfer)> {
|
||||
use crate::abstract_interp::{AbstractTransfer, IntervalTransfer, StringTransfer};
|
||||
|
||||
if param_info.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
// Build a lookup from SsaValue → defining op by scanning the body once.
|
||||
let mut defs: HashMap<SsaValue, &SsaOp> = HashMap::new();
|
||||
for block in &ssa.blocks {
|
||||
for inst in block.phis.iter().chain(block.body.iter()) {
|
||||
defs.insert(inst.value, &inst.op);
|
||||
}
|
||||
}
|
||||
|
||||
// Trace an SSA value backwards to the single source parameter index it
|
||||
// resolves to, if any. Returns `None` when the trace diverges, hits a
|
||||
// non-pass-through op, or exceeds the depth bound.
|
||||
fn trace_to_param(
|
||||
v: SsaValue,
|
||||
defs: &HashMap<SsaValue, &SsaOp>,
|
||||
depth: usize,
|
||||
) -> Option<usize> {
|
||||
const MAX_DEPTH: usize = 8;
|
||||
if depth > MAX_DEPTH {
|
||||
return None;
|
||||
}
|
||||
match defs.get(&v)? {
|
||||
SsaOp::Param { index } => Some(*index),
|
||||
SsaOp::Assign(ops) if ops.len() == 1 => trace_to_param(ops[0], defs, depth + 1),
|
||||
SsaOp::Phi(preds) => {
|
||||
let mut result: Option<usize> = None;
|
||||
for (_, pv) in preds {
|
||||
let p = trace_to_param(*pv, defs, depth + 1)?;
|
||||
match result {
|
||||
None => result = Some(p),
|
||||
Some(existing) if existing == p => {}
|
||||
Some(_) => return None,
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
// For every return block, trace its return value and record which
|
||||
// parameter (if any) it resolves to. If all return blocks agree on the
|
||||
// same parameter index, that parameter has `Identity`. If they disagree
|
||||
// (or some don't resolve), no parameter gets `Identity` and we fall
|
||||
// back to baseline-derived forms.
|
||||
let mut identity_param: Option<usize> = None;
|
||||
let mut identity_consistent = true;
|
||||
for block in &ssa.blocks {
|
||||
if let Terminator::Return(Some(rv)) = &block.terminator {
|
||||
let traced = trace_to_param(*rv, &defs, 0);
|
||||
match (identity_param, traced) {
|
||||
(None, Some(p)) => identity_param = Some(p),
|
||||
(Some(existing), Some(p)) if existing == p => {}
|
||||
_ => {
|
||||
identity_consistent = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Derive a baseline-invariant transform from `return_abstract`. This is
|
||||
// the "callee intrinsic" fact that always holds — each parameter that
|
||||
// flows to the return gets it attached as the conservative transfer.
|
||||
let baseline_invariant: Option<AbstractTransfer> = return_abstract.map(|av| {
|
||||
let interval = match (av.interval.lo, av.interval.hi) {
|
||||
(Some(lo), Some(hi)) if lo <= hi => IntervalTransfer::Clamped { lo, hi },
|
||||
_ => IntervalTransfer::Top,
|
||||
};
|
||||
let string = match &av.string.prefix {
|
||||
Some(p) if !p.is_empty() => StringTransfer::literal_prefix(p),
|
||||
_ => StringTransfer::Unknown,
|
||||
};
|
||||
AbstractTransfer { interval, string }
|
||||
});
|
||||
|
||||
let mut result: Vec<(usize, AbstractTransfer)> = Vec::new();
|
||||
|
||||
for (idx, _, _) in param_info {
|
||||
let mut transfer = AbstractTransfer::top();
|
||||
|
||||
if identity_consistent && identity_param == Some(*idx) {
|
||||
transfer.interval = IntervalTransfer::Identity;
|
||||
transfer.string = StringTransfer::Identity;
|
||||
} else if let Some(base) = baseline_invariant.as_ref() {
|
||||
// Baseline intrinsic bound applies to every parameter that could
|
||||
// reach the return. We conservatively attach it to all params
|
||||
// — at apply time the caller meets it with the real return
|
||||
// abstract (also from this same summary), so double-counting
|
||||
// would collapse to the tighter of the two.
|
||||
transfer = base.clone();
|
||||
}
|
||||
|
||||
if !transfer.is_top() {
|
||||
result.push((*idx, transfer));
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Detect callback patterns where internal source taint flows to a call of a
|
||||
/// parameter function. Re-runs the baseline probe internally to get accurate
|
||||
/// taint state at each instruction point.
|
||||
///
|
||||
/// Returns `(param_index_of_callee, source_caps)` pairs.
|
||||
fn detect_source_to_callback_from_states(
|
||||
ssa: &SsaBody,
|
||||
cfg: &Cfg,
|
||||
source_caps: Cap,
|
||||
param_info: &[(usize, String, SsaValue)],
|
||||
transfer: &SsaTaintTransfer,
|
||||
) -> Vec<(usize, Cap)> {
|
||||
use crate::ssa::ir::SsaOp;
|
||||
|
||||
// Map param var_name → param_index
|
||||
let param_name_to_index: HashMap<&str, usize> = param_info
|
||||
.iter()
|
||||
.map(|(idx, name, _)| (name.as_str(), *idx))
|
||||
.collect();
|
||||
|
||||
// Run taint analysis to get converged block states
|
||||
let (_events, block_states) = run_ssa_taint_full(ssa, cfg, transfer);
|
||||
|
||||
let mut result: Vec<(usize, Cap)> = vec![];
|
||||
for (bid, block) in ssa.blocks.iter().enumerate() {
|
||||
let Some(entry_state) = &block_states[bid] else {
|
||||
continue;
|
||||
};
|
||||
// Replay block transfer to get accurate taint state at each instruction
|
||||
let mut state = entry_state.clone();
|
||||
for inst in &block.body {
|
||||
// Apply transfer for this instruction to advance state
|
||||
transfer_inst(inst, cfg, ssa, transfer, &mut state);
|
||||
|
||||
// After transfer: check if this is a call to a param with tainted args
|
||||
if let SsaOp::Call { callee, args, .. } = &inst.op {
|
||||
if let Some(¶m_idx) = param_name_to_index.get(callee.as_str()) {
|
||||
let any_arg_tainted = args.iter().any(|arg_vals| {
|
||||
arg_vals
|
||||
.iter()
|
||||
.any(|v| state.get(*v).is_some_and(|t| !t.caps.is_empty()))
|
||||
});
|
||||
if any_arg_tainted && !result.iter().any(|(idx, _)| *idx == param_idx) {
|
||||
result.push((param_idx, source_caps));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Infer the return type of a function from its SSA body by checking whether
|
||||
/// return-reaching blocks produce values from known constructor/factory calls.
|
||||
fn infer_summary_return_type(
|
||||
ssa: &SsaBody,
|
||||
lang: Lang,
|
||||
) -> Option<crate::ssa::type_facts::TypeKind> {
|
||||
// Find blocks with Return terminators, then look at the last defined value
|
||||
// in those blocks — if it's a Call with a known constructor, that's our type.
|
||||
for block in &ssa.blocks {
|
||||
if !matches!(block.terminator, Terminator::Return(_)) {
|
||||
continue;
|
||||
}
|
||||
// Only inspect the very last instruction in the returning block.
|
||||
if let Some(inst) = block.body.last()
|
||||
&& let SsaOp::Call { callee, .. } = &inst.op
|
||||
&& let Some(ty) = crate::ssa::type_facts::constructor_type(lang, callee)
|
||||
{
|
||||
return Some(ty);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
// ── Inter-procedural container flow detection (structural SSA analysis) ──
|
||||
|
||||
/// Build a map from SsaValue to its defining instruction.
|
||||
fn build_inst_map(ssa: &SsaBody) -> HashMap<SsaValue, (SsaOp, Option<SsaValue>)> {
|
||||
let mut map = HashMap::new();
|
||||
for block in &ssa.blocks {
|
||||
for inst in block.phis.iter().chain(block.body.iter()) {
|
||||
// Store the op and optionally the receiver for calls
|
||||
map.insert(inst.value, (inst.op.clone(), None));
|
||||
}
|
||||
}
|
||||
map
|
||||
}
|
||||
|
||||
/// Trace an SSA value back through Assign/Phi chains to find if it originates
|
||||
/// from a `Param { index }`. Returns `Some(index)` if a param is found.
|
||||
/// Does NOT trace through Call, Const, Source, or other non-identity ops.
|
||||
fn trace_to_param(
|
||||
v: SsaValue,
|
||||
ssa: &SsaBody,
|
||||
inst_map: &HashMap<SsaValue, (SsaOp, Option<SsaValue>)>,
|
||||
visited: &mut HashSet<SsaValue>,
|
||||
) -> Option<usize> {
|
||||
if !visited.insert(v) {
|
||||
return None;
|
||||
}
|
||||
let (op, _) = inst_map.get(&v)?;
|
||||
match op {
|
||||
SsaOp::Param { index } => Some(*index),
|
||||
SsaOp::Assign(uses) => {
|
||||
for u in uses {
|
||||
if let Some(idx) = trace_to_param(*u, ssa, inst_map, visited) {
|
||||
return Some(idx);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
SsaOp::Phi(operands) => {
|
||||
for (_, op_val) in operands {
|
||||
if let Some(idx) = trace_to_param(*op_val, ssa, inst_map, visited) {
|
||||
return Some(idx);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
// Don't trace through Call (new identity), Const, Source, Nop, CatchParam
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Detect inter-procedural container flow patterns from SSA structure:
|
||||
/// - `param_container_to_return`: params whose container identity flows to return
|
||||
/// - `param_to_container_store`: (src_param, container_param) pairs where src taint
|
||||
/// is stored into container_param's contents
|
||||
pub(crate) fn extract_container_flow_summary(
|
||||
ssa: &SsaBody,
|
||||
lang: Lang,
|
||||
formal_param_count: usize,
|
||||
) -> (Vec<usize>, Vec<(usize, usize)>) {
|
||||
use crate::ssa::pointsto::{ContainerOp, classify_container_op};
|
||||
|
||||
let inst_map = build_inst_map(ssa);
|
||||
let mut container_to_return: HashSet<usize> = HashSet::new();
|
||||
let mut container_store: Vec<(usize, usize)> = Vec::new();
|
||||
|
||||
// 1. param_container_to_return: trace Assign/Phi ops in return blocks to params.
|
||||
//
|
||||
// `trace_to_param` will happily return any `SsaOp::Param { index }`, but
|
||||
// scoped lowering synthesises `Param` ops for external captures (module
|
||||
// imports, free identifiers) at indices beyond the formal parameter count.
|
||||
// Those must not enter the summary — the key's arity only covers formal
|
||||
// params, and an out-of-range index trips `ssa_summary_fits_arity`, forcing
|
||||
// the reconciliation probe to generate a synthetic disambiguator that no
|
||||
// caller will ever look up.
|
||||
for block in &ssa.blocks {
|
||||
if !matches!(block.terminator, Terminator::Return(_)) {
|
||||
continue;
|
||||
}
|
||||
for inst in block.phis.iter().chain(block.body.iter()) {
|
||||
match &inst.op {
|
||||
// Only trace identity-preserving ops (Assign, Phi).
|
||||
// Skip Param (would cause false positives in single-block functions),
|
||||
// Call (new identity), Const, Source, Nop, CatchParam.
|
||||
SsaOp::Assign(_) | SsaOp::Phi(_) => {
|
||||
if let Some(idx) =
|
||||
trace_to_param(inst.value, ssa, &inst_map, &mut HashSet::new())
|
||||
&& idx < formal_param_count
|
||||
{
|
||||
container_to_return.insert(idx);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 2. param_to_container_store: find container Store calls, trace args to params
|
||||
for block in &ssa.blocks {
|
||||
for inst in block.body.iter() {
|
||||
if let SsaOp::Call {
|
||||
callee,
|
||||
args,
|
||||
receiver,
|
||||
} = &inst.op
|
||||
{
|
||||
let op = match classify_container_op(callee, lang) {
|
||||
Some(ContainerOp::Store { value_args, .. }) => value_args,
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
// Resolve container SSA value. With the new call ABI, the
|
||||
// receiver is a separate channel and `args` contains only
|
||||
// positional arguments. For Go, container ops are plain
|
||||
// function calls (no receiver), so args[0] is the container.
|
||||
let container_val = if let Some(v) = *receiver {
|
||||
Some(v)
|
||||
} else if lang == Lang::Go {
|
||||
args.first().and_then(|a| a.first().copied())
|
||||
} else if let Some(dot_pos) = callee.rfind('.') {
|
||||
let receiver_name = &callee[..dot_pos];
|
||||
args.iter()
|
||||
.flat_map(|a| a.iter())
|
||||
.find(|&&v| {
|
||||
ssa.value_defs
|
||||
.get(v.0 as usize)
|
||||
.and_then(|d| d.var_name.as_deref())
|
||||
== Some(receiver_name)
|
||||
})
|
||||
.copied()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let container_val = match container_val {
|
||||
Some(v) => v,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
// Trace container to positional param (SelfParam → None, so
|
||||
// when the container is the receiver we skip — the caller
|
||||
// tracks that via `receiver_to_container_store` if needed).
|
||||
// Same arity filter as above: reject synthetic Param ops that
|
||||
// were injected for free captures.
|
||||
let container_param =
|
||||
match trace_to_param(container_val, ssa, &inst_map, &mut HashSet::new()) {
|
||||
Some(idx) if idx < formal_param_count => idx,
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
// Go container ops are plain function calls with the container
|
||||
// at args[0]; value args start at args[1]. Other languages
|
||||
// place the container on the receiver channel so args holds
|
||||
// only value args starting at index 0.
|
||||
let arg_offset = if lang == Lang::Go && receiver.is_none() {
|
||||
1usize
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
// Trace each value arg to param (same arity filter as above).
|
||||
for &va_idx in &op {
|
||||
let effective_idx = va_idx + arg_offset;
|
||||
if let Some(arg_vals) = args.get(effective_idx) {
|
||||
for &av in arg_vals {
|
||||
if let Some(src_param) =
|
||||
trace_to_param(av, ssa, &inst_map, &mut HashSet::new())
|
||||
&& src_param < formal_param_count
|
||||
&& src_param != container_param
|
||||
&& !container_store.contains(&(src_param, container_param))
|
||||
{
|
||||
container_store.push((src_param, container_param));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut ctr: Vec<usize> = container_to_return.into_iter().collect();
|
||||
ctr.sort();
|
||||
container_store.sort();
|
||||
(ctr, container_store)
|
||||
}
|
||||
1314
src/taint/ssa_transfer/tests.rs
Normal file
1314
src/taint/ssa_transfer/tests.rs
Normal file
File diff suppressed because it is too large
Load diff
3584
src/taint/tests.rs
3584
src/taint/tests.rs
File diff suppressed because it is too large
Load diff
|
|
@ -1,458 +0,0 @@
|
|||
use crate::callgraph::normalize_callee_name;
|
||||
use crate::cfg::{EdgeKind, FuncSummaries, NodeInfo, StmtKind};
|
||||
use crate::interop::InteropEdge;
|
||||
use crate::labels::{Cap, DataLabel};
|
||||
use crate::state::engine::Transfer;
|
||||
use crate::state::lattice::Lattice;
|
||||
use crate::state::symbol::{SymbolId, SymbolInterner};
|
||||
use crate::summary::{CalleeResolution, GlobalSummaries};
|
||||
use crate::symbol::Lang;
|
||||
use crate::taint::domain::{TaintOrigin, TaintState, VarTaint, predicate_kind_bit};
|
||||
use crate::taint::path_state::{PredicateKind, classify_condition};
|
||||
use petgraph::graph::NodeIndex;
|
||||
use smallvec::SmallVec;
|
||||
|
||||
/// Events emitted by the taint transfer function during Phase 2.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum TaintEvent {
|
||||
SinkReached {
|
||||
sink_node: NodeIndex,
|
||||
tainted_vars: Vec<(SymbolId, Cap, SmallVec<[TaintOrigin; 2]>)>,
|
||||
#[allow(dead_code)]
|
||||
sink_caps: Cap,
|
||||
all_validated: bool,
|
||||
guard_kind: Option<PredicateKind>,
|
||||
},
|
||||
}
|
||||
|
||||
/// Taint transfer function for forward dataflow analysis.
|
||||
pub struct TaintTransfer<'a> {
|
||||
pub lang: Lang,
|
||||
pub namespace: &'a str,
|
||||
pub interner: &'a SymbolInterner,
|
||||
pub local_summaries: &'a FuncSummaries,
|
||||
pub global_summaries: Option<&'a GlobalSummaries>,
|
||||
pub interop_edges: &'a [InteropEdge],
|
||||
/// For JS two-level solve: top-level taint state seeded into function solves.
|
||||
pub global_seed: Option<&'a TaintState>,
|
||||
/// Optional scope filter: if set, only process nodes whose enclosing_func matches.
|
||||
/// None = process all nodes. Some(None) = top-level only. Some(Some(name)) = function only.
|
||||
pub scope_filter: Option<Option<&'a str>>,
|
||||
}
|
||||
|
||||
impl Transfer<TaintState> for TaintTransfer<'_> {
|
||||
type Event = TaintEvent;
|
||||
|
||||
fn apply(
|
||||
&self,
|
||||
node: NodeIndex,
|
||||
info: &NodeInfo,
|
||||
edge: Option<EdgeKind>,
|
||||
mut state: TaintState,
|
||||
) -> (TaintState, Vec<TaintEvent>) {
|
||||
let mut events = Vec::new();
|
||||
|
||||
// Scope filter: skip nodes outside our scope (return state unchanged)
|
||||
if let Some(ref filter) = self.scope_filter {
|
||||
let node_func = info.enclosing_func.as_deref();
|
||||
if node_func != *filter {
|
||||
return (state, events);
|
||||
}
|
||||
}
|
||||
|
||||
let caller_func = info.enclosing_func.as_deref().unwrap_or("");
|
||||
|
||||
// ── Apply taint transfer ────────────────────────────────────────
|
||||
match info.label {
|
||||
Some(DataLabel::Source(bits)) => {
|
||||
self.apply_source(node, info, bits, &mut state);
|
||||
}
|
||||
Some(DataLabel::Sanitizer(bits)) => {
|
||||
self.apply_sanitizer(info, bits, &mut state);
|
||||
}
|
||||
_ if info.kind == StmtKind::Call => {
|
||||
self.apply_call(node, info, caller_func, &mut state);
|
||||
}
|
||||
_ => {
|
||||
self.apply_assignment(info, &mut state);
|
||||
}
|
||||
}
|
||||
|
||||
// ── If-node predicate handling (edge-aware) ─────────────────────
|
||||
if info.kind == StmtKind::If
|
||||
&& !info.condition_vars.is_empty()
|
||||
&& matches!(edge, Some(EdgeKind::True) | Some(EdgeKind::False))
|
||||
{
|
||||
let cond_text = info.condition_text.as_deref().unwrap_or("");
|
||||
let kind = classify_condition(cond_text);
|
||||
let polarity = matches!(edge, Some(EdgeKind::True)) ^ info.condition_negated;
|
||||
|
||||
// ValidationCall handling
|
||||
if kind == PredicateKind::ValidationCall && polarity {
|
||||
for var in &info.condition_vars {
|
||||
if let Some(sym) = self.interner.get(var) {
|
||||
state.validated_may.insert(sym);
|
||||
state.validated_must.insert(sym);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Predicate summary for whitelisted kinds (contradiction pruning)
|
||||
if let Some(bit_idx) = predicate_kind_bit(kind) {
|
||||
for var in &info.condition_vars {
|
||||
if let Some(sym) = self.interner.get(var) {
|
||||
let mut summary = state.get_predicate(sym);
|
||||
if polarity {
|
||||
summary.known_true |= 1 << bit_idx;
|
||||
} else {
|
||||
summary.known_false |= 1 << bit_idx;
|
||||
}
|
||||
state.set_predicate(sym, summary);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Contradiction pruning: if any variable has contradictory predicates,
|
||||
// this is an infeasible path → return bot (monotonically kills branch).
|
||||
if state.has_contradiction() {
|
||||
return (TaintState::bot(), events);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Sink check ──────────────────────────────────────────────────
|
||||
let sink_caps = self.resolve_sink_caps(info, caller_func);
|
||||
if !sink_caps.is_empty() {
|
||||
let tainted_vars = self.collect_tainted_sink_vars(info, &state, sink_caps);
|
||||
if !tainted_vars.is_empty() {
|
||||
let all_validated = tainted_vars
|
||||
.iter()
|
||||
.all(|(sym, _, _)| state.validated_may.contains(*sym));
|
||||
|
||||
let guard_kind = if all_validated {
|
||||
Some(PredicateKind::ValidationCall)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
events.push(TaintEvent::SinkReached {
|
||||
sink_node: node,
|
||||
tainted_vars,
|
||||
sink_caps,
|
||||
all_validated,
|
||||
guard_kind,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
(state, events)
|
||||
}
|
||||
|
||||
fn iteration_budget(&self) -> usize {
|
||||
100_000
|
||||
}
|
||||
|
||||
fn on_budget_exceeded(&self) -> bool {
|
||||
tracing::warn!("taint analysis: worklist budget exceeded, returning partial results");
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl TaintTransfer<'_> {
|
||||
/// Apply a Source label: insert taint for the defined variable.
|
||||
fn apply_source(&self, node: NodeIndex, info: &NodeInfo, bits: Cap, state: &mut TaintState) {
|
||||
if let Some(ref v) = info.defines
|
||||
&& let Some(sym) = self.interner.get(v)
|
||||
{
|
||||
let callee = info.callee.as_deref().unwrap_or("");
|
||||
let source_kind = crate::labels::infer_source_kind(bits, callee);
|
||||
let origin = TaintOrigin { node, source_kind };
|
||||
|
||||
match state.get(sym) {
|
||||
Some(existing) => {
|
||||
let mut new_taint = existing.clone();
|
||||
new_taint.caps |= bits;
|
||||
if new_taint.origins.len() < 4
|
||||
&& !new_taint.origins.iter().any(|o| o.node == node)
|
||||
{
|
||||
new_taint.origins.push(origin);
|
||||
}
|
||||
state.set(sym, new_taint);
|
||||
}
|
||||
None => {
|
||||
state.set(
|
||||
sym,
|
||||
VarTaint {
|
||||
caps: bits,
|
||||
origins: SmallVec::from_elem(origin, 1),
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply a Sanitizer label: propagate input taint, then strip sanitizer bits.
|
||||
fn apply_sanitizer(&self, info: &NodeInfo, bits: Cap, state: &mut TaintState) {
|
||||
if let Some(ref v) = info.defines
|
||||
&& let Some(sym) = self.interner.get(v)
|
||||
{
|
||||
let (combined_caps, combined_origins) = self.collect_uses_taint(info, state);
|
||||
let new_caps = combined_caps & !bits;
|
||||
if new_caps.is_empty() {
|
||||
state.remove(sym);
|
||||
} else {
|
||||
state.set(
|
||||
sym,
|
||||
VarTaint {
|
||||
caps: new_caps,
|
||||
origins: combined_origins,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply a function call: resolve callee and compute return taint.
|
||||
fn apply_call(
|
||||
&self,
|
||||
node: NodeIndex,
|
||||
info: &NodeInfo,
|
||||
caller_func: &str,
|
||||
state: &mut TaintState,
|
||||
) {
|
||||
if let Some(ref callee) = info.callee
|
||||
&& let Some(resolved) = self.resolve_callee(callee, caller_func, info.call_ordinal)
|
||||
{
|
||||
let mut return_bits = Cap::empty();
|
||||
let mut return_origins: SmallVec<[TaintOrigin; 2]> = SmallVec::new();
|
||||
|
||||
// 1. Source behaviour
|
||||
if !resolved.source_caps.is_empty() {
|
||||
return_bits |= resolved.source_caps;
|
||||
let callee_str = info.callee.as_deref().unwrap_or("");
|
||||
let source_kind =
|
||||
crate::labels::infer_source_kind(resolved.source_caps, callee_str);
|
||||
let origin = TaintOrigin { node, source_kind };
|
||||
if !return_origins.iter().any(|o| o.node == node) {
|
||||
return_origins.push(origin);
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Propagation
|
||||
if resolved.propagates_taint {
|
||||
let (use_caps, use_origins) = self.collect_uses_taint(info, state);
|
||||
return_bits |= use_caps;
|
||||
for orig in &use_origins {
|
||||
if return_origins.len() < 4
|
||||
&& !return_origins.iter().any(|o| o.node == orig.node)
|
||||
{
|
||||
return_origins.push(*orig);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Sanitizer behaviour (applied last so it always wins)
|
||||
return_bits &= !resolved.sanitizer_caps;
|
||||
|
||||
// Write result
|
||||
if let Some(ref v) = info.defines
|
||||
&& let Some(sym) = self.interner.get(v)
|
||||
{
|
||||
if return_bits.is_empty() {
|
||||
state.remove(sym);
|
||||
} else {
|
||||
state.set(
|
||||
sym,
|
||||
VarTaint {
|
||||
caps: return_bits,
|
||||
origins: return_origins,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Unresolved call — fall through to default gen/kill
|
||||
self.apply_assignment(info, state);
|
||||
}
|
||||
|
||||
/// Default gen/kill: propagate taint through variable assignments.
|
||||
fn apply_assignment(&self, info: &NodeInfo, state: &mut TaintState) {
|
||||
if matches!(
|
||||
info.label,
|
||||
Some(DataLabel::Source(_)) | Some(DataLabel::Sanitizer(_))
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
||||
if let Some(ref d) = info.defines
|
||||
&& let Some(sym) = self.interner.get(d)
|
||||
{
|
||||
let (combined_caps, combined_origins) = self.collect_uses_taint(info, state);
|
||||
if combined_caps.is_empty() {
|
||||
state.remove(sym);
|
||||
} else {
|
||||
state.set(
|
||||
sym,
|
||||
VarTaint {
|
||||
caps: combined_caps,
|
||||
origins: combined_origins,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Collect taint from all `uses` variables (union of caps + merge origins).
|
||||
fn collect_uses_taint(
|
||||
&self,
|
||||
info: &NodeInfo,
|
||||
state: &TaintState,
|
||||
) -> (Cap, SmallVec<[TaintOrigin; 2]>) {
|
||||
let mut combined_caps = Cap::empty();
|
||||
let mut combined_origins: SmallVec<[TaintOrigin; 2]> = SmallVec::new();
|
||||
|
||||
for u in &info.uses {
|
||||
let taint = self.lookup_var(u, state);
|
||||
if let Some(t) = taint {
|
||||
combined_caps |= t.caps;
|
||||
for orig in &t.origins {
|
||||
if combined_origins.len() < 4
|
||||
&& !combined_origins.iter().any(|o| o.node == orig.node)
|
||||
{
|
||||
combined_origins.push(*orig);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(combined_caps, combined_origins)
|
||||
}
|
||||
|
||||
/// Look up a variable's taint, falling back to global_seed for JS two-level solve.
|
||||
fn lookup_var<'a>(&'a self, name: &str, state: &'a TaintState) -> Option<&'a VarTaint> {
|
||||
if let Some(sym) = self.interner.get(name) {
|
||||
if let Some(taint) = state.get(sym) {
|
||||
return Some(taint);
|
||||
}
|
||||
// Fall back to global seed (JS two-level solve)
|
||||
if let Some(seed) = self.global_seed {
|
||||
return seed.get(sym);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Resolve sink caps from label or callee summary.
|
||||
fn resolve_sink_caps(&self, info: &NodeInfo, caller_func: &str) -> Cap {
|
||||
match info.label {
|
||||
Some(DataLabel::Sink(caps)) => caps,
|
||||
_ => info
|
||||
.callee
|
||||
.as_ref()
|
||||
.and_then(|c| self.resolve_callee(c, caller_func, info.call_ordinal))
|
||||
.filter(|r| !r.sink_caps.is_empty())
|
||||
.map(|r| r.sink_caps)
|
||||
.unwrap_or(Cap::empty()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Collect tainted variables at a sink node.
|
||||
fn collect_tainted_sink_vars(
|
||||
&self,
|
||||
info: &NodeInfo,
|
||||
state: &TaintState,
|
||||
sink_caps: Cap,
|
||||
) -> Vec<(SymbolId, Cap, SmallVec<[TaintOrigin; 2]>)> {
|
||||
let mut result = Vec::new();
|
||||
for u in &info.uses {
|
||||
if let Some(taint) = self.lookup_var(u, state)
|
||||
&& (taint.caps & sink_caps) != Cap::empty()
|
||||
&& let Some(sym) = self.interner.get(u)
|
||||
{
|
||||
result.push((sym, taint.caps, taint.origins.clone()));
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
/// Resolve a callee name to its summary (local → global → interop).
|
||||
fn resolve_callee(
|
||||
&self,
|
||||
callee: &str,
|
||||
caller_func: &str,
|
||||
call_ordinal: u32,
|
||||
) -> Option<ResolvedSummary> {
|
||||
let normalized = normalize_callee_name(callee);
|
||||
|
||||
// 1) Local (same-file)
|
||||
let local_matches: Vec<_> = self
|
||||
.local_summaries
|
||||
.iter()
|
||||
.filter(|(k, _)| {
|
||||
k.name == normalized && k.lang == self.lang && k.namespace == self.namespace
|
||||
})
|
||||
.collect();
|
||||
|
||||
if local_matches.len() == 1 {
|
||||
let (_, ls) = local_matches[0];
|
||||
return Some(ResolvedSummary {
|
||||
source_caps: ls.source_caps,
|
||||
sanitizer_caps: ls.sanitizer_caps,
|
||||
sink_caps: ls.sink_caps,
|
||||
propagates_taint: ls.propagates_taint,
|
||||
});
|
||||
}
|
||||
if local_matches.len() > 1 {
|
||||
return None;
|
||||
}
|
||||
|
||||
// 2) Global same-language
|
||||
if let Some(gs) = self.global_summaries {
|
||||
match gs.resolve_callee_key(normalized, self.lang, self.namespace, None) {
|
||||
CalleeResolution::Resolved(target_key) => {
|
||||
if let Some(fs) = gs.get(&target_key) {
|
||||
return Some(ResolvedSummary {
|
||||
source_caps: fs.source_caps(),
|
||||
sanitizer_caps: fs.sanitizer_caps(),
|
||||
sink_caps: fs.sink_caps(),
|
||||
propagates_taint: fs.propagates_taint,
|
||||
});
|
||||
}
|
||||
}
|
||||
CalleeResolution::NotFound | CalleeResolution::Ambiguous(_) => {}
|
||||
}
|
||||
}
|
||||
|
||||
// 3) Interop edges
|
||||
for edge in self.interop_edges {
|
||||
if edge.from.caller_lang == self.lang
|
||||
&& edge.from.caller_namespace == self.namespace
|
||||
&& edge.from.callee_symbol == callee
|
||||
&& (edge.from.caller_func.is_empty() || edge.from.caller_func == caller_func)
|
||||
&& (edge.from.ordinal == 0 || edge.from.ordinal == call_ordinal)
|
||||
&& let Some(gs) = self.global_summaries
|
||||
&& let Some(fs) = gs.get(&edge.to)
|
||||
{
|
||||
return Some(ResolvedSummary {
|
||||
source_caps: fs.source_caps(),
|
||||
sanitizer_caps: fs.sanitizer_caps(),
|
||||
sink_caps: fs.sink_caps(),
|
||||
propagates_taint: fs.propagates_taint,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Resolved summary for a callee.
|
||||
struct ResolvedSummary {
|
||||
source_caps: Cap,
|
||||
sanitizer_caps: Cap,
|
||||
sink_caps: Cap,
|
||||
propagates_taint: bool,
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue