mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-30 20:39:39 +02:00
Release/0.5.0 (#35)
* feat: Introduce function-scoped variable interning for state analysis with new tests and fixtures * feat: Add Phase 26 symbolic execution enhancements with bitwise operator support, abstract interpretation refinements, and new taint analysis tests * feat: Refine state analysis to handle factory-pattern resource returns with mixed-path tests and leak detection enhancements * feat: Add Phase 27 debug views with symbolic execution, abstract interpretation, SSA, and call graph viewers; integrate with debug layout and styles * feat: Add Phase 31 type-qualified symbolic resolution with receiver-based callee disambiguation and testing * feat: Extend symbolic execution with state iteration, enhanced debug views, and debounced input handling * feat: Add Phase 13 resource and auth pattern extensions with new tests and fixtures * feat: Introduce CFG debug graph renderer with compact mode, toolbar, and DAG layout integration * feat: Add Phase 28 encoding and decoding transform modeling with structural symex enhancements and new taint analysis tests * feat: Extend abstract interpretation with type facts and constant value tracking in debug views and server logic * feat: Add linear path handling and witness extraction to symbolic execution with Phase 28 transform mismatch detection * feat: Refine Go auth and sanitizer handling with enhanced rules, state updates, and benchmark improvements * feat: Enable auth-state analysis by default and update relevant tests in benchmark config * test: Update state_tests to reflect default enablement of auth-state analysis and add auth suppression test * docs: update CHANGELOG.md * feat: Introduce per-index taint tracking in `HeapState` with `HeapSlot`, overflow handling, and revised SSA transfers * feat: Introduce C/C++ language labels and refine heap state tracking in SSA transfers * feat: Implement per-index array slot tracking in symbolic heap with overflow collapse * feat: Add implicit definition handling for uninitialized declarations in SSA value allocation * feat: Refactor function parameters and constants for improved clarity and maintainability * refactor: Reorder module imports and improve formatting for consistency * refactor: Fix formatting erorrs * refactor: Fix clippy warnings * refactor: Fix fmt warnings (again) * chore: Update dependencies and improve feature configuration * Add comprehensive tests for undertested modules (#36) (COPILOT) * Add comprehensive tests for undertested modules Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> Agent-Logs-Url: https://github.com/elicpeter/nyx/sessions/f3fc877e-f386-49ba-9793-fc93d3805083 * Add comprehensive tests for ext, project, walk, and errors modules Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> Agent-Logs-Url: https://github.com/elicpeter/nyx/sessions/f3fc877e-f386-49ba-9793-fc93d3805083 --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> * chore: Update dependencies and improve feature configuration * fix: formatting errors in new tests * chore: Update license list in about.toml * chore: made functions input inline * chore: updated cfg graph to take up the full page * chore: add Prettier configuration and update code formatting * Add frontend test suite with Vitest (111 tests) (#37) * Add Vitest test suite for frontend - 111 tests across utils, components, hooks, and graph utilities Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> Agent-Logs-Url: https://github.com/elicpeter/nyx/sessions/7cf0dba2-ecff-4740-ba4d-92717e74a0b7 * ci: add frontend test step to CI workflow Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> Agent-Logs-Url: https://github.com/elicpeter/nyx/sessions/5bc0ac9f-0a32-4d03-9cb7-7a15aea53fca --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> * chore: simplify array initialization in test files for consistency * ran typecheck * feat: add AnalysisWorkspace component and integrate it into CfgViewerPage * feat: update routing in AppLayout and improve empty state message in ExplorerPage * feat: enhance scan progress tracking with additional metrics and stages * feat: update license information and add license check script * feat: implement cross-file symbolic execution with callee body persistence * feat: replace dagre graphs with Graphology + ELK + Sigma for more advanced call stack and cfg rendering * feat: ensure CFG function view is scoped to the selected function, preventing bleed into sibling functions * feat: enhance resource tracking with proxy method summaries and improve finding extraction * feat: add terminal function exit detection for accurate resource leak analysis * feat: add warnings for loops and functions without bodies to improve error recovery * feat: update lambda expression handling to ensure proper function classification and control flow * feat: remove bounded formatting/string ops and add JSON.parse sanitizer for improved data handling * feat: add inline return taint analysis and regression tests for improved security checks * feat: add engine version management and migration handling for database schema updates * feat: enhance first_call_ident to skip nested function bodies and add regression tests * feat: enhance callee name resolution with two-segment normalization and disambiguation * feat: add cross-file context flags and debug assertions for taint analysis * feat: refactor taint analysis structure to unify context handling and improve clarity * feat: enhance dead code elimination to preserve Sink, Source, and Sanitizer labels with new tests * docs: updated CHANGELOG.md * fmt: formatting fixes * fix: fixed frontend formatting and lint warnings * fix: optimized ci * fix: optimized ci * Add comprehensive multi-file test coverage to Nyx (#38) * Initial checklist for multi-file test suite expansion Agent-Logs-Url: https://github.com/elicpeter/nyx/sessions/e550cb88-9767-4442-94d4-101bf5bb0e23 Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> * Add 12 new multi-file test fixtures with TP/TN/near-miss coverage Agent-Logs-Url: https://github.com/elicpeter/nyx/sessions/e550cb88-9767-4442-94d4-101bf5bb0e23 Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> * deleted root repo * rebuilt to test for regressions --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> Co-authored-by: elipeter <elicpeter@gmail.com> * feat: enhance import alias resolution and taint tracking * feat: implement security hardening with CSRF protection and path validation * feat: add support for import alias bindings in Python, PHP, and Rust * feat: enhance CFG analysis modes and improve code readability * feat: add detection for parameterized SQL queries to enhance security * feat: add safe internal redirect handling and enhance session destroy validation * feat: implement security improvements by addressing vulnerabilities in execAsync, session management, and file downloads * feat: enhance taint detection by adding support for inline source member expressions in call arguments * feat: implement pre-emission of Source nodes for inline source member expressions in call arguments * feat: add support for Throw statement in control flow and error handling * feat: add debug and echo endpoints with potential information leakage * feat: implement internal redirect suppression and enhance taint detection * feat: implement module alias tracking for dynamic dispatch in JS/TS * feat: add authorization analysis module with Express support * feat: add authorization analysis module with Express support * feat: add tests for admin guard requirements and clean checks in authorization analysis * feat: integrate Koa and Fastify frameworks into authorization analysis * feat: add Flask and Django support to authorization analysis module * feat: add support for Rails and Sinatra frameworks in authorization analysis * feat: add support for Axum, ActixWeb, and Rocket frameworks in authorization analysis * feat: add support for ActixWeb, Axum, and Rocket frameworks in authorization analysis * feat: add support for Rails and Sinatra in authorization analysis * chore: add .DS_Store to .gitignore * refactor: simplify conditional checks and improve readability in multiple files * refactor: update usage of Option methods for improved clarity and consistency * refactor: improve code readability by simplifying conditional checks and formatting * refactor: improve code formatting and readability by simplifying conditional checks * refactor: simplify conditional checks and improve readability in multiple files * refactor: simplify conditional checks in axum.rs for improved readability * feat: add CodeQL analysis configuration for enhanced security scanning * test: add comprehensive tests for `src/output.rs` SARIF builder (#39) * chore: start test coverage improvement work Agent-Logs-Url: https://github.com/elicpeter/nyx/sessions/cd7ff398-134e-4728-a5e7-0353a0744423 Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> * test: add comprehensive tests for src/output.rs SARIF builder Agent-Logs-Url: https://github.com/elicpeter/nyx/sessions/cd7ff398-134e-4728-a5e7-0353a0744423 Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> * refactor: improve code formatting and readability in output.rs --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: elicpeter <54954007+elicpeter@users.noreply.github.com> Co-authored-by: elipeter <elicpeter@gmail.com> * refactor: improve code formatting and readability in output.rs * Potential fix for code scanning alert no. 210: Uncontrolled data used in path expression Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> * Potential fix for code scanning alert no. 211: Uncontrolled data used in path expression Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> * refactor: enhance triage file path handling with improved error management and validation * refactor: updated func summaries for richer detail * refactor: update SSA summary extraction to use canonical FuncKey for distinct entries * refactor: enhance callee metadata structure to support arity, receiver, and qualifier for better overload resolution * refactor: add support for keyword arguments in function calls and enhance receiver extraction for method-style calls * refactor: implement new Flask routes for safe and unsafe shell command execution * refactor: separate receiver handling in SSA operations and enhance taint propagation * refactor: improve arity handling by using arg_uses for positional argument count and enhance witness scoring for tainted arguments * refactor: implement auth decorator extraction and classification for multiple languages * refactor: enhance Rust module path resolution and use map handling for cross-file disambiguation * refactor: introduce CalleeQuery struct for structured callee resolution and enhance resolver logic * refactor: implement same-file identity collision handling for `runTask` to ensure correct resolver behavior * refactor: standardize default struct initialization across multiple files * feat: add scripts for formatting checks and auto-fixes with test summaries * refactor: simplify character splitting and enhance namespace qualifier handling * refactor: improve documentation clarity and enhance code readability in resolver logic * refactor: replace default struct initialization with explicit field assignments for clarity * feat: enhance anonymous function naming by deriving context-based bindings * refactor: streamline match expressions for improved readability and performance * refactor: streamline match expressions for improved readability and performance * refactor: replace loop with while let for improved clarity and performance * feat: add SSA constant propagation support to analysis context for improved accuracy * feat: add SSA constant propagation support to analysis context for improved accuracy * feat: implement shell metacharacter validation and bounded-length checks in Rust analysis * feat: add static map analysis for command injection suppression and type safety * refactor: simplify match statements and reduce line breaks for improved readability * feat(summary): phase 1/5 SinkSite data model for primary sink-location attribution Introduce SinkSite (file_rel, line, col, snippet, cap) carrying the primary sink source-location through function summaries. Swap SsaFuncSummary.param_to_sink and FuncSummary.param_to_sink from a coarse Cap map to a deduped SmallVec<[SinkSite; 1]> per parameter, with a backward-compatible cap_sites() helper and serde defaults so pre-phase-1 on-disk rows continue to deserialise cleanly. Extraction: SinkSiteLocator bundles the tree/bytes/file_rel needed by extract_ssa_func_summary; ParsedFile::extract_ssa_artifacts wires the locator in for the persisted pass-1 path, while pass-2 intra-file transient summaries fall back to cap-only sites (behavior unchanged). Merge: GlobalSummaries::insert now unions sink sites with (file_rel, line, col, cap) dedup via shared union_param_sink_sites helper. Database: JSON-serialised summary columns carry the new shape automatically; no schema change needed. Phase 2 will consume SinkSite in build_taint_diag() to overwrite the caller-site Finding.line with the callee's sink line when resolved via summary. Phase 1 keeps behavior unchanged: scanning tests/benchmark/corpus/rust/cmdi/cmdi_indirect.rs still produces the same (wrong) line 10 finding. Adds round-trip tests covering SinkSite solo, SsaFuncSummary with sink sites, legacy-JSON default handling for both summary types, and merge dedup. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * feat(taint): phase 2/5 thread SinkSite into SsaTaintEvent and Finding Plumb Phase 1's SinkSite through the event pipeline into Findings, no output change yet. SsaTaintEvent gains `primary_sink_site: Option<SinkSite>`; when the main or callback sink-emission path has non-empty `param_to_sink_sites`, filter to sites whose `(line != 0) && (cap ∩ sink_caps != ∅)` and emit one event per distinct site — the multi-primary collapse keeps each downstream Finding single-primary. Resolution: ResolvedSummary and SinkInfo gain mirror `param_to_sink_sites` fields, populated from `SsaFuncSummary.param_to_sink` (SSA + callback paths) and `FuncSummary.param_to_sink` (global paths). Label, local-summary, and interop resolution paths leave the field empty — they only ever had cap-level info to begin with. Finding: new `primary_location: Option<SinkLocation>` with `file_rel/line/col`. `ssa_events_to_findings` maps `event.primary_sink_site` → `Finding.primary_location`, filtering cap-only sites (`line == 0`) to `None` so the (0,0) sentinel never leaks to formatters. Dedup key extended with the primary location so multi-site events aren't collapsed back together. Invariants (debug_assert!): * every SinkSite reaching emission has `line != 0 && cap ∩ sink_caps != ∅` — enforced by the pick_primary_sink_sites* filters; * every populated Finding.primary_location has `line != 0` AND non-empty `file_rel` — the cap-only → None translation upstream guarantees this. Deliberately independent of `uses_summary`: that flag tracks whether the *taint chain* used a summary, whereas primary attribution requires only that the *sink* itself was summary-resolved. A local source reaching a cross-file sink produces `uses_summary=false` alongside a populated primary_location — documented on Finding.primary_location, covered by `cross_file_sink_finding_carries_primary_location`. build_taint_diag, SARIF/JSON/explanation formatters, and the benchmark scorer remain untouched: finding.line still comes from `cfg_graph[finding.sink]`, so cmdi_indirect.rs still reports line 10 and the benchmark's rs-cmdi-003 row still shows FN in the LOC column. Tests: `cross_file_sink_finding_carries_primary_location` (proves plumbing via a synthetic FuncSummary carrying a SinkSite at 42:5) and `cross_file_sink_cap_only_site_leaves_primary_location_none` (regression guard against cap-only sites surfacing). All 1566 lib tests + integration tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * feat(output): phase 3/5 consume primary sink location in diag + SARIF When a finding's primary_location (populated in phase 2 from a callee summary's SinkSite) names the dangerous instruction inside a callee body, attribute the diagnostic line to that location instead of the caller's call site. The call site is demoted to a Call step in flow_steps, and a synthetic Sink step at the primary location is appended so analysts still see the full trace. Changes: - Add scan_root parameter to build_taint_diag so file_rel can be resolved back to an absolute path via a shared resolve_file_rel helper. Empty file_rel (single-file scans where namespace == "") resolves to the file under analysis. - Extend SinkLocation with snippet, carried from the upstream SinkSite so the formatter needs no second file read. - Relax the ssa_events_to_findings debug_assert to allow empty file_rel, which is valid when scan root equals the file itself. - SARIF: emit data-flow as codeFlows[0].threadFlows[0].locations[]; locations[0] already reflects the primary sink position via the updated diag line/col. Acceptance: scan on tests/benchmark/corpus/rust/cmdi/cmdi_indirect.rs now reports line 5 (Command::new) as the primary sink, with the call site at line 10 visible in flow_steps. Two expect.json fixtures updated (must_match line_range widened): - javascript/taint/context_sensitive_call: 12-14 -> 7-14 (line 8 is the real sink inside run()). - rust/cfg/closure_async: 10-10 -> 10-11 (line 11 is Command::new inside the closure). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * feat(bench): phase 4/5 validate primary sink attribution across corpus Extend the benchmark scorer and ground truth to lock in phase 3's primary-location behavior, and add fixtures that exercise the new capability end-to-end. Scorer (tests/benchmark_test.rs): - Add optional `expected_call_site_lines: Option<Vec<[usize; 2]>>` on Case. When present, score_location_level additionally requires at least one flow_step in the finding's evidence trace to fall within ±2 of the call-site range. When absent, the check is skipped — fully forward-compatible with existing fixtures. - Retain ±2 tolerance on expected_sink_lines (compared against the now-primary Diag.line post-phase-3). Ground truth edits: - rs-cmdi-cross-001: expected_sink_lines [8,8] -> [9,9]. Line 8 is the transform::wrap call site (a cross-file propagator, not a sink); line 9 is Command::new, the real sink. The ±2 tolerance happened to mask this stale attribution but it was semantically wrong — phase 4 is the right time to correct it. Also adds expected_call_site_lines [8,8] so the new field is exercised on an existing cross-file case. - rs-cmdi-003: adds expected_call_site_lines [10,10] (run_cmd call). This fixture's sink (Command::new inside run_cmd at line 5) was the motivating case for phases 1-3; adding the call-site assertion guards against regression to caller-line attribution. New fixtures: - rust/cmdi/cmdi_indirect_multisink.rs (rs-cmdi-009): helper run_both takes two tainted params and invokes two Command sinks on consecutive lines. Locks in that primary line lands inside the helper (lines 5-6), not at the caller (line 12). Notes document that SinkSite is currently one-per-callee so both findings today collapse onto the first sink; expected_sink_lines=[5,6] and expected_call_site_lines=[12,12] stay valid either way. - python/cmdi/cross_indirect_sink/{app.py,helper.py} (py-cmdi-cross- 004): sink os.system lives in helper.py (cross-file), caller in app.py reads env source and calls run_cmd. Verifies phase 3's cross-file primary attribution: Diag.path = helper.py, Diag.line = 5, with app.py:7 recorded in flow_steps as a Call step. Acceptance: - `cargo test --test benchmark_test -- --ignored --nocapture` passes. - rs-cmdi-003 is TP/TP/TP (the target flip FN->TP at LOC). All pre-existing TP/TP/TP fixtures remain TP/TP/TP; 2 new fixtures are TP/TP/TP. - Aggregate rule-level: TP=158 FP=10 FN=1 TN=97, P=0.940 R=0.994 F1=0.966 on the 266-case corpus (was TP=156 FP=10 FN=1 TN=97 on 264 pre-phase-4, delta is the +2 new cases both resolving TP). - Full `cargo test` green (1566 lib tests + all integration tests). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * feat(taint): phase 5/5 lock Finding.primary_location contract via regression test Add a regression test in src/taint/ssa_transfer.rs that wires up a synthetic SsaFuncSummary with a SinkSite at other.rs:42:10 and drives the three emission stages (pick_primary_sink_sites → emit_ssa_taint_events → ssa_events_to_findings) against a minimal caller SSA body. Asserts the resulting Finding.primary_location is exactly that triple. The existing integration tests in src/taint/tests.rs cover the coarse FuncSummary path end-to-end through analyse_file. This test locks in the lower-level SSA-side plumbing so a future refactor that silently drops the site between pick → emit → findings fails here rather than only at the benchmark layer. Also refreshes tests/benchmark/results/latest.json (timestamp only; rs-cmdi-003 remains TP/TP/TP and the aggregate P/R/F1 are unchanged from phase 4). Closes the primary sink-location attribution feature (phases 1-5/5): * Phase 1 — SinkSite data model on summaries. * Phase 2 — SinkSite threaded into SsaTaintEvent and Finding. * Phase 3 — diag + SARIF consume primary_location. * Phase 4 — benchmark validates primary_call_site_lines across corpus. * Phase 5 — regression test locks the event→finding contract. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * refactor: clean up formatting and improve readability in multiple files * refactor: simplify type definition for deduplication key in findings * test(harness): add must_not_match expectation for FP regression guards Extends ExpectedFinding with must_not_match field that asserts a diagnostic must NOT fire — presence is a hard failure. Non-consuming scan so it coexists with must_match entries on the same rule_id. Adds forbidden_violations accumulator and updates summary line. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * feat(regression): update expectations to ensure must_not_match for various taint and resource leak rules * feat: implement auto-seeding for JS/TS handler parameters to enhance taint tracking * feat: update switch statement handling to improve control flow analysis * feat: implement promisify alias handling for JS/TS to enhance taint tracking * feat: enhance taint tracking by refining expectation handling and adding mode filtering * feat: refine SQL handling in stream processing and enhance auto-seeding for handler parameters * feat: update taint tracking rules to enforce full mode matching and improve flow analysis * feat: enhance Ruby subshell handling to improve taint tracking and flow analysis * feat: update xss_response expectations to refine taint flow analysis and enhance regression guarding * feat: refine framework detection and update expectation handling for Echo and Sinatra * feat: implement max_count for taint tracking expectations and deduplicate findings * feat: add strict_unexpected handling for taint-unsanitised-flow in expectation files * feat: enhance deduplication of taint-unsanitised-flow findings by collapsing based on line and severity * feat: add strict_unexpected handling for taint-unsanitised-flow in multiple expectation files * feat: add structural invariant checks for SSA bodies * feat: ensure deterministic phi emission order using BTreeSet * feat: enhance handling of terminators to ensure authoritative flow through successor edges * feat: enhance Goto terminator handling to ensure all successors are marked executable * feat: refactor code for improved readability and organization * feat: simplify predicate checks and enhance readability in SSA handling * feat: implement per-file parse timeout and enhance file size handling * feat: migrate analysis engine toggles from environment variables to configuration file * feat: remove unnecessary whitespace in hostile_input_tests.rs * feat: remove unnecessary whitespace in hostile_input_tests.rs * feat: update dependencies and enhance documentation on language maturity * feat: enhance security headers and improve request body limits * feat: implement sink capability bits for deduplication and enhance evidence tagging * feat: implement dynamic activation handling for gated sinks and enhance validation logic * feat: enhance configuration documentation and clarify inline analysis cache behavior * feat: implement panic recovery during analysis to continue scans past errors * feat: add expectations configuration for taint analysis and performance metrics * feat: enhance error handling and logging during file reading and mutex locking * feat: add cross-file body loading tests and plumbing for CF-1 phase * feat: implement cross-file k=1 context-sensitive inline taint analysis with new tests and fixtures * feat: implement indexed-scan parity in cross-file inline analysis with new dropdown and copy functionality * feat: enhance classification span handling in CFG and AST for improved source attribution * feat: add new Express routes for handling user input and telemetry data * feat: implement ternary expression handling in CFG with diamond structure for JS/TS * feat: implement Phase CF-3 abstract-domain transfer channels in summaries * feat: add support for string-prefix transfer in cross-file calls and update tests * docs: reduce RESULTS.md doc size * feat: implement Phase CF-4 per-return-path summary decomposition with tests * feat: update parameter handling in pass1 and refactor SsaFuncSummary initialization * feat: implement Phase CF-5 for cross-file SCC joint fixed-point convergence with new flags and tests * feat: implement Phase CF-6 with parameter-granularity points-to summaries and associated tests * refactor: update comments and documentation for clarity and consistency * style: format code for consistency and readability * refactor: simplify verdict handling and improve edge checking logic * refactor: optimize path and identifier collection by avoiding unnecessary cloning * chore: update Cargo.toml for Rust version 1.85 and add ignored files; modify CHANGELOG and README for clarity on state analysis defaults * refactor: update documentation and improve clarity in configuration files * refactor: update documentation and improve clarity in configuration files * feat: add JS/TS pass-2 convergence tests and expectations configuration * feat: add Phase 5 regression tests for inline cache origin attribution and update related logic * feat: implement Phase 7 deduplication and alternative path linking for taint findings * feat: implement structural DFS index for anonymous functions and update naming conventions * feat: add Phase 8 regression tests for container-element taint in JS and Python * feat: add engine-depth profiles and explain-engine option for CLI * feat: update expectations and add new README fixtures for multi-file scan regression * feat: implement Phase 11 callback-alias and factory patterns with regression tests * feat: implement Terminator::Switch for multi-way dispatch and add regression tests * feat: add real-CVE benchmark fixtures for CVE-2023-48022, CVE-2019-14939, and CVE-2023-26159 with corresponding patched variants * refactor: extract cfg and ssa_transfer to submodules * refactor: cargo fmt * refactor: remove unnecessary blank line in cfg_tests.rs * refactor: remove unnecessary planning file * chore: update Rust version to 1.88 and bump dependencies in Cargo files * feat: enhance triage UI with new layout and controls, update README for clarity * feat: enhance triage UI with new layout and controls, update README for clarity * chore: remove outdated section from README for version 0.5.0 * docs: improve clarity and consistency in README content * chore: add "GPL-3.0-or-later" to license options in about.toml * chore: update license handling in about.toml and check-licenses.mjs * style: format code for improved readability in TriagePage component * style: format code for improved readability in TriagePage component * chore: enhance license handling and improve body_id scoping in seed lookup * feat: introduce owner and parent body IDs for enhanced seed scoping * feat: implement direction-aware engine provenance with new CLI flag for strict CI gating * feat: add Undef SSA operation for improved control-flow handling * style: improve code formatting for consistency and readability in multiple files * feat: add 16-function chain SCC across multiple files for enhanced analysis * style: simplify code formatting for improved readability in multiple files * fix: update CapHitReason default implementation and improve README clarity * docs: enhance README with detailed explanations of taint analysis and limitations * docs: refine README for clarity and consistency in taint analysis section * style: improve code formatting for better readability in NewScanModal and scans * fix: update cargo-about command to use --offline for deterministic license generation * fix: update cargo-about command to use --offline for deterministic license generation * ci: add step to prime cargo registry cache for deterministic license generation * feat: add support for non-sink collections in authorization analysis * feat: enhance authorization checks with row-level ownership equality and binding tracking * feat: implement self-scoped user handling and enhance ownership checks * refactor: simplify assertions and formatting in authorization analysis tests * fix: normalize line endings in THIRDPARTY-LICENSES.html generation and update README with AI disclosure * docs: update AI disclosure section for clarity and conciseness * feat: add AI Contribution Policy and update contributing guidelines for AI assistance disclosure * feat: enhance authorization analysis with SSA-derived variable type classification * feat: implement auth_finding_to_diag function for enhanced security diagnostics * feat: add args_value_refs to CallSite struct for enhanced argument tracking * feat: add args_value_refs to CallSite struct for enhanced argument tracking * feat: add direction-aware engine provenance with LossDirection classification and new CLI flag * feat: simplify strip_cap_from_call_args call by removing unnecessary line breaks * feat: enhance error message handling in cli_validation_tests for better Windows compatibility * feat: optimize release profile settings in Cargo.toml and update CodeQL configuration * feat: enhance release build process with SBOM generation and SLSA provenance * feat: update actions/checkout and actions/setup-node to v6, enhance CLI options, and improve auth-check summaries * feat: introduce PathFact handling for path safety checks and rejection logic * feat: introduce PathFact handling for path safety checks and rejection logic * feat: update benchmark data and enhance path sanitization logic with new safety checks * feat: document AI assistance in frontend UI development and human review process * feat: add return path facts for enhanced path safety checks and update documentation * chore: update release date for version 0.5.0 in CHANGELOG.md * chore: clean up ci.yml by removing outdated comments and clarifying steps * feat: implement cross-language path sanitizers and validators for enhanced security * feat: enhance SSA value usage tracking by including block terminators and improve path safety checks * feat: enhance switch statement handling by adding per-case path constraints and support for exclusive cases * refactor: simplify conditional formatting and improve code readability in executor and lower modules * feat: add vulnerable examples for various languages demonstrating authentication and sanitization issues * feat: enhance actor context recognition for self-actor identifiers and add support for global non-sink receivers * feat: enhance actor context recognition for self-actor identifiers and add support for global non-sink receivers * feat: add transform classifiers for Java, Go, and Ruby with corresponding tests * refactor: clarify comments on reassign-to-constant idiom and sink behavior in guards.rs --------- Co-authored-by: Copilot <198982749+Copilot@users.noreply.github.com> Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
c4ce08b452
commit
41128177d2
2144 changed files with 201812 additions and 8927 deletions
1038
src/cfg/blocks.rs
Normal file
1038
src/cfg/blocks.rs
Normal file
File diff suppressed because it is too large
Load diff
2043
src/cfg/cfg_tests.rs
Normal file
2043
src/cfg/cfg_tests.rs
Normal file
File diff suppressed because it is too large
Load diff
505
src/cfg/conditions.rs
Normal file
505
src/cfg/conditions.rs
Normal file
|
|
@ -0,0 +1,505 @@
|
|||
use super::{
|
||||
AstMeta, Cfg, EdgeKind, MAX_COND_VARS, MAX_CONDITION_TEXT_LEN, NodeInfo, StmtKind,
|
||||
collect_idents, connect_all, detect_eq_with_const, detect_negation, has_call_descendant,
|
||||
member_expr_text, push_node, text_of,
|
||||
};
|
||||
use crate::labels::{DataLabel, LangAnalysisRules, classify};
|
||||
use petgraph::graph::NodeIndex;
|
||||
use smallvec::SmallVec;
|
||||
use tree_sitter::Node;
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Short-circuit boolean operator helpers
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub(super) enum BoolOp {
|
||||
And,
|
||||
Or,
|
||||
}
|
||||
|
||||
/// Check if an AST node is a boolean operator (`&&`/`||`/`and`/`or`).
|
||||
pub(super) fn is_boolean_operator(node: Node) -> Option<BoolOp> {
|
||||
match node.kind() {
|
||||
"binary_expression" | "boolean_operator" | "binary" => {
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
match child.kind() {
|
||||
"&&" | "and" => return Some(BoolOp::And),
|
||||
"||" | "or" => return Some(BoolOp::Or),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Strip parenthesized_expression wrappers.
|
||||
pub(super) fn unwrap_parens(node: Node) -> Node {
|
||||
if node.kind() == "parenthesized_expression" {
|
||||
if let Some(inner) = node.named_child(0) {
|
||||
return unwrap_parens(inner);
|
||||
}
|
||||
}
|
||||
node
|
||||
}
|
||||
|
||||
/// Extract `left` and `right` operands from a binary boolean node.
|
||||
pub(super) fn get_boolean_operands<'a>(node: Node<'a>) -> Option<(Node<'a>, Node<'a>)> {
|
||||
// Field-based (all supported grammars)
|
||||
if let (Some(left), Some(right)) = (
|
||||
node.child_by_field_name("left"),
|
||||
node.child_by_field_name("right"),
|
||||
) {
|
||||
return Some((left, right));
|
||||
}
|
||||
// Positional fallback (safety net)
|
||||
let mut cursor = node.walk();
|
||||
let named: Vec<_> = node.named_children(&mut cursor).collect();
|
||||
if named.len() >= 2 {
|
||||
return Some((named[0], named[named.len() - 1]));
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Create a lightweight `StmtKind::If` node for a sub-condition in a boolean chain.
|
||||
pub(super) fn push_condition_node<'a>(
|
||||
g: &mut Cfg,
|
||||
cond_ast: Node<'a>,
|
||||
lang: &str,
|
||||
code: &'a [u8],
|
||||
enclosing_func: Option<&str>,
|
||||
) -> NodeIndex {
|
||||
// Pass cond_ast as both args — sub-conditions are never `unless` nodes
|
||||
let (inner, negated) = detect_negation(cond_ast, cond_ast, lang);
|
||||
let mut vars = Vec::new();
|
||||
collect_idents(inner, code, &mut vars);
|
||||
vars.sort();
|
||||
vars.dedup();
|
||||
vars.truncate(MAX_COND_VARS);
|
||||
let text = text_of(cond_ast, code).map(|t| {
|
||||
if t.len() > MAX_CONDITION_TEXT_LEN {
|
||||
t[..MAX_CONDITION_TEXT_LEN].to_string()
|
||||
} else {
|
||||
t
|
||||
}
|
||||
});
|
||||
let span = (cond_ast.start_byte(), cond_ast.end_byte());
|
||||
g.add_node(NodeInfo {
|
||||
kind: StmtKind::If,
|
||||
ast: AstMeta {
|
||||
span,
|
||||
enclosing_func: enclosing_func.map(|s| s.to_string()),
|
||||
},
|
||||
condition_text: text,
|
||||
condition_vars: vars,
|
||||
condition_negated: negated,
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
/// For a Rust `let <pattern> = match <scrutinee> { <arm> if <guard> => .., ... }`,
|
||||
/// find the first guarded `match_arm` and return the guard expression node plus
|
||||
/// the primary let-binding name. Returns `None` when the let-value is not a
|
||||
/// `match_expression` or no arm has a guard.
|
||||
///
|
||||
/// The guard lives on the tree-sitter `match_pattern` node as the field
|
||||
/// `condition` (present whenever the pattern is followed by `if <expr>`).
|
||||
pub(super) fn detect_rust_let_match_guard<'a>(
|
||||
ast: Node<'a>,
|
||||
code: &[u8],
|
||||
) -> Option<(Node<'a>, String)> {
|
||||
if ast.kind() != "let_declaration" {
|
||||
return None;
|
||||
}
|
||||
let value = ast.child_by_field_name("value")?;
|
||||
if value.kind() != "match_expression" {
|
||||
return None;
|
||||
}
|
||||
let body = value.child_by_field_name("body")?;
|
||||
|
||||
let mut cursor = body.walk();
|
||||
let guard = body.children(&mut cursor).find_map(|arm| {
|
||||
if !matches!(arm.kind(), "match_arm" | "last_match_arm") {
|
||||
return None;
|
||||
}
|
||||
let pattern = arm.child_by_field_name("pattern")?;
|
||||
pattern.child_by_field_name("condition")
|
||||
})?;
|
||||
|
||||
let pat = ast.child_by_field_name("pattern")?;
|
||||
let mut idents = Vec::new();
|
||||
collect_idents(pat, code, &mut idents);
|
||||
let name = idents.into_iter().next()?;
|
||||
|
||||
Some((guard, name))
|
||||
}
|
||||
|
||||
/// Synthesize a `StmtKind::If` CFG node carrying a Rust match-arm guard's
|
||||
/// condition text and vars. The let-binding name is added to `condition_vars`
|
||||
/// so `apply_branch_predicates` narrows validation to that specific variable
|
||||
/// — the variable that receives the arm's value and flows to downstream sinks.
|
||||
pub(super) fn emit_rust_match_guard_if<'a>(
|
||||
g: &mut Cfg,
|
||||
guard: Node<'a>,
|
||||
let_name: &str,
|
||||
code: &'a [u8],
|
||||
enclosing_func: Option<&str>,
|
||||
) -> NodeIndex {
|
||||
let mut vars = Vec::new();
|
||||
collect_idents(guard, code, &mut vars);
|
||||
vars.push(let_name.to_string());
|
||||
vars.sort();
|
||||
vars.dedup();
|
||||
vars.truncate(MAX_COND_VARS);
|
||||
let text = text_of(guard, code).map(|t| {
|
||||
if t.len() > MAX_CONDITION_TEXT_LEN {
|
||||
t[..MAX_CONDITION_TEXT_LEN].to_string()
|
||||
} else {
|
||||
t
|
||||
}
|
||||
});
|
||||
let span = (guard.start_byte(), guard.end_byte());
|
||||
g.add_node(NodeInfo {
|
||||
kind: StmtKind::If,
|
||||
ast: AstMeta {
|
||||
span,
|
||||
enclosing_func: enclosing_func.map(|s| s.to_string()),
|
||||
},
|
||||
condition_text: text,
|
||||
condition_vars: vars,
|
||||
condition_negated: false,
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
/// Decompose an assignment whose RHS is a ternary (`lhs = cond ? a : b`) into
|
||||
/// a proper diamond CFG: cond → {true_branch | false_branch} → join. Each
|
||||
/// branch defines `lhs_text` from its own operand's identifiers; a phi for
|
||||
/// `lhs_text` is then synthesised by SSA lowering at the join.
|
||||
///
|
||||
/// The condition's identifiers live on the If node's `condition_vars`, **not**
|
||||
/// on the branch `uses`. This is the whole point of the split — cond is control
|
||||
/// flow, branches are data flow.
|
||||
///
|
||||
/// Returns the exit frontier for downstream statement chaining (a single-element
|
||||
/// vec containing the join node).
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(super) fn build_ternary_diamond<'a>(
|
||||
lhs_text: String,
|
||||
lhs_labels: SmallVec<[DataLabel; 2]>,
|
||||
ternary_ast: Node<'a>,
|
||||
preds: &[NodeIndex],
|
||||
pred_edge: EdgeKind,
|
||||
g: &mut Cfg,
|
||||
lang: &str,
|
||||
code: &'a [u8],
|
||||
enclosing_func: Option<&str>,
|
||||
call_ordinal: &mut u32,
|
||||
analysis_rules: Option<&LangAnalysisRules>,
|
||||
) -> Vec<NodeIndex> {
|
||||
let (Some(cond_field), Some(cons_field), Some(alt_field)) = (
|
||||
ternary_ast.child_by_field_name("condition"),
|
||||
ternary_ast.child_by_field_name("consequence"),
|
||||
ternary_ast.child_by_field_name("alternative"),
|
||||
) else {
|
||||
// Grammar mismatch: caller will fall through to the non-split path.
|
||||
return preds.to_vec();
|
||||
};
|
||||
let cond_ast = unwrap_parens(cond_field);
|
||||
let cons_ast = unwrap_parens(cons_field);
|
||||
let alt_ast = unwrap_parens(alt_field);
|
||||
|
||||
// 1. Condition header. `push_condition_node` sets span/text/vars/negated
|
||||
// but leaves `is_eq_with_const` default; stamp it explicitly so the
|
||||
// taint engine's equality-narrowing fires for `x === 'literal' ? …`.
|
||||
let cond_if = push_condition_node(g, cond_ast, lang, code, enclosing_func);
|
||||
g[cond_if].is_eq_with_const = detect_eq_with_const(cond_ast, lang);
|
||||
connect_all(g, preds, cond_if, pred_edge);
|
||||
|
||||
// 2. Branches. Each branch produces its own exit frontier (≥ 1 node) —
|
||||
// a nested ternary recurses and returns its own join node.
|
||||
let true_exits = lower_ternary_branch(
|
||||
cons_ast,
|
||||
&[cond_if],
|
||||
EdgeKind::True,
|
||||
&lhs_text,
|
||||
&lhs_labels,
|
||||
g,
|
||||
lang,
|
||||
code,
|
||||
enclosing_func,
|
||||
call_ordinal,
|
||||
analysis_rules,
|
||||
);
|
||||
let false_exits = lower_ternary_branch(
|
||||
alt_ast,
|
||||
&[cond_if],
|
||||
EdgeKind::False,
|
||||
&lhs_text,
|
||||
&lhs_labels,
|
||||
g,
|
||||
lang,
|
||||
code,
|
||||
enclosing_func,
|
||||
call_ordinal,
|
||||
analysis_rules,
|
||||
);
|
||||
|
||||
// 3. Join: a zero-width Seq node placed at the ternary's end. Phi insertion
|
||||
// via Cytron will synthesise `lhs_text = phi(true_def, false_def)` here
|
||||
// because both branches define `lhs_text` and this is their dominance
|
||||
// frontier.
|
||||
let join_pos = ternary_ast.end_byte();
|
||||
let join = g.add_node(NodeInfo {
|
||||
kind: StmtKind::Seq,
|
||||
ast: AstMeta {
|
||||
span: (join_pos, join_pos),
|
||||
enclosing_func: enclosing_func.map(|s| s.to_string()),
|
||||
},
|
||||
..Default::default()
|
||||
});
|
||||
connect_all(g, &true_exits, join, EdgeKind::Seq);
|
||||
connect_all(g, &false_exits, join, EdgeKind::Seq);
|
||||
|
||||
vec![join]
|
||||
}
|
||||
|
||||
/// Emit the CFG shape for a single ternary branch. Three cases:
|
||||
///
|
||||
/// 1. Branch is itself a ternary → recurse via `build_ternary_diamond` so nested
|
||||
/// conditions also split cleanly (no `cond2` leakage into uses).
|
||||
/// 2. Branch contains a call → emit as `StmtKind::Call` via `push_node` so inner
|
||||
/// source/sanitizer/sink classification is preserved, then rewrite `defines`
|
||||
/// to the outer LHS and union in the LHS's sink labels.
|
||||
/// 3. Otherwise → emit as `StmtKind::Seq`, same override.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(super) fn lower_ternary_branch<'a>(
|
||||
branch_ast: Node<'a>,
|
||||
preds: &[NodeIndex],
|
||||
pred_edge: EdgeKind,
|
||||
lhs_text: &str,
|
||||
lhs_labels: &SmallVec<[DataLabel; 2]>,
|
||||
g: &mut Cfg,
|
||||
lang: &str,
|
||||
code: &'a [u8],
|
||||
enclosing_func: Option<&str>,
|
||||
call_ordinal: &mut u32,
|
||||
analysis_rules: Option<&LangAnalysisRules>,
|
||||
) -> Vec<NodeIndex> {
|
||||
// Case 1: nested ternary.
|
||||
if branch_ast.kind() == "ternary_expression" {
|
||||
return build_ternary_diamond(
|
||||
lhs_text.to_string(),
|
||||
lhs_labels.clone(),
|
||||
branch_ast,
|
||||
preds,
|
||||
pred_edge,
|
||||
g,
|
||||
lang,
|
||||
code,
|
||||
enclosing_func,
|
||||
call_ordinal,
|
||||
analysis_rules,
|
||||
);
|
||||
}
|
||||
|
||||
// Cases 2 and 3: leaf branch expression.
|
||||
let has_call = has_call_descendant(branch_ast, lang);
|
||||
let kind = if has_call {
|
||||
StmtKind::Call
|
||||
} else {
|
||||
StmtKind::Seq
|
||||
};
|
||||
let ord = if kind == StmtKind::Call {
|
||||
let o = *call_ordinal;
|
||||
*call_ordinal += 1;
|
||||
o
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
let node = push_node(
|
||||
g,
|
||||
kind,
|
||||
branch_ast,
|
||||
lang,
|
||||
code,
|
||||
enclosing_func,
|
||||
ord,
|
||||
analysis_rules,
|
||||
);
|
||||
|
||||
// The branch expression's own `defines` (if any — typically None for a
|
||||
// pure value expression) is replaced with the outer LHS so that both
|
||||
// branches agree on the target, driving phi insertion at the join.
|
||||
g[node].taint.defines = Some(lhs_text.to_string());
|
||||
for label in lhs_labels {
|
||||
if !g[node].taint.labels.contains(label) {
|
||||
g[node].taint.labels.push(*label);
|
||||
}
|
||||
}
|
||||
|
||||
connect_all(g, preds, node, pred_edge);
|
||||
vec![node]
|
||||
}
|
||||
|
||||
/// Extract `(lhs_ast, ternary_ast)` when `outer_ast` is an expression-statement
|
||||
/// or declaration whose single assignment/declarator's RHS is a ternary.
|
||||
/// Returns `None` for multi-declarator forms, for missing fields, and for
|
||||
/// any RHS that isn't a `ternary_expression` after `unwrap_parens`.
|
||||
pub(super) fn find_ternary_rhs_wrapper<'a>(outer_ast: Node<'a>) -> Option<(Node<'a>, Node<'a>)> {
|
||||
let mut cursor = outer_ast.walk();
|
||||
let mut declarator_count = 0usize;
|
||||
let mut found: Option<(Node<'a>, Node<'a>)> = None;
|
||||
|
||||
for child in outer_ast.children(&mut cursor) {
|
||||
match child.kind() {
|
||||
"variable_declarator" => {
|
||||
declarator_count += 1;
|
||||
if declarator_count > 1 {
|
||||
return None;
|
||||
}
|
||||
let (Some(name), Some(value)) = (
|
||||
child.child_by_field_name("name"),
|
||||
child.child_by_field_name("value"),
|
||||
) else {
|
||||
continue;
|
||||
};
|
||||
let rhs = unwrap_parens(value);
|
||||
if rhs.kind() == "ternary_expression" {
|
||||
found = Some((name, rhs));
|
||||
}
|
||||
}
|
||||
"assignment_expression" => {
|
||||
let (Some(left), Some(right)) = (
|
||||
child.child_by_field_name("left"),
|
||||
child.child_by_field_name("right"),
|
||||
) else {
|
||||
continue;
|
||||
};
|
||||
let rhs = unwrap_parens(right);
|
||||
if rhs.kind() == "ternary_expression" {
|
||||
return Some((left, rhs));
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
found
|
||||
}
|
||||
|
||||
/// Classify the LHS of a ternary-split assignment. Returns `(lhs_text, labels)`
|
||||
/// where `labels` are any sink labels that belong to the LHS itself (e.g.
|
||||
/// `innerHTML`, `document.cookie`). These are applied to **each branch** so
|
||||
/// the sink fires on whichever branch carries tainted data.
|
||||
pub(super) fn classify_ternary_lhs(
|
||||
lhs_ast: Node,
|
||||
lang: &str,
|
||||
code: &[u8],
|
||||
analysis_rules: Option<&LangAnalysisRules>,
|
||||
) -> (String, SmallVec<[DataLabel; 2]>) {
|
||||
let extra = analysis_rules.map(|r| r.extra_labels.as_slice());
|
||||
let mut labels: SmallVec<[DataLabel; 2]> = SmallVec::new();
|
||||
|
||||
// Prefer full member-expression path; fall back to raw text.
|
||||
let lhs_text = member_expr_text(lhs_ast, code)
|
||||
.or_else(|| text_of(lhs_ast, code))
|
||||
.unwrap_or_default();
|
||||
|
||||
// Try the full dotted path first (e.g. "document.cookie"), then fall back
|
||||
// to the property alone (e.g. "innerHTML") — mirrors the LHS classification
|
||||
// already performed in `push_node` for non-split assignments.
|
||||
if let Some(l) = classify(lang, &lhs_text, extra) {
|
||||
labels.push(l);
|
||||
}
|
||||
if labels.is_empty()
|
||||
&& let Some(prop) = lhs_ast.child_by_field_name("property")
|
||||
&& let Some(prop_text) = text_of(prop, code)
|
||||
&& let Some(l) = classify(lang, &prop_text, extra)
|
||||
{
|
||||
labels.push(l);
|
||||
}
|
||||
|
||||
(lhs_text, labels)
|
||||
}
|
||||
|
||||
/// Recursively decompose a boolean condition into a chain of `StmtKind::If` nodes
|
||||
/// with short-circuit edges.
|
||||
///
|
||||
/// Returns `(true_exits, false_exits)` — the sets of nodes from which True/False
|
||||
/// edges should connect to the then/else branches.
|
||||
pub(super) fn build_condition_chain<'a>(
|
||||
cond_ast: Node<'a>,
|
||||
preds: &[NodeIndex],
|
||||
pred_edge: EdgeKind,
|
||||
g: &mut Cfg,
|
||||
lang: &str,
|
||||
code: &'a [u8],
|
||||
enclosing_func: Option<&str>,
|
||||
) -> (Vec<NodeIndex>, Vec<NodeIndex>) {
|
||||
let inner = unwrap_parens(cond_ast);
|
||||
|
||||
match is_boolean_operator(inner) {
|
||||
Some(BoolOp::And) => {
|
||||
if let Some((left, right)) = get_boolean_operands(inner) {
|
||||
// Left operand with current preds
|
||||
let (left_true, left_false) =
|
||||
build_condition_chain(left, preds, pred_edge, g, lang, code, enclosing_func);
|
||||
// Right operand only evaluated when left is true
|
||||
let (right_true, right_false) = build_condition_chain(
|
||||
right,
|
||||
&left_true,
|
||||
EdgeKind::True,
|
||||
g,
|
||||
lang,
|
||||
code,
|
||||
enclosing_func,
|
||||
);
|
||||
// AND: true only when both true; false when either false
|
||||
let mut false_exits = left_false;
|
||||
false_exits.extend(right_false);
|
||||
(right_true, false_exits)
|
||||
} else {
|
||||
// Safety fallback: treat as leaf
|
||||
let node = push_condition_node(g, inner, lang, code, enclosing_func);
|
||||
connect_all(g, preds, node, pred_edge);
|
||||
(vec![node], vec![node])
|
||||
}
|
||||
}
|
||||
Some(BoolOp::Or) => {
|
||||
if let Some((left, right)) = get_boolean_operands(inner) {
|
||||
// Left operand with current preds
|
||||
let (left_true, left_false) =
|
||||
build_condition_chain(left, preds, pred_edge, g, lang, code, enclosing_func);
|
||||
// Right operand only evaluated when left is false
|
||||
let (right_true, right_false) = build_condition_chain(
|
||||
right,
|
||||
&left_false,
|
||||
EdgeKind::False,
|
||||
g,
|
||||
lang,
|
||||
code,
|
||||
enclosing_func,
|
||||
);
|
||||
// OR: true when either true; false only when both false
|
||||
let mut true_exits = left_true;
|
||||
true_exits.extend(right_true);
|
||||
(true_exits, right_false)
|
||||
} else {
|
||||
// Safety fallback: treat as leaf
|
||||
let node = push_condition_node(g, inner, lang, code, enclosing_func);
|
||||
connect_all(g, preds, node, pred_edge);
|
||||
(vec![node], vec![node])
|
||||
}
|
||||
}
|
||||
None => {
|
||||
// Leaf: single condition node
|
||||
let node = push_condition_node(g, inner, lang, code, enclosing_func);
|
||||
connect_all(g, preds, node, pred_edge);
|
||||
(vec![node], vec![node])
|
||||
}
|
||||
}
|
||||
}
|
||||
556
src/cfg/decorators.rs
Normal file
556
src/cfg/decorators.rs
Normal file
|
|
@ -0,0 +1,556 @@
|
|||
use super::text_of;
|
||||
use tree_sitter::Node;
|
||||
|
||||
/// Extract the leading identifier from a tree-sitter expression/call node.
|
||||
///
|
||||
/// Used by decorator extraction to reduce `login_required`, `permission_required(...)`,
|
||||
/// `flask_login.login_required`, `hasRole('ADMIN')` to their first identifier
|
||||
/// name — the matcher target.
|
||||
fn leading_ident_text(node: Node<'_>, code: &[u8]) -> Option<String> {
|
||||
let mut cur = node;
|
||||
loop {
|
||||
match cur.kind() {
|
||||
"identifier"
|
||||
| "type_identifier"
|
||||
| "property_identifier"
|
||||
| "scoped_identifier"
|
||||
| "name"
|
||||
| "constant"
|
||||
| "simple_identifier" => {
|
||||
return text_of(cur, code);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
// Peel wrappers: call → function, member/attribute → object or last segment
|
||||
if let Some(fn_field) = cur.child_by_field_name("function") {
|
||||
cur = fn_field;
|
||||
continue;
|
||||
}
|
||||
if let Some(name_field) = cur.child_by_field_name("name") {
|
||||
cur = name_field;
|
||||
continue;
|
||||
}
|
||||
if let Some(obj_field) = cur.child_by_field_name("object") {
|
||||
// For `flask_login.login_required`, we want the RIGHT side.
|
||||
if let Some(prop) = cur.child_by_field_name("property") {
|
||||
cur = prop;
|
||||
continue;
|
||||
}
|
||||
cur = obj_field;
|
||||
continue;
|
||||
}
|
||||
// Fallback: first non-trivia child.
|
||||
let mut walker = cur.walk();
|
||||
let next = cur
|
||||
.children(&mut walker)
|
||||
.find(|c| !matches!(c.kind(), "@" | "(" | ")" | "," | " " | "\n"));
|
||||
match next {
|
||||
Some(n) if n.id() != cur.id() => cur = n,
|
||||
_ => return text_of(cur, code),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Strip trailing `!` / `?` / `()` and leading `:` / `@`, then lowercase.
|
||||
fn normalize_decorator_name(raw: &str) -> String {
|
||||
let trimmed = raw.trim();
|
||||
let trimmed = trimmed.trim_start_matches(':').trim_start_matches('@');
|
||||
// If a call syntax leaked through (e.g. `UseGuards(AuthGuard)`), keep only
|
||||
// the head — callers that want the arg handle it separately.
|
||||
let head = trimmed
|
||||
.split(['(', ' ', '\t', '\n'])
|
||||
.next()
|
||||
.unwrap_or(trimmed);
|
||||
let head = head.trim_end_matches('!').trim_end_matches('?');
|
||||
// Keep only the last path segment so `module.name` / `a::b::c` become `c`.
|
||||
let head = head.rsplit(['.', ':']).next().unwrap_or(head);
|
||||
head.to_ascii_lowercase()
|
||||
}
|
||||
|
||||
/// Collect decorator-argument identifiers for call-style decorators like
|
||||
/// NestJS `@UseGuards(AuthGuard, JwtGuard)` or Java `@PreAuthorize("hasRole('USER')")`.
|
||||
///
|
||||
/// For Java annotations with string-literal arguments, also splits out bare
|
||||
/// identifiers from inside the string so that `hasRole('ADMIN')` contributes
|
||||
/// `hasrole` and `admin` as additional matcher candidates.
|
||||
fn decorator_arg_names(decorator_ast: Node<'_>, code: &[u8]) -> Vec<String> {
|
||||
let mut out = Vec::new();
|
||||
let args = decorator_ast.child_by_field_name("arguments").or_else(|| {
|
||||
let mut w = decorator_ast.walk();
|
||||
decorator_ast
|
||||
.children(&mut w)
|
||||
.find(|c| matches!(c.kind(), "argument_list" | "arguments"))
|
||||
});
|
||||
let Some(args) = args else {
|
||||
return out;
|
||||
};
|
||||
let mut walker = args.walk();
|
||||
for arg in args.children(&mut walker) {
|
||||
match arg.kind() {
|
||||
"(" | ")" | "," => continue,
|
||||
"string" | "string_literal" | "interpreted_string_literal" => {
|
||||
if let Some(s) = text_of(arg, code) {
|
||||
for token in s.split(|c: char| !c.is_ascii_alphanumeric() && c != '_') {
|
||||
if !token.is_empty() {
|
||||
out.push(token.to_ascii_lowercase());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
if let Some(name) = leading_ident_text(arg, code) {
|
||||
out.push(name.to_ascii_lowercase());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Walk tree-sitter decorator/annotation/attribute children of a function AST
|
||||
/// node and return normalized names for auth-rule matching.
|
||||
///
|
||||
/// Grammar-specific notes:
|
||||
/// - **Python**: function is wrapped by `decorated_definition` whose siblings
|
||||
/// are `decorator` nodes containing an `identifier` or `call` expression.
|
||||
/// - **JS/TS**: decorators attach to `method_definition` children or appear
|
||||
/// as siblings inside `class_body`; stage-3 decorators use `decorator` nodes.
|
||||
/// `@UseGuards(AuthGuard)` — we include the call args too.
|
||||
/// - **Java**: annotations live in the `modifiers` child of `method_declaration`;
|
||||
/// kinds are `marker_annotation` / `annotation`.
|
||||
/// - **Rust**: `function_item` has `attribute_item` siblings (outer `#[..]`).
|
||||
/// - **PHP**: `method_declaration` has an `attribute_list` child with `attribute`
|
||||
/// grandchildren (`#[IsGranted(..)]`).
|
||||
/// - **C++**: `function_definition` preceded or prefixed by `attribute_declaration`
|
||||
/// / `attribute` (`[[authenticated]]`).
|
||||
/// - **Ruby**: not a per-function decorator. `before_action :authenticate_user!`
|
||||
/// at class body scope applies to every method in the class. `only:` /
|
||||
/// `except:` hash args scope the filter to the listed action names; the
|
||||
/// filter is only recorded for the current method when the scope matches.
|
||||
/// Conditional filters (`if:` / `unless:`) are not honored — those require
|
||||
/// predicate evaluation and are deferred.
|
||||
pub(super) fn extract_auth_decorators<'a>(
|
||||
func_node: Node<'a>,
|
||||
lang: &str,
|
||||
code: &'a [u8],
|
||||
) -> Vec<String> {
|
||||
let mut out = Vec::new();
|
||||
let mut push = |raw: &str| {
|
||||
let norm = normalize_decorator_name(raw);
|
||||
if !norm.is_empty() && !out.contains(&norm) {
|
||||
out.push(norm);
|
||||
}
|
||||
};
|
||||
|
||||
match lang {
|
||||
"python" => {
|
||||
if let Some(parent) = func_node.parent() {
|
||||
if parent.kind() == "decorated_definition" {
|
||||
let mut w = parent.walk();
|
||||
for ch in parent.children(&mut w) {
|
||||
if ch.kind() != "decorator" {
|
||||
continue;
|
||||
}
|
||||
// `decorator` → '@' + expression child.
|
||||
let mut dw = ch.walk();
|
||||
let expr = ch.children(&mut dw).find(|c| c.kind() != "@");
|
||||
let Some(expr) = expr else { continue };
|
||||
if let Some(name) = leading_ident_text(expr, code) {
|
||||
push(&name);
|
||||
}
|
||||
// Arguments (e.g. `permission_required('view_user')`).
|
||||
for arg in decorator_arg_names(expr, code) {
|
||||
push(&arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"javascript" | "typescript" => {
|
||||
// Decorators may live as children of method_definition or as
|
||||
// preceding siblings inside a class_body.
|
||||
let mut seen = Vec::new();
|
||||
let mut w = func_node.walk();
|
||||
for ch in func_node.children(&mut w) {
|
||||
if ch.kind() == "decorator" {
|
||||
seen.push(ch);
|
||||
}
|
||||
}
|
||||
if let Some(parent) = func_node.parent() {
|
||||
if parent.kind() == "class_body" {
|
||||
let mut pw = parent.walk();
|
||||
for sib in parent.children(&mut pw) {
|
||||
if sib.id() == func_node.id() {
|
||||
break;
|
||||
}
|
||||
if sib.kind() == "decorator" {
|
||||
seen.push(sib);
|
||||
} else if sib.kind() != "decorator" && !seen.is_empty() {
|
||||
// Only the contiguous run of decorators immediately
|
||||
// before this method is relevant; reset if a non-
|
||||
// decorator node intervenes.
|
||||
if sib.end_byte() < func_node.start_byte() {
|
||||
seen.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for dec in seen {
|
||||
let mut dw = dec.walk();
|
||||
let expr = dec.children(&mut dw).find(|c| c.kind() != "@");
|
||||
let Some(expr) = expr else { continue };
|
||||
if let Some(name) = leading_ident_text(expr, code) {
|
||||
push(&name);
|
||||
}
|
||||
for arg in decorator_arg_names(expr, code) {
|
||||
push(&arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
"java" => {
|
||||
// method_declaration has a `modifiers` field listing annotations.
|
||||
let modifiers = func_node.child_by_field_name("modifiers").or_else(|| {
|
||||
let mut w = func_node.walk();
|
||||
func_node.children(&mut w).find(|c| c.kind() == "modifiers")
|
||||
});
|
||||
if let Some(modifiers) = modifiers {
|
||||
let mut w = modifiers.walk();
|
||||
for ch in modifiers.children(&mut w) {
|
||||
match ch.kind() {
|
||||
"marker_annotation" | "annotation" => {
|
||||
if let Some(name_node) = ch.child_by_field_name("name") {
|
||||
if let Some(t) = text_of(name_node, code) {
|
||||
push(&t);
|
||||
}
|
||||
} else if let Some(t) = leading_ident_text(ch, code) {
|
||||
push(&t);
|
||||
}
|
||||
for arg in decorator_arg_names(ch, code) {
|
||||
push(&arg);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"rust" => {
|
||||
// In tree-sitter-rust, outer `#[..]` attributes may appear either
|
||||
// as children of `function_item` OR as preceding siblings inside
|
||||
// the parent container (grammar has varied by version).
|
||||
let mut harvest = |node: Node<'_>| {
|
||||
if node.kind() == "attribute_item" || node.kind() == "inner_attribute_item" {
|
||||
let mut aw = node.walk();
|
||||
for inner in node.children(&mut aw) {
|
||||
if inner.kind() == "attribute" {
|
||||
if let Some(name) = leading_ident_text(inner, code) {
|
||||
push(&name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
let mut w = func_node.walk();
|
||||
for ch in func_node.children(&mut w) {
|
||||
harvest(ch);
|
||||
}
|
||||
if let Some(parent) = func_node.parent() {
|
||||
let mut pw = parent.walk();
|
||||
let mut pending: Vec<Node<'_>> = Vec::new();
|
||||
for sib in parent.children(&mut pw) {
|
||||
if sib.id() == func_node.id() {
|
||||
for p in &pending {
|
||||
harvest(*p);
|
||||
}
|
||||
break;
|
||||
}
|
||||
if sib.kind() == "attribute_item" || sib.kind() == "inner_attribute_item" {
|
||||
pending.push(sib);
|
||||
} else {
|
||||
pending.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"php" => {
|
||||
// `attribute_list` child of `method_declaration`.
|
||||
let mut w = func_node.walk();
|
||||
for ch in func_node.children(&mut w) {
|
||||
if ch.kind() == "attribute_list" {
|
||||
let mut aw = ch.walk();
|
||||
for attr_group in ch.children(&mut aw) {
|
||||
let mut gw = attr_group.walk();
|
||||
for attr in attr_group.children(&mut gw) {
|
||||
if attr.kind() == "attribute" {
|
||||
if let Some(name) = leading_ident_text(attr, code) {
|
||||
push(&name);
|
||||
}
|
||||
for arg in decorator_arg_names(attr, code) {
|
||||
push(&arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"cpp" => {
|
||||
// C++ attributes `[[auth]]` appear as preceding siblings
|
||||
// (`attribute_declaration`) or as children of the function declarator.
|
||||
let mut harvest = |node: Node<'_>| {
|
||||
let mut w = node.walk();
|
||||
for ch in node.children(&mut w) {
|
||||
if ch.kind() == "attribute" {
|
||||
if let Some(name) = leading_ident_text(ch, code) {
|
||||
push(&name);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
let mut w = func_node.walk();
|
||||
for ch in func_node.children(&mut w) {
|
||||
if ch.kind() == "attribute_declaration" || ch.kind() == "attribute_specifier" {
|
||||
harvest(ch);
|
||||
}
|
||||
}
|
||||
if let Some(parent) = func_node.parent() {
|
||||
let mut pw = parent.walk();
|
||||
let mut pending: Vec<Node<'_>> = Vec::new();
|
||||
for sib in parent.children(&mut pw) {
|
||||
if sib.id() == func_node.id() {
|
||||
for p in &pending {
|
||||
harvest(*p);
|
||||
}
|
||||
break;
|
||||
}
|
||||
if sib.kind() == "attribute_declaration" {
|
||||
pending.push(sib);
|
||||
} else {
|
||||
pending.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"ruby" => {
|
||||
// Walk up to enclosing class/module body and collect
|
||||
// `before_action :name` filter calls. Apply `only:` / `except:`
|
||||
// hash args by comparing against the current method name.
|
||||
let method_name = func_node
|
||||
.child_by_field_name("name")
|
||||
.and_then(|n| text_of(n, code))
|
||||
.map(|s| normalize_decorator_name(&s))
|
||||
.unwrap_or_default();
|
||||
let mut cursor = func_node.parent();
|
||||
while let Some(node) = cursor {
|
||||
match node.kind() {
|
||||
"class" | "module" => {
|
||||
// Body is the direct sibling/child sequence.
|
||||
let mut w = node.walk();
|
||||
for ch in node.children(&mut w) {
|
||||
match ch.kind() {
|
||||
"body_statement" | "block_body" => {
|
||||
let mut bw = ch.walk();
|
||||
for stmt in ch.children(&mut bw) {
|
||||
collect_ruby_before_action(
|
||||
stmt,
|
||||
code,
|
||||
&method_name,
|
||||
&mut out,
|
||||
);
|
||||
}
|
||||
}
|
||||
"call" | "method_call" | "identifier" | "command" => {
|
||||
collect_ruby_before_action(ch, code, &method_name, &mut out);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
cursor = node.parent();
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// If a Ruby statement is `before_action :name` (or `before_filter :name`),
|
||||
/// push the normalized filter name into `out` — honoring any `only:` / `except:`
|
||||
/// hash arguments against `method_name`.
|
||||
///
|
||||
/// Positional symbol args (`before_action :a, :b, only: [:x]`) all share the
|
||||
/// single trailing scope. Conditional filters (`if:` / `unless:`) are not
|
||||
/// honored here — those require predicate evaluation and are deferred.
|
||||
fn collect_ruby_before_action(
|
||||
node: Node<'_>,
|
||||
code: &[u8],
|
||||
method_name: &str,
|
||||
out: &mut Vec<String>,
|
||||
) {
|
||||
// The call may be wrapped in expression nodes; drill to a call-shaped node.
|
||||
let mut cur = node;
|
||||
loop {
|
||||
match cur.kind() {
|
||||
"call" | "method_call" | "command" => break,
|
||||
_ => {}
|
||||
}
|
||||
let mut w = cur.walk();
|
||||
let next = cur
|
||||
.children(&mut w)
|
||||
.find(|c| matches!(c.kind(), "call" | "method_call" | "command" | "identifier"));
|
||||
match next {
|
||||
Some(n) if n.id() != cur.id() => cur = n,
|
||||
_ => return,
|
||||
}
|
||||
}
|
||||
let head = cur
|
||||
.child_by_field_name("method")
|
||||
.or_else(|| cur.child_by_field_name("name"))
|
||||
.and_then(|n| text_of(n, code))
|
||||
.or_else(|| leading_ident_text(cur, code));
|
||||
let Some(head) = head else { return };
|
||||
let head_lc = head.to_ascii_lowercase();
|
||||
if !(head_lc == "before_action" || head_lc == "before_filter") {
|
||||
return;
|
||||
}
|
||||
let args = cur.child_by_field_name("arguments").or_else(|| {
|
||||
let mut w = cur.walk();
|
||||
cur.children(&mut w).find(|c| {
|
||||
matches!(
|
||||
c.kind(),
|
||||
"argument_list" | "arguments" | "command_argument_list"
|
||||
)
|
||||
})
|
||||
});
|
||||
let Some(args) = args else { return };
|
||||
|
||||
let mut positional: Vec<String> = Vec::new();
|
||||
let mut only_list: Vec<String> = Vec::new();
|
||||
let mut except_list: Vec<String> = Vec::new();
|
||||
let mut only_present = false;
|
||||
let mut except_present = false;
|
||||
|
||||
let mut w = args.walk();
|
||||
for arg in args.children(&mut w) {
|
||||
match arg.kind() {
|
||||
"simple_symbol" | "symbol" | "hash_key_symbol" | "identifier" => {
|
||||
if let Some(t) = text_of(arg, code) {
|
||||
let norm = normalize_decorator_name(&t);
|
||||
if !norm.is_empty() {
|
||||
positional.push(norm);
|
||||
}
|
||||
}
|
||||
}
|
||||
"pair" => {
|
||||
collect_ruby_filter_pair(
|
||||
arg,
|
||||
code,
|
||||
&mut only_list,
|
||||
&mut except_list,
|
||||
&mut only_present,
|
||||
&mut except_present,
|
||||
);
|
||||
}
|
||||
"hash" => {
|
||||
let mut hw = arg.walk();
|
||||
for pair_node in arg.children(&mut hw) {
|
||||
if pair_node.kind() == "pair" {
|
||||
collect_ruby_filter_pair(
|
||||
pair_node,
|
||||
code,
|
||||
&mut only_list,
|
||||
&mut except_list,
|
||||
&mut only_present,
|
||||
&mut except_present,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
// Scope check: apply filter to this method only when the scope matches.
|
||||
if except_present
|
||||
&& except_list
|
||||
.iter()
|
||||
.any(|n| n.eq_ignore_ascii_case(method_name))
|
||||
{
|
||||
return;
|
||||
}
|
||||
if only_present
|
||||
&& !only_list
|
||||
.iter()
|
||||
.any(|n| n.eq_ignore_ascii_case(method_name))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
for filter in positional {
|
||||
if !out.contains(&filter) {
|
||||
out.push(filter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a single `only:` / `except:` hash pair and append the symbol list into
|
||||
/// the corresponding out-vec. Sets the `*_present` flag when the key is seen,
|
||||
/// regardless of whether the value parses into any symbols — treating
|
||||
/// `only: []` as "no actions match" is safer than ignoring the scope.
|
||||
fn collect_ruby_filter_pair(
|
||||
pair_node: Node<'_>,
|
||||
code: &[u8],
|
||||
only_list: &mut Vec<String>,
|
||||
except_list: &mut Vec<String>,
|
||||
only_present: &mut bool,
|
||||
except_present: &mut bool,
|
||||
) {
|
||||
let key_node = pair_node.child_by_field_name("key");
|
||||
let Some(key_node) = key_node else { return };
|
||||
let Some(key_text) = text_of(key_node, code) else {
|
||||
return;
|
||||
};
|
||||
let key_norm = normalize_decorator_name(&key_text);
|
||||
let value_node = pair_node.child_by_field_name("value");
|
||||
match key_norm.as_str() {
|
||||
"only" => {
|
||||
*only_present = true;
|
||||
if let Some(v) = value_node {
|
||||
collect_ruby_symbol_list(v, code, only_list);
|
||||
}
|
||||
}
|
||||
"except" => {
|
||||
*except_present = true;
|
||||
if let Some(v) = value_node {
|
||||
collect_ruby_symbol_list(v, code, except_list);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Recursively collect symbol / identifier names from a `:x` or `[:x, :y]`
|
||||
/// value into `out`, using the tree-sitter AST (no text parsing).
|
||||
fn collect_ruby_symbol_list(node: Node<'_>, code: &[u8], out: &mut Vec<String>) {
|
||||
match node.kind() {
|
||||
"simple_symbol" | "symbol" | "hash_key_symbol" | "identifier" | "string" => {
|
||||
if let Some(t) = text_of(node, code) {
|
||||
let norm = normalize_decorator_name(&t);
|
||||
if !norm.is_empty() {
|
||||
out.push(norm);
|
||||
}
|
||||
}
|
||||
}
|
||||
"array" => {
|
||||
let mut w = node.walk();
|
||||
for ch in node.children(&mut w) {
|
||||
collect_ruby_symbol_list(ch, code, out);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
702
src/cfg/helpers.rs
Normal file
702
src/cfg/helpers.rs
Normal file
|
|
@ -0,0 +1,702 @@
|
|||
use super::anon_fn_name;
|
||||
use super::conditions::unwrap_parens;
|
||||
use crate::labels::{DataLabel, Kind, classify, lookup};
|
||||
use tree_sitter::Node;
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Utility helpers
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/// Return the text of a node.
|
||||
#[inline]
|
||||
pub(crate) fn text_of<'a>(n: Node<'a>, code: &'a [u8]) -> Option<String> {
|
||||
std::str::from_utf8(&code[n.start_byte()..n.end_byte()])
|
||||
.ok()
|
||||
.map(|s| s.to_string())
|
||||
}
|
||||
|
||||
/// Walk through chained calls / member accesses to find the root receiver.
|
||||
///
|
||||
/// For `Runtime.getRuntime().exec(cmd)`, the receiver of `exec` is the call
|
||||
/// `Runtime.getRuntime()`. This function drills through that to return
|
||||
/// `"Runtime"` — the outermost non-call object. This lets labels like
|
||||
/// `"Runtime.exec"` match correctly.
|
||||
pub(crate) fn root_receiver_text(n: Node, lang: &str, code: &[u8]) -> Option<String> {
|
||||
match lookup(lang, n.kind()) {
|
||||
// The receiver is itself a call — drill into ITS receiver.
|
||||
// e.g. for `Runtime.getRuntime()`, the object is `Runtime`.
|
||||
Kind::CallFn | Kind::CallMethod => {
|
||||
let inner = n
|
||||
.child_by_field_name("object")
|
||||
.or_else(|| n.child_by_field_name("receiver"))
|
||||
.or_else(|| n.child_by_field_name("function"));
|
||||
match inner {
|
||||
Some(child) => root_receiver_text(child, lang, code),
|
||||
None => text_of(n, code),
|
||||
}
|
||||
}
|
||||
_ => text_of(n, code),
|
||||
}
|
||||
}
|
||||
|
||||
/// Walk a member-expression / attribute chain down to its root identifier.
|
||||
///
|
||||
/// Unlike [`root_receiver_text`], which returns the raw text of a nested
|
||||
/// attribute (yielding `"request.args.get"` for the attribute node covering
|
||||
/// `request.args.get`), this drills through `object`/`value` fields until it
|
||||
/// hits a terminal identifier and returns just that leaf.
|
||||
///
|
||||
/// Used when JS/Python `obj.method(x)` is classified as `Kind::CallFn` with a
|
||||
/// dotted function child: we want the leftmost segment (`request` in
|
||||
/// `request.args.get("q")`) as the structured receiver for type-qualified
|
||||
/// resolution. Returns `None` when the chain does not resolve to a plain
|
||||
/// identifier (e.g. call expressions, subscripts, `this`/`self`, etc.).
|
||||
pub(crate) fn root_member_receiver(n: Node, code: &[u8]) -> Option<String> {
|
||||
let mut cur = n;
|
||||
// Bounded walk — tree-sitter can nest deeply but we only need a handful
|
||||
// of hops for real code.
|
||||
for _ in 0..16 {
|
||||
match cur.kind() {
|
||||
"identifier" | "variable_name" | "this" | "self" => {
|
||||
return text_of(cur, code);
|
||||
}
|
||||
"member_expression" | "attribute" => {
|
||||
cur = cur.child_by_field_name("object")?;
|
||||
}
|
||||
// Rust `x.y` is `field_expression` with a `value` field.
|
||||
"field_expression" => {
|
||||
cur = cur.child_by_field_name("value")?;
|
||||
}
|
||||
// Drill through nested calls / method chains to find the base
|
||||
// identifier. E.g. `Connection::open(p).unwrap().execute(...)` —
|
||||
// the receiver of `.execute` is the `.unwrap()` call whose
|
||||
// object is `Connection::open(p)`; we want the leftmost plain
|
||||
// identifier the chain resolves to (for SSA var_stacks lookup).
|
||||
"call_expression" => {
|
||||
cur = cur.child_by_field_name("function")?;
|
||||
}
|
||||
"method_call_expression" => {
|
||||
cur = cur
|
||||
.child_by_field_name("object")
|
||||
.or_else(|| cur.child_by_field_name("receiver"))?;
|
||||
}
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Check if a callee represents an RAII-managed factory whose resources are
|
||||
/// automatically cleaned up by language semantics (Rust ownership/Drop, C++
|
||||
/// smart pointers). Returns `true` to set `managed_resource` on the acquire
|
||||
/// node, suppressing false `state-resource-leak` findings.
|
||||
pub(crate) fn is_raii_factory(lang: &str, callee: &str) -> bool {
|
||||
fn matches_any(callee: &str, patterns: &[&str]) -> bool {
|
||||
let cl = callee.to_ascii_lowercase();
|
||||
// Strip C++ template arguments: make_unique<int> → make_unique
|
||||
let base = cl.split('<').next().unwrap_or(&cl);
|
||||
patterns.iter().any(|p| base == *p || base.ends_with(p))
|
||||
}
|
||||
|
||||
match lang {
|
||||
"cpp" => {
|
||||
static CPP_RAII_FACTORIES: &[&str] = &[
|
||||
"make_unique",
|
||||
"make_shared",
|
||||
"std::make_unique",
|
||||
"std::make_shared",
|
||||
];
|
||||
matches_any(callee, CPP_RAII_FACTORIES)
|
||||
}
|
||||
"rust" => {
|
||||
static RUST_RAII_CONSTRUCTORS: &[&str] = &[
|
||||
"file::open",
|
||||
"file::create",
|
||||
"box::new",
|
||||
"bufwriter::new",
|
||||
"bufreader::new",
|
||||
"tcplistener::bind",
|
||||
"tcpstream::connect",
|
||||
"udpsocket::bind",
|
||||
"mutex::new",
|
||||
"rwlock::new",
|
||||
"fs::file::open",
|
||||
"fs::file::create",
|
||||
"std::fs::file::open",
|
||||
"std::fs::file::create",
|
||||
];
|
||||
matches_any(callee, RUST_RAII_CONSTRUCTORS)
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Fallback for constructor expressions whose grammar lacks field names.
|
||||
/// For example, PHP `object_creation_expression` has positional children
|
||||
/// `new name arguments` where `name` is a node kind (not a field).
|
||||
/// Returns the first child whose kind is `"name"` or `"type_identifier"`.
|
||||
pub(crate) fn find_constructor_type_child(n: Node) -> Option<Node> {
|
||||
let mut cursor = n.walk();
|
||||
n.children(&mut cursor)
|
||||
.find(|c| matches!(c.kind(), "name" | "type_identifier" | "qualified_name"))
|
||||
}
|
||||
|
||||
/// Return the callee identifier and byte span for the first call / method /
|
||||
/// macro inside `n`. Searches recursively through all descendants.
|
||||
///
|
||||
/// The span is the byte range of the call expression itself, so a caller that
|
||||
/// overrides `text` with the returned identifier can also record a
|
||||
/// `callee_span` pointing at the inner call (narrower than the enclosing
|
||||
/// statement) for accurate source-location reporting.
|
||||
pub(crate) fn first_call_ident_with_span<'a>(
|
||||
n: Node<'a>,
|
||||
lang: &str,
|
||||
code: &'a [u8],
|
||||
) -> Option<(String, (usize, usize))> {
|
||||
let mut cursor = n.walk();
|
||||
for c in n.children(&mut cursor) {
|
||||
match lookup(lang, c.kind()) {
|
||||
Kind::CallFn | Kind::CallMethod | Kind::CallMacro => {
|
||||
let span = (c.start_byte(), c.end_byte());
|
||||
// C++ new/delete: normalize callee before returning.
|
||||
if lang == "cpp" && c.kind() == "new_expression" {
|
||||
return Some(("new".to_string(), span));
|
||||
}
|
||||
if lang == "cpp" && c.kind() == "delete_expression" {
|
||||
return Some(("delete".to_string(), span));
|
||||
}
|
||||
// Ruby backtick subshell: no `function` field, normalise to
|
||||
// the synthetic callee so assignment-wrapped subshells classify.
|
||||
if lang == "ruby" && c.kind() == "subshell" {
|
||||
return Some(("subshell".to_string(), span));
|
||||
}
|
||||
let ident = match lookup(lang, c.kind()) {
|
||||
Kind::CallFn => c
|
||||
.child_by_field_name("function")
|
||||
.or_else(|| c.child_by_field_name("method"))
|
||||
.or_else(|| c.child_by_field_name("name"))
|
||||
.or_else(|| c.child_by_field_name("type"))
|
||||
.or_else(|| c.child_by_field_name("constructor"))
|
||||
// Fallback for constructors whose grammar lacks field names
|
||||
// (e.g. PHP `object_creation_expression` has positional children).
|
||||
.or_else(|| find_constructor_type_child(c))
|
||||
.and_then(|f| {
|
||||
let unwrapped = unwrap_parens(f);
|
||||
if lookup(lang, unwrapped.kind()) == Kind::Function {
|
||||
Some(anon_fn_name(unwrapped.start_byte()))
|
||||
} else {
|
||||
text_of(f, code)
|
||||
}
|
||||
}),
|
||||
Kind::CallMethod => {
|
||||
let func = c
|
||||
.child_by_field_name("method")
|
||||
.or_else(|| c.child_by_field_name("name"))
|
||||
.and_then(|f| text_of(f, code));
|
||||
let recv = c
|
||||
.child_by_field_name("object")
|
||||
.or_else(|| c.child_by_field_name("receiver"))
|
||||
.or_else(|| c.child_by_field_name("scope"))
|
||||
.and_then(|f| root_receiver_text(f, lang, code));
|
||||
match (recv, func) {
|
||||
(Some(r), Some(f)) => Some(format!("{r}.{f}")),
|
||||
(_, Some(f)) => Some(f.to_string()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
Kind::CallMacro => c
|
||||
.child_by_field_name("macro")
|
||||
.and_then(|f| text_of(f, code)),
|
||||
_ => None,
|
||||
};
|
||||
return ident.map(|s| (s, span));
|
||||
}
|
||||
Kind::Function => {
|
||||
// Do not descend into nested function/lambda bodies —
|
||||
// they are separate scopes and should not contribute
|
||||
// callee identifiers to the parent expression.
|
||||
continue;
|
||||
}
|
||||
_ => {
|
||||
// Recurse into children (handles nested declarators)
|
||||
if let Some(found) = first_call_ident_with_span(c, lang, code) {
|
||||
return Some(found);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Convenience wrapper around [`first_call_ident_with_span`] that discards
|
||||
/// the byte-span when only the callee identifier is needed (e.g. for
|
||||
/// Python-side label lookup that does not participate in span-narrowed
|
||||
/// location reporting).
|
||||
pub(crate) fn first_call_ident<'a>(n: Node<'a>, lang: &str, code: &'a [u8]) -> Option<String> {
|
||||
first_call_ident_with_span(n, lang, code).map(|(s, _)| s)
|
||||
}
|
||||
|
||||
/// Search recursively for any nested call whose identifier classifies as a label.
|
||||
/// Used for cases like `str(eval(expr))` where `str` doesn't match but `eval` does.
|
||||
///
|
||||
/// Returns `(callee_text, label, span)` where `span` is the byte range of the
|
||||
/// inner call node itself — used to populate `CallMeta.callee_span` so that
|
||||
/// display sites can report the actual call location rather than the enclosing
|
||||
/// statement's span.
|
||||
pub(crate) fn find_classifiable_inner_call<'a>(
|
||||
n: Node<'a>,
|
||||
lang: &str,
|
||||
code: &'a [u8],
|
||||
extra: Option<&[crate::labels::RuntimeLabelRule]>,
|
||||
) -> Option<(String, DataLabel, (usize, usize))> {
|
||||
let mut cursor = n.walk();
|
||||
for c in n.children(&mut cursor) {
|
||||
// Do not descend into Kind::Function nodes — they will be extracted
|
||||
// as separate BodyCfg entries and should not contribute inner callees
|
||||
// to the parent expression.
|
||||
if lookup(lang, c.kind()) == Kind::Function {
|
||||
continue;
|
||||
}
|
||||
match lookup(lang, c.kind()) {
|
||||
Kind::CallFn | Kind::CallMethod | Kind::CallMacro => {
|
||||
let ident = match lookup(lang, c.kind()) {
|
||||
Kind::CallFn => c
|
||||
.child_by_field_name("function")
|
||||
.or_else(|| c.child_by_field_name("method"))
|
||||
.or_else(|| c.child_by_field_name("name"))
|
||||
.or_else(|| c.child_by_field_name("type"))
|
||||
.and_then(|f| text_of(f, code)),
|
||||
Kind::CallMethod => {
|
||||
let func = c
|
||||
.child_by_field_name("method")
|
||||
.or_else(|| c.child_by_field_name("name"))
|
||||
.and_then(|f| text_of(f, code));
|
||||
let recv = c
|
||||
.child_by_field_name("object")
|
||||
.or_else(|| c.child_by_field_name("receiver"))
|
||||
.or_else(|| c.child_by_field_name("scope"))
|
||||
.and_then(|f| root_receiver_text(f, lang, code));
|
||||
match (recv, func) {
|
||||
(Some(r), Some(f)) => Some(format!("{r}.{f}")),
|
||||
(_, Some(f)) => Some(f),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
Kind::CallMacro => c
|
||||
.child_by_field_name("macro")
|
||||
.and_then(|f| text_of(f, code)),
|
||||
_ => None,
|
||||
};
|
||||
if let Some(ref id) = ident
|
||||
&& let Some(lbl) = classify(lang, id, extra)
|
||||
{
|
||||
return Some((id.clone(), lbl, (c.start_byte(), c.end_byte())));
|
||||
}
|
||||
// Recurse into arguments of this call
|
||||
if let Some(found) = find_classifiable_inner_call(c, lang, code, extra) {
|
||||
return Some(found);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
if let Some(found) = find_classifiable_inner_call(c, lang, code, extra) {
|
||||
return Some(found);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Build the dot-joined text of a member_expression / attribute / selector_expression.
|
||||
/// E.g. for `process.env.CMD` this returns `"process.env.CMD"`.
|
||||
/// Field paths are capped at 3 segments (2 dots) to bound state size.
|
||||
pub(crate) fn member_expr_text(n: Node, code: &[u8]) -> Option<String> {
|
||||
let path = member_expr_text_inner(n, code)?;
|
||||
// Depth limit: keep at most 3 segments (2 dots)
|
||||
let mut dots = 0;
|
||||
for (i, c) in path.char_indices() {
|
||||
if c == '.' {
|
||||
dots += 1;
|
||||
}
|
||||
if dots >= 3 {
|
||||
return Some(path[..i].to_string());
|
||||
}
|
||||
}
|
||||
Some(path)
|
||||
}
|
||||
|
||||
pub(crate) fn member_expr_text_inner(n: Node, code: &[u8]) -> Option<String> {
|
||||
match n.kind() {
|
||||
"member_expression" | "attribute" | "selector_expression" => {
|
||||
let obj = n
|
||||
.child_by_field_name("object")
|
||||
.or_else(|| n.child_by_field_name("value"))
|
||||
.and_then(|o| member_expr_text_inner(o, code))
|
||||
.or_else(|| {
|
||||
n.child_by_field_name("object")
|
||||
.or_else(|| n.child_by_field_name("value"))
|
||||
.and_then(|o| text_of(o, code))
|
||||
});
|
||||
let prop = n
|
||||
.child_by_field_name("property")
|
||||
.or_else(|| n.child_by_field_name("attribute"))
|
||||
.or_else(|| n.child_by_field_name("field"))
|
||||
.and_then(|p| text_of(p, code));
|
||||
match (obj, prop) {
|
||||
(Some(o), Some(p)) => Some(format!("{o}.{p}")),
|
||||
(_, Some(p)) => Some(p),
|
||||
(Some(o), _) => Some(o),
|
||||
_ => text_of(n, code),
|
||||
}
|
||||
}
|
||||
_ => text_of(n, code),
|
||||
}
|
||||
}
|
||||
|
||||
/// Recursively search `n` for a member expression whose text classifies as a label.
|
||||
pub(crate) fn first_member_label(
|
||||
n: Node,
|
||||
lang: &str,
|
||||
code: &[u8],
|
||||
extra_labels: Option<&[crate::labels::RuntimeLabelRule]>,
|
||||
) -> Option<DataLabel> {
|
||||
match n.kind() {
|
||||
"member_expression" | "attribute" | "selector_expression" => {
|
||||
if let Some(full) = member_expr_text(n, code) {
|
||||
// Try the full text first, then progressively strip the last segment
|
||||
// to match rules like "process.env" from "process.env.CMD".
|
||||
let mut candidate = full.as_str();
|
||||
loop {
|
||||
if let Some(lbl) = classify(lang, candidate, extra_labels) {
|
||||
return Some(lbl);
|
||||
}
|
||||
match candidate.rsplit_once('.') {
|
||||
Some((prefix, _)) => candidate = prefix,
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// PHP/Python/Ruby subscript access: `$_GET['cmd']`, `os.environ['KEY']`, `params[:cmd]`
|
||||
// Try to classify the object (before the `[`) as a source.
|
||||
"subscript_expression" | "subscript" | "element_reference" => {
|
||||
if let Some(obj) = n
|
||||
.child_by_field_name("object")
|
||||
.or_else(|| n.child_by_field_name("value"))
|
||||
.or_else(|| n.child(0))
|
||||
{
|
||||
if let Some(txt) = text_of(obj, code)
|
||||
&& let Some(lbl) = classify(lang, &txt, extra_labels)
|
||||
{
|
||||
return Some(lbl);
|
||||
}
|
||||
// Recurse into the object for nested member accesses
|
||||
if let Some(lbl) = first_member_label(obj, lang, code, extra_labels) {
|
||||
return Some(lbl);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
let mut cursor = n.walk();
|
||||
for child in n.children(&mut cursor) {
|
||||
if let Some(lbl) = first_member_label(child, lang, code, extra_labels) {
|
||||
return Some(lbl);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Return the text of the first member expression found in `n`.
|
||||
pub(crate) fn first_member_text(n: Node, code: &[u8]) -> Option<String> {
|
||||
match n.kind() {
|
||||
"member_expression" | "attribute" | "selector_expression" => member_expr_text(n, code),
|
||||
"subscript_expression" | "subscript" | "element_reference" => n
|
||||
.child_by_field_name("object")
|
||||
.or_else(|| n.child_by_field_name("value"))
|
||||
.or_else(|| n.child(0))
|
||||
.and_then(|obj| text_of(obj, code)),
|
||||
_ => {
|
||||
let mut cursor = n.walk();
|
||||
for child in n.children(&mut cursor) {
|
||||
if let Some(t) = first_member_text(child, code) {
|
||||
return Some(t);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Check whether any descendant of `n` is a call expression.
|
||||
/// Collect function-expression nodes nested inside a call's arguments.
|
||||
///
|
||||
/// This finds anonymous functions / arrow functions / closures that are
|
||||
/// passed as arguments to a call and should be analysed as separate
|
||||
/// function scopes. Only direct function-argument children are collected
|
||||
/// (not functions nested inside other functions — those get handled when
|
||||
/// the outer function is recursed into).
|
||||
pub(crate) fn collect_nested_function_nodes<'a>(n: Node<'a>, lang: &str) -> Vec<Node<'a>> {
|
||||
let mut funcs = Vec::new();
|
||||
collect_nested_functions_rec(n, lang, &mut funcs, false);
|
||||
funcs
|
||||
}
|
||||
|
||||
pub(crate) fn collect_nested_functions_rec<'a>(
|
||||
n: Node<'a>,
|
||||
lang: &str,
|
||||
out: &mut Vec<Node<'a>>,
|
||||
inside_function: bool,
|
||||
) {
|
||||
let kind = lookup(lang, n.kind());
|
||||
// Only treat as a function if it's a real function node (has children),
|
||||
// not a keyword token like `function` in JS which shares the same kind name.
|
||||
if kind == Kind::Function && n.child_count() > 0 {
|
||||
if inside_function {
|
||||
// Don't recurse into nested functions of nested functions
|
||||
return;
|
||||
}
|
||||
out.push(n);
|
||||
return;
|
||||
}
|
||||
let mut cursor = n.walk();
|
||||
for c in n.children(&mut cursor) {
|
||||
collect_nested_functions_rec(c, lang, out, inside_function);
|
||||
}
|
||||
}
|
||||
|
||||
/// Derive a binding name for an anonymous function literal from its syntactic
|
||||
/// context. Returns `None` when no unambiguous binding exists (e.g. function
|
||||
/// passed directly as a call argument, nested in a destructuring pattern, or
|
||||
/// stored into a subscript expression).
|
||||
///
|
||||
/// Supported shapes (across JS/TS, Python, Ruby, Go, PHP, Rust):
|
||||
/// * `var|let|const h = <fn>` → `"h"`
|
||||
/// * `h := <fn>` → `"h"` (Go short-var)
|
||||
/// * `h = <fn>` → `"h"` (reassignment)
|
||||
/// * `obj.prop = <fn>` / `obj::prop` → `"prop"` (bind via rightmost member)
|
||||
///
|
||||
/// Parenthesised wrappers (`var h = (function(){})`) are transparently
|
||||
/// skipped. The disambig start-byte on the generated FuncKey prevents
|
||||
/// shadowed same-name bindings from colliding.
|
||||
pub(crate) fn derive_anon_fn_name_from_context<'a>(
|
||||
func_node: Node<'a>,
|
||||
lang: &str,
|
||||
code: &'a [u8],
|
||||
) -> Option<String> {
|
||||
// Walk up past parenthesized wrappers so `var h = (fn)` works.
|
||||
let mut cur = func_node.parent()?;
|
||||
while cur.kind() == "parenthesized_expression" {
|
||||
cur = cur.parent()?;
|
||||
}
|
||||
let parent = cur;
|
||||
|
||||
let lhs_ident_text = |lhs: Node<'a>| -> Option<String> {
|
||||
let lhs = unwrap_parens(lhs);
|
||||
match lhs.kind() {
|
||||
"identifier" | "variable_name" | "simple_identifier" => text_of(lhs, code),
|
||||
// `obj.prop = <fn>` → "prop" (JS/TS/Python/PHP/Ruby/Go)
|
||||
"member_expression"
|
||||
| "attribute"
|
||||
| "field_expression"
|
||||
| "selector_expression"
|
||||
| "scoped_identifier" => lhs
|
||||
.child_by_field_name("property")
|
||||
.or_else(|| lhs.child_by_field_name("field"))
|
||||
.or_else(|| lhs.child_by_field_name("name"))
|
||||
.and_then(|n| text_of(n, code)),
|
||||
_ => None,
|
||||
}
|
||||
};
|
||||
|
||||
match parent.kind() {
|
||||
// JS/TS: `var h = fn`, Java/Rust: `let h = fn`, C++: `auto h = fn`,
|
||||
// PHP: `$h = fn` also lands here when the parent is `variable_declarator`.
|
||||
"variable_declarator" | "init_declarator" | "let_declaration" => parent
|
||||
.child_by_field_name("name")
|
||||
.or_else(|| parent.child_by_field_name("pattern"))
|
||||
.and_then(|n| match n.kind() {
|
||||
"identifier" | "variable_name" | "simple_identifier" => text_of(n, code),
|
||||
_ => None, // destructuring / tuple patterns are ambiguous
|
||||
}),
|
||||
|
||||
// JS/TS: `h = fn`, `obj.prop = fn`
|
||||
// Ruby `assignment` / C `assignment_expression`
|
||||
"assignment_expression" | "assignment" => {
|
||||
parent.child_by_field_name("left").and_then(lhs_ident_text)
|
||||
}
|
||||
|
||||
// Go: `h := fn` (short_var_declaration). The left child is an
|
||||
// expression_list with one identifier.
|
||||
"short_var_declaration" => {
|
||||
let left = parent.child_by_field_name("left")?;
|
||||
let mut cur = left.walk();
|
||||
left.children(&mut cur).find_map(|c| {
|
||||
(c.kind() == "identifier")
|
||||
.then(|| text_of(c, code))
|
||||
.flatten()
|
||||
})
|
||||
}
|
||||
|
||||
// Go: `var h = fn` → var_spec with names field.
|
||||
"var_spec" | "const_spec" => {
|
||||
let names = parent.child_by_field_name("name")?;
|
||||
let mut cur = names.walk();
|
||||
names.children(&mut cur).find_map(|c| {
|
||||
(c.kind() == "identifier")
|
||||
.then(|| text_of(c, code))
|
||||
.flatten()
|
||||
})
|
||||
}
|
||||
|
||||
// Python: `h = lambda: ...` parents as `assignment`, handled above.
|
||||
// Python `default_parameter` assigning `def foo(x=lambda: 0)` — ambiguous, skip.
|
||||
_ => {
|
||||
// Some grammars wrap the RHS in an `expression`, `expression_list`,
|
||||
// or similar node between the binding site and the function literal.
|
||||
// Do one more hop to catch these without blowing past meaningful
|
||||
// scopes (e.g. enclosing function body / block).
|
||||
let grand = parent.parent()?;
|
||||
match grand.kind() {
|
||||
"variable_declarator" | "init_declarator" => grand
|
||||
.child_by_field_name("name")
|
||||
.and_then(|n| match n.kind() {
|
||||
"identifier" | "variable_name" | "simple_identifier" => text_of(n, code),
|
||||
_ => None,
|
||||
}),
|
||||
"assignment_expression" | "assignment" => {
|
||||
grand.child_by_field_name("left").and_then(lhs_ident_text)
|
||||
}
|
||||
// Go: `run := func(){...}` → func_literal's parent is
|
||||
// `expression_list`, grandparent is `short_var_declaration`.
|
||||
"short_var_declaration" => {
|
||||
let left = grand.child_by_field_name("left")?;
|
||||
let mut cur = left.walk();
|
||||
left.children(&mut cur).find_map(|c| {
|
||||
(c.kind() == "identifier")
|
||||
.then(|| text_of(c, code))
|
||||
.flatten()
|
||||
})
|
||||
}
|
||||
// Go: `var run = func(){...}` wraps through var_spec via
|
||||
// expression_list in older grammar versions.
|
||||
"var_spec" | "const_spec" => {
|
||||
let names = grand.child_by_field_name("name")?;
|
||||
let mut cur = names.walk();
|
||||
names.children(&mut cur).find_map(|c| {
|
||||
(c.kind() == "identifier")
|
||||
.then(|| text_of(c, code))
|
||||
.flatten()
|
||||
})
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
.and_then(|name| {
|
||||
// Guard against degenerate names that would collide with label rules
|
||||
// or produce unstable summary keys. Lang-specific leaf only.
|
||||
if name.is_empty()
|
||||
|| name.contains(|c: char| !(c.is_alphanumeric() || c == '_' || c == '$'))
|
||||
{
|
||||
None
|
||||
} else {
|
||||
// Silence unused-binding warning if lang matching never fires.
|
||||
let _ = lang;
|
||||
Some(name)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn has_call_descendant(n: Node, lang: &str) -> bool {
|
||||
let mut cursor = n.walk();
|
||||
for c in n.children(&mut cursor) {
|
||||
match lookup(lang, c.kind()) {
|
||||
Kind::CallFn | Kind::CallMethod | Kind::CallMacro => return true,
|
||||
_ => {
|
||||
if has_call_descendant(c, lang) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Recursively collect identifiers AND full dotted member-expression paths.
|
||||
///
|
||||
/// For `member_expression` / `attribute` / `selector_expression` / `field_expression`
|
||||
/// nodes the full dotted path (via `member_expr_text`) is pushed into `paths`,
|
||||
/// and the individual leaf identifiers are pushed into `idents` as a fallback.
|
||||
/// Plain identifiers go only into `idents`.
|
||||
pub(crate) fn collect_idents_with_paths(
|
||||
n: Node,
|
||||
code: &[u8],
|
||||
idents: &mut Vec<String>,
|
||||
paths: &mut Vec<String>,
|
||||
) {
|
||||
match n.kind() {
|
||||
"member_expression" | "attribute" | "selector_expression" | "field_expression" => {
|
||||
if let Some(path) = member_expr_text(n, code) {
|
||||
paths.push(path);
|
||||
}
|
||||
// Also collect individual idents as fallback
|
||||
collect_idents(n, code, idents);
|
||||
}
|
||||
"identifier"
|
||||
| "field_identifier"
|
||||
| "property_identifier"
|
||||
| "shorthand_property_identifier_pattern" => {
|
||||
if let Some(txt) = text_of(n, code) {
|
||||
idents.push(txt);
|
||||
}
|
||||
}
|
||||
"variable_name" => {
|
||||
if let Some(txt) = text_of(n, code) {
|
||||
idents.push(txt.trim_start_matches('$').to_string());
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
let mut c = n.walk();
|
||||
for ch in n.children(&mut c) {
|
||||
collect_idents_with_paths(ch, code, idents, paths);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Recursively collect every identifier that occurs inside `n`.
|
||||
///
|
||||
/// Recognises `identifier` (most languages), `variable_name` (PHP),
|
||||
/// `field_identifier` (Go), `property_identifier` (JS/TS), and
|
||||
/// `shorthand_property_identifier_pattern` (JS/TS destructuring).
|
||||
pub(crate) fn collect_idents(n: Node, code: &[u8], out: &mut Vec<String>) {
|
||||
match n.kind() {
|
||||
"identifier"
|
||||
| "field_identifier"
|
||||
| "property_identifier"
|
||||
| "shorthand_property_identifier_pattern"
|
||||
// PHP `name`: leaf node carrying the bare identifier text for
|
||||
// function/method names and similar grammar slots. Without this
|
||||
// arm `function_definition` → `name` extraction returns empty
|
||||
// for PHP, demoting every named function to `<anon#N>` and
|
||||
// breaking cross-function summary lookup at the call site.
|
||||
| "name" => {
|
||||
if let Some(txt) = text_of(n, code) {
|
||||
out.push(txt);
|
||||
}
|
||||
}
|
||||
// PHP: $x is `variable_name` → `$` + `name`. Use the whole text minus `$`.
|
||||
"variable_name" => {
|
||||
if let Some(txt) = text_of(n, code) {
|
||||
out.push(txt.trim_start_matches('$').to_string());
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
let mut c = n.walk();
|
||||
for ch in n.children(&mut c) {
|
||||
collect_idents(ch, code, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
365
src/cfg/imports.rs
Normal file
365
src/cfg/imports.rs
Normal file
|
|
@ -0,0 +1,365 @@
|
|||
use super::{
|
||||
ImportBinding, ImportBindings, PromisifyAlias, PromisifyAliases, member_expr_text, text_of,
|
||||
};
|
||||
use tree_sitter::{Node, Tree};
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Import binding extraction
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/// Walk the top-level AST nodes and collect import alias bindings:
|
||||
///
|
||||
/// - ES6: `import { A as B } from 'mod'` → B → ImportBinding { original: A, module: mod }
|
||||
/// - CommonJS: `const { A: B } = require('mod')` → B → ImportBinding { original: A, module: mod }
|
||||
///
|
||||
/// Only aliased (renamed) bindings are recorded — same-name imports (e.g.
|
||||
/// `import { exec }`) are already resolvable by their original name.
|
||||
pub(super) fn extract_import_bindings(tree: &Tree, code: &[u8]) -> ImportBindings {
|
||||
let mut bindings = ImportBindings::new();
|
||||
let root = tree.root_node();
|
||||
let mut cursor = root.walk();
|
||||
|
||||
for child in root.children(&mut cursor) {
|
||||
match child.kind() {
|
||||
// ES6: import { A as B } from 'mod'
|
||||
"import_statement" => {
|
||||
let source_str = child
|
||||
.child_by_field_name("source")
|
||||
.and_then(|s| text_of(s, code))
|
||||
.map(|s| s.trim_matches(|c| c == '\'' || c == '"').to_string());
|
||||
|
||||
let mut c1 = child.walk();
|
||||
for clause_child in child.children(&mut c1) {
|
||||
if clause_child.kind() != "import_clause" {
|
||||
continue;
|
||||
}
|
||||
let mut c2 = clause_child.walk();
|
||||
for part in clause_child.children(&mut c2) {
|
||||
if part.kind() != "named_imports" {
|
||||
continue;
|
||||
}
|
||||
let mut c3 = part.walk();
|
||||
for spec in part.children(&mut c3) {
|
||||
if spec.kind() != "import_specifier" {
|
||||
continue;
|
||||
}
|
||||
let original = spec
|
||||
.child_by_field_name("name")
|
||||
.and_then(|n| text_of(n, code));
|
||||
let alias = spec
|
||||
.child_by_field_name("alias")
|
||||
.and_then(|a| text_of(a, code));
|
||||
if let (Some(orig), Some(al)) = (original, alias) {
|
||||
if orig != al {
|
||||
bindings.insert(
|
||||
al,
|
||||
ImportBinding {
|
||||
original: orig,
|
||||
module_path: source_str.clone(),
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// CommonJS: const { A: B } = require('mod')
|
||||
"lexical_declaration" | "variable_declaration" => {
|
||||
let mut c1 = child.walk();
|
||||
for decl in child.children(&mut c1) {
|
||||
if decl.kind() != "variable_declarator" {
|
||||
continue;
|
||||
}
|
||||
let (pattern, value) = match (
|
||||
decl.child_by_field_name("name"),
|
||||
decl.child_by_field_name("value"),
|
||||
) {
|
||||
(Some(p), Some(v)) => (p, v),
|
||||
_ => continue,
|
||||
};
|
||||
if pattern.kind() != "object_pattern" {
|
||||
continue;
|
||||
}
|
||||
let module_path = extract_require_module(value, code);
|
||||
if module_path.is_none() {
|
||||
continue;
|
||||
}
|
||||
let mut c2 = pattern.walk();
|
||||
for pair in pattern.children(&mut c2) {
|
||||
if pair.kind() != "pair_pattern" {
|
||||
continue;
|
||||
}
|
||||
let key = pair
|
||||
.child_by_field_name("key")
|
||||
.and_then(|n| text_of(n, code));
|
||||
let val = pair
|
||||
.child_by_field_name("value")
|
||||
.and_then(|n| text_of(n, code));
|
||||
if let (Some(orig), Some(al)) = (key, val) {
|
||||
if orig != al {
|
||||
bindings.insert(
|
||||
al,
|
||||
ImportBinding {
|
||||
original: orig,
|
||||
module_path: module_path.clone(),
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Python: from module import A as B
|
||||
"import_from_statement" => {
|
||||
// Extract module path from the module_name field.
|
||||
let module_path = child
|
||||
.child_by_field_name("module_name")
|
||||
.and_then(|m| text_of(m, code));
|
||||
|
||||
let mut c1 = child.walk();
|
||||
for part in child.children(&mut c1) {
|
||||
if part.kind() != "aliased_import" {
|
||||
continue;
|
||||
}
|
||||
let original = part
|
||||
.child_by_field_name("name")
|
||||
.and_then(|n| text_of(n, code));
|
||||
let alias = part
|
||||
.child_by_field_name("alias")
|
||||
.and_then(|a| text_of(a, code));
|
||||
if let (Some(orig), Some(al)) = (original, alias) {
|
||||
if orig != al {
|
||||
bindings.insert(
|
||||
al,
|
||||
ImportBinding {
|
||||
original: orig,
|
||||
module_path: module_path.clone(),
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// PHP: use Namespace\ClassName as Alias;
|
||||
"namespace_use_declaration" => {
|
||||
let mut c1 = child.walk();
|
||||
for clause in child.children(&mut c1) {
|
||||
if clause.kind() != "namespace_use_clause" {
|
||||
continue;
|
||||
}
|
||||
// The alias is accessed via the "alias" field (a `name` node).
|
||||
// The qualified name has no field — find it by kind.
|
||||
let alias_node = clause.child_by_field_name("alias");
|
||||
let mut c2 = clause.walk();
|
||||
let qname_node = clause
|
||||
.children(&mut c2)
|
||||
.find(|n| n.kind() == "qualified_name" || n.kind() == "name");
|
||||
if let (Some(qn), Some(alias_n)) = (qname_node, alias_node) {
|
||||
let full_path = text_of(qn, code);
|
||||
let alias = text_of(alias_n, code);
|
||||
if let (Some(path_str), Some(al)) = (full_path, alias) {
|
||||
// Extract the last segment as the original name.
|
||||
let orig = path_str
|
||||
.rsplit('\\')
|
||||
.next()
|
||||
.unwrap_or(&path_str)
|
||||
.to_string();
|
||||
if orig != al {
|
||||
bindings.insert(
|
||||
al,
|
||||
ImportBinding {
|
||||
original: orig,
|
||||
module_path: Some(path_str),
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Rust: use crate::module::func as alias;
|
||||
"use_declaration" => {
|
||||
// Walk all descendants looking for use_as_clause nodes
|
||||
// (may be nested inside use_list / scoped_use_list).
|
||||
let mut stack = vec![child];
|
||||
while let Some(node) = stack.pop() {
|
||||
if node.kind() == "use_as_clause" {
|
||||
let path_node = node.child_by_field_name("path");
|
||||
let alias_node = node.child_by_field_name("alias");
|
||||
if let (Some(p), Some(a)) = (path_node, alias_node) {
|
||||
let path_text = text_of(p, code);
|
||||
let alias_text = text_of(a, code);
|
||||
if let (Some(path_str), Some(al)) = (path_text, alias_text) {
|
||||
// Extract the last segment of the path as the original name.
|
||||
let orig = path_str
|
||||
.rsplit("::")
|
||||
.next()
|
||||
.unwrap_or(&path_str)
|
||||
.to_string();
|
||||
if orig != al {
|
||||
bindings.insert(
|
||||
al,
|
||||
ImportBinding {
|
||||
original: orig,
|
||||
module_path: Some(path_str),
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let mut c1 = node.walk();
|
||||
for ch in node.children(&mut c1) {
|
||||
stack.push(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
bindings
|
||||
}
|
||||
|
||||
/// Walk the AST and collect promisify-alias bindings for JS/TS.
|
||||
///
|
||||
/// Recognises declarations of the forms:
|
||||
/// - `const alias = util.promisify(wrapped)`
|
||||
/// - `const alias = promisify(wrapped)` (when `promisify` was destructured
|
||||
/// from `util`, matched structurally without tracking the import)
|
||||
///
|
||||
/// The `wrapped` callee is stored as its canonical textual form (e.g.
|
||||
/// `child_process.exec`). Only single-argument calls are captured; wrappers
|
||||
/// that rename more than the first argument are skipped conservatively.
|
||||
///
|
||||
/// The walk recurses through function bodies so aliases declared inside a
|
||||
/// handler are still recorded (they are file-local bindings regardless).
|
||||
pub(super) fn extract_promisify_aliases(tree: &Tree, code: &[u8]) -> PromisifyAliases {
|
||||
let mut aliases = PromisifyAliases::new();
|
||||
let mut stack = vec![tree.root_node()];
|
||||
while let Some(node) = stack.pop() {
|
||||
match node.kind() {
|
||||
"lexical_declaration" | "variable_declaration" => {
|
||||
let mut c = node.walk();
|
||||
for decl in node.children(&mut c) {
|
||||
if decl.kind() != "variable_declarator" {
|
||||
continue;
|
||||
}
|
||||
let (name_node, value_node) = match (
|
||||
decl.child_by_field_name("name"),
|
||||
decl.child_by_field_name("value"),
|
||||
) {
|
||||
(Some(n), Some(v)) => (n, v),
|
||||
_ => continue,
|
||||
};
|
||||
if name_node.kind() != "identifier" {
|
||||
continue;
|
||||
}
|
||||
let alias_name = match text_of(name_node, code) {
|
||||
Some(s) => s,
|
||||
None => continue,
|
||||
};
|
||||
if let Some(wrapped) = extract_promisify_wrapped(value_node, code) {
|
||||
aliases.insert(alias_name, PromisifyAlias { wrapped });
|
||||
}
|
||||
}
|
||||
}
|
||||
"assignment_expression" => {
|
||||
let (Some(lhs), Some(rhs)) = (
|
||||
node.child_by_field_name("left"),
|
||||
node.child_by_field_name("right"),
|
||||
) else {
|
||||
continue;
|
||||
};
|
||||
if lhs.kind() != "identifier" {
|
||||
continue;
|
||||
}
|
||||
let alias_name = match text_of(lhs, code) {
|
||||
Some(s) => s,
|
||||
None => continue,
|
||||
};
|
||||
if let Some(wrapped) = extract_promisify_wrapped(rhs, code) {
|
||||
aliases.insert(alias_name, PromisifyAlias { wrapped });
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
let mut c = node.walk();
|
||||
for child in node.children(&mut c) {
|
||||
stack.push(child);
|
||||
}
|
||||
}
|
||||
aliases
|
||||
}
|
||||
|
||||
/// If `value` is a call expression of the shape `util.promisify(X)` or
|
||||
/// `promisify(X)`, return the textual representation of `X` (`child_process.exec`,
|
||||
/// `fs.readFile`, `foo`). Otherwise `None`.
|
||||
fn extract_promisify_wrapped(value: Node, code: &[u8]) -> Option<String> {
|
||||
if value.kind() != "call_expression" {
|
||||
return None;
|
||||
}
|
||||
let func = value.child_by_field_name("function")?;
|
||||
let func_text = match func.kind() {
|
||||
"identifier" => text_of(func, code)?,
|
||||
"member_expression" => member_expr_text(func, code)?,
|
||||
_ => return None,
|
||||
};
|
||||
let matches = matches!(func_text.as_str(), "util.promisify" | "promisify");
|
||||
if !matches {
|
||||
return None;
|
||||
}
|
||||
let args = value.child_by_field_name("arguments")?;
|
||||
let mut cursor = args.walk();
|
||||
let mut wrapped: Option<String> = None;
|
||||
let mut arg_count = 0;
|
||||
for arg in args.children(&mut cursor) {
|
||||
if arg.is_extra() {
|
||||
continue;
|
||||
}
|
||||
match arg.kind() {
|
||||
"," | "(" | ")" => continue,
|
||||
_ => {}
|
||||
}
|
||||
arg_count += 1;
|
||||
if arg_count == 1 {
|
||||
wrapped = match arg.kind() {
|
||||
"identifier" => text_of(arg, code),
|
||||
"member_expression" => member_expr_text(arg, code),
|
||||
_ => None,
|
||||
};
|
||||
}
|
||||
}
|
||||
if arg_count != 1 {
|
||||
return None;
|
||||
}
|
||||
wrapped
|
||||
}
|
||||
|
||||
/// Extract the module path from a `require('...')` call expression.
|
||||
fn extract_require_module(node: Node, code: &[u8]) -> Option<String> {
|
||||
if node.kind() != "call_expression" {
|
||||
return None;
|
||||
}
|
||||
let func = node.child_by_field_name("function")?;
|
||||
let func_text = text_of(func, code)?;
|
||||
if func_text != "require" {
|
||||
return None;
|
||||
}
|
||||
let args = node.child_by_field_name("arguments")?;
|
||||
let mut cursor = args.walk();
|
||||
for arg in args.children(&mut cursor) {
|
||||
if arg.kind() == "string" || arg.kind() == "template_string" {
|
||||
return text_of(arg, code).map(|s| {
|
||||
s.trim_matches(|c| c == '\'' || c == '"' || c == '`')
|
||||
.to_string()
|
||||
});
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// === PUBLIC ENTRY POINT =================================================
|
||||
// -------------------------------------------------------------------------
|
||||
1180
src/cfg/literals.rs
Normal file
1180
src/cfg/literals.rs
Normal file
File diff suppressed because it is too large
Load diff
3672
src/cfg/mod.rs
Normal file
3672
src/cfg/mod.rs
Normal file
File diff suppressed because it is too large
Load diff
409
src/cfg/params.rs
Normal file
409
src/cfg/params.rs
Normal file
|
|
@ -0,0 +1,409 @@
|
|||
use super::{
|
||||
AstMeta, Cfg, EdgeKind, NodeInfo, StmtKind, TaintMeta, collect_idents, connect_all,
|
||||
is_anon_fn_name, text_of,
|
||||
};
|
||||
use crate::labels::{DataLabel, LangAnalysisRules, classify, param_config};
|
||||
use petgraph::graph::NodeIndex;
|
||||
use smallvec::smallvec;
|
||||
use tree_sitter::Node;
|
||||
|
||||
/// Extract parameter names from a function AST node.
|
||||
///
|
||||
/// Uses the language's `ParamConfig` to find the parameter list field
|
||||
/// and extract identifiers from each parameter child.
|
||||
pub(super) fn extract_param_names<'a>(
|
||||
func_node: Node<'a>,
|
||||
lang: &str,
|
||||
code: &'a [u8],
|
||||
) -> Vec<String> {
|
||||
let cfg = param_config(lang);
|
||||
let mut names = Vec::new();
|
||||
// Try the params_field directly on the function node first.
|
||||
// For C/C++, the parameter list is nested inside the declarator
|
||||
// (function_definition > declarator:function_declarator > parameters:parameter_list),
|
||||
// so fall back to looking one level deeper via the "declarator" field.
|
||||
let params = func_node.child_by_field_name(cfg.params_field).or_else(|| {
|
||||
func_node
|
||||
.child_by_field_name("declarator")
|
||||
.and_then(|d| d.child_by_field_name(cfg.params_field))
|
||||
});
|
||||
let Some(params) = params else {
|
||||
return names;
|
||||
};
|
||||
let mut cursor = params.walk();
|
||||
for child in params.children(&mut cursor) {
|
||||
// Self/this parameter (e.g. Rust's `self_parameter`)
|
||||
if cfg.self_param_kinds.contains(&child.kind()) {
|
||||
names.push("self".into());
|
||||
continue;
|
||||
}
|
||||
|
||||
// Regular parameter
|
||||
if cfg.param_node_kinds.contains(&child.kind()) {
|
||||
// Try each ident field in order
|
||||
let mut found = false;
|
||||
for &field in cfg.ident_fields {
|
||||
if let Some(node) = child.child_by_field_name(field) {
|
||||
let mut tmp = Vec::new();
|
||||
collect_idents(node, code, &mut tmp);
|
||||
let candidate = if lang == "rust" {
|
||||
tmp.into_iter().last()
|
||||
} else {
|
||||
tmp.into_iter().next()
|
||||
};
|
||||
if let Some(name) = candidate {
|
||||
names.push(name);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Fallback: if the param node itself is an identifier (e.g. JS/Python)
|
||||
if !found
|
||||
&& child.kind() == "identifier"
|
||||
&& let Some(txt) = text_of(child, code)
|
||||
{
|
||||
names.push(txt);
|
||||
found = true;
|
||||
}
|
||||
// Fallback for C/C++: look for nested declarator → identifier
|
||||
if !found && child.kind() == "parameter_declaration" {
|
||||
let mut tmp = Vec::new();
|
||||
collect_idents(child, code, &mut tmp);
|
||||
if let Some(last) = tmp.pop() {
|
||||
names.push(last);
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
// Generic fallback for typed/default parameter wrappers (e.g.
|
||||
// Python `typed_parameter`, `default_parameter`,
|
||||
// `typed_default_parameter`): the wrapper node has no `name`
|
||||
// field but contains the identifier as a child. Pick the
|
||||
// *first* identifier — that is the parameter name; subsequent
|
||||
// identifiers are part of the type annotation or default
|
||||
// expression.
|
||||
if !found {
|
||||
let mut tmp = Vec::new();
|
||||
collect_idents(child, code, &mut tmp);
|
||||
if let Some(first) = tmp.into_iter().next() {
|
||||
names.push(first);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Bare identifier children — e.g. Rust untyped closure params `|cmd|`
|
||||
// where the child is an `identifier` node, not a `parameter` wrapper.
|
||||
if child.kind() == "identifier" {
|
||||
if let Some(txt) = text_of(child, code) {
|
||||
names.push(txt);
|
||||
}
|
||||
}
|
||||
}
|
||||
names
|
||||
}
|
||||
|
||||
/// Walk up from a function definition node and build a container path.
|
||||
///
|
||||
/// Records the names of enclosing classes / impls / modules / namespaces /
|
||||
/// structs — and, for anonymous / nested functions, the name of an enclosing
|
||||
/// named function — joined with `::`. Also returns a `FuncKind` guess
|
||||
/// reflecting the structural role.
|
||||
///
|
||||
/// Returns `(container, kind)`.
|
||||
pub(super) fn compute_container_and_kind(
|
||||
func_node: Node<'_>,
|
||||
ast_kind: &str,
|
||||
fn_name: &str,
|
||||
code: &[u8],
|
||||
) -> (String, crate::symbol::FuncKind) {
|
||||
use crate::symbol::FuncKind;
|
||||
|
||||
// Lambda / arrow / anonymous function ⇒ Closure regardless of context.
|
||||
let mut kind = if ast_kind == "lambda_expression"
|
||||
|| ast_kind == "arrow_function"
|
||||
|| ast_kind == "function_expression"
|
||||
|| ast_kind == "anonymous_function"
|
||||
|| ast_kind == "closure_expression"
|
||||
|| is_anon_fn_name(fn_name)
|
||||
{
|
||||
FuncKind::Closure
|
||||
} else {
|
||||
FuncKind::Function
|
||||
};
|
||||
|
||||
let mut segments: Vec<String> = Vec::new();
|
||||
let mut inside_class = false;
|
||||
let mut cursor = func_node.parent();
|
||||
|
||||
while let Some(parent) = cursor {
|
||||
let pk = parent.kind();
|
||||
|
||||
// Class / struct / impl / interface / namespace / module containers.
|
||||
let container_name_field: Option<&str> = match pk {
|
||||
// JS / TS / Python / Ruby / PHP / Java / Kotlin / C++ classes
|
||||
"class_declaration"
|
||||
| "class_definition"
|
||||
| "class_specifier"
|
||||
| "class"
|
||||
| "interface_declaration"
|
||||
| "interface_body"
|
||||
| "enum_declaration"
|
||||
| "trait_item"
|
||||
| "trait_declaration"
|
||||
| "enum_item"
|
||||
| "struct_specifier"
|
||||
| "struct_item" => Some("name"),
|
||||
// Rust impl blocks — pick the type name, not the trait name.
|
||||
"impl_item" => Some("type"),
|
||||
// Go / C++ / PHP namespaces and modules.
|
||||
"namespace_definition" | "namespace_declaration" | "module_declaration" | "module" => {
|
||||
Some("name")
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
if let Some(field) = container_name_field {
|
||||
if let Some(name_node) = parent.child_by_field_name(field) {
|
||||
if let Some(text) = text_of(name_node, code) {
|
||||
segments.push(text);
|
||||
inside_class |= matches!(
|
||||
pk,
|
||||
"class_declaration"
|
||||
| "class_definition"
|
||||
| "class_specifier"
|
||||
| "class"
|
||||
| "interface_declaration"
|
||||
| "interface_body"
|
||||
| "trait_item"
|
||||
| "trait_declaration"
|
||||
| "impl_item"
|
||||
| "struct_item"
|
||||
| "struct_specifier"
|
||||
);
|
||||
}
|
||||
}
|
||||
} else if pk == "function_declaration"
|
||||
|| pk == "function_definition"
|
||||
|| pk == "method_declaration"
|
||||
|| pk == "method_definition"
|
||||
|| pk == "function_item"
|
||||
|| pk == "arrow_function"
|
||||
|| pk == "lambda_expression"
|
||||
|| pk == "function_expression"
|
||||
{
|
||||
// Nested definition — record the outer function's name and
|
||||
// classify self as Closure even if we got a real name.
|
||||
if let Some(name_node) = parent.child_by_field_name("name") {
|
||||
if let Some(text) = text_of(name_node, code) {
|
||||
segments.push(text);
|
||||
}
|
||||
}
|
||||
if !matches!(kind, FuncKind::Closure) {
|
||||
kind = FuncKind::Closure;
|
||||
}
|
||||
}
|
||||
|
||||
cursor = parent.parent();
|
||||
}
|
||||
|
||||
// Upgrade to Method/Constructor when inside a class-like container.
|
||||
if inside_class && matches!(kind, FuncKind::Function) {
|
||||
kind = if fn_name == "__init__"
|
||||
|| fn_name == "constructor"
|
||||
|| fn_name == "initialize"
|
||||
|| fn_name == "new"
|
||||
{
|
||||
FuncKind::Constructor
|
||||
} else {
|
||||
FuncKind::Method
|
||||
};
|
||||
}
|
||||
|
||||
segments.reverse();
|
||||
let container = segments.join("::");
|
||||
(container, kind)
|
||||
}
|
||||
|
||||
pub(super) fn rust_param_binding_name(param_text: &str) -> Option<String> {
|
||||
let before_colon = param_text.split(':').next().unwrap_or(param_text).trim();
|
||||
let tokens: Vec<&str> = before_colon
|
||||
.split(|ch: char| !(ch.is_ascii_alphanumeric() || ch == '_'))
|
||||
.filter(|token| !token.is_empty() && !matches!(*token, "mut" | "ref"))
|
||||
.collect();
|
||||
tokens.last().map(|token| (*token).to_string())
|
||||
}
|
||||
|
||||
pub(super) fn rust_param_type_text(param: Node<'_>, code: &[u8]) -> Option<String> {
|
||||
param
|
||||
.child_by_field_name("type")
|
||||
.and_then(|node| text_of(node, code))
|
||||
.or_else(|| {
|
||||
text_of(param, code).and_then(|text| {
|
||||
text.split_once(':')
|
||||
.map(|(_, ty)| ty.trim().to_string())
|
||||
.filter(|ty| !ty.is_empty())
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
pub(super) fn rust_route_attribute_bindings(func_node: Node<'_>, code: &[u8]) -> Vec<String> {
|
||||
let Some(text) = text_of(func_node, code) else {
|
||||
return Vec::new();
|
||||
};
|
||||
let mut bindings = Vec::new();
|
||||
|
||||
for line in text
|
||||
.lines()
|
||||
.map(str::trim)
|
||||
.take_while(|line| line.starts_with("#["))
|
||||
{
|
||||
if !(line.starts_with("#[get")
|
||||
|| line.starts_with("#[post")
|
||||
|| line.starts_with("#[put")
|
||||
|| line.starts_with("#[delete")
|
||||
|| line.starts_with("#[patch"))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut chars = line.chars().peekable();
|
||||
while let Some(ch) = chars.next() {
|
||||
if ch == '<' {
|
||||
let mut token = String::new();
|
||||
while let Some(&next) = chars.peek() {
|
||||
chars.next();
|
||||
if next == '>' {
|
||||
break;
|
||||
}
|
||||
token.push(next);
|
||||
}
|
||||
let token = token.trim();
|
||||
if !token.is_empty() {
|
||||
bindings.push(token.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bindings
|
||||
}
|
||||
|
||||
pub(super) fn rust_framework_param_sources<'a>(
|
||||
func_node: Node<'a>,
|
||||
code: &'a [u8],
|
||||
analysis_rules: Option<&crate::labels::LangAnalysisRules>,
|
||||
) -> Vec<(String, crate::labels::Cap, (usize, usize))> {
|
||||
let Some(analysis_rules) = analysis_rules else {
|
||||
return Vec::new();
|
||||
};
|
||||
let extra = analysis_rules.extra_labels.as_slice();
|
||||
if extra.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let cfg = param_config("rust");
|
||||
let params = func_node.child_by_field_name(cfg.params_field);
|
||||
let Some(params) = params else {
|
||||
return Vec::new();
|
||||
};
|
||||
|
||||
let rocket_route_bindings = if analysis_rules
|
||||
.frameworks
|
||||
.contains(&crate::utils::project::DetectedFramework::Rocket)
|
||||
{
|
||||
rust_route_attribute_bindings(func_node, code)
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
let mut sources = Vec::new();
|
||||
let mut cursor = params.walk();
|
||||
for child in params.children(&mut cursor) {
|
||||
if cfg.self_param_kinds.contains(&child.kind()) || child.kind() != "parameter" {
|
||||
continue;
|
||||
}
|
||||
|
||||
let Some(param_text) = text_of(child, code) else {
|
||||
continue;
|
||||
};
|
||||
let Some(binding) = rust_param_binding_name(¶m_text) else {
|
||||
continue;
|
||||
};
|
||||
let span = (child.start_byte(), child.end_byte());
|
||||
|
||||
let type_caps = rust_param_type_text(child, code).and_then(|type_text| {
|
||||
match classify("rust", &type_text, Some(extra)) {
|
||||
Some(DataLabel::Source(caps)) => Some(caps),
|
||||
_ => None,
|
||||
}
|
||||
});
|
||||
let route_caps = rocket_route_bindings
|
||||
.iter()
|
||||
.any(|name| name == &binding)
|
||||
.then_some(crate::labels::Cap::all());
|
||||
|
||||
let Some(caps) = type_caps.or(route_caps) else {
|
||||
continue;
|
||||
};
|
||||
if !sources
|
||||
.iter()
|
||||
.any(|(name, _, existing_span)| name == &binding && existing_span == &span)
|
||||
{
|
||||
sources.push((binding, caps, span));
|
||||
}
|
||||
}
|
||||
|
||||
sources
|
||||
}
|
||||
|
||||
pub(super) fn inject_framework_param_sources(
|
||||
func_node: Node<'_>,
|
||||
code: &[u8],
|
||||
analysis_rules: Option<&crate::labels::LangAnalysisRules>,
|
||||
graph: &mut Cfg,
|
||||
entry: NodeIndex,
|
||||
enclosing_func: Option<&str>,
|
||||
) -> Vec<NodeIndex> {
|
||||
let sources = rust_framework_param_sources(func_node, code, analysis_rules);
|
||||
if sources.is_empty() {
|
||||
return vec![entry];
|
||||
}
|
||||
|
||||
let mut preds = vec![entry];
|
||||
for (binding, caps, span) in sources {
|
||||
let idx = graph.add_node(NodeInfo {
|
||||
kind: StmtKind::Seq,
|
||||
taint: TaintMeta {
|
||||
labels: smallvec![DataLabel::Source(caps)],
|
||||
defines: Some(binding),
|
||||
..Default::default()
|
||||
},
|
||||
ast: AstMeta {
|
||||
span,
|
||||
enclosing_func: enclosing_func.map(|s| s.to_string()),
|
||||
},
|
||||
..Default::default()
|
||||
});
|
||||
connect_all(graph, &preds, idx, EdgeKind::Seq);
|
||||
preds = vec![idx];
|
||||
}
|
||||
|
||||
preds
|
||||
}
|
||||
|
||||
/// Check if a callee name matches any configured terminator.
|
||||
pub(super) fn is_configured_terminator(
|
||||
callee: &str,
|
||||
analysis_rules: Option<&LangAnalysisRules>,
|
||||
) -> bool {
|
||||
if let Some(rules) = analysis_rules {
|
||||
let callee_lower = callee.to_ascii_lowercase();
|
||||
rules
|
||||
.terminators
|
||||
.iter()
|
||||
.any(|t| callee_lower == t.to_ascii_lowercase())
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue