mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-06 19:35:13 +02:00
docs: Enhance module documentation across various files for clarity a… (#62)
* docs: Enhance module documentation across various files for clarity and completeness * fix: Remove unnecessary blank line in build.rs for cleaner code * docs: Update documentation to improve clarity and consistency in code comments
This commit is contained in:
parent
40995e45e7
commit
1f2bfe76c1
44 changed files with 721 additions and 366 deletions
16
.github/workflows/ci.yml
vendored
16
.github/workflows/ci.yml
vendored
|
|
@ -153,6 +153,22 @@ jobs:
|
|||
exit 1
|
||||
fi
|
||||
|
||||
rustdoc:
|
||||
name: rustdoc
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- uses: actions-rust-lang/setup-rust-toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
cache: true
|
||||
|
||||
- name: Check rustdoc links
|
||||
env:
|
||||
RUSTDOCFLAGS: "-D warnings"
|
||||
run: cargo doc --workspace --no-deps --all-features
|
||||
|
||||
rust-beta-build:
|
||||
name: rust-beta-build
|
||||
runs-on: ubuntu-latest
|
||||
|
|
|
|||
291
build.rs
291
build.rs
|
|
@ -1,9 +1,7 @@
|
|||
use std::path::{Path, PathBuf};
|
||||
use std::path::Path;
|
||||
use std::process::Command;
|
||||
|
||||
fn main() {
|
||||
render_docs_for_rustdoc();
|
||||
|
||||
// Only relevant when the serve feature is active
|
||||
if std::env::var("CARGO_FEATURE_SERVE").is_err() {
|
||||
return;
|
||||
|
|
@ -58,293 +56,6 @@ fn main() {
|
|||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Rustdoc / docs.rs: render docs/*.md into $OUT_DIR with relative .md links
|
||||
// rewritten to absolute github.com/elicpeter/nyx URLs so they resolve when the
|
||||
// markdown is embedded in rustdoc via #![doc = include_str!(...)].
|
||||
//
|
||||
// Source of truth stays in docs/. Files that don't exist (published-crate
|
||||
// builds where docs/ wasn't packaged) fall back to a one-line stub so rustdoc
|
||||
// still compiles.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const GH_DOCS_BASE: &str = "https://github.com/elicpeter/nyx/blob/master/docs";
|
||||
|
||||
struct DocSpec {
|
||||
/// Path under docs/, e.g. "how-it-works.md" or "detectors/taint.md".
|
||||
src: &'static str,
|
||||
/// Output filename in $OUT_DIR.
|
||||
out: &'static str,
|
||||
}
|
||||
|
||||
const DOC_SPECS: &[DocSpec] = &[
|
||||
DocSpec {
|
||||
src: "how-it-works.md",
|
||||
out: "lib_intro.md",
|
||||
},
|
||||
DocSpec {
|
||||
src: "detectors/taint.md",
|
||||
out: "taint.md",
|
||||
},
|
||||
DocSpec {
|
||||
src: "detectors/cfg.md",
|
||||
out: "cfg_analysis.md",
|
||||
},
|
||||
DocSpec {
|
||||
src: "detectors/state.md",
|
||||
out: "state.md",
|
||||
},
|
||||
DocSpec {
|
||||
src: "detectors/patterns.md",
|
||||
out: "patterns.md",
|
||||
},
|
||||
DocSpec {
|
||||
src: "auth.md",
|
||||
out: "auth_analysis.md",
|
||||
},
|
||||
];
|
||||
|
||||
fn render_docs_for_rustdoc() {
|
||||
let Ok(out_dir) = std::env::var("OUT_DIR") else {
|
||||
return;
|
||||
};
|
||||
let out_dir = PathBuf::from(out_dir);
|
||||
let docs_dir = Path::new("docs");
|
||||
|
||||
for spec in DOC_SPECS {
|
||||
let src_path = docs_dir.join(spec.src);
|
||||
println!("cargo:rerun-if-changed=docs/{}", spec.src);
|
||||
let out_path = out_dir.join(spec.out);
|
||||
let rendered = match std::fs::read_to_string(&src_path) {
|
||||
Ok(raw) => rewrite_doc_links(&raw, spec.src),
|
||||
Err(_) => format!(
|
||||
"See [`{base}/{src}`]({base}/{src}).\n",
|
||||
base = GH_DOCS_BASE,
|
||||
src = spec.src,
|
||||
),
|
||||
};
|
||||
if let Err(e) = std::fs::write(&out_path, rendered) {
|
||||
println!(
|
||||
"cargo:warning=failed to write rendered doc {}: {}",
|
||||
out_path.display(),
|
||||
e
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Render markdown for embedding in rustdoc.
|
||||
///
|
||||
/// 1. Rewrites relative `.md` links to absolute github.com URLs:
|
||||
/// - inline links: `](path.md)` and `](path.md#anchor)`
|
||||
/// - reference defs: `[id]: path.md`
|
||||
/// 2. Labels unmarked fenced code blocks as `text` so rustdoc does not try
|
||||
/// to compile them as Rust (and choke on Unicode like `→`).
|
||||
/// 3. Annotates `rust` fences with `,ignore` so rustdoc doesn't try to
|
||||
/// compile or run prose-level snippets as doctests. GitHub still
|
||||
/// highlights them as Rust because it keys off the first token.
|
||||
///
|
||||
/// Skips link rewriting inside code fences. Skips link rewriting for URLs
|
||||
/// that are already absolute (have a scheme), pure anchors (`#section`),
|
||||
/// or non-`.md` paths.
|
||||
fn rewrite_doc_links(content: &str, source_rel: &str) -> String {
|
||||
let source_dir = Path::new(source_rel)
|
||||
.parent()
|
||||
.map(|p| p.to_string_lossy().into_owned())
|
||||
.unwrap_or_default();
|
||||
|
||||
let mut out = String::with_capacity(content.len() + 256);
|
||||
let mut in_fence = false;
|
||||
|
||||
for line in content.split_inclusive('\n') {
|
||||
let body = line.strip_suffix('\n').unwrap_or(line);
|
||||
let trimmed = body.trim_start();
|
||||
if trimmed.starts_with("```") {
|
||||
let lang = trimmed.trim_start_matches('`').trim();
|
||||
if in_fence {
|
||||
in_fence = false;
|
||||
out.push_str(line);
|
||||
} else {
|
||||
in_fence = true;
|
||||
let indent_len = body.len() - trimmed.len();
|
||||
if lang.is_empty() {
|
||||
out.push_str(&body[..indent_len]);
|
||||
out.push_str("```text");
|
||||
if line.ends_with('\n') {
|
||||
out.push('\n');
|
||||
}
|
||||
} else if is_rust_fence_needing_ignore(lang) {
|
||||
out.push_str(&body[..indent_len]);
|
||||
out.push_str("```rust,ignore");
|
||||
if line.ends_with('\n') {
|
||||
out.push('\n');
|
||||
}
|
||||
} else {
|
||||
out.push_str(line);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if in_fence {
|
||||
out.push_str(line);
|
||||
} else {
|
||||
rewrite_links_in_line(body, &source_dir, &mut out);
|
||||
if line.ends_with('\n') {
|
||||
out.push('\n');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
fn rewrite_links_in_line(line: &str, source_dir: &str, out: &mut String) {
|
||||
let bytes = line.as_bytes();
|
||||
let mut i = 0;
|
||||
while i < bytes.len() {
|
||||
// Inline link: `](URL)`, markdown URLs do not contain a raw `)`.
|
||||
if i + 1 < bytes.len() && bytes[i] == b']' && bytes[i + 1] == b'(' {
|
||||
out.push_str("](");
|
||||
i += 2;
|
||||
let url_start = i;
|
||||
while i < bytes.len() && bytes[i] != b')' {
|
||||
i += 1;
|
||||
}
|
||||
let url = &line[url_start..i];
|
||||
out.push_str(&maybe_rewrite_url(url, source_dir));
|
||||
}
|
||||
// Reference def: `]: URL`.
|
||||
else if i + 2 < bytes.len()
|
||||
&& bytes[i] == b']'
|
||||
&& bytes[i + 1] == b':'
|
||||
&& bytes[i + 2] == b' '
|
||||
{
|
||||
out.push_str("]: ");
|
||||
i += 3;
|
||||
let url_start = i;
|
||||
while i < bytes.len() && bytes[i] != b' ' {
|
||||
i += 1;
|
||||
}
|
||||
let url = &line[url_start..i];
|
||||
out.push_str(&maybe_rewrite_url(url, source_dir));
|
||||
} else {
|
||||
// `]` (0x5D) is ASCII; UTF-8 continuation bytes are 0x80-0xBF
|
||||
// and start bytes are 0xC0+, so byte-level scanning of `]` is
|
||||
// safe. For non-ASCII bytes, copy the full codepoint at once.
|
||||
let b = bytes[i];
|
||||
if b < 0x80 {
|
||||
out.push(b as char);
|
||||
i += 1;
|
||||
} else {
|
||||
let len = utf8_seq_len(b);
|
||||
let end = (i + len).min(bytes.len());
|
||||
out.push_str(&line[i..end]);
|
||||
i = end;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// True for `rust` / `rust,...` fences that don't already opt out of
|
||||
/// doctest execution. We rewrite these to `rust,ignore` because the prose
|
||||
/// snippets in docs/ are illustrative, not standalone-compilable.
|
||||
fn is_rust_fence_needing_ignore(lang: &str) -> bool {
|
||||
let mut parts = lang.split(',').map(|p| p.trim());
|
||||
let Some(first) = parts.next() else {
|
||||
return false;
|
||||
};
|
||||
if !first.eq_ignore_ascii_case("rust") {
|
||||
return false;
|
||||
}
|
||||
for tag in parts {
|
||||
let t = tag.to_ascii_lowercase();
|
||||
if t == "ignore" || t == "no_run" || t == "compile_fail" || t == "should_panic" {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
fn utf8_seq_len(lead: u8) -> usize {
|
||||
// lead < 0xC0 covers ASCII and unexpected continuation bytes; treat both as
|
||||
// single-byte to make progress.
|
||||
if lead < 0xC0 {
|
||||
1
|
||||
} else if lead < 0xE0 {
|
||||
2
|
||||
} else if lead < 0xF0 {
|
||||
3
|
||||
} else {
|
||||
4
|
||||
}
|
||||
}
|
||||
|
||||
fn maybe_rewrite_url(url: &str, source_dir: &str) -> String {
|
||||
if url.is_empty() {
|
||||
return url.to_string();
|
||||
}
|
||||
// Already absolute (scheme://, mailto:, ssh://, etc.), leave alone.
|
||||
if has_scheme(url) {
|
||||
return url.to_string();
|
||||
}
|
||||
// Pure anchor, leave alone.
|
||||
if url.starts_with('#') {
|
||||
return url.to_string();
|
||||
}
|
||||
// Split off optional anchor.
|
||||
let (path, anchor) = match url.find('#') {
|
||||
Some(p) => (&url[..p], &url[p..]),
|
||||
None => (url, ""),
|
||||
};
|
||||
// Only rewrite if the path looks like a markdown file.
|
||||
if !path.ends_with(".md") {
|
||||
return url.to_string();
|
||||
}
|
||||
// Resolve relative to source_dir.
|
||||
let combined = if source_dir.is_empty() {
|
||||
path.to_string()
|
||||
} else {
|
||||
format!("{}/{}", source_dir, path)
|
||||
};
|
||||
let normalised = normalise_path(&combined);
|
||||
format!("{}/{}{}", GH_DOCS_BASE, normalised, anchor)
|
||||
}
|
||||
|
||||
fn has_scheme(url: &str) -> bool {
|
||||
// RFC 3986: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) ":"
|
||||
let mut chars = url.chars();
|
||||
let first = match chars.next() {
|
||||
Some(c) => c,
|
||||
None => return false,
|
||||
};
|
||||
if !first.is_ascii_alphabetic() {
|
||||
return false;
|
||||
}
|
||||
for c in chars {
|
||||
if c == ':' {
|
||||
return true;
|
||||
}
|
||||
if !(c.is_ascii_alphanumeric() || matches!(c, '+' | '-' | '.')) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn normalise_path(path: &str) -> String {
|
||||
let mut stack: Vec<&str> = Vec::new();
|
||||
for seg in path.split('/') {
|
||||
match seg {
|
||||
"" | "." => {}
|
||||
".." => {
|
||||
stack.pop();
|
||||
}
|
||||
other => stack.push(other),
|
||||
}
|
||||
}
|
||||
stack.join("/")
|
||||
}
|
||||
|
||||
fn emit_placeholder_and_warn(dist_dir: &Path) {
|
||||
// Create minimal placeholder files so compilation succeeds
|
||||
std::fs::create_dir_all(dist_dir).ok();
|
||||
|
|
|
|||
|
|
@ -214,7 +214,7 @@ impl PathFact {
|
|||
/// Accepts either of two structural invariants:
|
||||
///
|
||||
/// * `dotdot = No && absolute = No` — the relative-and-`..`-free
|
||||
/// shape recognised by [`is_path_safe`]. Cannot escape to an
|
||||
/// shape recognised by `is_path_safe`. Cannot escape to an
|
||||
/// attacker-controlled absolute location.
|
||||
/// * `dotdot = No && prefix_lock.is_some()` — a canonicalised path
|
||||
/// (typically `File.expand_path` / `realpath` / `fs::canonicalize`)
|
||||
|
|
@ -866,7 +866,7 @@ pub fn is_structural_variant_ctor_for_lang(lang: crate::symbol::Lang, callee: &s
|
|||
/// [`crate::ssa::type_facts::peel_identity_suffix`]. Other languages do
|
||||
/// not (yet) have an equivalent grammar-driven recogniser; the rejection
|
||||
/// arm in their fixtures returns either an empty string literal (handled
|
||||
/// by [`SsaOp::Const`] seeding) or `None`/`null`/`nil` (handled by the
|
||||
/// by `SsaOp::Const` seeding) or `None`/`null`/`nil` (handled by the
|
||||
/// non-data-return skip).
|
||||
pub fn is_zero_arg_allocator_for_lang(lang: crate::symbol::Lang, _callee: &str) -> bool {
|
||||
// Currently a no-op for non-Rust languages: rejection-arm constructors
|
||||
|
|
|
|||
27
src/ast.rs
27
src/ast.rs
|
|
@ -1,3 +1,24 @@
|
|||
//! Tree-sitter parsing and two-pass analysis for all supported languages.
|
||||
//!
|
||||
//! The core type is `ParsedSource`, a thin wrapper around a parsed tree-sitter
|
||||
//! tree that carries the source bytes and language. Parsing reuses a thread-local
|
||||
//! [`tree_sitter::Parser`] so each worker thread keeps one live parser instance.
|
||||
//!
|
||||
//! ## Two-pass pipeline
|
||||
//!
|
||||
//! **Pass 1** (`extract_summaries_from_file`): builds the CFG, lowers to SSA,
|
||||
//! and extracts a [`crate::summary::FuncSummary`] per function. Summaries
|
||||
//! describe boundary behaviour: which arguments flow to sinks, which sources
|
||||
//! the function reads, what taint it strips, and what it returns.
|
||||
//!
|
||||
//! **Pass 2** (`run_rules_on_file`): reanalyses each file with the merged
|
||||
//! [`crate::summary::GlobalSummaries`] from pass 1. The taint engine runs a
|
||||
//! forward dataflow worklist over SSA, resolving cross-file calls via summaries.
|
||||
//!
|
||||
//! Parse timeouts are tracked per-thread via [`take_last_parse_timeout_ms`]
|
||||
//! so callers can surface the event as an informational diagnostic instead
|
||||
//! of silently skipping the file.
|
||||
|
||||
#![allow(clippy::only_used_in_recursion, clippy::type_complexity)]
|
||||
|
||||
use crate::auth_analysis;
|
||||
|
|
@ -39,7 +60,7 @@ thread_local! {
|
|||
}
|
||||
|
||||
/// Consume and return the most recent parse-timeout event on this thread
|
||||
/// (set by [`ParsedSource::try_new`]). Used to lift the event into a
|
||||
/// (set by `ParsedSource::try_new`). Used to lift the event into a
|
||||
/// synthetic [`Diag`] carrying an [`crate::engine_notes::EngineNote::ParseTimeout`].
|
||||
pub fn take_last_parse_timeout_ms() -> Option<u64> {
|
||||
LAST_PARSE_TIMEOUT_MS.with(|c| c.take())
|
||||
|
|
@ -647,7 +668,7 @@ fn build_taint_diag(
|
|||
}
|
||||
|
||||
/// Resolve a file extension to a language slug (e.g. `"rust"`,
|
||||
/// `"javascript"`). Public façade over [`lang_for_path`] for callers
|
||||
/// `"javascript"`). Public façade over `lang_for_path` for callers
|
||||
/// that only need the slug, used by the debug API to look up
|
||||
/// per-language rule enablement without re-parsing the file.
|
||||
pub fn lang_slug_for_path(path: &Path) -> Option<&'static str> {
|
||||
|
|
@ -3985,7 +4006,7 @@ pub struct FusedResult {
|
|||
///
|
||||
/// When `global_summaries` is `None`, the taint engine runs with local
|
||||
/// context only (equivalent to pass 1 + partial pass 2). A second call
|
||||
/// to [`run_taint_only`] can refine findings with the full cross-file view
|
||||
/// to `run_taint_only` can refine findings with the full cross-file view
|
||||
/// without re-parsing or re-building the CFG.
|
||||
pub fn analyse_file_fused(
|
||||
bytes: &[u8],
|
||||
|
|
|
|||
|
|
@ -2793,7 +2793,7 @@ fn function_params(node: Node<'_>, bytes: &[u8]) -> Vec<String> {
|
|||
params
|
||||
}
|
||||
|
||||
/// Variant of [`function_params`] that always includes id-like typed
|
||||
/// Variant of `function_params` that always includes id-like typed
|
||||
/// Python params (`dag_id: str`, `dag_run_id: str`). Used by
|
||||
/// `attach_route_handler` to populate `unit.params` for RouteHandler
|
||||
/// units so middleware-injected auth checks (FastAPI
|
||||
|
|
@ -2802,7 +2802,7 @@ fn function_params(node: Node<'_>, bytes: &[u8]) -> Vec<String> {
|
|||
/// the id-shaped ones that are *the* primary user-controlled data on
|
||||
/// REST routes.
|
||||
///
|
||||
/// The id-like filter in [`collect_param_names`] exists to keep
|
||||
/// The id-like filter in `collect_param_names` exists to keep
|
||||
/// internal helper signatures (`def f(release_id: int, project:
|
||||
/// Project)`) from passing `unit_has_user_input_evidence`'s param
|
||||
/// heuristic, which would over-fire `missing_ownership_check`. Route
|
||||
|
|
|
|||
|
|
@ -1,4 +1,60 @@
|
|||
#![doc = include_str!(concat!(env!("OUT_DIR"), "/auth_analysis.md"))]
|
||||
//! Missing authorization and ownership checks (Rust-primary).
|
||||
//!
|
||||
//! Detects request handlers that reach a privileged operation taking a scoped
|
||||
//! identifier (`*_id`, row reference, scoped resource) without a preceding
|
||||
//! ownership or membership check.
|
||||
//!
|
||||
//! Other languages have rule scaffolding (`py.auth.*`, `js.auth.*`,
|
||||
//! `rb.auth.*`, `go.auth.*`, `java.auth.*`) but only Rust has benchmark
|
||||
//! corpus coverage and validated precision. Treat non-Rust findings as preview.
|
||||
//!
|
||||
//! # Rule IDs
|
||||
//!
|
||||
//! | Rule ID | Variant |
|
||||
//! |---------|---------|
|
||||
//! | `rs.auth.missing_ownership_check` | Standalone structural analyser (default on) |
|
||||
//! | `rs.auth.missing_ownership_check.taint` | SSA/taint variant via `Cap::UNAUTHORIZED_ID` (default off) |
|
||||
//!
|
||||
//! Enable the taint variant via `scanner.enable_auth_as_taint = true` in
|
||||
//! `nyx.conf`. Run both together when enabled; if both fire for the same site,
|
||||
//! treat them as the same finding.
|
||||
//!
|
||||
//! # What counts as authorization
|
||||
//!
|
||||
//! The analyser accepts any of:
|
||||
//! - A call to a recognised authorization helper (`check_ownership`,
|
||||
//! `has_permission`, `require_*_member`, etc.; configurable per project).
|
||||
//! - An ownership-equality check on a row reference
|
||||
//! (`if owner_id != user.id { return 403 }`).
|
||||
//! - A self-actor reference from a typed extractor param (`Extension<Session>`,
|
||||
//! `CurrentUser`, etc.) combined with `user.id` / `user.user_id` use.
|
||||
//! - A typed policy-guard wrapper (`GuardedData<ActionPolicy<X>, _>`);
|
||||
//! configured via `policy_guard_names`.
|
||||
//! - A SQL query joining through an ACL table or filtering by `user_id`
|
||||
//! predicate (detected without a SQL parser via [`sql_semantics`]).
|
||||
//! - A helper-summary lift: a called function whose body contains a
|
||||
//! `require_*_member` call (fixed-point up to 4 iterations).
|
||||
//!
|
||||
//! # Sink classification
|
||||
//!
|
||||
//! | Class | Examples | Treatment |
|
||||
//! |-------|---------|-----------|
|
||||
//! | `InMemoryLocal` | `map.insert`, `vec.push` on local | Never a sink |
|
||||
//! | `RealtimePublish` | `realtime.publish_to_group` | Sink unless channel scope is ownership-checked |
|
||||
//! | `OutboundNetwork` | `http.post`, `reqwest::Client::post` | Sink unless sanitizer is on the path |
|
||||
//! | `CacheCrossTenant` | `redis.set` with scoped keys | Sink unless tenant is checked |
|
||||
//! | `DbMutation` | `db.insert`, `repo.save` with scoped IDs | Sink unless ownership is established |
|
||||
//! | `DbCrossTenantRead` | `db.query` returning tenant-scoped rows | Sink unless ACL-join or tenant predicate is present |
|
||||
//!
|
||||
//! # Submodules
|
||||
//!
|
||||
//! - [`checks`]: ownership-check recognition, actor-context extraction,
|
||||
//! row-field variable tracking
|
||||
//! - [`config`]: per-language auth rule defaults and config merging
|
||||
//! - [`extract`]: handler detection, scoped-ID extraction, summary lifting
|
||||
//! - [`model`]: `AnalysisUnit`, `AuthCheck`, `SensitiveOperation`, `SinkClass`
|
||||
//! - [`sql_semantics`]: ACL-join and `user_id`-predicate detection without a
|
||||
//! SQL parser
|
||||
|
||||
pub mod checks;
|
||||
pub mod config;
|
||||
|
|
|
|||
|
|
@ -253,7 +253,7 @@ pub struct AnalysisUnit {
|
|||
/// Function parameter names whose static type maps to a
|
||||
/// payload-incompatible scalar ([`crate::ssa::type_facts::TypeKind::Int`]
|
||||
/// or [`crate::ssa::type_facts::TypeKind::Bool`]). Populated
|
||||
/// per-file by [`super::apply_typed_bounded_params`] using the
|
||||
/// per-file by `apply_typed_bounded_params` using the
|
||||
/// SSA-derived `VarTypes` map. Consulted by
|
||||
/// `is_typed_bounded_subject` so parameters like Spring `Long
|
||||
/// userId`, Axum `Path<i64>`, or FastAPI `user_id: int` are not
|
||||
|
|
@ -265,7 +265,7 @@ pub struct AnalysisUnit {
|
|||
/// declared type is a payload-incompatible scalar. Map key is the
|
||||
/// parameter name (e.g. `dto`), value is the list of field names
|
||||
/// (e.g. `["age", "count"]`). Populated by
|
||||
/// [`super::apply_typed_bounded_params`] only when the parameter
|
||||
/// `apply_typed_bounded_params` only when the parameter
|
||||
/// itself was recognised as a typed extractor, bare parameters
|
||||
/// with no framework gate never lift their fields.
|
||||
pub typed_bounded_dto_fields: HashMap<String, Vec<String>>,
|
||||
|
|
|
|||
|
|
@ -1,3 +1,15 @@
|
|||
//! Whole-program call graph built from pass-1 function summaries.
|
||||
//!
|
||||
//! Nodes are [`FuncKey`]s (one per function definition across all files).
|
||||
//! Edges represent call-site relationships resolved after pass 1 completes.
|
||||
//! Unresolved and ambiguous callees are tracked separately so they can be
|
||||
//! surfaced in diagnostics without blocking analysis.
|
||||
//!
|
||||
//! [`CallGraphAnalysis`] computes SCCs and topological order. The scanner
|
||||
//! uses topo order in pass 2 so callees are analysed before their callers,
|
||||
//! and iterates over SCC groups to a fixed point for mutually recursive
|
||||
//! functions.
|
||||
|
||||
use crate::interop::InteropEdge;
|
||||
use crate::rust_resolve::RustUseMap;
|
||||
use crate::summary::{CalleeQuery, CalleeResolution, GlobalSummaries};
|
||||
|
|
@ -55,7 +67,7 @@ pub struct CallGraph {
|
|||
pub struct CallGraphAnalysis {
|
||||
/// Strongly connected components.
|
||||
pub sccs: Vec<Vec<NodeIndex>>,
|
||||
/// Maps each `NodeIndex` to its SCC index in [`sccs`].
|
||||
/// Maps each `NodeIndex` to its SCC index in `sccs`.
|
||||
#[allow(dead_code)] // used for future topo-ordered taint propagation
|
||||
pub node_to_scc: HashMap<NodeIndex, usize>,
|
||||
/// SCC indices in **callee-first** (leaves-first) order.
|
||||
|
|
@ -160,7 +172,7 @@ pub(crate) fn callee_container_hint(raw: &str) -> &str {
|
|||
/// Per-language `(container, method_name)` → candidate [`FuncKey`] index.
|
||||
///
|
||||
/// Built once per call-graph construction over every merged
|
||||
/// [`FuncSummary`]. Used by edge insertion to restrict an indirect method
|
||||
/// [`crate::summary::FuncSummary`]. Used by edge insertion to restrict an indirect method
|
||||
/// call (`receiver.method(...)`) to only those targets whose defining
|
||||
/// container matches the receiver's static type. Without a container
|
||||
/// hint the index falls back to the bare-name list, matching today's
|
||||
|
|
@ -272,7 +284,7 @@ impl ClassMethodIndex {
|
|||
///
|
||||
/// Covers Java `extends`/`implements`, Rust `impl Trait for Type`, TS
|
||||
/// `extends`/`implements`, Python `class X(Base)`, plus PHP/Ruby/C++
|
||||
/// (see [`crate::cfg::hierarchy`]). Go's structural interfaces are
|
||||
/// (see `crate::cfg::hierarchy`). Go's structural interfaces are
|
||||
/// intentionally omitted, name-only resolution is used instead.
|
||||
///
|
||||
/// Container names are bare (no namespace), so cross-namespace aliases
|
||||
|
|
@ -804,7 +816,7 @@ pub fn analyse(cg: &CallGraph) -> CallGraphAnalysis {
|
|||
/// such SCC has nodes in more than one file (`cross_file`).
|
||||
///
|
||||
/// `has_mutual_recursion` triggers the SCC fixed-point loop in
|
||||
/// [`crate::commands::scan::run_topo_batches`]. `cross_file` is a tighter
|
||||
/// `run_topo_batches`. `cross_file` is a tighter
|
||||
/// signal used by joint fixed-point convergence: it implies the
|
||||
/// recursion involves at least one cross-file call edge, so the inline
|
||||
/// cache and per-iteration findings need joint convergence, not just
|
||||
|
|
|
|||
|
|
@ -1,3 +1,17 @@
|
|||
//! Intra-procedural control-flow graph construction.
|
||||
//!
|
||||
//! Walks tree-sitter ASTs for all ten supported languages and builds a
|
||||
//! [`Cfg`] (a petgraph `DiGraph<NodeInfo, EdgeKind>`) per function.
|
||||
//! [`NodeInfo`] carries the statement kind, label classification, callee
|
||||
//! name, taint and gate metadata. [`EdgeKind`] distinguishes normal flow,
|
||||
//! true/false branches, and exception edges.
|
||||
//!
|
||||
//! `build_cfg` is the main entry point: given a parsed tree and language,
|
||||
//! it produces a [`FileCfg`] (one [`Cfg`] per function in the file) along
|
||||
//! with a [`FuncSummaries`] map for pass-1 summary extraction.
|
||||
//! `export_summaries` converts in-graph [`LocalFuncSummary`] values to
|
||||
//! the serializable [`crate::summary::FuncSummary`] form.
|
||||
|
||||
#![allow(
|
||||
clippy::collapsible_if,
|
||||
clippy::let_and_return,
|
||||
|
|
@ -65,7 +79,7 @@ use params::{
|
|||
is_configured_terminator,
|
||||
};
|
||||
|
||||
/// Test-only re-export of [`extract_param_meta`] so the external
|
||||
/// Test-only re-export of `extract_param_meta` so the external
|
||||
/// `tests/typed_extractors_audit.rs` harness can drive the per-param
|
||||
/// classifier directly without spinning up the full scan pipeline.
|
||||
/// Projects away the destructured-siblings third tuple slot so the
|
||||
|
|
@ -675,7 +689,7 @@ pub struct FileCfg {
|
|||
/// per-file class / trait / interface hierarchy edges.
|
||||
/// Each entry is `(sub_container, super_container)` after
|
||||
/// language-specific normalisation. See
|
||||
/// [`crate::cfg::hierarchy`] for the per-language extraction
|
||||
/// `crate::cfg::hierarchy` for the per-language extraction
|
||||
/// rules and [`crate::callgraph::TypeHierarchyIndex`] for the
|
||||
/// downstream consumer. Empty for languages without an
|
||||
/// extractor (Go, C) and for files with no inheritance / impl
|
||||
|
|
|
|||
|
|
@ -1,4 +1,50 @@
|
|||
#![doc = include_str!(concat!(env!("OUT_DIR"), "/cfg_analysis.md"))]
|
||||
//! CFG structural analysis: dominator-based checks over intra-procedural CFGs.
|
||||
//!
|
||||
//! Checks structural properties that the taint engine cannot: whether sinks are
|
||||
//! guarded by sanitizers or validators, whether web handlers reach privileged
|
||||
//! sinks without an auth call, whether resources are released on all exit paths,
|
||||
//! and whether error paths terminate before reaching dangerous code.
|
||||
//!
|
||||
//! A guard dominates a sink when the guard must execute before the sink on
|
||||
//! every path from function entry.
|
||||
//!
|
||||
//! # Rule IDs
|
||||
//!
|
||||
//! | Rule ID | Severity | What it checks |
|
||||
//! |---------|----------|----------------|
|
||||
//! | `cfg-unguarded-sink` | High/Medium | Sink reachable from entry without a matching guard |
|
||||
//! | `cfg-auth-gap` | High | Web handler reaches privileged sink with no auth call |
|
||||
//! | `cfg-unreachable-sink` | Medium | Sink in dead code |
|
||||
//! | `cfg-unreachable-sanitizer` | Low | Sanitizer in dead code (may have been silently disabled) |
|
||||
//! | `cfg-unreachable-source` | Low | Source in dead code |
|
||||
//! | `cfg-error-fallthrough` | High/Medium | Error path does not terminate before a dangerous call |
|
||||
//! | `cfg-resource-leak` | Medium | Resource acquired but not released on all exit paths |
|
||||
//! | `cfg-lock-not-released` | Medium | Lock acquired but not released on all exit paths |
|
||||
//!
|
||||
//! # Recognised guards
|
||||
//!
|
||||
//! `validate*`, `sanitize*`, `check_*`, `verify_*`, `assert_*`,
|
||||
//! `shell_escape`, `html_escape`, `url_encode`, `which`.
|
||||
//!
|
||||
//! # Recognised auth names
|
||||
//!
|
||||
//! `is_authenticated`, `require_auth`, `check_permission`, `authorize`,
|
||||
//! `authenticate`, `require_login`, `check_auth`, `verify_token`,
|
||||
//! `validate_token` (cross-language), plus `isAuthenticated`,
|
||||
//! `checkPermission`, `hasAuthority`, `hasRole` (Java) and
|
||||
//! `middleware.auth`, `auth.required` (Go).
|
||||
//!
|
||||
//! Custom guards and auth functions can be added as `sanitizer` rules
|
||||
//! with `cap = "all"` in `nyx.conf`.
|
||||
//!
|
||||
//! # Submodules
|
||||
//!
|
||||
//! - [`auth`]: auth-gap detection, handler classification
|
||||
//! - [`dominators`]: dominator tree computation over CFG nodes
|
||||
//! - [`error_handling`]: error-fallthrough detection
|
||||
//! - [`guards`]: guard recognition and dominator queries
|
||||
//! - [`resources`]: resource-leak and lock-not-released detection
|
||||
//! - [`rules`]: finding construction and rule ID assignment
|
||||
|
||||
pub mod auth;
|
||||
pub mod dominators;
|
||||
|
|
|
|||
10
src/cli.rs
10
src/cli.rs
|
|
@ -1,3 +1,11 @@
|
|||
//! Command-line interface definition via clap.
|
||||
//!
|
||||
//! Defines [`Cli`] (the top-level parser) and the [`Commands`] enum of
|
||||
//! subcommands. Helpers on [`Commands`] answer routing questions the binary
|
||||
//! needs without pattern-matching on specific arms: [`Commands::effective_format`],
|
||||
//! [`Commands::is_structured_output`], [`Commands::is_serve`], and
|
||||
//! [`Commands::is_informational`].
|
||||
|
||||
use clap::{Parser, Subcommand, ValueEnum};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
|
|
@ -250,7 +258,7 @@ pub enum Commands {
|
|||
#[arg(long, help_heading = "Output")]
|
||||
no_rank: bool,
|
||||
|
||||
/// Show inline-suppressed findings (dimmed, tagged [SUPPRESSED])
|
||||
/// Show inline-suppressed findings (dimmed, tagged \[SUPPRESSED\])
|
||||
#[arg(long, help_heading = "Output")]
|
||||
show_suppressed: bool,
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,11 @@
|
|||
//! Subcommand handlers and top-level dispatch.
|
||||
//!
|
||||
//! [`handle_command`] is the single entry point from `main`. It installs
|
||||
//! analysis engine options from the resolved config, then routes to the
|
||||
//! appropriate subcommand module (scan, clean, config, index, list, serve).
|
||||
//! CLI flags that override config values are applied per-arm before the
|
||||
//! handler runs.
|
||||
|
||||
pub mod clean;
|
||||
pub mod config;
|
||||
pub mod index;
|
||||
|
|
|
|||
|
|
@ -117,10 +117,20 @@ fn fail_if_persist_errors(stage: &str, errors: Arc<Mutex<Vec<String>>>) -> NyxRe
|
|||
|
||||
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
|
||||
pub struct Diag {
|
||||
/// Project-relative path of the file containing the finding.
|
||||
pub path: String,
|
||||
/// 1-based line number of the sink location.
|
||||
pub line: usize,
|
||||
/// 0-based column offset of the sink location.
|
||||
pub col: usize,
|
||||
/// Finding severity (Critical / High / Medium / Low / Info).
|
||||
pub severity: Severity,
|
||||
/// Rule identifier, e.g. `taint-unsanitised-flow`, `cfg-auth-gap`,
|
||||
/// `rs.auth.missing_ownership_check`. Taint findings append a
|
||||
/// source-location suffix (`"taint-unsanitised-flow (source 12:3)"`)
|
||||
/// so sibling paths with the same sink have distinct IDs for
|
||||
/// deduplication; [`crate::evidence::Evidence::sink_caps`] disambiguates
|
||||
/// findings at the same `(path, line, col)` that reach different sinks.
|
||||
pub id: String,
|
||||
/// High-level finding category (Security, Reliability, Quality).
|
||||
pub category: FindingCategory,
|
||||
|
|
@ -871,7 +881,7 @@ static LAST_TOPO_NONRECURSIVE_REFINEMENTS: AtomicUsize = AtomicUsize::new(0);
|
|||
|
||||
/// Returns the cumulative count of non-recursive batch refinements
|
||||
/// (summary + ssa-summary + body + auth inserts) persisted to
|
||||
/// `global_summaries` during the most recent [`run_topo_batches`] call.
|
||||
/// `global_summaries` during the most recent `run_topo_batches` call.
|
||||
/// Reset to zero at the start of each invocation.
|
||||
pub fn last_topo_nonrecursive_refinements() -> usize {
|
||||
LAST_TOPO_NONRECURSIVE_REFINEMENTS.load(Ordering::Relaxed)
|
||||
|
|
|
|||
|
|
@ -322,7 +322,7 @@ impl BoolState {
|
|||
pub struct ValueFact {
|
||||
/// Exact known constant (Eq constraint). `None` = unconstrained.
|
||||
pub exact: Option<ConstValue>,
|
||||
/// Excluded constant values (Neq constraints). Bounded by [`MAX_NEQ`].
|
||||
/// Excluded constant values (Neq constraints). Bounded by `MAX_NEQ`.
|
||||
pub excluded: SmallVec<[ConstValue; 4]>,
|
||||
/// Inclusive lower bound (`None` = −∞).
|
||||
pub lo: Option<i64>,
|
||||
|
|
|
|||
|
|
@ -204,7 +204,7 @@ pub fn lower_condition(
|
|||
/// Called during SSA lowering when the full [`SsaBody`] is not yet available.
|
||||
/// Resolves variables via `var_stacks[name].last()` (the current reaching
|
||||
/// definition) instead of scanning `value_defs`. Does not use `const_values`
|
||||
/// (unavailable at lowering time); constants are seeded into [`PathEnv`]
|
||||
/// (unavailable at lowering time); constants are seeded into [`crate::constraint::PathEnv`]
|
||||
/// separately via `seed_from_optimization`.
|
||||
pub fn lower_condition_with_stacks(
|
||||
cond_info: &NodeInfo,
|
||||
|
|
|
|||
|
|
@ -200,7 +200,7 @@ fn apply_value_const(env: &mut PathEnv, v: crate::ssa::ir::SsaValue, op: CompOp,
|
|||
/// Resolution order:
|
||||
/// 1. Cross-language primitive aliases (case-insensitive)
|
||||
/// 2. Java/Ruby/Go class and framework names (case-sensitive)
|
||||
/// 3. Java type hierarchy fallback (case-sensitive, via [`TypeHierarchy`])
|
||||
/// 3. Java type hierarchy fallback (case-sensitive, via [`crate::ssa::type_facts::TypeHierarchy`])
|
||||
pub fn parse_type_name(name: &str) -> Option<TypeKind> {
|
||||
use crate::ssa::type_facts::TypeHierarchy;
|
||||
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ pub enum ConvergenceEvent {
|
|||
/// Per-batch record for the SCC fix-point loop.
|
||||
///
|
||||
/// Populated once per batch entry in
|
||||
/// [`crate::commands::scan::run_topo_batches`] that hits the
|
||||
/// `run_topo_batches` that hits the
|
||||
/// `has_mutual_recursion` branch.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct SccBatchRecord {
|
||||
|
|
|
|||
|
|
@ -1,3 +1,14 @@
|
|||
//! SQLite connection pool and schema for the incremental index.
|
||||
//!
|
||||
//! The index stores file content hashes, per-file scan results, and function
|
||||
//! summaries so subsequent scans can skip files whose content has not changed.
|
||||
//! The pool is backed by [`r2d2`] with WAL journaling, `synchronous=NORMAL`,
|
||||
//! and memory-mapped I/O tuned for large codebases.
|
||||
//!
|
||||
//! Tables: `files`, `issues`, `function_summaries`, `ssa_function_summaries`.
|
||||
//! SSA-specific persistence lives in [`crate::summary::ssa_summary`]; routines
|
||||
//! here cover function summaries and file-level hash bookkeeping.
|
||||
|
||||
pub mod index {
|
||||
#![allow(clippy::too_many_arguments, clippy::type_complexity)]
|
||||
|
||||
|
|
@ -615,7 +626,7 @@ pub mod index {
|
|||
})
|
||||
}
|
||||
|
||||
/// Like [`should_scan`] but accepts a pre-computed hash to avoid
|
||||
/// Like `should_scan` but accepts a pre-computed hash to avoid
|
||||
/// redundant file reads.
|
||||
pub fn should_scan_with_hash(&self, path: &Path, hash: &[u8]) -> NyxResult<bool> {
|
||||
let row: Option<Vec<u8>> = self
|
||||
|
|
@ -673,7 +684,7 @@ pub mod index {
|
|||
/// (`file_id, rule_id, line, col`) to defend against upstream bugs
|
||||
/// that produce same-keyed diagnostics with differing severity or
|
||||
/// cosmetic fields. The first-seen row wins; upstream
|
||||
/// [`crate::ast::ParsedSource::finalize_diags`] sorts so that high
|
||||
/// `ParsedSource::finalize_diags` sorts so that high
|
||||
/// severity comes first, and this fallback preserves that ordering.
|
||||
pub fn replace_issues<'a>(
|
||||
&mut self,
|
||||
|
|
|
|||
|
|
@ -1,3 +1,12 @@
|
|||
//! Error types used throughout the scanner.
|
||||
//!
|
||||
//! [`NyxError`] wraps I/O, TOML parse, SQLite, tree-sitter, and connection-pool
|
||||
//! errors into a single enum. [`NyxResult<T>`] is the standard return type alias.
|
||||
//!
|
||||
//! [`ConfigError`] and [`ConfigErrorKind`] carry structured config-validation
|
||||
//! diagnostics (section, field, message, kind) so callers can format them
|
||||
//! consistently without ad-hoc string matching.
|
||||
|
||||
use serde::Serialize;
|
||||
use serde::de::StdError;
|
||||
use std::fmt;
|
||||
|
|
|
|||
|
|
@ -60,10 +60,15 @@ impl FromStr for Confidence {
|
|||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum FlowStepKind {
|
||||
/// A source read: user input, environment variable, network data, etc.
|
||||
Source,
|
||||
/// A local assignment propagating taint from one variable to another.
|
||||
Assignment,
|
||||
/// A function call through which taint flows (via argument or return value).
|
||||
Call,
|
||||
/// An SSA phi node merging tainted values from multiple predecessors.
|
||||
Phi,
|
||||
/// The dangerous sink where tainted data is consumed.
|
||||
Sink,
|
||||
}
|
||||
|
||||
|
|
@ -82,19 +87,29 @@ impl fmt::Display for FlowStepKind {
|
|||
/// A single step in a taint flow path (display-ready).
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct FlowStep {
|
||||
/// 1-based position of this step in the flow (source = 1, sink = N).
|
||||
pub step: u32,
|
||||
pub kind: FlowStepKind,
|
||||
/// Project-relative file path where this step occurs.
|
||||
pub file: String,
|
||||
/// 1-based line number of the operation.
|
||||
pub line: u32,
|
||||
/// 0-based column offset of the operation.
|
||||
pub col: u32,
|
||||
/// Source code snippet at this location, if available.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub snippet: Option<String>,
|
||||
/// SSA variable name carrying taint at this step.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub variable: Option<String>,
|
||||
/// For [`FlowStepKind::Call`] steps, the name of the function called.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub callee: Option<String>,
|
||||
/// Name of the enclosing function at this step.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub function: Option<String>,
|
||||
/// True when this step crosses a file boundary, resolved via a cross-file
|
||||
/// summary rather than direct SSA flow.
|
||||
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
|
||||
pub is_cross_file: bool,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,13 @@
|
|||
//! Explicit cross-language call-graph bridge edges.
|
||||
//!
|
||||
//! Without an [`InteropEdge`], the call graph resolver never attempts
|
||||
//! cross-language resolution. This prevents false positives from functions
|
||||
//! in different languages that happen to share a name.
|
||||
//!
|
||||
//! An [`InteropEdge`] maps a [`CallSiteKey`] (caller language, file, function,
|
||||
//! callee symbol, call ordinal) to a [`FuncKey`] in another language. Ordinal
|
||||
//! `0` acts as a wildcard matching any call of that name from the given caller.
|
||||
|
||||
use crate::symbol::{FuncKey, Lang};
|
||||
|
||||
/// Identifies a specific call site within a caller function.
|
||||
|
|
|
|||
|
|
@ -1,3 +1,16 @@
|
|||
//! Per-language source, sanitizer, and sink rule registries.
|
||||
//!
|
||||
//! The central type is [`DataLabel`], which pairs a [`Cap`] bitflag set with
|
||||
//! a role (Source, Sanitizer, Sink). [`LabelRule`] maps AST text patterns to
|
||||
//! labels. [`classify`] and [`classify_all`] look up a callee name against
|
||||
//! the active language's rule table; [`classify_gated_sink`] handles
|
||||
//! argument-role-aware sinks where one argument controls whether the call is
|
||||
//! dangerous at all.
|
||||
//!
|
||||
//! Rules for each language live in per-language submodules (`rust`, `java`,
|
||||
//! `go`, `python`, `php`, `ruby`, `javascript`, `typescript`, `c`, `cpp`).
|
||||
//! The [`Cap`] bitflag type is defined here and shared with the taint engine.
|
||||
|
||||
mod c;
|
||||
mod cpp;
|
||||
mod go;
|
||||
|
|
@ -125,19 +138,58 @@ pub struct SinkGate {
|
|||
}
|
||||
|
||||
bitflags! {
|
||||
/// Security capability bits for sources, sanitizers, and sinks.
|
||||
///
|
||||
/// Each bit represents a security-relevant property. The meaning depends on
|
||||
/// which role the [`Cap`] value is attached to:
|
||||
///
|
||||
/// - **Source**: which attack classes this tainted value can potentially
|
||||
/// trigger. Sources usually carry [`Cap::all()`] so they match any sink.
|
||||
/// [`ENV_VAR`](Cap::ENV_VAR) is an exception — it marks origin rather
|
||||
/// than reach.
|
||||
/// - **Sanitizer**: which attack classes this function strips. A sanitizer
|
||||
/// labelled with [`HTML_ESCAPE`](Cap::HTML_ESCAPE) clears the XSS-relevant
|
||||
/// bits from tainted values that flow through it.
|
||||
/// - **Sink**: which capability bits must be present on the incoming tainted
|
||||
/// value for a finding to fire. A SQL sink requires [`SQL_QUERY`](Cap::SQL_QUERY).
|
||||
///
|
||||
/// In practice: a finding fires when a tainted value reaches a sink and
|
||||
/// `(value_caps & sink_caps) != 0`.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct Cap: u16 {
|
||||
/// Taint that originated from an environment variable read.
|
||||
/// Used as a source-origin marker for env-injection rules.
|
||||
const ENV_VAR = 0b0000_0000_0000_0001; // bit 0
|
||||
/// Sanitizer: the value has passed through HTML entity escaping.
|
||||
/// Strips XSS risk from values that reach HTML output sinks.
|
||||
const HTML_ESCAPE = 0b0000_0000_0000_0010; // bit 1
|
||||
/// Sanitizer: the value has been shell-argument escaped.
|
||||
/// Strips command-injection risk before shell sinks.
|
||||
const SHELL_ESCAPE = 0b0000_0000_0000_0100; // bit 2
|
||||
/// Sanitizer: the value has been percent-encoded for use in a URL.
|
||||
const URL_ENCODE = 0b0000_0000_0000_1000; // bit 3
|
||||
/// Sanitizer: the value was parsed through a structured JSON decoder
|
||||
/// (as opposed to `eval`-based or regex parsing).
|
||||
const JSON_PARSE = 0b0000_0000_0001_0000; // bit 4
|
||||
/// Sink: file system read or write operation (path traversal, arbitrary
|
||||
/// file read/write).
|
||||
const FILE_IO = 0b0000_0000_0010_0000; // bit 5
|
||||
/// Sink: format string injection (e.g. `printf`-family, `String.format`).
|
||||
const FMT_STRING = 0b0000_0000_0100_0000; // bit 6
|
||||
/// Sink: SQL query construction. Fires for string-concatenated queries
|
||||
/// and parameterized-query builders where the query text itself is tainted.
|
||||
const SQL_QUERY = 0b0000_0000_1000_0000; // bit 7
|
||||
/// Sink: unsafe object deserialization (Java `ObjectInputStream`,
|
||||
/// Python `pickle`, Ruby `Marshal`, PHP `unserialize`, etc.).
|
||||
const DESERIALIZE = 0b0000_0001_0000_0000; // bit 8
|
||||
/// Sink: server-side request forgery. Fires when attacker-controlled
|
||||
/// data reaches the destination URL of an outbound HTTP request.
|
||||
const SSRF = 0b0000_0010_0000_0000; // bit 9
|
||||
/// Sink: code or command execution (shell injection, `eval`, `exec`,
|
||||
/// dynamic `require`/`import`, template injection).
|
||||
const CODE_EXEC = 0b0000_0100_0000_0000; // bit 10
|
||||
/// Sink: cryptographic operation with a tainted algorithm name or seed
|
||||
/// (weak-crypto / predictable-randomness patterns).
|
||||
const CRYPTO = 0b0000_1000_0000_0000; // bit 11
|
||||
/// Request-bound, caller-supplied identifier that has not yet been
|
||||
/// validated against an ownership/membership check. Used as the
|
||||
|
|
@ -747,7 +799,7 @@ fn phase_c_auth_rules_for_lang(lang_slug: &str) -> Vec<RuntimeLabelRule> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Public re-export used by [`crate::ast::ParsedFile::from_source`] to
|
||||
/// Public re-export used by `ParsedFile::from_source` to
|
||||
/// augment per-file rule sets when imports reveal frameworks that the
|
||||
/// manifest-level detector missed.
|
||||
pub fn framework_rules_for_lang_pub(
|
||||
|
|
@ -1207,7 +1259,7 @@ pub fn classify_gated_sink(
|
|||
out
|
||||
}
|
||||
|
||||
/// Public wrapper for [`normalize_chained_call`] so callers outside the module
|
||||
/// Public wrapper for `normalize_chained_call` so callers outside the module
|
||||
/// can share the same normalization used by the label classifier.
|
||||
pub fn normalize_chained_call_for_classify(text: &str) -> String {
|
||||
normalize_chained_call(text)
|
||||
|
|
|
|||
111
src/lib.rs
111
src/lib.rs
|
|
@ -1,14 +1,92 @@
|
|||
//! Multi-language static vulnerability scanner. Tree-sitter parsing, petgraph
|
||||
//! CFGs, SSA-based dataflow, and cross-file taint analysis with a
|
||||
//! capability-based sanitizer system. Supports Rust, C, C++, Java, Go, PHP,
|
||||
//! Python, Ruby, TypeScript, and JavaScript.
|
||||
//! Multi-language static vulnerability scanner.
|
||||
//!
|
||||
//! The handbook below is embedded verbatim from
|
||||
//! [`docs/how-it-works.md`](https://github.com/elicpeter/nyx/blob/master/docs/how-it-works.md).
|
||||
//! Tree-sitter parsing, petgraph CFGs, SSA-based dataflow, and cross-file
|
||||
//! taint analysis with a capability-based sanitizer system. Supports Rust,
|
||||
//! C, C++, Java, Go, PHP, Python, Ruby, TypeScript, and JavaScript.
|
||||
//!
|
||||
//! This crate is both the `nyx` binary and a library for programmatic
|
||||
//! scanning. Most internal modules are public for testing and downstream
|
||||
//! tooling, but the stable contract is [`scan_no_index`] plus the types
|
||||
//! it returns.
|
||||
//!
|
||||
//! For a description of how the analysis pipeline works, see the
|
||||
//! [how-it-works handbook](https://github.com/elicpeter/nyx/blob/master/docs/how-it-works.md).
|
||||
//! Per-detector documentation lives on the [`taint`], [`cfg_analysis`],
|
||||
//! [`state`], [`patterns`], and [`auth_analysis`] modules. The primary
|
||||
//! library entry point for tests and embedders is [`scan_no_index`].
|
||||
#![doc = include_str!(concat!(env!("OUT_DIR"), "/lib_intro.md"))]
|
||||
//! [`state`], [`patterns`], and [`auth_analysis`] module pages.
|
||||
//!
|
||||
//! # Entry points
|
||||
//!
|
||||
//! [`scan_no_index`] runs a full two-pass scan over a directory tree and
|
||||
//! returns a flat list of [`commands::scan::Diag`] values. It does not
|
||||
//! touch a SQLite index; every file is analysed from disk on each call.
|
||||
//!
|
||||
//! ```no_run
|
||||
//! use nyx_scanner::{scan_no_index, utils::Config};
|
||||
//! use std::path::Path;
|
||||
//!
|
||||
//! let config = Config::default();
|
||||
//! let findings = scan_no_index(Path::new("/path/to/project"), &config).unwrap();
|
||||
//! for diag in &findings {
|
||||
//! println!("{} at {}:{}", diag.id, diag.path, diag.line);
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! For incremental rescanning backed by a SQLite index, use
|
||||
//! [`commands::scan::scan_with_index_parallel`] directly.
|
||||
//!
|
||||
//! # Key types
|
||||
//!
|
||||
//! | Type | Purpose |
|
||||
//! |------|---------|
|
||||
//! | [`utils::config::Config`] | Top-level scanner config (load from `nyx.conf` or construct in code) |
|
||||
//! | [`commands::scan::Diag`] | A single finding: location, severity, rule ID, structured evidence |
|
||||
//! | [`evidence::Evidence`] | Source/sink spans, flow steps, sanitizer annotations, engine notes |
|
||||
//! | [`evidence::Confidence`] | Low / Medium / High confidence tag |
|
||||
//! | [`labels::Cap`] | Bitflag capability set describing what a taint flow can reach |
|
||||
//! | [`symbol::Lang`] | Supported language enum |
|
||||
//! | [`symbol::FuncKey`] | Canonical cross-file function identity |
|
||||
//!
|
||||
//! # Reading findings
|
||||
//!
|
||||
//! Each [`commands::scan::Diag`] carries:
|
||||
//!
|
||||
//! - `path`, `line`, `col` — source location of the sink
|
||||
//! - `id` — rule identifier (e.g. `taint-unsanitised-flow`, `cfg-auth-gap`)
|
||||
//! - `severity` — Critical / High / Medium / Low / Info
|
||||
//! - `confidence` — Low / Medium / High; capped at Medium when an engine
|
||||
//! budget was hit
|
||||
//! - `rank_score` — deterministic attack-surface score for truncation ordering
|
||||
//! - `evidence` — optional [`evidence::Evidence`] with source/sink spans,
|
||||
//! flow steps, and [`engine_notes::EngineNote`] values describing precision loss
|
||||
//!
|
||||
//! Engine notes communicate when a bound was hit. A finding carrying
|
||||
//! `EngineNote::OriginsTruncated` or `EngineNote::SccBudgetExhausted` is
|
||||
//! still real, but the engine had less information than it would have had
|
||||
//! without the cap.
|
||||
//!
|
||||
//! # Module map
|
||||
//!
|
||||
//! | Module | Role |
|
||||
//! |--------|------|
|
||||
//! | [`ast`] | Tree-sitter parsing and two-pass analysis dispatch |
|
||||
//! | [`mod@cfg`] | CFG construction from ASTs |
|
||||
//! | [`ssa`] | SSA lowering and optimization passes |
|
||||
//! | [`taint`] | Forward SSA taint analysis |
|
||||
//! | [`cfg_analysis`] | Structural CFG checks (auth gaps, resource leaks, error paths) |
|
||||
//! | [`state`] | Resource lifecycle and state-machine analysis |
|
||||
//! | [`patterns`] | Pattern-based AST checks |
|
||||
//! | [`auth_analysis`] | Missing authorization / ownership checks |
|
||||
//! | [`callgraph`] | Whole-program call graph and SCC analysis |
|
||||
//! | [`summary`] | Per-function summaries for cross-file resolution |
|
||||
//! | [`labels`] | Source, sanitizer, and sink rule registries per language |
|
||||
//! | [`symex`] | Symbolic execution for witness generation and path feasibility |
|
||||
//! | [`abstract_interp`] | Interval and string bounds propagation for sink suppression |
|
||||
//! | [`constraint`] | Path constraint solving and infeasible-path pruning |
|
||||
//! | [`evidence`] | Finding provenance and confidence types |
|
||||
//! | [`suppress`] | Inline `nyx:ignore` directive handling |
|
||||
//! | [`output`] | JSON and SARIF serialization |
|
||||
//! | [`database`] | SQLite index pool and schema |
|
||||
//! | [`walk`] | Filesystem traversal with batched delivery |
|
||||
|
||||
pub mod abstract_interp;
|
||||
pub mod ast;
|
||||
|
|
@ -48,8 +126,19 @@ use errors::NyxResult;
|
|||
use std::path::Path;
|
||||
use utils::config::Config;
|
||||
|
||||
/// Run a two-pass scan without index (filesystem only).
|
||||
/// This is the primary entry point for integration tests.
|
||||
/// Run a two-pass scan over `root` without an incremental index.
|
||||
///
|
||||
/// Every file under `root` is analysed from disk on each call; no SQLite
|
||||
/// state is read or written. The walker respects `.gitignore` files when
|
||||
/// `cfg.scanner.read_vcsignore` is true (the default), skips hidden files
|
||||
/// and symlinks unless the config enables them, and excludes the directories
|
||||
/// and extensions listed in `cfg.scanner.excluded_*`.
|
||||
///
|
||||
/// Returns one [`commands::scan::Diag`] per finding. The list is unsorted;
|
||||
/// call [`rank::rank_diags`] if you need findings ordered by exploitability.
|
||||
///
|
||||
/// For indexed / incremental rescanning use
|
||||
/// [`commands::scan::scan_with_index_parallel`] instead.
|
||||
pub fn scan_no_index(root: &Path, cfg: &Config) -> NyxResult<Vec<commands::scan::Diag>> {
|
||||
commands::scan::scan_filesystem(root, cfg, false)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,12 @@
|
|||
//! Finding serialization and output routing.
|
||||
//!
|
||||
//! Serializes [`crate::commands::scan::Diag`] values to console, JSON, or
|
||||
//! SARIF based on the requested format. `PATTERN_DESCRIPTIONS` is a
|
||||
//! lazily-built map from pattern ID to human-readable description, populated
|
||||
//! from all language registries on first access. `sarif_base_id` normalizes
|
||||
//! source-location-suffixed finding IDs (like `"taint-unsanitised-flow (source 12:3)"`)
|
||||
//! to the canonical SARIF rule ID form.
|
||||
|
||||
use crate::commands::scan::Diag;
|
||||
use crate::patterns::{self, Severity};
|
||||
use once_cell::sync::Lazy;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,52 @@
|
|||
#![doc = include_str!(concat!(env!("OUT_DIR"), "/patterns.md"))]
|
||||
//! AST pattern matching: tree-sitter queries over dangerous structural shapes.
|
||||
//!
|
||||
//! Patterns match constructs based on syntax alone, with no dataflow or CFG.
|
||||
//! A match means the construct is present; it is not proof that it is
|
||||
//! reachable or exploitable. Patterns run in every analysis mode and are the
|
||||
//! only active detector in `--mode ast`.
|
||||
//!
|
||||
//! # Rule ID format
|
||||
//!
|
||||
//! ```text
|
||||
//! <lang>.<category>.<name>
|
||||
//! ```
|
||||
//!
|
||||
//! Examples: `js.code_exec.eval`, `py.deser.pickle_loads`, `c.memory.gets`,
|
||||
//! `java.sqli.execute_concat`.
|
||||
//!
|
||||
//! # Tiers
|
||||
//!
|
||||
//! - **Tier A**: structural presence alone is high-signal. `gets`, `eval`,
|
||||
//! `pickle.loads`, `mem::transmute`. No guard needed.
|
||||
//! - **Tier B**: pattern includes a tree-sitter heuristic guard.
|
||||
//! `java.sqli.execute_concat` fires only when `executeQuery` receives a
|
||||
//! `binary_expression` (concatenation), not a literal or parameterized call.
|
||||
//!
|
||||
//! # Categories
|
||||
//!
|
||||
//! | Category | Examples |
|
||||
//! |----------|---------|
|
||||
//! | `CommandExec` | `system`, `os.system`, `Runtime.exec`, backticks |
|
||||
//! | `CodeExec` | `eval`, `Function`, PHP `assert("string")`, `class_eval` |
|
||||
//! | `Deserialization` | `pickle.loads`, `yaml.load`, `Marshal.load`, `readObject` |
|
||||
//! | `SqlInjection` | `executeQuery` with concatenated argument (Tier B) |
|
||||
//! | `PathTraversal` | PHP `include $var` |
|
||||
//! | `Xss` | `innerHTML`, `document.write`, `insertAdjacentHTML` |
|
||||
//! | `Crypto` | `md5`, `sha1`, `Math.random` for security use |
|
||||
//! | `Secrets` | Hardcoded API keys (Go, JS, TS) |
|
||||
//! | `InsecureTransport` | `InsecureSkipVerify`, `fetch("http://...")` |
|
||||
//! | `Reflection` | `Class.forName`, `Method.invoke`, `constantize` |
|
||||
//! | `MemorySafety` | `transmute`, `unsafe`, `gets`, `strcpy`, `sprintf` |
|
||||
//! | `Prototype` | `__proto__` assignment, `Object.prototype.*` |
|
||||
//! | `Config` | CORS dynamic origin, `rejectUnauthorized: false` |
|
||||
//! | `CodeQuality` | `unwrap`, `panic!`, `as any` |
|
||||
//!
|
||||
//! # Pattern loading
|
||||
//!
|
||||
//! Each language submodule exports a `patterns()` function returning
|
||||
//! `&'static [Pattern]`. [`load`] dispatches to the correct submodule by
|
||||
//! language slug. [`Pattern`] carries the rule ID, severity, confidence,
|
||||
//! category, and the tree-sitter query string.
|
||||
|
||||
pub mod c;
|
||||
pub mod cpp;
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ fn is_container_read_callee(callee: &str) -> bool {
|
|||
)
|
||||
}
|
||||
|
||||
/// Container-write callees, mirror of [`is_container_read_callee`].
|
||||
/// Container-write callees, mirror of `is_container_read_callee`.
|
||||
pub fn is_container_write_callee(callee: &str) -> bool {
|
||||
let bare = match callee.rsplit_once('.') {
|
||||
Some((_, m)) => m,
|
||||
|
|
@ -66,7 +66,7 @@ pub fn is_container_write_callee(callee: &str) -> bool {
|
|||
)
|
||||
}
|
||||
|
||||
/// Public re-export of [`is_container_read_callee`] for the taint engine.
|
||||
/// Public re-export of `is_container_read_callee` for the taint engine.
|
||||
pub fn is_container_read_callee_pub(callee: &str) -> bool {
|
||||
is_container_read_callee(callee)
|
||||
}
|
||||
|
|
@ -92,7 +92,7 @@ pub fn is_container_read_callee_pub(callee: &str) -> bool {
|
|||
///
|
||||
/// Receiver (`SelfParam`) reads/writes are recorded under the
|
||||
/// [`u32::MAX`] sentinel parameter index, mirroring the convention in
|
||||
/// [`crate::summary::ssa_summary::SsaFuncSummary::receiver_to_*`].
|
||||
/// `SsaFuncSummary::receiver_to_*` fields.
|
||||
///
|
||||
/// The container-element sentinel field [`FieldId::ELEM`] is recorded
|
||||
/// under the special name `"<elem>"` so callers can recognise the
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@
|
|||
//! - PointsToSet is bounded to `analysis.engine.max_pointsto` entries
|
||||
//! (default 32, widening on overflow, see [`effective_max_pointsto`]).
|
||||
//! Overflow drops emit an [`crate::engine_notes::EngineNote::PointsToTruncated`]
|
||||
//! note and increment [`POINTSTO_TRUNCATION_COUNT`] so operators can
|
||||
//! note and increment `POINTSTO_TRUNCATION_COUNT` so operators can
|
||||
//! tell when the cap is firing on their corpus.
|
||||
//! - HeapState tracks per-(heap-object, slot) taint (monotone lattice)
|
||||
//! - HeapSlot::Index(u64) for constant-index container access (proven by const propagation)
|
||||
|
|
@ -168,7 +168,7 @@ impl PointsToSet {
|
|||
///
|
||||
/// Truncates to [`effective_max_pointsto`]; any heap-object member
|
||||
/// that would be admitted after the cap is reached is dropped and
|
||||
/// counted via [`record_pointsto_truncation`]. Truncation is
|
||||
/// counted via `record_pointsto_truncation`. Truncation is
|
||||
/// deterministic: the merge proceeds in sorted order, so survivors
|
||||
/// are always the smallest `HeapObjectId`s across the two inputs.
|
||||
pub fn union(&self, other: &Self) -> Self {
|
||||
|
|
@ -230,7 +230,7 @@ impl PointsToSet {
|
|||
///
|
||||
/// When the set is already at [`effective_max_pointsto`], the new id
|
||||
/// is dropped and the drop is counted via
|
||||
/// [`record_pointsto_truncation`].
|
||||
/// `record_pointsto_truncation`.
|
||||
pub fn insert(&mut self, id: HeapObjectId) {
|
||||
match self.ids.binary_search(&id) {
|
||||
Ok(_) => {} // already present
|
||||
|
|
|
|||
|
|
@ -1,3 +1,21 @@
|
|||
//! SSA IR, lowering, and optimization passes.
|
||||
//!
|
||||
//! The pipeline converts a CFG into a pruned SSA body consumed by the taint
|
||||
//! analysis engine. [`lower_to_ssa`] inserts phi nodes via Cytron's algorithm
|
||||
//! and renames variables along the dominator tree. [`optimize_ssa`] runs
|
||||
//! constant propagation, branch pruning, copy propagation, DCE, and type
|
||||
//! fact analysis in sequence.
|
||||
//!
|
||||
//! Key submodules:
|
||||
//! - [`ir`]: core types (`SsaValue`, `SsaOp`, `SsaInst`, `SsaBlock`, `SsaBody`)
|
||||
//! - [`lower`]: CFG-to-SSA lowering with Cytron phi insertion and dominator-tree rename
|
||||
//! - [`const_prop`]: sparse conditional constant propagation with branch pruning
|
||||
//! - [`copy_prop`]: copy and alias propagation
|
||||
//! - [`dce`]: dead definition elimination
|
||||
//! - [`type_facts`]: per-value type inference (`TypeKind`, `TypeFactResult`)
|
||||
//! - [`heap`]: abstract heap for container element abstractions
|
||||
//! - [`alias`]: base-variable alias groups from copy propagation
|
||||
|
||||
#[allow(dead_code)] // IR types, fields used by Display impl, tests, and downstream analyses
|
||||
pub mod alias;
|
||||
pub mod const_prop;
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@
|
|||
//!
|
||||
//! The analysis is **flow-insensitive** and **bounded**: it does not
|
||||
//! reason about path feasibility, and it stops adding edges once the
|
||||
//! summary's [`MAX_ALIAS_EDGES`] cap is reached, the overflow flag is
|
||||
//! summary's `MAX_ALIAS_EDGES` cap is reached, the overflow flag is
|
||||
//! the conservative fallback that callers honour.
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
|
@ -239,7 +239,7 @@ fn returns_fresh_allocation(
|
|||
/// `formal_param_count` bounds the parameter indices written to the
|
||||
/// summary: scoped lowering synthesises `Param` ops for module-level
|
||||
/// captures at indices beyond the formal arity, and those must not leak
|
||||
/// into the summary (they would trip [`crate::summary::ssa_summary_fits_arity`]).
|
||||
/// into the summary (they would trip `ssa_summary_fits_arity`).
|
||||
pub fn analyse_param_points_to(
|
||||
ssa: &SsaBody,
|
||||
param_info: &[(usize, String, SsaValue)],
|
||||
|
|
|
|||
|
|
@ -1,4 +1,55 @@
|
|||
#![doc = include_str!(concat!(env!("OUT_DIR"), "/state.md"))]
|
||||
//! State-model analysis: resource lifecycle and authentication state tracking.
|
||||
//!
|
||||
//! Runs a per-function state machine over the CFG to detect use-after-close,
|
||||
//! double-close, resource leaks, and unauthenticated access to privileged
|
||||
//! operations.
|
||||
//!
|
||||
//! Enabled by default. Disable via `scanner.enable_state_analysis = false`.
|
||||
//! Runs in `--mode full` and `--mode taint`; skipped in AST-only mode.
|
||||
//!
|
||||
//! # Rule IDs
|
||||
//!
|
||||
//! | Rule ID | Severity | What it detects |
|
||||
//! |---------|----------|-----------------|
|
||||
//! | `state-use-after-close` | High | Operation on a resource after it was closed |
|
||||
//! | `state-double-close` | Medium | Resource closed twice |
|
||||
//! | `state-resource-leak` | Medium | Resource opened and never closed on any path |
|
||||
//! | `state-resource-leak-possible` | Low | Resource closed on some paths but not others |
|
||||
//! | `state-unauthed-access` | High | Web handler reaches privileged sink without an auth call |
|
||||
//!
|
||||
//! # Managed-resource suppression
|
||||
//!
|
||||
//! Language-specific cleanup patterns suppress leak findings automatically:
|
||||
//!
|
||||
//! | Pattern | Languages |
|
||||
//! |---------|-----------|
|
||||
//! | RAII / Drop | Rust (all leak findings suppressed except `alloc`/`dealloc`) |
|
||||
//! | Smart pointers (`make_unique`, `make_shared`) | C++ |
|
||||
//! | `defer f.Close()` | Go |
|
||||
//! | `with open(f) as f:` | Python |
|
||||
//! | try-with-resources | Java |
|
||||
//!
|
||||
//! # Tracked acquire/release pairs
|
||||
//!
|
||||
//! C/C++: `fopen`/`fclose`, `open`/`close`, `socket`/`close`,
|
||||
//! `malloc`/`free`, `pthread_mutex_lock`/`pthread_mutex_unlock`,
|
||||
//! `new`/`delete`.
|
||||
//!
|
||||
//! Rust: `File::open`/`close`, `TcpStream::connect`/`shutdown`,
|
||||
//! mutex `lock`/`read`/`write`/`drop`.
|
||||
//!
|
||||
//! Java: stream/connection/socket constructors / `close`, `getConnection`/`close`.
|
||||
//!
|
||||
//! Go, Python, JavaScript, Ruby, PHP follow language-idiomatic equivalents.
|
||||
//!
|
||||
//! # Submodules
|
||||
//!
|
||||
//! - [`domain`]: state lattice (`ResourceState`, `AuthState`, `StateCell`)
|
||||
//! - [`engine`]: generic forward transfer engine (`Transfer` trait, `run_forward`)
|
||||
//! - [`facts`]: per-node state fact extraction
|
||||
//! - [`lattice`]: lattice join/meet for state values
|
||||
//! - [`symbol`]: resource symbol normalisation
|
||||
//! - [`transfer`]: `DefaultTransfer` — the concrete resource-lifecycle transfer function
|
||||
|
||||
pub mod domain;
|
||||
pub mod engine;
|
||||
|
|
|
|||
|
|
@ -1,3 +1,20 @@
|
|||
//! Per-function summaries for cross-file taint analysis.
|
||||
//!
|
||||
//! [`FuncSummary`] describes a function's boundary behaviour: which parameters
|
||||
//! flow to sinks, which sources it reads, whether it propagates taint from
|
||||
//! arguments to its return value, and what capabilities it strips. Summaries
|
||||
//! are serialized to SQLite in pass 1 and merged into [`GlobalSummaries`]
|
||||
//! before pass 2 begins.
|
||||
//!
|
||||
//! [`crate::summary::ssa_summary::SsaFuncSummary`] is a richer summary
|
||||
//! derived from the SSA taint engine and takes precedence over [`FuncSummary`]
|
||||
//! during call resolution. `GlobalSummaries::ssa_by_key` stores SSA summaries
|
||||
//! keyed by [`FuncKey`]; `GlobalSummaries::by_name` holds the fallback
|
||||
//! name-keyed map for cases where an exact key is not found.
|
||||
//!
|
||||
//! Same-name collisions across files are merged conservatively: capabilities
|
||||
//! are unioned and booleans are OR-ed so no true positive is silently dropped.
|
||||
|
||||
pub mod points_to;
|
||||
pub mod ssa_summary;
|
||||
|
||||
|
|
@ -669,7 +686,7 @@ impl GlobalSummaries {
|
|||
/// drop one of the two summaries entirely.
|
||||
///
|
||||
/// We therefore inspect the existing entry first. If the new summary
|
||||
/// is not [`summaries_compatible`] with it, we mint a synthetic
|
||||
/// is not `summaries_compatible` with it, we mint a synthetic
|
||||
/// disambig (top bit set to stay disjoint from byte-offset disambigs)
|
||||
/// and retry the insert under the fresh key so *both* functions are
|
||||
/// preserved.
|
||||
|
|
@ -1065,7 +1082,7 @@ impl GlobalSummaries {
|
|||
|
||||
/// Snapshot the SSA summaries for convergence detection.
|
||||
///
|
||||
/// Used alongside [`snapshot_caps`] in the SCC fixed-point loop so that
|
||||
/// Used alongside [`Self::snapshot_caps`] in the SCC fixed-point loop so that
|
||||
/// SSA-only refinements (e.g. a `StripBits` transform appearing after a
|
||||
/// cross-file sanitizer is resolved) are not invisible to convergence.
|
||||
pub fn snapshot_ssa(&self) -> &HashMap<FuncKey, SsaFuncSummary> {
|
||||
|
|
@ -1090,7 +1107,7 @@ impl GlobalSummaries {
|
|||
/// 2. Otherwise, for each wildcard prefix in scope, try
|
||||
/// `(wildcard_prefix, name)` in the module index. If across all
|
||||
/// wildcards exactly one arity-filtered candidate appears → resolved.
|
||||
/// 3. Otherwise fall through to [`resolve_callee_key_with_container`]
|
||||
/// 3. Otherwise fall through to [`Self::resolve_callee_key_with_container`]
|
||||
/// with no `container_hint`, meaning only the existing namespace /
|
||||
/// arity disambiguation applies.
|
||||
///
|
||||
|
|
@ -1168,9 +1185,9 @@ impl GlobalSummaries {
|
|||
|
||||
/// Resolve a bare (already-normalized) callee name to a [`FuncKey`].
|
||||
///
|
||||
/// Thin wrapper around [`resolve_callee`] that constructs a minimal
|
||||
/// Thin wrapper around [`Self::resolve_callee`] that constructs a minimal
|
||||
/// [`CalleeQuery`] with no qualified hints. Kept for call sites that
|
||||
/// only hold a string callee and an arity; prefer [`resolve_callee`]
|
||||
/// only hold a string callee and an arity; prefer [`Self::resolve_callee`]
|
||||
/// whenever receiver / qualifier / container information is available.
|
||||
pub fn resolve_callee_key(
|
||||
&self,
|
||||
|
|
@ -1197,7 +1214,7 @@ impl GlobalSummaries {
|
|||
/// unchanged. `container_hint` is interpreted as a syntactic
|
||||
/// container qualifier (not an authoritative receiver type), so a
|
||||
/// miss is allowed to fall through to leaf-name lookup. New
|
||||
/// callers should route through [`resolve_callee`] and classify
|
||||
/// callers should route through [`Self::resolve_callee`] and classify
|
||||
/// their hint as `receiver_type` vs `namespace_qualifier` vs
|
||||
/// `receiver_var` so the resolver can apply the correct policy.
|
||||
pub fn resolve_callee_key_with_container(
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@
|
|||
//! Mutation is observable to the caller through its argument for `j`.
|
||||
//! * `Source(Param(i)) → Target(Return)`, the return value aliases
|
||||
//! parameter `i`'s heap identity. Adds heap-level precision on top of
|
||||
//! the coarser [`TaintTransform::Identity`] view already carried in
|
||||
//! the coarser [`crate::summary::ssa_summary::TaintTransform::Identity`] view already carried in
|
||||
//! [`crate::summary::ssa_summary::SsaFuncSummary::param_to_return`].
|
||||
//!
|
||||
//! `MustAlias` is intentionally omitted, the ROI on
|
||||
|
|
@ -105,7 +105,7 @@ pub const MAX_ALIAS_EDGES: usize = 8;
|
|||
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct PointsToSummary {
|
||||
/// Bounded edge list, deduped by `(source, target, kind)`. The
|
||||
/// [`serde(default)`] attribute lets summaries pre-dating points-to
|
||||
/// `#[serde(default)]` attribute lets summaries pre-dating points-to
|
||||
/// tracking deserialise cleanly (no edges).
|
||||
#[serde(default, skip_serializing_if = "SmallVec::is_empty")]
|
||||
pub edges: SmallVec<[AliasEdge; 4]>,
|
||||
|
|
@ -193,7 +193,7 @@ impl PointsToSummary {
|
|||
}
|
||||
|
||||
/// Parameter indices referenced by any edge in this summary. Used by
|
||||
/// [`crate::summary::ssa_summary_fits_arity`] to confirm the summary
|
||||
/// `ssa_summary_fits_arity` to confirm the summary
|
||||
/// does not reference a parameter beyond the key's declared arity
|
||||
/// (which would indicate a synthetic-param mis-attribution in
|
||||
/// extraction).
|
||||
|
|
|
|||
|
|
@ -165,7 +165,7 @@ pub struct SsaFuncSummary {
|
|||
/// [`crate::cfg::CallMeta::gate_filters`] carries more than one entry
|
||||
/// (e.g. `fetch` is both an `SSRF` gate on the URL arg and a
|
||||
/// `DATA_EXFIL` gate on the body arg), the multi-gate dispatch in
|
||||
/// [`super::super::collect_block_events`] cap-narrows the event's
|
||||
/// `collect_block_events` cap-narrows the event's
|
||||
/// `sink_caps` to the specific gate's `label_caps`. Each
|
||||
/// `(param_idx, label_caps)` entry records that this function's
|
||||
/// parameter `param_idx` flowed into a gated sink whose narrowed
|
||||
|
|
@ -195,7 +195,7 @@ pub struct SsaFuncSummary {
|
|||
/// (e.g., function returns the same container it received as input).
|
||||
///
|
||||
/// Populated by
|
||||
/// [`crate::taint::ssa_transfer::summary_extract::extract_container_flow_summary`]
|
||||
/// `extract_container_flow_summary`
|
||||
/// and applied at cross-file call sites to propagate the caller's
|
||||
/// points-to set for that argument onto the call's return SSA value.
|
||||
#[serde(default)]
|
||||
|
|
@ -205,7 +205,7 @@ pub struct SsaFuncSummary {
|
|||
/// (e.g., `fn storeInto(value, arr) { arr.push(value); }` → `[(0, 1)]`).
|
||||
///
|
||||
/// Populated by
|
||||
/// [`crate::taint::ssa_transfer::summary_extract::extract_container_flow_summary`]
|
||||
/// `extract_container_flow_summary`
|
||||
/// and applied at cross-file call sites by writing the caller's taint on
|
||||
/// the `src_param` argument into the heap objects pointed to by the
|
||||
/// `container_param` argument.
|
||||
|
|
@ -254,7 +254,7 @@ pub struct SsaFuncSummary {
|
|||
/// Per-parameter return-path decomposition.
|
||||
///
|
||||
/// When non-empty, supplies finer-grained per-path data than
|
||||
/// [`Self::param_to_return`]. Each parameter maps to up to
|
||||
/// `param_to_return`. Each parameter maps to up to
|
||||
/// [`MAX_RETURN_PATHS`] [`ReturnPathTransform`] entries, one per
|
||||
/// distinct path-predicate gate. Callers consult their own predicate
|
||||
/// state at the call site and apply only entries whose predicate is
|
||||
|
|
@ -262,7 +262,7 @@ pub struct SsaFuncSummary {
|
|||
/// set into the effective call-site transform.
|
||||
///
|
||||
/// Empty when the callee has a single return path, the aggregate
|
||||
/// [`param_to_return`] is already precise, or when extraction
|
||||
/// `param_to_return` is already precise, or when extraction
|
||||
/// could not derive per-return state (e.g. early-exit probes).
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub param_return_paths: Vec<(usize, SmallVec<[ReturnPathTransform; 2]>)>,
|
||||
|
|
@ -338,7 +338,7 @@ pub struct SsaFuncSummary {
|
|||
/// control would not reach the post-call instruction.
|
||||
///
|
||||
/// Populated by
|
||||
/// [`crate::taint::ssa_transfer::summary_extract::extract_ssa_func_summary`]
|
||||
/// `extract_ssa_func_summary`
|
||||
/// when a per-parameter probe shows the parameter's `var_name` in
|
||||
/// `validated_must` at every return block of the helper. Empty
|
||||
/// (the default) for helpers that do not validate any parameter.
|
||||
|
|
|
|||
|
|
@ -1,3 +1,15 @@
|
|||
//! Core language and function identity types.
|
||||
//!
|
||||
//! [`Lang`] is the 10-language enum (Rust, C, C++, Java, Go, PHP, Python,
|
||||
//! Ruby, TypeScript, JavaScript). [`FuncKey`] is the canonical cross-file
|
||||
//! function identity: name, arity, language, container (class/struct/module),
|
||||
//! and an optional disambiguator for overloaded functions.
|
||||
//!
|
||||
//! [`FuncKey`] is the node type in the call graph and the lookup key in
|
||||
//! [`crate::summary::GlobalSummaries`]. [`FuncKind`] distinguishes constructors,
|
||||
//! methods, closures, and free functions so callers can apply language-specific
|
||||
//! resolution heuristics.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt;
|
||||
|
||||
|
|
|
|||
|
|
@ -102,7 +102,7 @@ pub struct FieldAccessRecord {
|
|||
/// Bounded symbolic heap tracking field-level symbolic values and taint.
|
||||
///
|
||||
/// Cloned at fork points during multi-path exploration. Bounded
|
||||
/// by [`MAX_HEAP_ENTRIES`] total entries and [`MAX_FIELDS_PER_OBJECT`] per
|
||||
/// by `MAX_HEAP_ENTRIES` total entries and `MAX_FIELDS_PER_OBJECT` per
|
||||
/// object to prevent blowup on object-heavy code.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct SymbolicHeap {
|
||||
|
|
@ -126,8 +126,8 @@ impl SymbolicHeap {
|
|||
|
||||
/// Store a symbolic value into a heap field.
|
||||
///
|
||||
/// Bounded: silently drops the store if [`MAX_HEAP_ENTRIES`] or
|
||||
/// [`MAX_FIELDS_PER_OBJECT`] would be exceeded. `Index(*)` entries are
|
||||
/// Bounded: silently drops the store if `MAX_HEAP_ENTRIES` or
|
||||
/// `MAX_FIELDS_PER_OBJECT` would be exceeded. `Index(*)` entries are
|
||||
/// bounded by [`MAX_TRACKED_INDICES`] per object; overflow collapses all
|
||||
/// indexed entries into `Elements`.
|
||||
pub fn store(&mut self, key: HeapKey, value: SymbolicValue, tainted: bool) {
|
||||
|
|
|
|||
|
|
@ -149,7 +149,7 @@ pub struct BackwardsCtx<'a> {
|
|||
/// Language tag for source-kind heuristics (e.g. `os.getenv` hints).
|
||||
pub lang: Lang,
|
||||
/// Whole-program summaries: used to discover cross-file bodies and
|
||||
/// [`SsaFuncSummary`] metadata at call instructions.
|
||||
/// [`crate::summary::ssa_summary::SsaFuncSummary`] metadata at call instructions.
|
||||
pub global_summaries: Option<&'a GlobalSummaries>,
|
||||
/// Pre-lowered intra-file callee bodies keyed by [`FuncKey`]. Shared
|
||||
/// with the forward path so we do not lower functions twice.
|
||||
|
|
|
|||
|
|
@ -1,5 +1,78 @@
|
|||
//! Forward SSA taint analysis: the primary vulnerability detection engine.
|
||||
//!
|
||||
//! Tracks untrusted data from **sources** (where it enters the program) through
|
||||
//! assignments and calls to **sinks** (where it is used dangerously). A finding
|
||||
//! fires when the flow reaches a sink without passing a matching **sanitizer**.
|
||||
//!
|
||||
//! The engine is a monotone forward dataflow over a finite lattice with
|
||||
//! guaranteed termination. It is flow-sensitive within a function and
|
||||
//! interprocedural across files via persisted [`crate::summary::FuncSummary`]
|
||||
//! and [`crate::summary::ssa_summary::SsaFuncSummary`] values.
|
||||
//!
|
||||
//! # Rule ID
|
||||
//!
|
||||
//! ```text
|
||||
//! taint-unsanitised-flow (source <line>:<col>)
|
||||
//! taint-data-exfiltration (source <line>:<col>)
|
||||
//! ```
|
||||
//!
|
||||
//! The source location is part of the ID so sibling paths to the same sink
|
||||
//! get distinct IDs. Suppressions can target either the base ID or the full
|
||||
//! string.
|
||||
//!
|
||||
//! # Capabilities
|
||||
//!
|
||||
//! Sources, sanitizers, and sinks are linked by [`crate::labels::Cap`] bits.
|
||||
//! A sanitizer only clears the cap it declares; a sink only fires when the
|
||||
//! remaining taint still carries its required cap.
|
||||
//!
|
||||
//! | Cap | Typical source | Typical sanitizer | Typical sink |
|
||||
//! |-----|----------------|-------------------|--------------|
|
||||
//! | `env_var` | `env::var`, `getenv`, `process.env` | | |
|
||||
//! | `html_escape` | | `html.escape`, `DOMPurify.sanitize` | `innerHTML`, `document.write` |
|
||||
//! | `shell_escape` | | `shlex.quote`, `shell_escape::escape` | `system`, `Command::new` |
|
||||
//! | `url_encode` | | `encodeURIComponent` | HTTP client URL arg |
|
||||
//! | `file_io` | | `realpath`, `filepath.Clean` | `open`, `fs::read_to_string` |
|
||||
//! | `sql_query` | | parameterized query binders | `cursor.execute`, `db.query` |
|
||||
//! | `deserialize` | | | `pickle.loads`, `Marshal.load` |
|
||||
//! | `ssrf` | | URL-prefix locks | `fetch` URL arg, outbound HTTP |
|
||||
//! | `code_exec` | | | `eval`, `exec`, `system` |
|
||||
//! | `crypto` | | | weak-algorithm constructors |
|
||||
//! | `data_exfil` | cookies, headers, env, db rows (Sensitive tier) | | `fetch` body/json/headers |
|
||||
//!
|
||||
//! Sources typically carry `Cap::all()` so they match any sink.
|
||||
//!
|
||||
//! # Source sensitivity
|
||||
//!
|
||||
//! Each source carries a [`crate::labels::SourceKind`] and a derived tier:
|
||||
//!
|
||||
//! - `Plain` — direct attacker input (`UserInput`): request bodies, query
|
||||
//! strings, argv, stdin.
|
||||
//! - `Sensitive` — operator-bound state: cookies, headers, env, files, DB rows,
|
||||
//! caught exceptions.
|
||||
//!
|
||||
//! `Cap::DATA_EXFIL` only fires on `Sensitive`-tier sources. Plain user input
|
||||
//! flowing into an outbound request body is suppressed — the canonical false
|
||||
//! positive for API gateways that proxy `req.body`.
|
||||
//!
|
||||
//! # Confidence signals
|
||||
//!
|
||||
//! Higher confidence: source and sink both present in evidence, `source_kind:
|
||||
//! user_input`, `path_validated: false`, symbolic witness produced.
|
||||
//!
|
||||
//! Lower confidence: path-validated taint, source is a database read or
|
||||
//! internal file, engine note `ForwardBailed` / `PathWidened`.
|
||||
//!
|
||||
//! # Submodules
|
||||
//!
|
||||
//! - [`domain`]: taint lattice types (`VarTaint`, `TaintOrigin`, `SmallBitSet`,
|
||||
//! `PredicateSummary`)
|
||||
//! - [`ssa_transfer`]: SSA taint transfer functions and the forward worklist
|
||||
//! (`SsaTaintState`, `SsaTaintTransfer`, `run_ssa_taint`)
|
||||
//! - [`path_state`]: predicate classification for branch-sensitive propagation
|
||||
//! - [`backwards`]: demand-driven backwards walk from sinks (off by default)
|
||||
|
||||
#![allow(clippy::collapsible_if, clippy::too_many_arguments)]
|
||||
#![doc = include_str!(concat!(env!("OUT_DIR"), "/taint.md"))]
|
||||
|
||||
pub mod backwards;
|
||||
pub mod domain;
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ pub enum PredicateKind {
|
|||
ShellMetaValidated,
|
||||
/// Bounded-length rejection: `x.len() > N` / `x.length < N` with N >= 2.
|
||||
///
|
||||
/// Commonly paired with [`ShellMetaValidated`] in OR-chain rejection
|
||||
/// Commonly paired with `ShellMetaValidated` in OR-chain rejection
|
||||
/// idioms (`if x.len() > MAX || x.contains(";") { reject }`). Counts as
|
||||
/// a dominator guard for `cfg-unguarded-sink` purposes, but intentionally
|
||||
/// does **not** mark variables as validated, the rejection direction is
|
||||
|
|
|
|||
|
|
@ -71,14 +71,14 @@ pub struct SsaTaintTransfer<'a> {
|
|||
/// The [`BodyId`] of the body currently being analysed. Used as the
|
||||
/// owning scope when writing seed entries that leave this body
|
||||
/// (e.g. [`extract_ssa_exit_state`]) and as the identity recorded on
|
||||
/// engine notes. Defaults to [`BodyId(0)`] (top-level) for inline
|
||||
/// engine notes. Defaults to `BodyId(0)` (top-level) for inline
|
||||
/// probes and unit tests that analyse a single synthetic body.
|
||||
pub owner_body_id: BodyId,
|
||||
/// The [`BodyId`] of this body's lexical parent, if any. Drives the
|
||||
/// `Param`-op reader's lookup into [`Self::global_seed`]: we read
|
||||
/// from the parent's scope first (the seed entries produced by
|
||||
/// [`extract_ssa_exit_state`] on the parent body), then fall back to
|
||||
/// [`BodyId(0)`] to pick up JS/TS two-level re-keyed entries (see
|
||||
/// `BodyId(0)` to pick up JS/TS two-level re-keyed entries (see
|
||||
/// [`filter_seed_to_toplevel`]). `None` for the top-level body and
|
||||
/// for probes with no surrounding scope.
|
||||
pub parent_body_id: Option<BodyId>,
|
||||
|
|
@ -176,7 +176,7 @@ pub struct SsaTaintTransfer<'a> {
|
|||
/// to detect handler-style flows that have no registered caller.
|
||||
pub auto_seed_handler_params: bool,
|
||||
/// Cross-file callee bodies sourced from
|
||||
/// [`GlobalSummaries::bodies_iter`]. Populated in pass 2 to enable
|
||||
/// [`GlobalSummaries`]. Populated in pass 2 to enable
|
||||
/// context-sensitive inline re-analysis across file boundaries the
|
||||
/// same way `callee_bodies` enables it intra-file. `None` preserves
|
||||
/// non-cross-file behaviour for unit tests and non-cross-file
|
||||
|
|
|
|||
|
|
@ -366,7 +366,7 @@ pub struct SsaTaintState {
|
|||
/// = false`).
|
||||
pub abstract_state: Option<AbstractState>,
|
||||
/// per-heap-field taint cells, keyed by
|
||||
/// `(parent_loc, field)`. Sorted by [`FieldTaintKey`] for O(n)
|
||||
/// `(parent_loc, field)`. Sorted by `FieldTaintKey` for O(n)
|
||||
/// merge-join. Populated only when the body's
|
||||
/// [`crate::pointer::PointsToFacts`] is available
|
||||
/// (`NYX_POINTER_ANALYSIS=1`); empty otherwise so the lattice join
|
||||
|
|
@ -375,7 +375,7 @@ pub struct SsaTaintState {
|
|||
/// them. Cross-call propagation lands during lowering via the
|
||||
/// field-granularity `PointsToSummary`.
|
||||
///
|
||||
/// Cell shape: [`FieldCell`] carries `taint` plus
|
||||
/// Cell shape: `FieldCell` carries `taint` plus
|
||||
/// `validated_must` / `validated_may` flags so validation flows
|
||||
/// through abstract field / element identity.
|
||||
pub field_taint: SmallVec<[(FieldTaintKey, FieldCell); 4]>,
|
||||
|
|
@ -405,7 +405,7 @@ impl SsaTaintState {
|
|||
|
||||
/// read the field cell at `key`. Returns `None`
|
||||
/// when no cell has been recorded (caller should treat as
|
||||
/// untainted). O(log n) on the sorted [`field_taint`] list.
|
||||
/// untainted). O(log n) on the sorted `field_taint` list.
|
||||
pub fn get_field(&self, key: FieldTaintKey) -> Option<&FieldCell> {
|
||||
self.field_taint
|
||||
.binary_search_by_key(&key, |(k, _)| *k)
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ const MAX_PROBE_PARAMS: usize = 8;
|
|||
|
||||
/// Extract a precise per-parameter `SsaFuncSummary` from an already-lowered SSA body.
|
||||
///
|
||||
/// For each parameter (up to [`MAX_PROBE_PARAMS`]), runs a taint probe by seeding
|
||||
/// For each parameter (up to `MAX_PROBE_PARAMS`), runs a taint probe by seeding
|
||||
/// that parameter with `Cap::all()` via `global_seed` and observing what caps
|
||||
/// survive to return positions and which sinks fire. A final probe with no params
|
||||
/// tainted detects intrinsic source caps.
|
||||
|
|
|
|||
|
|
@ -713,6 +713,22 @@ fn builtin_profile(name: &str) -> Option<ScanProfile> {
|
|||
})
|
||||
}
|
||||
|
||||
/// Top-level scanner configuration.
|
||||
///
|
||||
/// Loaded from `nyx.conf` (TOML) via [`Config::load`], or constructed in
|
||||
/// code for embedded use. [`Config::default`] gives conservative defaults:
|
||||
/// no symlink following, no hidden files, gitignore respected, 10 s parse
|
||||
/// timeout, all analysis passes on.
|
||||
///
|
||||
/// Config sections mirror `nyx.conf` sections:
|
||||
/// - [`scanner`](Config::scanner): what files to scan, which analysis passes
|
||||
/// to enable, severity floor
|
||||
/// - [`output`](Config::output): format, ranking, LOW-finding budgets
|
||||
/// - [`analysis`](Config::analysis): per-language rules, engine-pass toggles
|
||||
/// - [`performance`](Config::performance): thread count, depth limit, batch
|
||||
/// size
|
||||
/// - [`database`](Config::database): incremental index settings
|
||||
/// - [`detectors`](Config::detectors): per-detector sensitivity knobs
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(default)]
|
||||
#[derive(Default)]
|
||||
|
|
|
|||
|
|
@ -1,3 +1,16 @@
|
|||
//! Shared utilities and configuration.
|
||||
//!
|
||||
//! Re-exports [`Config`], [`AnalysisOptions`], and [`DetectorOptions`] from
|
||||
//! their submodules. [`Config`] is loaded from `nyx.conf` and passed through
|
||||
//! the top-level call stack. [`AnalysisOptions`] is installed once per process
|
||||
//! via an `OnceLock` and read back via [`analysis_options::current`] from deep
|
||||
//! inside the analysis pipeline without threading it through every call frame.
|
||||
//!
|
||||
//! Other submodules: `path` (root-relative path utilities and traversal guards),
|
||||
//! `project` (framework detection, project metadata), `query_cache` (cached
|
||||
//! tree-sitter query compilation), `snippet` (source snippet extraction for
|
||||
//! finding locations).
|
||||
|
||||
pub mod analysis_options;
|
||||
pub mod config;
|
||||
pub mod detector_options;
|
||||
|
|
|
|||
10
src/walk.rs
10
src/walk.rs
|
|
@ -1,3 +1,13 @@
|
|||
//! Filesystem walker with batched path delivery.
|
||||
//!
|
||||
//! Builds an [`ignore`]-crate [`WalkBuilder`] from the config (respecting
|
||||
//! `.gitignore`, excluded directories, and excluded extensions), then delivers
|
||||
//! discovered paths to the analysis pipeline in batches over a crossbeam channel.
|
||||
//! Batching amortizes channel overhead for large trees.
|
||||
//!
|
||||
//! All paths are checked via [`crate::utils::path::path_stays_within_root`]
|
||||
//! before entering a batch, preventing traversal outside the scan root.
|
||||
|
||||
use crate::utils::Config;
|
||||
use crate::utils::path::path_stays_within_root;
|
||||
use crossbeam_channel::{Receiver, Sender, bounded};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue