docs: Enhance module documentation across various files for clarity a… (#62)

* docs: Enhance module documentation across various files for clarity and completeness

* fix: Remove unnecessary blank line in build.rs for cleaner code

* docs: Update documentation to improve clarity and consistency in code comments
This commit is contained in:
Eli Peter 2026-05-02 17:46:45 -04:00 committed by GitHub
parent 40995e45e7
commit 1f2bfe76c1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
44 changed files with 721 additions and 366 deletions

View file

@ -1,3 +1,20 @@
//! Per-function summaries for cross-file taint analysis.
//!
//! [`FuncSummary`] describes a function's boundary behaviour: which parameters
//! flow to sinks, which sources it reads, whether it propagates taint from
//! arguments to its return value, and what capabilities it strips. Summaries
//! are serialized to SQLite in pass 1 and merged into [`GlobalSummaries`]
//! before pass 2 begins.
//!
//! [`crate::summary::ssa_summary::SsaFuncSummary`] is a richer summary
//! derived from the SSA taint engine and takes precedence over [`FuncSummary`]
//! during call resolution. `GlobalSummaries::ssa_by_key` stores SSA summaries
//! keyed by [`FuncKey`]; `GlobalSummaries::by_name` holds the fallback
//! name-keyed map for cases where an exact key is not found.
//!
//! Same-name collisions across files are merged conservatively: capabilities
//! are unioned and booleans are OR-ed so no true positive is silently dropped.
pub mod points_to;
pub mod ssa_summary;
@ -669,7 +686,7 @@ impl GlobalSummaries {
/// drop one of the two summaries entirely.
///
/// We therefore inspect the existing entry first. If the new summary
/// is not [`summaries_compatible`] with it, we mint a synthetic
/// is not `summaries_compatible` with it, we mint a synthetic
/// disambig (top bit set to stay disjoint from byte-offset disambigs)
/// and retry the insert under the fresh key so *both* functions are
/// preserved.
@ -1065,7 +1082,7 @@ impl GlobalSummaries {
/// Snapshot the SSA summaries for convergence detection.
///
/// Used alongside [`snapshot_caps`] in the SCC fixed-point loop so that
/// Used alongside [`Self::snapshot_caps`] in the SCC fixed-point loop so that
/// SSA-only refinements (e.g. a `StripBits` transform appearing after a
/// cross-file sanitizer is resolved) are not invisible to convergence.
pub fn snapshot_ssa(&self) -> &HashMap<FuncKey, SsaFuncSummary> {
@ -1090,7 +1107,7 @@ impl GlobalSummaries {
/// 2. Otherwise, for each wildcard prefix in scope, try
/// `(wildcard_prefix, name)` in the module index. If across all
/// wildcards exactly one arity-filtered candidate appears → resolved.
/// 3. Otherwise fall through to [`resolve_callee_key_with_container`]
/// 3. Otherwise fall through to [`Self::resolve_callee_key_with_container`]
/// with no `container_hint`, meaning only the existing namespace /
/// arity disambiguation applies.
///
@ -1168,9 +1185,9 @@ impl GlobalSummaries {
/// Resolve a bare (already-normalized) callee name to a [`FuncKey`].
///
/// Thin wrapper around [`resolve_callee`] that constructs a minimal
/// Thin wrapper around [`Self::resolve_callee`] that constructs a minimal
/// [`CalleeQuery`] with no qualified hints. Kept for call sites that
/// only hold a string callee and an arity; prefer [`resolve_callee`]
/// only hold a string callee and an arity; prefer [`Self::resolve_callee`]
/// whenever receiver / qualifier / container information is available.
pub fn resolve_callee_key(
&self,
@ -1197,7 +1214,7 @@ impl GlobalSummaries {
/// unchanged. `container_hint` is interpreted as a syntactic
/// container qualifier (not an authoritative receiver type), so a
/// miss is allowed to fall through to leaf-name lookup. New
/// callers should route through [`resolve_callee`] and classify
/// callers should route through [`Self::resolve_callee`] and classify
/// their hint as `receiver_type` vs `namespace_qualifier` vs
/// `receiver_var` so the resolver can apply the correct policy.
pub fn resolve_callee_key_with_container(

View file

@ -22,7 +22,7 @@
//! Mutation is observable to the caller through its argument for `j`.
//! * `Source(Param(i)) → Target(Return)`, the return value aliases
//! parameter `i`'s heap identity. Adds heap-level precision on top of
//! the coarser [`TaintTransform::Identity`] view already carried in
//! the coarser [`crate::summary::ssa_summary::TaintTransform::Identity`] view already carried in
//! [`crate::summary::ssa_summary::SsaFuncSummary::param_to_return`].
//!
//! `MustAlias` is intentionally omitted, the ROI on
@ -105,7 +105,7 @@ pub const MAX_ALIAS_EDGES: usize = 8;
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct PointsToSummary {
/// Bounded edge list, deduped by `(source, target, kind)`. The
/// [`serde(default)`] attribute lets summaries pre-dating points-to
/// `#[serde(default)]` attribute lets summaries pre-dating points-to
/// tracking deserialise cleanly (no edges).
#[serde(default, skip_serializing_if = "SmallVec::is_empty")]
pub edges: SmallVec<[AliasEdge; 4]>,
@ -193,7 +193,7 @@ impl PointsToSummary {
}
/// Parameter indices referenced by any edge in this summary. Used by
/// [`crate::summary::ssa_summary_fits_arity`] to confirm the summary
/// `ssa_summary_fits_arity` to confirm the summary
/// does not reference a parameter beyond the key's declared arity
/// (which would indicate a synthetic-param mis-attribution in
/// extraction).

View file

@ -165,7 +165,7 @@ pub struct SsaFuncSummary {
/// [`crate::cfg::CallMeta::gate_filters`] carries more than one entry
/// (e.g. `fetch` is both an `SSRF` gate on the URL arg and a
/// `DATA_EXFIL` gate on the body arg), the multi-gate dispatch in
/// [`super::super::collect_block_events`] cap-narrows the event's
/// `collect_block_events` cap-narrows the event's
/// `sink_caps` to the specific gate's `label_caps`. Each
/// `(param_idx, label_caps)` entry records that this function's
/// parameter `param_idx` flowed into a gated sink whose narrowed
@ -195,7 +195,7 @@ pub struct SsaFuncSummary {
/// (e.g., function returns the same container it received as input).
///
/// Populated by
/// [`crate::taint::ssa_transfer::summary_extract::extract_container_flow_summary`]
/// `extract_container_flow_summary`
/// and applied at cross-file call sites to propagate the caller's
/// points-to set for that argument onto the call's return SSA value.
#[serde(default)]
@ -205,7 +205,7 @@ pub struct SsaFuncSummary {
/// (e.g., `fn storeInto(value, arr) { arr.push(value); }` → `[(0, 1)]`).
///
/// Populated by
/// [`crate::taint::ssa_transfer::summary_extract::extract_container_flow_summary`]
/// `extract_container_flow_summary`
/// and applied at cross-file call sites by writing the caller's taint on
/// the `src_param` argument into the heap objects pointed to by the
/// `container_param` argument.
@ -254,7 +254,7 @@ pub struct SsaFuncSummary {
/// Per-parameter return-path decomposition.
///
/// When non-empty, supplies finer-grained per-path data than
/// [`Self::param_to_return`]. Each parameter maps to up to
/// `param_to_return`. Each parameter maps to up to
/// [`MAX_RETURN_PATHS`] [`ReturnPathTransform`] entries, one per
/// distinct path-predicate gate. Callers consult their own predicate
/// state at the call site and apply only entries whose predicate is
@ -262,7 +262,7 @@ pub struct SsaFuncSummary {
/// set into the effective call-site transform.
///
/// Empty when the callee has a single return path, the aggregate
/// [`param_to_return`] is already precise, or when extraction
/// `param_to_return` is already precise, or when extraction
/// could not derive per-return state (e.g. early-exit probes).
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub param_return_paths: Vec<(usize, SmallVec<[ReturnPathTransform; 2]>)>,
@ -338,7 +338,7 @@ pub struct SsaFuncSummary {
/// control would not reach the post-call instruction.
///
/// Populated by
/// [`crate::taint::ssa_transfer::summary_extract::extract_ssa_func_summary`]
/// `extract_ssa_func_summary`
/// when a per-parameter probe shows the parameter's `var_name` in
/// `validated_must` at every return block of the helper. Empty
/// (the default) for helpers that do not validate any parameter.