Python fp and docs updtes (#58)

* refactor: Update comments for clarity and add expectations.json files for performance metrics

* feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks

* feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks

* refactor: Simplify code formatting for better readability in multiple files

* refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration

* feat: Update Java and Python patterns to include new security rules

* refactor: Improve comment clarity and consistency across multiple Rust files

* refactor: Simplify code formatting for improved readability in integration tests and module files

* refactor: Improve comment formatting and enhance clarity in assertions across multiple files
This commit is contained in:
Eli Peter 2026-04-29 19:53:34 -04:00 committed by GitHub
parent 4db0805de6
commit a438886217
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
291 changed files with 9485 additions and 3851 deletions

View file

@ -13,19 +13,16 @@ use petgraph::graph::NodeIndex;
/// callee isn't a clean dotted member chain (parens, brackets, `::`,
/// arrow operators, whitespace, or other complex tokens disqualify it).
///
/// Phase 3 of the field-projections rollout: this is the textual mirror
/// of `try_lower_field_proj_chain` in `src/ssa/lower.rs`. The state
/// engine doesn't yet read SSA bodies (would require threading SSA
/// through the lattice run), so the same parse rules are duplicated
/// here. Both helpers share the contract: a success here implies a
/// FieldProj chain at SSA level (or a direct receiver for the 1-dot
/// case).
/// Textual mirror of `try_lower_field_proj_chain` in
/// `src/ssa/lower.rs`. The state engine doesn't read SSA bodies, so
/// the parse rules are duplicated. A success here implies a FieldProj
/// chain at SSA level (or a direct receiver for the 1-dot case).
///
/// **Returns** `Some(("c", "Close"))` for `"c.Close"` (1 dot the
/// **Returns** `Some(("c", "Close"))` for `"c.Close"` (1 dot, the
/// receiver is a bare ident); `Some(("c.mu", "Lock"))` for
/// `"c.mu.Lock"` (2 dots receiver is a 1-element chain);
/// `"c.mu.Lock"` (2 dots, receiver is a 1-element chain);
/// `Some(("c.writer.header", "set"))` for `"c.writer.header.set"`
/// (3 dots receiver is a 2-element chain). Returns `None` for any
/// (3 dots, receiver is a 2-element chain). Returns `None` for any
/// callee shape we can't safely decompose textually.
fn try_chain_decompose(callee: &str) -> Option<(&str, &str)> {
for ch in callee.chars() {
@ -42,7 +39,7 @@ fn try_chain_decompose(callee: &str) -> Option<(&str, &str)> {
return None;
}
// Reject if any segment in the receiver is empty (leading dot,
// double dots) same discipline as the SSA-side helper.
// double dots), same discipline as the SSA-side helper.
if receiver_text.split('.').any(str::is_empty) {
return None;
}
@ -50,7 +47,7 @@ fn try_chain_decompose(callee: &str) -> Option<(&str, &str)> {
}
/// Events emitted during transfer for illegal state transitions.
/// These are NOT lattice values they become findings in `facts.rs`.
/// These are NOT lattice values, they become findings in `facts.rs`.
#[derive(Debug, Clone)]
pub struct TransferEvent {
pub kind: TransferEventKind,
@ -159,7 +156,7 @@ pub struct ResourceMethodSummary {
pub method_name: String,
/// Whether this method acquires or releases a resource.
pub effect: ResourceEffect,
/// `parent_body_id` of the declaring method groups methods by class.
/// `parent_body_id` of the declaring method, groups methods by class.
pub class_group: crate::cfg::BodyId,
/// Span of the actual resource operation (e.g., fs.openSync at line 7).
pub original_span: (usize, usize),
@ -171,7 +168,7 @@ pub struct DefaultTransfer<'a> {
pub interner: &'a SymbolInterner,
/// Resource method summaries for cross-body proxy resolution.
pub resource_method_summaries: &'a [ResourceMethodSummary],
/// Optional per-body field-only points-to hints names that resolve
/// Optional per-body field-only points-to hints, names that resolve
/// to a value whose entire abstract heap identity is one or more
/// [`crate::pointer::AbsLoc::Field`] locations (e.g. `m := c.mu`).
///
@ -225,21 +222,12 @@ impl DefaultTransfer<'_> {
.get_scoped(info.ast.enclosing_func.as_deref(), name)
}
/// Pointer-Phase 2 hook. Returns `true` when the call has been
/// fully handled as a field-aliased receiver proxy and the rest of
/// `apply_call` should bail.
///
/// Activates only on single-dot calls (`<recv>.<method>`) whose
/// receiver name is recorded with [`crate::pointer::PtrProxyHint::FieldOnly`]
/// in the per-body hint map AND for which a matching
/// [`ResourceMethodSummary`] exists. The acquire/release effect
/// is recorded against `state.chain_proxies` keyed by the receiver
/// name — chain_proxies is a tracking-only lattice today, so leak
/// detection (which only inspects `state.resource`) is suppressed
/// for the alias. Strict-additive: when no hint map is supplied,
/// when the receiver isn't `FieldOnly`, or when no method summary
/// matches, the function returns `false` and the legacy branches
/// run unchanged.
/// Returns `true` when the call was fully handled as a
/// field-aliased receiver proxy and the rest of `apply_call`
/// should bail. Activates on single-dot calls whose receiver is
/// `FieldOnly` in the hint map and that match a
/// [`ResourceMethodSummary`]. The acquire/release effect is
/// recorded against `state.chain_proxies` keyed by receiver name.
fn try_apply_field_alias_proxy(
&self,
info: &NodeInfo,
@ -308,13 +296,13 @@ impl DefaultTransfer<'_> {
None => return,
};
// ── Pointer-Phase 2: field-aliased receiver fast-path ───────────
// ── field-aliased receiver fast-path ───────────
// When the receiver name resolves through points-to to a value
// whose abstract heap identity is purely `Field(_, _)` (e.g.
// `m := c.mu` followed by `m.Lock()`), the receiver is a
// sub-object alias rather than a standalone resource handle.
// Routing the entire call into `chain_proxies` here *before*
// the SymbolId-based direct-acquire/release/proxy branches
// Routing the entire call into `chain_proxies` here, *before*
// the SymbolId-based direct-acquire/release/proxy branches ,
// suppresses the FP class where the local `m` would otherwise
// be flagged as a leakable resource at function exit.
//
@ -385,16 +373,16 @@ impl DefaultTransfer<'_> {
// When no direct resource pair matched, check if the callee is a
// method wrapper for a known resource operation.
//
// Phase 3 (field-projections rollout, 2026-04-25): the previous
// the previous
// single-dot band-aid (`callee.matches('.').count() == 1 &&
// !callee.contains('(')`) silently dropped chained receivers
// because the original textual extractor took the chain root as
// receiver collapsing `c.writer.header().set` to `c` and
// receiver, collapsing `c.writer.header().set` to `c` and
// marking `c` as proxy-acquired (the gin/context.go FP class).
//
// The band-aid is now deleted. Chained-receiver method calls
// are routed to a *separate* state map (`chain_proxies`) keyed by
// the joined receiver chain text so `c.mu.Lock()` acquires
// the joined receiver chain text, so `c.mu.Lock()` acquires
// `c.mu` (a chain-receiver entity), not `c`. The chain receiver
// is independent of the chain root: leaks/double-closes are
// tracked per chain, never propagated up to the root.
@ -443,7 +431,7 @@ impl DefaultTransfer<'_> {
} else if !direct_acquire && !direct_release {
// Single-dot receiver (`<recv>.<method>`): existing
// SymbolId-based path. Gated on direct_acquire/release
// because it shares state with the direct paths above
// because it shares state with the direct paths above ,
// running both would double-transition. Honour the
// explicit `info.call.receiver` when it's the same bare
// ident, otherwise fall back to the parsed receiver text.
@ -544,7 +532,7 @@ impl DefaultTransfer<'_> {
}
fn apply_if(&self, info: &NodeInfo, edge: Option<EdgeKind>, state: &mut ProductState) {
// Determine the "positive edge" the edge where the underlying
// Determine the "positive edge", the edge where the underlying
// (de-negated) condition evaluates to true.
//
// For `if (is_authenticated(req))`: positive = True edge
@ -558,8 +546,8 @@ impl DefaultTransfer<'_> {
// Resource null-check: `if (f)` or `if (!f)` where f is a tracked
// resource currently in OPEN state. The "var is falsy" edge means
// the acquisition returned null/zero no resource was actually
// produced so subsequent close requirements do not apply on that
// the acquisition returned null/zero, no resource was actually
// produced, so subsequent close requirements do not apply on that
// path. Clearing OPEN suppresses the spurious may-leak finding for
// the canonical NULL-safe close idiom in C / C++ / similar:
//
@ -572,7 +560,7 @@ impl DefaultTransfer<'_> {
//
// Heuristic conditions:
// * condition is a single-variable truth check (no comparisons,
// no calls `condition_vars.len() == 1` and the trimmed text
// no calls, `condition_vars.len() == 1` and the trimmed text
// equals that variable name).
// * the var has OPEN in its lifecycle bitset.
// * the edge represents "var is falsy" (= !is_positive_edge).
@ -595,7 +583,7 @@ impl DefaultTransfer<'_> {
if let Some(ref cond) = info.condition_text {
let cond_lower = cond.to_ascii_lowercase();
// Strip leading negation operator for pattern matching
// Strip leading negation operator for pattern matching ,
// the edge selection above already encodes the semantics.
let cond_inner = if info.condition_negated {
cond_lower.trim_start_matches('!').trim_start()
@ -691,7 +679,7 @@ fn is_guard_like(callee: &str) -> bool {
}
/// True iff the condition is a single-variable truth check (no comparison,
/// no method call, no boolean composition) the bare `if (f)` or `if (!f)`
/// no method call, no boolean composition), the bare `if (f)` or `if (!f)`
/// shape used as a NULL-safe gate around resource access.
///
/// Conservative: requires `condition_vars` to have exactly one entry, and
@ -1093,7 +1081,7 @@ mod tests {
let mut state = ProductState::initial();
state.resource.set(sym_f, ResourceLifecycle::OPEN);
// `if (!f)` condition_negated=true, true-edge means f is null
// `if (!f)`, condition_negated=true, true-edge means f is null
let info = NodeInfo {
kind: StmtKind::If,
condition_text: Some("!f".into()),
@ -1232,7 +1220,7 @@ mod tests {
#[test]
fn auth_token_underscore_camel_boundary_cases() {
// Underscore-joined identifiers are single tokens must not match interior.
// Underscore-joined identifiers are single tokens, must not match interior.
assert!(!condition_contains_auth_token(
"req.user_is_authenticated_flag",
"is_authenticated"
@ -1259,12 +1247,12 @@ mod tests {
"xmiddleware.auth()",
"middleware.auth"
));
// Right boundary violation "middleware.authz" extends past "middleware.auth".
// Right boundary violation, "middleware.authz" extends past "middleware.auth".
assert!(!condition_contains_auth_token(
"middleware.authz()",
"middleware.auth"
));
// "middleware.auth.check" matcher ends at '.', which is non-ident → matches.
// "middleware.auth.check", matcher ends at '.', which is non-ident → matches.
assert!(condition_contains_auth_token(
"middleware.auth.check()",
"middleware.auth"
@ -1332,7 +1320,7 @@ mod tests {
#[test]
fn auth_token_boolean_composition() {
// Compound conditions each token should be individually matchable.
// Compound conditions, each token should be individually matchable.
assert!(condition_contains_auth_token(
"is_authenticated && is_admin",
"is_authenticated"
@ -1352,7 +1340,7 @@ mod tests {
}
// ─────────────────────────────────────────────────────────────────
// Phase 3: chain-receiver decomposition + chain_proxies tracking
// chain-receiver decomposition + chain_proxies tracking
// ─────────────────────────────────────────────────────────────────
//
// These tests pin the contract that:
@ -1360,12 +1348,12 @@ mod tests {
// method, bailing on complex tokens.
// 2. The proxy-method routing in `apply_call` records chained
// receivers in `state.chain_proxies` (keyed by joined chain
// text) independent from the chain root's `SymbolId`-based
// text), independent from the chain root's `SymbolId`-based
// `state.receiver_class_group` entries.
// 3. Single-dot callees still flow through the existing SymbolId
// path (regression guard).
// 4. The deleted single-dot band-aid no longer suppresses chain
// cases `c.mu.Lock()` now fires the chain-proxies path
// cases, `c.mu.Lock()` now fires the chain-proxies path
// instead of being silently dropped.
#[test]
@ -1407,7 +1395,7 @@ mod tests {
// the simple `<ident>.<ident>...` shape; helper must bail to
// preserve the conservative behaviour the band-aid established.
for s in [
"Foo::bar::baz", // Rust path `::` rules it out
"Foo::bar::baz", // Rust path, `::` rules it out
"ptr->field.f", // C arrow operator
"obj.f().g", // intermediate call
"vec[0].field", // index expression
@ -1431,7 +1419,7 @@ mod tests {
#[test]
fn chain_proxy_acquire_records_chain_text_not_root() {
// Phase 3 key behaviour: a chained-receiver acquire (`c.mu.Lock()`)
// Key behaviour: a chained-receiver acquire (`c.mu.Lock()`)
// records `c.mu` in `state.chain_proxies` and DOES NOT touch the
// SymbolId-keyed `receiver_class_group` for the chain root `c`.
let mut interner = SymbolInterner::new();
@ -1481,7 +1469,7 @@ mod tests {
assert_eq!(entry.class_group, crate::cfg::BodyId(7));
assert_eq!(entry.acquire_span, (10, 20));
// Root `c` is NOT marked in receiver_class_group the gin/context FP
// Root `c` is NOT marked in receiver_class_group, the gin/context FP
// the band-aid was guarding against can no longer reappear.
assert!(
state.receiver_class_group.is_empty(),
@ -1564,7 +1552,7 @@ mod tests {
#[test]
fn chain_proxy_distinct_chains_dont_collide() {
// `c.mu.Lock()` and `c.other.Lock()` are independent chain
// receivers each gets its own entry in chain_proxies.
// receivers, each gets its own entry in chain_proxies.
let interner = SymbolInterner::new();
let class_group = crate::cfg::BodyId(3);
@ -1610,7 +1598,7 @@ mod tests {
#[test]
fn single_dot_proxy_acquire_uses_symbol_id_path() {
// REGRESSION: single-dot callees keep the existing SymbolId-based
// path `f.acquireMine()` records against
// path, `f.acquireMine()` records against
// `receiver_class_group[sym_f]`, NOT `chain_proxies["f"]`. This
// preserves all existing 1-dot proxy semantics (leak detection,
// finding attribution).
@ -1716,7 +1704,7 @@ mod tests {
fn chain_proxy_lattice_join_unions_keys() {
// Sanity check: the lattice join unions chain_proxies keys.
// Branch A: `c.mu` OPEN. Branch B: `c.other` OPEN. Join must
// contain both this is the dataflow-correctness invariant
// contain both, this is the dataflow-correctness invariant
// for chain tracking across branches.
use crate::state::lattice::Lattice;
let mut a = ProductState::initial();
@ -1745,7 +1733,7 @@ mod tests {
#[test]
fn chain_proxy_lattice_join_merges_lifecycle() {
// Same chain key on two branches the lifecycle is OR-joined
// Same chain key on two branches, the lifecycle is OR-joined
// (OPEN CLOSED). Mirrors the `ResourceLifecycle::join`
// bitflag-or semantics already used for SymbolId-based tracking.
use crate::state::lattice::Lattice;
@ -1775,7 +1763,7 @@ mod tests {
}
// ─────────────────────────────────────────────────────────────────
// Pointer-analysis Phase 2: PtrProxyHint::FieldOnly routes
// Pointer-analysis: PtrProxyHint::FieldOnly routes
// single-dot proxy-acquire to chain_proxies, suppressing the
// SymbolId path that would otherwise mark the field-aliased local
// as a leakable resource.
@ -1783,7 +1771,7 @@ mod tests {
#[test]
fn field_only_hint_routes_single_dot_acquire_to_chain_proxies() {
// Models `m := c.mu; m.Lock()` `m`'s pt set is `{Field(SelfParam, mu)}`,
// Models `m := c.mu; m.Lock()`, `m`'s pt set is `{Field(SelfParam, mu)}`,
// so PtrProxyHint::FieldOnly applies. The acquire must record
// `m` in chain_proxies, NOT in receiver_class_group, so the
// leak detector does not later flag `m` as an OPEN-at-exit
@ -1845,7 +1833,7 @@ mod tests {
#[test]
fn field_only_hint_release_transitions_chain_entry_to_closed() {
// Acquire + Release pair on the field-aliased local both route
// through chain_proxies the entry transitions OPEN → CLOSED
// through chain_proxies, the entry transitions OPEN → CLOSED
// exactly as the existing chain-receiver path does.
let mut interner = SymbolInterner::new();
let _sym_m = interner.intern_scoped(None, "m");
@ -1909,7 +1897,7 @@ mod tests {
#[test]
fn no_hint_falls_through_to_existing_symbol_id_path() {
// REGRESSION: when `ptr_proxy_hints` is `None`, the single-dot
// proxy-acquire branch behaves exactly as today the SymbolId
// proxy-acquire branch behaves exactly as today, the SymbolId
// path fires, `chain_proxies` stays empty. Strict-additive
// contract: pointer analysis disabled ⇒ no behavioural change.
let mut interner = SymbolInterner::new();
@ -1951,7 +1939,7 @@ mod tests {
fn empty_hint_map_does_not_redirect() {
// REGRESSION: an empty hint map means "every name resolves to
// PtrProxyHint::Other". The single-dot branch must fall
// through to the SymbolId path not silently route to
// through to the SymbolId path, not silently route to
// chain_proxies because the map happened to be empty.
let mut interner = SymbolInterner::new();
let sym_f = interner.intern_scoped(None, "f");