mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-21 20:18:06 +02:00
Python fp and docs updtes (#58)
* refactor: Update comments for clarity and add expectations.json files for performance metrics * feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks * feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks * refactor: Simplify code formatting for better readability in multiple files * refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration * feat: Update Java and Python patterns to include new security rules * refactor: Improve comment clarity and consistency across multiple Rust files * refactor: Simplify code formatting for improved readability in integration tests and module files * refactor: Improve comment formatting and enhance clarity in assertions across multiple files
This commit is contained in:
parent
4db0805de6
commit
a438886217
291 changed files with 9485 additions and 3851 deletions
|
|
@ -168,8 +168,8 @@ impl Lattice for AuthDomainState {
|
|||
/// (e.g. `"c.mu"`, `"c.writer.header"`) so distinct field projections
|
||||
/// of the same chain root are tracked independently.
|
||||
///
|
||||
/// Chain-keyed proxy state is the Phase 3 replacement for the single-dot
|
||||
/// band-aid that conservatively dropped chain receivers entirely — chain
|
||||
/// Chain-keyed proxy state is the DTO replacement for the single-dot
|
||||
/// band-aid that conservatively dropped chain receivers entirely, chain
|
||||
/// receivers are now first-class, semantically distinct from their root.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct ChainProxyState {
|
||||
|
|
@ -192,18 +192,12 @@ pub struct ProductState {
|
|||
/// operation (e.g., fs.openSync at line 7) rather than the proxy call.
|
||||
pub proxy_acquire_spans: HashMap<SymbolId, (usize, usize)>,
|
||||
/// Per-chain-receiver proxy tracking, keyed by joined chain text
|
||||
/// (`"c.mu"`, `"c.writer.header"`). Each chain receiver has its own
|
||||
/// lifecycle, class group, and acquire span — independent of both the
|
||||
/// chain root and any other chain. Phase 3 of the field-projections
|
||||
/// rollout introduces this map; consumers that previously used
|
||||
/// [`receiver_class_group`] for chain receivers (via the deleted
|
||||
/// single-dot band-aid) now route through here for 2+ dot callees.
|
||||
/// (`"c.mu"`, `"c.writer.header"`). Each chain receiver has its own
|
||||
/// lifecycle, class group, and acquire span, independent of both
|
||||
/// the chain root and any other chain.
|
||||
///
|
||||
/// Phase 3 ships chain_proxies in tracking-only mode: chain receivers
|
||||
/// that remain OPEN at exit are NOT promoted to leak findings (so the
|
||||
/// addition is strictly behaviour-preserving against the existing
|
||||
/// benchmark). Phase 4 / a follow-up adds chain-rooted leak findings
|
||||
/// once the receiver-class detection is broad enough to avoid new FPs.
|
||||
/// Tracking-only: chain receivers that remain OPEN at exit are NOT
|
||||
/// promoted to leak findings.
|
||||
pub chain_proxies: HashMap<String, ChainProxyState>,
|
||||
}
|
||||
|
||||
|
|
@ -386,7 +380,7 @@ mod tests {
|
|||
// the laws also need to hold on the *actual* impls used by the
|
||||
// engine. A change to ResourceLifecycle's bitset semantics or to
|
||||
// AuthLevel's ordering could quietly break commutativity /
|
||||
// associativity / idempotence — these tests pin those properties.
|
||||
// associativity / idempotence, these tests pin those properties.
|
||||
|
||||
#[test]
|
||||
fn resource_lifecycle_join_laws() {
|
||||
|
|
@ -424,7 +418,7 @@ mod tests {
|
|||
|
||||
/// `AuthLevel` satisfies idempotence, commutativity, and associativity
|
||||
/// of `join` (which is `min` of the privilege ordering). It does NOT
|
||||
/// satisfy the `Lattice` trait's bot-identity law — see the explicit
|
||||
/// satisfy the `Lattice` trait's bot-identity law, see the explicit
|
||||
/// `auth_level_bot_is_absorbing_not_identity` test below for a
|
||||
/// rationale and a regression guard.
|
||||
#[test]
|
||||
|
|
@ -459,14 +453,14 @@ mod tests {
|
|||
/// * therefore `Admin.join(Unauthed) == Unauthed`, not `Admin`
|
||||
///
|
||||
/// In other words, `Unauthed` is the *absorbing* element of the join,
|
||||
/// not the identity — the algebraic dual of what the trait expects.
|
||||
/// not the identity, the algebraic dual of what the trait expects.
|
||||
///
|
||||
/// This is intentional for security: if any incoming path is unauthed,
|
||||
/// the merged state must be unauthed (the conservative baseline). The
|
||||
/// trait contract violation matters only if the dataflow engine ever
|
||||
/// joins `bot()` with a non-bot reachable state from a different path
|
||||
/// (e.g. for an unreachable predecessor); in the current engine such
|
||||
/// nodes are skipped, so the violation is observably benign — but
|
||||
/// nodes are skipped, so the violation is observably benign, but
|
||||
/// documenting it here prevents an accidental "fix" that flips
|
||||
/// `bot()` to `Admin` and silently elevates auth across all merges.
|
||||
#[test]
|
||||
|
|
@ -506,7 +500,7 @@ mod tests {
|
|||
|
||||
/// `AuthDomainState::join` keeps a variable as `validated` only if
|
||||
/// it was validated on *every* incoming path. A variable validated
|
||||
/// on one branch but not the other must be dropped — otherwise an
|
||||
/// on one branch but not the other must be dropped, otherwise an
|
||||
/// auth bypass on one path silently authorises sinks on the merge
|
||||
/// path.
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -89,7 +89,7 @@ pub fn run_forward<S: Lattice, T: Transfer<S>>(
|
|||
converged = false;
|
||||
break;
|
||||
}
|
||||
// Budget exceeded but transfer requested continuation — mark non-converged
|
||||
// Budget exceeded but transfer requested continuation, mark non-converged
|
||||
converged = false;
|
||||
}
|
||||
|
||||
|
|
@ -100,7 +100,7 @@ pub fn run_forward<S: Lattice, T: Transfer<S>>(
|
|||
|
||||
let edges: Vec<_> = cfg.edges(node).map(|e| (*e.weight(), e.target())).collect();
|
||||
|
||||
// No outgoing edges — nothing to propagate (exit/dead end).
|
||||
// No outgoing edges, nothing to propagate (exit/dead end).
|
||||
if edges.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -159,7 +159,7 @@ pub fn run_forward<S: Lattice, T: Transfer<S>>(
|
|||
let edges: Vec<_> = cfg.edges(node).map(|e| (*e.weight(), e.target())).collect();
|
||||
|
||||
if edges.is_empty() {
|
||||
// Exit / dead end — apply transfer for event collection.
|
||||
// Exit / dead end, apply transfer for event collection.
|
||||
let info = &cfg[node];
|
||||
let (_out_state, new_events) = transfer.apply(node, info, None, node_state);
|
||||
events.extend(new_events);
|
||||
|
|
@ -487,7 +487,7 @@ mod tests {
|
|||
assert!(in_wl.insert(n1));
|
||||
wl.push_back(n1);
|
||||
|
||||
// Duplicate n0 — should not insert
|
||||
// Duplicate n0, should not insert
|
||||
assert!(!in_wl.insert(n0));
|
||||
// wl still has only 2 entries
|
||||
assert_eq!(wl.len(), 2);
|
||||
|
|
@ -597,7 +597,7 @@ mod tests {
|
|||
}
|
||||
|
||||
/// Self-loop on a single node: `entry → A → A → … → exit`. The
|
||||
/// worklist must not livelock — once A's state is stable, the
|
||||
/// worklist must not livelock, once A's state is stable, the
|
||||
/// back-edge stops re-enqueueing it.
|
||||
#[test]
|
||||
fn self_loop_terminates() {
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ fn sanitize_desc(s: &str) -> String {
|
|||
crate::fmt::normalize_snippet(s)
|
||||
}
|
||||
|
||||
/// Returns true if `idx` is the terminal exit of a function body — the
|
||||
/// Returns true if `idx` is the terminal exit of a function body, the
|
||||
/// convergence node where all execution paths join before leaving the function.
|
||||
///
|
||||
/// **Invariant:** Only terminal exits carry the complete merged lifecycle state
|
||||
|
|
@ -143,7 +143,7 @@ pub fn extract_findings(
|
|||
for (idx, info) in cfg.node_references() {
|
||||
// File-level Exit (program termination, no enclosing function).
|
||||
let is_file_exit = info.kind == StmtKind::Exit && info.ast.enclosing_func.is_none();
|
||||
// Terminal function exit — the convergence node where all paths join.
|
||||
// Terminal function exit, the convergence node where all paths join.
|
||||
// Return nodes are intermediate and carry only path-specific state;
|
||||
// only the terminal exit carries the complete merged lifecycle.
|
||||
let is_func_terminal = is_terminal_function_exit(idx, info, cfg);
|
||||
|
|
@ -167,7 +167,7 @@ pub fn extract_findings(
|
|||
let acquire_node = find_acquire_node(cfg, sym, interner, scope);
|
||||
|
||||
// At the file-level Exit, skip variables whose acquire site is
|
||||
// inside a function — those are already handled by the per-
|
||||
// inside a function, those are already handled by the per-
|
||||
// function exit checks above. Without this, the file-level Exit
|
||||
// would duplicate leak findings with a misleading acquire span
|
||||
// (the first global match instead of the correct function-local one).
|
||||
|
|
@ -296,7 +296,7 @@ pub fn extract_findings(
|
|||
// **Language gate**: this heuristic is JS/TS-specific. Other
|
||||
// languages (Go, Java, C, C++, Python, Rust, Ruby, PHP) use
|
||||
// explicit error returns / try-catch with deterministic control
|
||||
// flow — an intervening call does NOT silently bypass a release.
|
||||
// flow, an intervening call does NOT silently bypass a release.
|
||||
// Firing this on Go gave the gin/context.go FP where any method
|
||||
// calling another method (`c.Set`, `c.Get`) was flagged as a
|
||||
// possible leak on the receiver. Skip the section but continue
|
||||
|
|
@ -374,7 +374,7 @@ pub fn extract_findings(
|
|||
// (PathFact `dotdot=No && absolute=No`). A web handler
|
||||
// reading a sanitised user-controlled path is not the
|
||||
// same shape as a handler reading any user-controlled
|
||||
// path — the auth concern reduces once the data cannot
|
||||
// path, the auth concern reduces once the data cannot
|
||||
// escape into a privileged location. Note this is per
|
||||
// CFG-node span, so co-located unrelated sinks are
|
||||
// unaffected.
|
||||
|
|
@ -455,7 +455,7 @@ fn is_web_entrypoint_simple(
|
|||
) -> bool {
|
||||
let name_lower = func_name.to_ascii_lowercase();
|
||||
|
||||
// Skip bare "main" — it's typically a CLI entry
|
||||
// Skip bare "main", it's typically a CLI entry
|
||||
if name_lower == "main" {
|
||||
return false;
|
||||
}
|
||||
|
|
@ -695,7 +695,7 @@ mod tests {
|
|||
fn per_body_factory_returned_resource_no_finding() {
|
||||
// Per-body graph: Entry → fopen(f) → return f → Exit
|
||||
// All nodes have enclosing_func=Some("factory").
|
||||
// The resource is returned — no leak finding expected.
|
||||
// The resource is returned, no leak finding expected.
|
||||
let func = "factory";
|
||||
let mut cfg: Cfg = Graph::new();
|
||||
let entry = cfg.add_node(make_func_node(StmtKind::Entry, func));
|
||||
|
|
@ -764,7 +764,7 @@ mod tests {
|
|||
fn per_body_non_returned_resource_leaks() {
|
||||
// Per-body graph: Entry → fopen(f) → return (no uses) → Exit
|
||||
// All nodes have enclosing_func=Some("leaker").
|
||||
// Resource is NOT returned — exactly one state-resource-leak expected.
|
||||
// Resource is NOT returned, exactly one state-resource-leak expected.
|
||||
let func = "leaker";
|
||||
let mut cfg: Cfg = Graph::new();
|
||||
let entry = cfg.add_node(make_func_node(StmtKind::Entry, func));
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
#![doc = include_str!(concat!(env!("OUT_DIR"), "/state.md"))]
|
||||
|
||||
pub mod domain;
|
||||
pub mod engine;
|
||||
pub mod facts;
|
||||
|
|
@ -27,7 +29,7 @@ pub fn classify_auth_decorators(lang: Lang, decorators: &[String]) -> AuthLevel
|
|||
let mut level = AuthLevel::Unauthed;
|
||||
for dec in decorators {
|
||||
let d = dec.to_ascii_lowercase();
|
||||
// Admin patterns — match the same static list used by the call-site
|
||||
// Admin patterns, match the same static list used by the call-site
|
||||
// transfer so decorators and runtime checks agree on privilege.
|
||||
if d.contains("admin") || d.contains("hasrole") || d.contains("superuser") {
|
||||
return AuthLevel::Admin;
|
||||
|
|
@ -73,7 +75,7 @@ pub fn run_state_analysis(
|
|||
// PointsToFacts. When present, the proxy-acquire transfer suppresses
|
||||
// SymbolId attribution on field-aliased receivers (`m := c.mu;
|
||||
// m.Lock()`) and routes them through `chain_proxies` instead. Pass
|
||||
// `None` to disable — strict-additive.
|
||||
// `None` to disable, strict-additive.
|
||||
ptr_proxy_hints: Option<&std::collections::HashMap<String, crate::pointer::PtrProxyHint>>,
|
||||
) -> Vec<StateFinding> {
|
||||
let _span = tracing::debug_span!("run_state_analysis").entered();
|
||||
|
|
@ -119,7 +121,7 @@ pub fn run_state_analysis(
|
|||
|
||||
/// Build resource method summaries by pre-scanning all method bodies for known
|
||||
/// resource acquire/release operations. Only creates summaries for methods whose
|
||||
/// bodies actually contain matching operations — never infers from names alone.
|
||||
/// bodies actually contain matching operations, never infers from names alone.
|
||||
pub fn build_resource_method_summaries(
|
||||
bodies: &[crate::cfg::BodyCfg],
|
||||
lang: Lang,
|
||||
|
|
@ -140,7 +142,7 @@ pub fn build_resource_method_summaries(
|
|||
};
|
||||
|
||||
for (_, info) in body.graph.node_references() {
|
||||
// Check both Call and Seq (Assignment) nodes — resource operations
|
||||
// Check both Call and Seq (Assignment) nodes, resource operations
|
||||
// can appear as RHS of assignments (e.g., `this.fd = fs.openSync(...)`).
|
||||
if !matches!(
|
||||
info.kind,
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ pub struct SymbolId(pub(crate) u32);
|
|||
|
||||
/// Function-scope discriminator for symbol interning.
|
||||
///
|
||||
/// This provides **function-level isolation only** — not full lexical/block
|
||||
/// This provides **function-level isolation only**, not full lexical/block
|
||||
/// scope modeling. Variables in different functions with the same name get
|
||||
/// distinct [`SymbolId`]s. Top-level / module-scope code uses `scope: None`.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||
|
|
@ -21,8 +21,8 @@ struct ScopedKey {
|
|||
///
|
||||
/// Built once from CFG node `defines`/`uses`, reused throughout analysis.
|
||||
/// Two construction modes:
|
||||
/// - [`from_cfg`](Self::from_cfg): flat (unscoped) interning — used by taint/SSA pipeline
|
||||
/// - [`from_cfg_scoped`](Self::from_cfg_scoped): function-scoped interning — used by state analysis
|
||||
/// - [`from_cfg`](Self::from_cfg): flat (unscoped) interning, used by taint/SSA pipeline
|
||||
/// - [`from_cfg_scoped`](Self::from_cfg_scoped): function-scoped interning, used by state analysis
|
||||
#[derive(Default)]
|
||||
pub struct SymbolInterner {
|
||||
to_id: HashMap<ScopedKey, SymbolId>,
|
||||
|
|
@ -43,7 +43,7 @@ impl SymbolInterner {
|
|||
/// scoped key.
|
||||
pub fn intern_scoped(&mut self, scope: Option<&str>, name: &str) -> SymbolId {
|
||||
// Member expressions (e.g. `this.fd`, `self.conn`) are shared class/
|
||||
// instance state — keep them in the global (None) scope so that
|
||||
// instance state, keep them in the global (None) scope so that
|
||||
// `open()` and `close()` methods can track the same resource symbol.
|
||||
// Only plain local variables get function-scoped isolation.
|
||||
let effective_scope = if name.contains('.') { None } else { scope };
|
||||
|
|
@ -70,7 +70,7 @@ impl SymbolInterner {
|
|||
self.to_id.get(&key).copied()
|
||||
}
|
||||
|
||||
/// Intern a name (unscoped — equivalent to `intern_scoped(None, name)`).
|
||||
/// Intern a name (unscoped, equivalent to `intern_scoped(None, name)`).
|
||||
///
|
||||
/// Used by the taint/SSA pipeline and unit tests that don't need
|
||||
/// function-scope isolation.
|
||||
|
|
@ -78,7 +78,7 @@ impl SymbolInterner {
|
|||
self.intern_scoped(None, name)
|
||||
}
|
||||
|
||||
/// Look up a name without interning it (unscoped — equivalent to
|
||||
/// Look up a name without interning it (unscoped, equivalent to
|
||||
/// `get_scoped(None, name)`).
|
||||
pub fn get(&self, name: &str) -> Option<SymbolId> {
|
||||
self.get_scoped(None, name)
|
||||
|
|
|
|||
|
|
@ -13,19 +13,16 @@ use petgraph::graph::NodeIndex;
|
|||
/// callee isn't a clean dotted member chain (parens, brackets, `::`,
|
||||
/// arrow operators, whitespace, or other complex tokens disqualify it).
|
||||
///
|
||||
/// Phase 3 of the field-projections rollout: this is the textual mirror
|
||||
/// of `try_lower_field_proj_chain` in `src/ssa/lower.rs`. The state
|
||||
/// engine doesn't yet read SSA bodies (would require threading SSA
|
||||
/// through the lattice run), so the same parse rules are duplicated
|
||||
/// here. Both helpers share the contract: a success here implies a
|
||||
/// FieldProj chain at SSA level (or a direct receiver for the 1-dot
|
||||
/// case).
|
||||
/// Textual mirror of `try_lower_field_proj_chain` in
|
||||
/// `src/ssa/lower.rs`. The state engine doesn't read SSA bodies, so
|
||||
/// the parse rules are duplicated. A success here implies a FieldProj
|
||||
/// chain at SSA level (or a direct receiver for the 1-dot case).
|
||||
///
|
||||
/// **Returns** `Some(("c", "Close"))` for `"c.Close"` (1 dot — the
|
||||
/// **Returns** `Some(("c", "Close"))` for `"c.Close"` (1 dot, the
|
||||
/// receiver is a bare ident); `Some(("c.mu", "Lock"))` for
|
||||
/// `"c.mu.Lock"` (2 dots — receiver is a 1-element chain);
|
||||
/// `"c.mu.Lock"` (2 dots, receiver is a 1-element chain);
|
||||
/// `Some(("c.writer.header", "set"))` for `"c.writer.header.set"`
|
||||
/// (3 dots — receiver is a 2-element chain). Returns `None` for any
|
||||
/// (3 dots, receiver is a 2-element chain). Returns `None` for any
|
||||
/// callee shape we can't safely decompose textually.
|
||||
fn try_chain_decompose(callee: &str) -> Option<(&str, &str)> {
|
||||
for ch in callee.chars() {
|
||||
|
|
@ -42,7 +39,7 @@ fn try_chain_decompose(callee: &str) -> Option<(&str, &str)> {
|
|||
return None;
|
||||
}
|
||||
// Reject if any segment in the receiver is empty (leading dot,
|
||||
// double dots) — same discipline as the SSA-side helper.
|
||||
// double dots), same discipline as the SSA-side helper.
|
||||
if receiver_text.split('.').any(str::is_empty) {
|
||||
return None;
|
||||
}
|
||||
|
|
@ -50,7 +47,7 @@ fn try_chain_decompose(callee: &str) -> Option<(&str, &str)> {
|
|||
}
|
||||
|
||||
/// Events emitted during transfer for illegal state transitions.
|
||||
/// These are NOT lattice values — they become findings in `facts.rs`.
|
||||
/// These are NOT lattice values, they become findings in `facts.rs`.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TransferEvent {
|
||||
pub kind: TransferEventKind,
|
||||
|
|
@ -159,7 +156,7 @@ pub struct ResourceMethodSummary {
|
|||
pub method_name: String,
|
||||
/// Whether this method acquires or releases a resource.
|
||||
pub effect: ResourceEffect,
|
||||
/// `parent_body_id` of the declaring method — groups methods by class.
|
||||
/// `parent_body_id` of the declaring method, groups methods by class.
|
||||
pub class_group: crate::cfg::BodyId,
|
||||
/// Span of the actual resource operation (e.g., fs.openSync at line 7).
|
||||
pub original_span: (usize, usize),
|
||||
|
|
@ -171,7 +168,7 @@ pub struct DefaultTransfer<'a> {
|
|||
pub interner: &'a SymbolInterner,
|
||||
/// Resource method summaries for cross-body proxy resolution.
|
||||
pub resource_method_summaries: &'a [ResourceMethodSummary],
|
||||
/// Optional per-body field-only points-to hints — names that resolve
|
||||
/// Optional per-body field-only points-to hints, names that resolve
|
||||
/// to a value whose entire abstract heap identity is one or more
|
||||
/// [`crate::pointer::AbsLoc::Field`] locations (e.g. `m := c.mu`).
|
||||
///
|
||||
|
|
@ -225,21 +222,12 @@ impl DefaultTransfer<'_> {
|
|||
.get_scoped(info.ast.enclosing_func.as_deref(), name)
|
||||
}
|
||||
|
||||
/// Pointer-Phase 2 hook. Returns `true` when the call has been
|
||||
/// fully handled as a field-aliased receiver proxy and the rest of
|
||||
/// `apply_call` should bail.
|
||||
///
|
||||
/// Activates only on single-dot calls (`<recv>.<method>`) whose
|
||||
/// receiver name is recorded with [`crate::pointer::PtrProxyHint::FieldOnly`]
|
||||
/// in the per-body hint map AND for which a matching
|
||||
/// [`ResourceMethodSummary`] exists. The acquire/release effect
|
||||
/// is recorded against `state.chain_proxies` keyed by the receiver
|
||||
/// name — chain_proxies is a tracking-only lattice today, so leak
|
||||
/// detection (which only inspects `state.resource`) is suppressed
|
||||
/// for the alias. Strict-additive: when no hint map is supplied,
|
||||
/// when the receiver isn't `FieldOnly`, or when no method summary
|
||||
/// matches, the function returns `false` and the legacy branches
|
||||
/// run unchanged.
|
||||
/// Returns `true` when the call was fully handled as a
|
||||
/// field-aliased receiver proxy and the rest of `apply_call`
|
||||
/// should bail. Activates on single-dot calls whose receiver is
|
||||
/// `FieldOnly` in the hint map and that match a
|
||||
/// [`ResourceMethodSummary`]. The acquire/release effect is
|
||||
/// recorded against `state.chain_proxies` keyed by receiver name.
|
||||
fn try_apply_field_alias_proxy(
|
||||
&self,
|
||||
info: &NodeInfo,
|
||||
|
|
@ -308,13 +296,13 @@ impl DefaultTransfer<'_> {
|
|||
None => return,
|
||||
};
|
||||
|
||||
// ── Pointer-Phase 2: field-aliased receiver fast-path ───────────
|
||||
// ── field-aliased receiver fast-path ───────────
|
||||
// When the receiver name resolves through points-to to a value
|
||||
// whose abstract heap identity is purely `Field(_, _)` (e.g.
|
||||
// `m := c.mu` followed by `m.Lock()`), the receiver is a
|
||||
// sub-object alias rather than a standalone resource handle.
|
||||
// Routing the entire call into `chain_proxies` here — *before*
|
||||
// the SymbolId-based direct-acquire/release/proxy branches —
|
||||
// Routing the entire call into `chain_proxies` here, *before*
|
||||
// the SymbolId-based direct-acquire/release/proxy branches ,
|
||||
// suppresses the FP class where the local `m` would otherwise
|
||||
// be flagged as a leakable resource at function exit.
|
||||
//
|
||||
|
|
@ -385,16 +373,16 @@ impl DefaultTransfer<'_> {
|
|||
// When no direct resource pair matched, check if the callee is a
|
||||
// method wrapper for a known resource operation.
|
||||
//
|
||||
// Phase 3 (field-projections rollout, 2026-04-25): the previous
|
||||
// the previous
|
||||
// single-dot band-aid (`callee.matches('.').count() == 1 &&
|
||||
// !callee.contains('(')`) silently dropped chained receivers
|
||||
// because the original textual extractor took the chain root as
|
||||
// receiver — collapsing `c.writer.header().set` to `c` and
|
||||
// receiver, collapsing `c.writer.header().set` to `c` and
|
||||
// marking `c` as proxy-acquired (the gin/context.go FP class).
|
||||
//
|
||||
// The band-aid is now deleted. Chained-receiver method calls
|
||||
// are routed to a *separate* state map (`chain_proxies`) keyed by
|
||||
// the joined receiver chain text — so `c.mu.Lock()` acquires
|
||||
// the joined receiver chain text, so `c.mu.Lock()` acquires
|
||||
// `c.mu` (a chain-receiver entity), not `c`. The chain receiver
|
||||
// is independent of the chain root: leaks/double-closes are
|
||||
// tracked per chain, never propagated up to the root.
|
||||
|
|
@ -443,7 +431,7 @@ impl DefaultTransfer<'_> {
|
|||
} else if !direct_acquire && !direct_release {
|
||||
// Single-dot receiver (`<recv>.<method>`): existing
|
||||
// SymbolId-based path. Gated on direct_acquire/release
|
||||
// because it shares state with the direct paths above —
|
||||
// because it shares state with the direct paths above ,
|
||||
// running both would double-transition. Honour the
|
||||
// explicit `info.call.receiver` when it's the same bare
|
||||
// ident, otherwise fall back to the parsed receiver text.
|
||||
|
|
@ -544,7 +532,7 @@ impl DefaultTransfer<'_> {
|
|||
}
|
||||
|
||||
fn apply_if(&self, info: &NodeInfo, edge: Option<EdgeKind>, state: &mut ProductState) {
|
||||
// Determine the "positive edge" — the edge where the underlying
|
||||
// Determine the "positive edge", the edge where the underlying
|
||||
// (de-negated) condition evaluates to true.
|
||||
//
|
||||
// For `if (is_authenticated(req))`: positive = True edge
|
||||
|
|
@ -558,8 +546,8 @@ impl DefaultTransfer<'_> {
|
|||
|
||||
// Resource null-check: `if (f)` or `if (!f)` where f is a tracked
|
||||
// resource currently in OPEN state. The "var is falsy" edge means
|
||||
// the acquisition returned null/zero — no resource was actually
|
||||
// produced — so subsequent close requirements do not apply on that
|
||||
// the acquisition returned null/zero, no resource was actually
|
||||
// produced, so subsequent close requirements do not apply on that
|
||||
// path. Clearing OPEN suppresses the spurious may-leak finding for
|
||||
// the canonical NULL-safe close idiom in C / C++ / similar:
|
||||
//
|
||||
|
|
@ -572,7 +560,7 @@ impl DefaultTransfer<'_> {
|
|||
//
|
||||
// Heuristic conditions:
|
||||
// * condition is a single-variable truth check (no comparisons,
|
||||
// no calls — `condition_vars.len() == 1` and the trimmed text
|
||||
// no calls, `condition_vars.len() == 1` and the trimmed text
|
||||
// equals that variable name).
|
||||
// * the var has OPEN in its lifecycle bitset.
|
||||
// * the edge represents "var is falsy" (= !is_positive_edge).
|
||||
|
|
@ -595,7 +583,7 @@ impl DefaultTransfer<'_> {
|
|||
|
||||
if let Some(ref cond) = info.condition_text {
|
||||
let cond_lower = cond.to_ascii_lowercase();
|
||||
// Strip leading negation operator for pattern matching —
|
||||
// Strip leading negation operator for pattern matching ,
|
||||
// the edge selection above already encodes the semantics.
|
||||
let cond_inner = if info.condition_negated {
|
||||
cond_lower.trim_start_matches('!').trim_start()
|
||||
|
|
@ -691,7 +679,7 @@ fn is_guard_like(callee: &str) -> bool {
|
|||
}
|
||||
|
||||
/// True iff the condition is a single-variable truth check (no comparison,
|
||||
/// no method call, no boolean composition) — the bare `if (f)` or `if (!f)`
|
||||
/// no method call, no boolean composition), the bare `if (f)` or `if (!f)`
|
||||
/// shape used as a NULL-safe gate around resource access.
|
||||
///
|
||||
/// Conservative: requires `condition_vars` to have exactly one entry, and
|
||||
|
|
@ -1093,7 +1081,7 @@ mod tests {
|
|||
let mut state = ProductState::initial();
|
||||
state.resource.set(sym_f, ResourceLifecycle::OPEN);
|
||||
|
||||
// `if (!f)` — condition_negated=true, true-edge means f is null
|
||||
// `if (!f)`, condition_negated=true, true-edge means f is null
|
||||
let info = NodeInfo {
|
||||
kind: StmtKind::If,
|
||||
condition_text: Some("!f".into()),
|
||||
|
|
@ -1232,7 +1220,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn auth_token_underscore_camel_boundary_cases() {
|
||||
// Underscore-joined identifiers are single tokens — must not match interior.
|
||||
// Underscore-joined identifiers are single tokens, must not match interior.
|
||||
assert!(!condition_contains_auth_token(
|
||||
"req.user_is_authenticated_flag",
|
||||
"is_authenticated"
|
||||
|
|
@ -1259,12 +1247,12 @@ mod tests {
|
|||
"xmiddleware.auth()",
|
||||
"middleware.auth"
|
||||
));
|
||||
// Right boundary violation — "middleware.authz" extends past "middleware.auth".
|
||||
// Right boundary violation, "middleware.authz" extends past "middleware.auth".
|
||||
assert!(!condition_contains_auth_token(
|
||||
"middleware.authz()",
|
||||
"middleware.auth"
|
||||
));
|
||||
// "middleware.auth.check" — matcher ends at '.', which is non-ident → matches.
|
||||
// "middleware.auth.check", matcher ends at '.', which is non-ident → matches.
|
||||
assert!(condition_contains_auth_token(
|
||||
"middleware.auth.check()",
|
||||
"middleware.auth"
|
||||
|
|
@ -1332,7 +1320,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn auth_token_boolean_composition() {
|
||||
// Compound conditions — each token should be individually matchable.
|
||||
// Compound conditions, each token should be individually matchable.
|
||||
assert!(condition_contains_auth_token(
|
||||
"is_authenticated && is_admin",
|
||||
"is_authenticated"
|
||||
|
|
@ -1352,7 +1340,7 @@ mod tests {
|
|||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
// Phase 3: chain-receiver decomposition + chain_proxies tracking
|
||||
// chain-receiver decomposition + chain_proxies tracking
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
//
|
||||
// These tests pin the contract that:
|
||||
|
|
@ -1360,12 +1348,12 @@ mod tests {
|
|||
// method, bailing on complex tokens.
|
||||
// 2. The proxy-method routing in `apply_call` records chained
|
||||
// receivers in `state.chain_proxies` (keyed by joined chain
|
||||
// text) — independent from the chain root's `SymbolId`-based
|
||||
// text), independent from the chain root's `SymbolId`-based
|
||||
// `state.receiver_class_group` entries.
|
||||
// 3. Single-dot callees still flow through the existing SymbolId
|
||||
// path (regression guard).
|
||||
// 4. The deleted single-dot band-aid no longer suppresses chain
|
||||
// cases — `c.mu.Lock()` now fires the chain-proxies path
|
||||
// cases, `c.mu.Lock()` now fires the chain-proxies path
|
||||
// instead of being silently dropped.
|
||||
|
||||
#[test]
|
||||
|
|
@ -1407,7 +1395,7 @@ mod tests {
|
|||
// the simple `<ident>.<ident>...` shape; helper must bail to
|
||||
// preserve the conservative behaviour the band-aid established.
|
||||
for s in [
|
||||
"Foo::bar::baz", // Rust path — `::` rules it out
|
||||
"Foo::bar::baz", // Rust path, `::` rules it out
|
||||
"ptr->field.f", // C arrow operator
|
||||
"obj.f().g", // intermediate call
|
||||
"vec[0].field", // index expression
|
||||
|
|
@ -1431,7 +1419,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn chain_proxy_acquire_records_chain_text_not_root() {
|
||||
// Phase 3 key behaviour: a chained-receiver acquire (`c.mu.Lock()`)
|
||||
// Key behaviour: a chained-receiver acquire (`c.mu.Lock()`)
|
||||
// records `c.mu` in `state.chain_proxies` and DOES NOT touch the
|
||||
// SymbolId-keyed `receiver_class_group` for the chain root `c`.
|
||||
let mut interner = SymbolInterner::new();
|
||||
|
|
@ -1481,7 +1469,7 @@ mod tests {
|
|||
assert_eq!(entry.class_group, crate::cfg::BodyId(7));
|
||||
assert_eq!(entry.acquire_span, (10, 20));
|
||||
|
||||
// Root `c` is NOT marked in receiver_class_group — the gin/context FP
|
||||
// Root `c` is NOT marked in receiver_class_group, the gin/context FP
|
||||
// the band-aid was guarding against can no longer reappear.
|
||||
assert!(
|
||||
state.receiver_class_group.is_empty(),
|
||||
|
|
@ -1564,7 +1552,7 @@ mod tests {
|
|||
#[test]
|
||||
fn chain_proxy_distinct_chains_dont_collide() {
|
||||
// `c.mu.Lock()` and `c.other.Lock()` are independent chain
|
||||
// receivers — each gets its own entry in chain_proxies.
|
||||
// receivers, each gets its own entry in chain_proxies.
|
||||
let interner = SymbolInterner::new();
|
||||
let class_group = crate::cfg::BodyId(3);
|
||||
|
||||
|
|
@ -1610,7 +1598,7 @@ mod tests {
|
|||
#[test]
|
||||
fn single_dot_proxy_acquire_uses_symbol_id_path() {
|
||||
// REGRESSION: single-dot callees keep the existing SymbolId-based
|
||||
// path — `f.acquireMine()` records against
|
||||
// path, `f.acquireMine()` records against
|
||||
// `receiver_class_group[sym_f]`, NOT `chain_proxies["f"]`. This
|
||||
// preserves all existing 1-dot proxy semantics (leak detection,
|
||||
// finding attribution).
|
||||
|
|
@ -1716,7 +1704,7 @@ mod tests {
|
|||
fn chain_proxy_lattice_join_unions_keys() {
|
||||
// Sanity check: the lattice join unions chain_proxies keys.
|
||||
// Branch A: `c.mu` OPEN. Branch B: `c.other` OPEN. Join must
|
||||
// contain both — this is the dataflow-correctness invariant
|
||||
// contain both, this is the dataflow-correctness invariant
|
||||
// for chain tracking across branches.
|
||||
use crate::state::lattice::Lattice;
|
||||
let mut a = ProductState::initial();
|
||||
|
|
@ -1745,7 +1733,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn chain_proxy_lattice_join_merges_lifecycle() {
|
||||
// Same chain key on two branches — the lifecycle is OR-joined
|
||||
// Same chain key on two branches, the lifecycle is OR-joined
|
||||
// (OPEN ∪ CLOSED). Mirrors the `ResourceLifecycle::join`
|
||||
// bitflag-or semantics already used for SymbolId-based tracking.
|
||||
use crate::state::lattice::Lattice;
|
||||
|
|
@ -1775,7 +1763,7 @@ mod tests {
|
|||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
// Pointer-analysis Phase 2: PtrProxyHint::FieldOnly routes
|
||||
// Pointer-analysis: PtrProxyHint::FieldOnly routes
|
||||
// single-dot proxy-acquire to chain_proxies, suppressing the
|
||||
// SymbolId path that would otherwise mark the field-aliased local
|
||||
// as a leakable resource.
|
||||
|
|
@ -1783,7 +1771,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn field_only_hint_routes_single_dot_acquire_to_chain_proxies() {
|
||||
// Models `m := c.mu; m.Lock()` — `m`'s pt set is `{Field(SelfParam, mu)}`,
|
||||
// Models `m := c.mu; m.Lock()`, `m`'s pt set is `{Field(SelfParam, mu)}`,
|
||||
// so PtrProxyHint::FieldOnly applies. The acquire must record
|
||||
// `m` in chain_proxies, NOT in receiver_class_group, so the
|
||||
// leak detector does not later flag `m` as an OPEN-at-exit
|
||||
|
|
@ -1845,7 +1833,7 @@ mod tests {
|
|||
#[test]
|
||||
fn field_only_hint_release_transitions_chain_entry_to_closed() {
|
||||
// Acquire + Release pair on the field-aliased local both route
|
||||
// through chain_proxies — the entry transitions OPEN → CLOSED
|
||||
// through chain_proxies, the entry transitions OPEN → CLOSED
|
||||
// exactly as the existing chain-receiver path does.
|
||||
let mut interner = SymbolInterner::new();
|
||||
let _sym_m = interner.intern_scoped(None, "m");
|
||||
|
|
@ -1909,7 +1897,7 @@ mod tests {
|
|||
#[test]
|
||||
fn no_hint_falls_through_to_existing_symbol_id_path() {
|
||||
// REGRESSION: when `ptr_proxy_hints` is `None`, the single-dot
|
||||
// proxy-acquire branch behaves exactly as today — the SymbolId
|
||||
// proxy-acquire branch behaves exactly as today, the SymbolId
|
||||
// path fires, `chain_proxies` stays empty. Strict-additive
|
||||
// contract: pointer analysis disabled ⇒ no behavioural change.
|
||||
let mut interner = SymbolInterner::new();
|
||||
|
|
@ -1951,7 +1939,7 @@ mod tests {
|
|||
fn empty_hint_map_does_not_redirect() {
|
||||
// REGRESSION: an empty hint map means "every name resolves to
|
||||
// PtrProxyHint::Other". The single-dot branch must fall
|
||||
// through to the SymbolId path — not silently route to
|
||||
// through to the SymbolId path, not silently route to
|
||||
// chain_proxies because the map happened to be empty.
|
||||
let mut interner = SymbolInterner::new();
|
||||
let sym_f = interner.intern_scoped(None, "f");
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue