nyx/src/cfg_analysis/resources.rs

use super::dominators;
use super::rules;
use super::{AnalysisContext, CfgAnalysis, CfgFinding, Confidence};
use crate::cfg::{EdgeKind, StmtKind};
use crate::patterns::Severity;
use crate::symbol::Lang;
use petgraph::graph::NodeIndex;
use petgraph::visit::EdgeRef;
use std::collections::HashSet;

pub struct ResourceMisuse;

/// Distinguishes `obj.connect("event-name", handler)` event-handler
/// registrations from real database-connection acquires.
///
/// Recognises the canonical handler shape: a string-literal first arg
/// that does not look like a URL (`scheme://`), plus a second positional
/// argument that resolves to a single identifier (the callable being
/// registered).  SQLAlchemy `engine.connect()` and `sqlite3.connect(
/// "path.db")` either pass zero args or a single string, so they fall
/// through and the leak check still fires.
///
/// Kept out of the static `exclude_acquire` list because that list is
/// callee-substring-only; this check needs to read argument shape from
/// the call node.
fn is_event_handler_register_shape(info: &crate::cfg::NodeInfo) -> bool {
    let Some(first_literal) = info
        .call
        .arg_string_literals
        .first()
        .and_then(|x| x.as_ref())
    else {
        return false;
    };
    if first_literal.contains("://") {
        return false;
    }
    let Some(second_uses) = info.call.arg_uses.get(1) else {
        return false;
    };
    // A bare identifier (`callback`) lands as `["callback"]`; a
    // member-access ref (`self._on_status`) lands as `["self",
    // "_on_status"]`.  Both are valid handler shapes.  Real DB connects
    // either have no second positional or pass a non-ident value
    // (string literal for `connect("user", "pass", ...)`), which lands
    // as an empty `arg_uses[1]`.
    !second_uses.is_empty()
}

/// Find nodes matching acquire patterns for a given resource pair,
/// excluding any that match `exclude_patterns`.
fn find_acquire_nodes(
    ctx: &AnalysisContext,
    acquire_patterns: &[&str],
    exclude_patterns: &[&str],
) -> Vec<NodeIndex> {
    ctx.cfg
        .node_indices()
        .filter(|&idx| {
            let info = &ctx.cfg[idx];
            if info.kind != StmtKind::Call {
                return false;
            }
            if let Some(callee) = &info.call.callee {
                let callee_lower = callee.to_ascii_lowercase();
                // Check exclusions first, if the callee matches an exclude
                // pattern, it is NOT an acquire even if it also matches an
                // acquire pattern (e.g. `freopen` ends with `fopen`).
                let excluded = exclude_patterns.iter().any(|p| {
                    let pl = p.to_ascii_lowercase();
                    callee_lower.ends_with(&pl) || callee_lower == pl
                });
                if excluded {
                    return false;
                }
                acquire_patterns.iter().any(|p| {
                    let pl = p.to_ascii_lowercase();
                    callee_lower.ends_with(&pl) || callee_lower == pl
                })
            } else {
                false
            }
        })
        .collect()
}

/// Find nodes matching release patterns for a given resource pair.
///
/// Includes both direct release calls (`info.call.callee`) and inner-arg
/// release calls (`info.arg_callees`), so wrapper shapes like Go testify
/// `require.NoError(t, f.Close())` and `errs = append(errs, f.Close())`
/// register the close site even though the outer callee is the wrapper.
fn find_release_nodes(ctx: &AnalysisContext, release_patterns: &[&str]) -> Vec<NodeIndex> {
    let matches_release = |callee: &str| -> bool {
        let callee_lower = callee.to_ascii_lowercase();
        release_patterns.iter().any(|p| {
            let pl = p.to_ascii_lowercase();
            callee_lower.ends_with(&pl) || callee_lower == pl
        })
    };
    ctx.cfg
        .node_indices()
        .filter(|&idx| {
            let info = &ctx.cfg[idx];
            if let Some(callee) = &info.call.callee
                && info.kind == StmtKind::Call
                && matches_release(callee)
            {
                return true;
            }
            // Inner-call-release-in-arg: any kind, the close lives in an
            // argument to the outer wrapper.
            info.arg_callees
                .iter()
                .filter_map(|c| c.as_deref())
                .any(matches_release)
        })
        .collect()
}

/// Check if a release node is on all paths from acquire to every exit.
///
/// Treats null-guard-false edges as not-applicable: when control reaches an
/// `if (acquire_var)` (or `if (!acquire_var)`) and the edge represents
/// "acquire_var is null", the resource was never actually produced on that
/// path, so a release is unnecessary.  This closes the canonical
/// `FILE *f = fopen(...); if (f) fclose(f);` idiom, without this rule the
/// false edge of the null check provides a path acquire→exit that misses
/// the release, producing a may-leak FP.
fn release_on_all_exit_paths(
    ctx: &AnalysisContext,
    acquire: NodeIndex,
    release_nodes: &[NodeIndex],
    exit: NodeIndex,
) -> bool {
    // Use post-dominators as optimization: if any release post-dominates acquire, it's fine
    if let Some(post_doms) = dominators::compute_post_dominators(ctx.cfg) {
        for &release in release_nodes {
            if dominators::dominates(&post_doms, release, acquire) {
                return true;
            }
        }
    }

    // Fall back to path enumeration with null-guard pruning.
    let acquire_var = ctx.cfg[acquire].taint.defines.as_deref();
    let extra_defines = &ctx.cfg[acquire].taint.extra_defines;
    let release_set: HashSet<_> = release_nodes.iter().copied().collect();
    all_paths_pass_through(ctx, acquire, exit, &release_set, acquire_var, extra_defines)
}

/// Identify whether a CFG edge is the "null-guard false edge" for the named
/// acquired variable.  Returns `true` for the edge that, if traversed, means
/// the resource handle is null/falsy and therefore not actually acquired.
///
/// Recognises:
///   * `if (var)`, false edge means `var` is null
///   * `if (!var)`, true edge means `var` is null
///
/// Rejects comparisons (`if (var != NULL)`), method calls
/// (`if (var.is_valid())`), and composite conditions (`if (var && cond)`).
fn is_null_guard_false_edge(
    ctx: &AnalysisContext,
    src: NodeIndex,
    edge_kind: EdgeKind,
    acquire_var: &str,
) -> bool {
    let info = &ctx.cfg[src];
    if info.kind != StmtKind::If {
        return false;
    }
    if info.condition_vars.len() != 1 || info.condition_vars[0] != acquire_var {
        return false;
    }
    let Some(text) = info.condition_text.as_deref() else {
        return false;
    };
    let stripped = text
        .trim()
        .trim_start_matches('!')
        .trim()
        .trim_matches(|c: char| c == '(' || c == ')')
        .trim();
    if stripped != acquire_var {
        return false;
    }
    // Choose the null edge: false for plain truth check, true for negated.
    let null_edge = if info.condition_negated {
        EdgeKind::True
    } else {
        EdgeKind::False
    };
    edge_kind == null_edge
}

/// Recognise Go's err-companion guard: `if err != nil { return err }` where
/// `err` is a companion define of the acquire (`f, err := os.Open(...)`).
/// On the err-true edge the resource was never actually acquired (acquire
/// returned the zero value), so the path is not a real leak path.
///
/// Returns `true` for the edge that takes the err-non-nil branch.  Match is
/// strict: condition must reference exactly one var that lives in the
/// acquire's `extra_defines`, condition_text must compare against `nil`, and
/// the chosen edge must match the err-non-nil polarity.
fn is_err_companion_guard_edge(
    ctx: &AnalysisContext,
    src: NodeIndex,
    edge_kind: EdgeKind,
    extra_defines: &[String],
) -> bool {
    if extra_defines.is_empty() {
        return false;
    }
    let info = &ctx.cfg[src];
    if info.kind != StmtKind::If {
        return false;
    }
    if info.condition_vars.len() != 1 {
        return false;
    }
    let cond_var = &info.condition_vars[0];
    if !extra_defines.iter().any(|e| e == cond_var) {
        return false;
    }
    let Some(text) = info.condition_text.as_deref() else {
        return false;
    };
    // Normalise: drop spaces so `err!=nil` and `err != nil` both match.
    let collapsed: String = text.chars().filter(|c| !c.is_whitespace()).collect();
    let var_eq_nil = format!("{cond_var}==nil");
    let var_neq_nil = format!("{cond_var}!=nil");
    // Polarity: `err != nil` → err-non-nil branch is the True edge;
    //           `err == nil` → err-non-nil branch is the False edge.
    let err_branch = if collapsed.contains(&var_neq_nil) {
        if info.condition_negated {
            EdgeKind::False
        } else {
            EdgeKind::True
        }
    } else if collapsed.contains(&var_eq_nil) {
        if info.condition_negated {
            EdgeKind::True
        } else {
            EdgeKind::False
        }
    } else {
        return false;
    };
    edge_kind == err_branch
}

/// Check if all paths from `from` to `to` pass through at least one node in `through`,
/// pruning null-guard-false edges for the acquired variable so the canonical
/// `if (var) release(var);` idiom is recognised as a complete release.
fn all_paths_pass_through(
    ctx: &AnalysisContext,
    from: NodeIndex,
    to: NodeIndex,
    through: &HashSet<NodeIndex>,
    acquire_var: Option<&str>,
    extra_defines: &[String],
) -> bool {
    use std::collections::VecDeque;

    if through.contains(&from) {
        return true;
    }

    // BFS, tracking whether we've passed through a required node
    let mut visited = HashSet::new();
    let mut queue = VecDeque::new();
    queue.push_back((from, false));
    visited.insert((from, false));

    while let Some((node, passed)) = queue.pop_front() {
        if node == to {
            if !passed {
                return false; // Found a path to exit without passing through release
            }
            continue;
        }

        // Treat a Return-of-err-companion as a passing exit: in Go's
        // `f, err := os.Open(...); if err != nil { return err }` shape the
        // err-return path does not actually own a resource (acquire returned
        // the zero value), so reaching such a Return is not a leak.
        let info = &ctx.cfg[node];
        if info.kind == StmtKind::Return
            && !extra_defines.is_empty()
            && !info.taint.uses.is_empty()
            && info
                .taint
                .uses
                .iter()
                .all(|u| extra_defines.iter().any(|e| e == u))
        {
            continue;
        }

        for edge in ctx.cfg.edges(node) {
            // Prune null-guard-false edges: those represent "var is null",
            // a path on which the resource was never actually acquired.
            if let Some(var) = acquire_var
                && is_null_guard_false_edge(ctx, node, *edge.weight(), var)
            {
                continue;
            }
            // Prune Go err-companion guard edges: `if err != nil { return err }`
            // after `f, err := os.Open(...)` takes the err branch on which the
            // resource handle is the zero value and was never acquired.
            if is_err_companion_guard_edge(ctx, node, *edge.weight(), extra_defines) {
                continue;
            }
            let succ = edge.target();
            let new_passed = passed || through.contains(&succ);
            let state = (succ, new_passed);
            if visited.insert(state) {
                queue.push_back(state);
            }
        }
    }

    true
}

/// Check whether the acquired variable is stored into a struct field (ownership
/// transfer) downstream of the acquire node.  Patterns recognised:
///   - `ptr->field = var`   (C arrow operator)
///   - `obj.field = var`    (C dot / generic field store)
///   - `list->next = ...`   (linked-list insertion)
///
/// If the variable is transferred, there is no leak, the receiving struct is
/// responsible for the lifetime.
fn is_ownership_transferred(ctx: &AnalysisContext, acquire: NodeIndex) -> bool {
    let acquired_var = match &ctx.cfg[acquire].taint.defines {
        Some(v) => v.clone(),
        None => return false,
    };

    // BFS through CFG successors looking for a node whose span text
    // mentions the acquired variable in a struct-field store context.
    use std::collections::VecDeque;
    let mut visited = HashSet::new();
    let mut queue = VecDeque::new();
    for succ in ctx.cfg.neighbors(acquire) {
        if visited.insert(succ) {
            queue.push_back(succ);
        }
    }

    while let Some(node) = queue.pop_front() {
        let info = &ctx.cfg[node];
        let (start, end) = info.ast.span;

        // Check the source text at this node's span for the acquired variable
        // appearing in a struct-field store context.
        let references_var = info.taint.uses.iter().any(|u| u == &acquired_var)
            || info
                .taint
                .defines
                .as_ref()
                .is_some_and(|d| d == &acquired_var);

        if references_var && start < end && end <= ctx.source_bytes.len() {
            let span_text = &ctx.source_bytes[start..end];
            // `->` anywhere in span means pointer-to-member store
            if span_text.windows(2).any(|w| w == b"->") {
                return true;
            }
            // `.field = var` pattern (but not `==`)
            if has_dot_field_assignment(span_text) {
                return true;
            }
        }

        // If the variable is truly redefined (not a field write), stop
        // following this path. A true redefinition is when `defines` matches
        // but the span doesn't contain `->` or `.field =` patterns.
        if info
            .taint
            .defines
            .as_ref()
            .is_some_and(|d| d == &acquired_var)
        {
            let is_field_write = if start < end && end <= ctx.source_bytes.len() {
                let span_text = &ctx.source_bytes[start..end];
                span_text.windows(2).any(|w| w == b"->") || has_dot_field_assignment(span_text)
            } else {
                false
            };
            if !is_field_write {
                continue; // genuine redefinition, stop this path
            }
        }

        for succ in ctx.cfg.neighbors(node) {
            if visited.insert(succ) {
                queue.push_back(succ);
            }
        }
    }

    false
}

/// Check if `span_text` contains a dot-field assignment pattern like
/// `obj.field = var` (but not `obj.method(...)` or `a == b`).
fn has_dot_field_assignment(span_text: &[u8]) -> bool {
    // Look for `.` followed (possibly with ident chars) by `=` but not `==`
    let mut i = 0;
    while i < span_text.len() {
        if span_text[i] == b'.' {
            // Scan forward past identifier chars to find `=`
            let mut j = i + 1;
            while j < span_text.len()
                && (span_text[j].is_ascii_alphanumeric() || span_text[j] == b'_')
            {
                j += 1;
            }
            // Skip whitespace
            while j < span_text.len() && span_text[j].is_ascii_whitespace() {
                j += 1;
            }
            // Check for `=` but not `==`
            if j < span_text.len()
                && span_text[j] == b'='
                && (j + 1 >= span_text.len() || span_text[j + 1] != b'=')
            {
                return true;
            }
        }
        i += 1;
    }
    false
}

/// Check whether the acquired variable is consumed by an ownership-taking
/// function (e.g. `FileResponse(f)`, `send_file(f)`) downstream of the
/// acquire node.  These functions take ownership of the file handle so there
/// is no leak.
fn is_consumed_by_owner(ctx: &AnalysisContext, acquire: NodeIndex) -> bool {
    static CONSUMING_SINKS: &[&str] = &[
        "fileresponse",
        "streaminghttpresponse",
        "send_file",
        "make_response",
    ];

    let acquired_var = match &ctx.cfg[acquire].taint.defines {
        Some(v) => v.clone(),
        None => return false,
    };

    use std::collections::VecDeque;
    let mut visited = HashSet::new();
    let mut queue = VecDeque::new();
    for succ in ctx.cfg.neighbors(acquire) {
        if visited.insert(succ) {
            queue.push_back(succ);
        }
    }

    while let Some(node) = queue.pop_front() {
        let info = &ctx.cfg[node];

        // Check Call nodes with callee that matches a consuming sink
        if info.kind == StmtKind::Call
            && let Some(callee) = &info.call.callee
        {
            let callee_lower = callee.to_ascii_lowercase();
            let is_consuming = CONSUMING_SINKS.iter().any(|s| callee_lower.ends_with(s));
            if is_consuming && info.taint.uses.iter().any(|u| u == &acquired_var) {
                return true;
            }
        }

        // Also check the span text for consuming calls, handles cases where
        // the call is embedded in a return statement (e.g. `return FileResponse(f)`)
        if info.taint.uses.iter().any(|u| u == &acquired_var) {
            let (start, end) = info.ast.span;
            if start < end && end <= ctx.source_bytes.len() {
                let span_lower: Vec<u8> = ctx.source_bytes[start..end]
                    .iter()
                    .map(|b| b.to_ascii_lowercase())
                    .collect();
                if CONSUMING_SINKS
                    .iter()
                    .any(|s| span_lower.windows(s.len()).any(|w| w == s.as_bytes()))
                {
                    return true;
                }
            }
        }

        for succ in ctx.cfg.neighbors(node) {
            if visited.insert(succ) {
                queue.push_back(succ);
            }
        }
    }

    false
}

/// For mutex pairs, check that an explicit `.acquire()` or `.lock()` call
/// exists on the acquired variable in the CFG.  If only the constructor
/// (e.g. `threading.Lock()`) is observed without acquire, skip the finding.
fn has_explicit_lock_acquire(ctx: &AnalysisContext, acquire: NodeIndex) -> bool {
    let acquired_var = match &ctx.cfg[acquire].taint.defines {
        Some(v) => v.clone(),
        None => return false,
    };

    for idx in ctx.cfg.node_indices() {
        let info = &ctx.cfg[idx];
        if info.kind != StmtKind::Call {
            continue;
        }
        if let Some(callee) = &info.call.callee {
            let callee_lower = callee.to_ascii_lowercase();
            let is_lock_call = callee_lower.ends_with(".acquire")
                || callee_lower.ends_with(".lock")
                || callee_lower == "pthread_mutex_lock";
            if is_lock_call && info.taint.uses.iter().any(|u| u == &acquired_var) {
                return true;
            }
        }
    }

    false
}

impl CfgAnalysis for ResourceMisuse {
    fn name(&self) -> &'static str {
        "resource-misuse"
    }

    fn run(&self, ctx: &AnalysisContext) -> Vec<CfgFinding> {
        let pairs = rules::resource_pairs(ctx.lang);
        let exit = match dominators::find_exit_node(ctx.cfg) {
            Some(e) => e,
            None => return Vec::new(),
        };

        let mut findings = Vec::new();

        for pair in pairs {
            let acquire_nodes = find_acquire_nodes(ctx, pair.acquire, pair.exclude_acquire);
            let release_nodes = find_release_nodes(ctx, pair.release);

            for &acquire in &acquire_nodes {
                // Suppress resources inside managed cleanup scopes
                // (Python `with`, Java try-with-resources).
                if ctx.cfg[acquire].managed_resource {
                    continue;
                }
                // Suppress `obj.connect("event-name", callback)` event-
                // handler registrations that share the `connect` /
                // `cursor` callee suffix with real DB acquires.  Sphinx
                // app.connect("config-inited", on_init), Flask blueprint
                // handlers, and MQTT client.connect("topic", on_msg) all
                // pass a string literal event name plus a callable
                // identifier; SQLAlchemy `engine.connect()` and
                // `sqlite3.connect("path.db")` either have no args or a
                // single string arg.  Gated on the `db connection`
                // resource name so file/socket/mutex pairs are untouched.
                if pair.resource_name == "db connection"
                    && is_event_handler_register_shape(&ctx.cfg[acquire])
                {
                    continue;
                }
                // SAFE-FOR-FIELD-LHS (Go only): skip member-expression
                // LHS acquires.  `b.cpuprof = os.Create(...)` transfers
                // ownership to the containing struct; closure
                // responsibility belongs to a paired Stop()/Release()
                // method on the struct's lifecycle.  Mirrors the gate
                // in src/state/transfer.rs::apply_call.  Production
                // trigger: prometheus
                // cmd/promtool/tsdb.go::startProfiling cluster.
                // Restricted to Go because TS/JS class-field acquires
                // (`this.fd = fs.openSync(...)`) are still expected to
                // be tracked — the leak fixtures rely on it.
                if ctx.lang == Lang::Go
                    && let Some(acquired_var) = ctx.cfg[acquire].taint.defines.as_deref()
                    && acquired_var.contains('.')
                {
                    continue;
                }
                // Suppress resources with a deferred release (Go `defer f.Close()`).
                // Defer guarantees cleanup on all exit paths including early returns.
                if let Some(acquired_var) = ctx.cfg[acquire].taint.defines.as_deref() {
                    let has_deferred_release = release_nodes.iter().any(|&r| {
                        ctx.cfg[r].in_defer
                            && ctx.cfg[r].taint.uses.iter().any(|u| u == acquired_var)
                    });
                    if has_deferred_release {
                        continue;
                    }
                }
                if !release_on_all_exit_paths(ctx, acquire, &release_nodes, exit)
                    && !is_ownership_transferred(ctx, acquire)
                    && !is_consumed_by_owner(ctx, acquire)
                {
                    // For mutex pairs, require an explicit .acquire()/.lock() call
                    if pair.resource_name == "mutex" && !has_explicit_lock_acquire(ctx, acquire) {
                        continue;
                    }
                    // Suppress when a sibling closure / event handler in
                    // this file releases the same variable.  Common JS/TS
                    // shape: `const ws = new WebSocket(url);
                    // socket.on("close", () => ws.close())`.  The release
                    // node lives in a nested body the per-body CFG can't
                    // see, so the structural "no release on this exit
                    // path" check fires erroneously.  Match by acquired
                    // variable name; closure captures share the binding
                    // name with the outer handle.
                    if let Some(acq_var) = ctx.cfg[acquire].taint.defines.as_deref()
                        && ctx
                            .closure_released_var_names
                            .map(|s| s.contains(acq_var))
                            .unwrap_or(false)
                    {
                        continue;
                    }
                    let info = &ctx.cfg[acquire];
                    let callee_desc = info.call.callee.as_deref().unwrap_or("(acquire)");

                    findings.push(CfgFinding {
                        rule_id: if pair.resource_name == "mutex" {
                            "cfg-lock-not-released".to_string()
                        } else {
                            "cfg-resource-leak".to_string()
                        },
                        title: format!("{} may leak", pair.resource_name),
                        severity: Severity::Medium,
                        confidence: Confidence::Medium,
                        span: info.ast.span,
                        message: format!(
                            "`{callee_desc}` acquires {} but not all exit paths \
                             release it",
                            pair.resource_name
                        ),
                        evidence: vec![acquire],
                        score: None,
                    });
                }
            }
        }

        findings
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::cfg::{CallMeta, NodeInfo, StmtKind};

    fn call_node(arg_string_literals: Vec<Option<String>>, arg_uses: Vec<Vec<String>>) -> NodeInfo {
        NodeInfo {
            kind: StmtKind::Call,
            call: CallMeta {
                callee: Some("obj.connect".into()),
                arg_string_literals,
                arg_uses,
                ..Default::default()
            },
            ..Default::default()
        }
    }

    #[test]
    fn event_handler_shape_recognises_sphinx_connect() {
        // app.connect("config-inited", _on_init)
        let info = call_node(
            vec![Some("config-inited".into()), None],
            vec![vec![], vec!["_on_init".into()]],
        );
        assert!(is_event_handler_register_shape(&info));
    }

    #[test]
    fn event_handler_shape_recognises_self_method_callback() {
        // client.connect("device/+", self._on_status)
        let info = call_node(
            vec![Some("device/+".into()), None],
            vec![vec![], vec!["self".into(), "_on_status".into()]],
        );
        assert!(is_event_handler_register_shape(&info));
    }

    #[test]
    fn event_handler_shape_rejects_url_first_arg() {
        // engine.connect("postgres://localhost/mydb")
        let info = call_node(vec![Some("postgres://localhost/mydb".into())], vec![vec![]]);
        assert!(!is_event_handler_register_shape(&info));
    }

    #[test]
    fn event_handler_shape_rejects_oracle_string_args() {
        // cx_Oracle.connect("user", "pass", "dsn") -- arg1 is a literal,
        // no identifier in `arg_uses[1]`.
        let info = call_node(
            vec![Some("user".into()), Some("pass".into()), Some("dsn".into())],
            vec![vec![], vec![], vec![]],
        );
        assert!(!is_event_handler_register_shape(&info));
    }

    #[test]
    fn event_handler_shape_rejects_no_args() {
        // engine.connect()
        let info = call_node(vec![], vec![]);
        assert!(!is_event_handler_register_shape(&info));
    }

    #[test]
    fn event_handler_shape_rejects_single_string_arg() {
        // sqlite3.connect("path.db")
        let info = call_node(vec![Some("path.db".into())], vec![vec![]]);
        assert!(!is_event_handler_register_shape(&info));
    }

    #[test]
    fn event_handler_shape_rejects_ident_first_arg() {
        // signal.connect(receiver_func, sender=...) -- handled by the
        // static exclude list `signal.connect`, but the shape check
        // should also gate it out: first arg is not a string literal.
        let info = call_node(vec![None], vec![vec!["receiver_func".into()]]);
        assert!(!is_event_handler_register_shape(&info));
    }
}