Python fp and docs updtes (#58)

* refactor: Update comments for clarity and add expectations.json files for performance metrics * feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks * feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks * refactor: Simplify code formatting for better readability in multiple files * refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration * feat: Update Java and Python patterns to include new security rules * refactor: Improve comment clarity and consistency across multiple Rust files * refactor: Simplify code formatting for improved readability in integration tests and module files * refactor: Improve comment formatting and enhance clarity in assertions across multiple files
2026-06-21 20:18:06 +02:00 · 2026-04-29 19:53:34 -04:00 · 2026-04-29 19:53:34 -04:00 · a438886217
commit a438886217
parent 4db0805de6
291 changed files with 9485 additions and 3851 deletions
--- a/tests/abstract_transfer_tests.rs
+++ b/tests/abstract_transfer_tests.rs
@ -187,7 +187,7 @@ fn interval_join_clamped_widens_range() {
 #[test]
 fn interval_join_identity_vs_clamped_is_top() {
    // Different flow shapes cannot be combined into a single bounded
-    // form — conservative fallback is Top.
+    // form, conservative fallback is Top.
    let a = IntervalTransfer::Identity;
    let b = IntervalTransfer::Clamped { lo: 0, hi: 10 };
    assert_eq!(a.join(&b), IntervalTransfer::Top);
@ -296,7 +296,7 @@ fn transfer_apply_combines_subdomains() {
    // Interval identity forwards the caller-known bound.
    assert_eq!(out.interval.lo, Some(8080));
    assert_eq!(out.interval.hi, Some(8080));
-    // String literal-prefix overrides the caller-side input — the
+    // String literal-prefix overrides the caller-side input, the
    // callee's structural fact wins.
    assert_eq!(out.string.prefix.as_deref(), Some("https://safe.com/"));
    // Bit subdomain is always Top on cross-file transfer by design.
--- a/tests/auth_analysis_tests.rs
+++ b/tests/auth_analysis_tests.rs
@ -649,7 +649,7 @@ fn hashmap_local_noise_is_clean() {
 #[test]
 fn row_ownership_equality_is_clean() {
    // `if owner_id != user.id { return ... }` is a row-level
-    // ownership check — both the row-fetching call and any downstream
+    // ownership check, both the row-fetching call and any downstream
    // uses of the row's fields should be considered authorized.
    assert_absent(
        "row_ownership_equality.rs",
@ -670,7 +670,7 @@ fn row_ownership_no_early_exit_flags() {
 #[test]
 fn helper_scoped_params_is_clean() {
    // A library helper whose internal work is `result.insert(..)`
-    // on a locally-constructed HashSet is not a sink — the call is
+    // on a locally-constructed HashSet is not a sink, the call is
    // classified as non-sink because the receiver is the locally-bound
    // collection.
    assert_absent("helper_scoped_params.rs", "rs.auth.missing_ownership_check");
@ -688,7 +688,7 @@ fn self_scoped_user_is_clean() {
 fn true_positive_missing_check_flags() {
    // Positive control: an authenticated handler that deletes a doc
    // and publishes against a group without any ownership/membership
-    // check — must still flag.
+    // check, must still flag.
    assert_has(
        "true_positive_missing_check.rs",
        "rs.auth.missing_ownership_check",
@ -763,7 +763,7 @@ fn db_connection_type_inferred_is_clean() {
    // inferred as a `DatabaseConnection` via SSA `constructor_type`
    // (through `peel_identity_suffix`).  The handler logs the caller's
    // own id; no foreign scoped id reaches the sink, so the ownership
-    // gate has nothing to flag — the type-facts refinement must not
+    // gate has nothing to flag, the type-facts refinement must not
    // introduce a false positive here.
    assert_absent(
        "db_connection_type_inferred.rs",
--- a/tests/benchmark/RESULTS.md
+++ b/tests/benchmark/RESULTS.md
@ -4,13 +4,13 @@ Current baseline (2026-04-29):

 | Metric    | File-level | Rule-level | CI floor |
 |-----------|------------|------------|----------|
-| Precision | 0.991      | 0.991      | 0.861    |
-| Recall    | 0.995      | 0.995      | 0.944    |
-| F1        | 0.993      | 0.993      | 0.901    |
+| Precision | 0.996      | 0.996      | 0.861    |
+| Recall    | 1.000      | 1.000      | 0.944    |
+| F1        | 0.998      | 0.998      | 0.901    |

-Corpus: 433 cases across 10 languages, 432 evaluated (1 disabled). Per-run JSON lands in `tests/benchmark/results/` (`latest.json` plus dated snapshots). See `README.md` for what the scoring modes mean and how to run a subset.
+Corpus: 451 cases across 10 languages, 449 evaluated (no disabled). Per-run JSON lands in `tests/benchmark/results/` (`latest.json` plus dated snapshots). See `README.md` for what the scoring modes mean and how to run a subset.

-The corpus is mostly synthetic 8-20 line fixtures, one vulnerability or one safe pattern per file. A smaller real-CVE replay set under `cve_corpus/` covers 18 published CVEs across all 10 languages. Both contribute to the headline numbers.
+The corpus is mostly synthetic 8-20 line fixtures, one vulnerability or one safe pattern per file. A smaller real-CVE replay set under `cve_corpus/` covers 20 published CVEs across all 10 languages. Both contribute to the headline numbers.

 ## Real CVE coverage

@ -20,14 +20,19 @@ Real disclosed CVEs reduced to minimal reproducers, vulnerable + patched pair pe
 |----------------|------------|----------------------------|----------------------|-----------------|----------|
 | CVE-2023-48022 | Python     | Ray                        | Apache-2.0           | CMDI            | detected |
 | CVE-2017-18342 | Python     | PyYAML                     | MIT                  | Deserialization | detected |
+| CVE-2025-69662 | Python     | geopandas                  | BSD-3-Clause         | SQL Injection   | detected |
+| CVE-2026-33626 | Python     | LMDeploy                   | Apache-2.0           | SSRF            | detected |
 | CVE-2019-14939 | JavaScript | mongo-express              | MIT                  | code_exec       | detected |
 | CVE-2025-64430 | JavaScript | Parse Server               | Apache-2.0           | SSRF            | detected |
 | CVE-2023-26159 | TypeScript | follow-redirects           | MIT                  | SSRF            | detected |
+| GHSA-4x48-cgf9-q33f | TypeScript | Novu                       | MIT                  | SSRF            | detected |
 | CVE-2022-30323 | Go         | hashicorp/go-getter        | MPL-2.0              | CMDI            | detected |
 | CVE-2023-3188  | Go         | owncast                    | MIT                  | SSRF            | detected |
 | CVE-2024-31450 | Go         | owncast                    | MIT                  | path_traversal  | detected |
 | CVE-2015-7501  | Java       | Apache Commons Collections | Apache-2.0           | Deserialization | detected |
 | CVE-2017-12629 | Java       | Apache Solr                | Apache-2.0           | CMDI            | detected |
+| CVE-2022-1471  | Java       | SnakeYAML                  | Apache-2.0           | Deserialization | detected |
+| CVE-2022-42889 | Java       | Apache Commons Text        | Apache-2.0           | code_exec       | detected |
 | CVE-2013-0156  | Ruby       | Ruby on Rails              | MIT                  | Deserialization | detected |
 | CVE-2020-8130  | Ruby       | Rake                       | MIT                  | CMDI            | detected |
 | CVE-2017-9841  | PHP        | PHPUnit                    | BSD-3-Clause         | code_exec       | detected |
@ -60,6 +65,9 @@ Most recent first. Metrics are rule-level on the corpus size at that point.

 | Date       | Change                                                                       | Corpus | P     | R     | F1    |
 |------------|------------------------------------------------------------------------------|--------|-------|-------|-------|
+| 2026-04-29 | Java SnakeYAML + Text4Shell patterns; CVE-2022-1471 and CVE-2022-42889 detected | 449 | 0.996 | 1.000 | 0.998 |
+| 2026-04-29 | Indirect-validator branch narrowing (`const err = validate(x); if (err) throw …;`) + helper-summary all_validated propagation; Novu GHSA-4x48-cgf9-q33f detected | 445 | 0.991 | 1.000 | 0.995 |
+| 2026-04-29 | Python f-string SQLi pattern + bindparams sanitizer + HttpClient SSRF rules; CVE-2025-69662 (geopandas) and CVE-2026-33626 (LMDeploy) detected | 439 | 0.991 | 1.000 | 0.995 |
 | 2026-04-29 | Phantom-Param-aware field suppression: CVE-2023-3188 detected, FP guards hold | 432    | 0.995 | 1.000 | 0.998 |
 | 2026-04-28 | Ruby bare `Kernel#open` CMDI sink, exact-match sigil on label matchers        | 428    | 0.995 | 1.000 | 0.998 |
 | 2026-04-28 | Go SSRF/FILE_IO sink expansion (`http.DefaultClient.*`, `os.Remove`/`WriteFile`) plus Decode-writeback container op | 426 | 0.995 | 1.000 | 0.998 |
--- a/tests/benchmark/corpus/go/safe/safe_test_helper_fatal.go
+++ b/tests/benchmark/corpus/go/safe/safe_test_helper_fatal.go
@ -0,0 +1,62 @@
+// go-safe-realrepo-006 — distilled from minio cmd/admin-handlers-users_test.go
+// (and the identical pattern across xl-storage_test.go, erasure-healing_test.go,
+// 49+34+12+11+9+7+7+5 findings on minio test files alone).
+//
+// `cfg-error-fallthrough` looks for `if err != nil { … }` whose body fails to
+// terminate.  Test code idiomatically writes
+//
+//     if err != nil { c.Fatalf("...", err) }
+//     postSink(...)
+//
+// where `c.Fatalf` (a `*testing.T` method) calls `runtime.Goexit()` and the
+// `postSink` line is unreachable on the error path.  The rule classified
+// this as fall-through because `Fatalf` looks like an ordinary call.  Engine
+// fix: `src/cfg_analysis/error_handling.rs::call_never_returns` recognises
+// `Fatal*`, `Panic*`, `FailNow`, `os.Exit`, `runtime.Goexit`, `log.Fatal*`,
+// `panic`, etc. as terminators inside `terminates_on_all_paths`.
+
+package safe
+
+import (
+	"context"
+	"log"
+	"os"
+	"testing"
+)
+
+type clientHelper struct {
+	bucket string
+}
+
+func (c *clientHelper) MakeBucket(ctx context.Context, name string) error { return nil }
+func (c *clientHelper) PutObject(ctx context.Context, name string) error  { return nil }
+
+func setupBucket(t *testing.T, c *clientHelper, ctx context.Context) {
+	if err := c.MakeBucket(ctx, c.bucket); err != nil {
+		t.Fatalf("bucket creat error: %v", err)
+	}
+	if err := c.PutObject(ctx, "obj"); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func runWithExit(c *clientHelper, ctx context.Context) {
+	if err := c.MakeBucket(ctx, c.bucket); err != nil {
+		log.Fatalf("init failed: %v", err)
+	}
+	c.PutObject(ctx, "obj")
+}
+
+func runWithOsExit(c *clientHelper, ctx context.Context) {
+	if err := c.MakeBucket(ctx, c.bucket); err != nil {
+		os.Exit(1)
+	}
+	c.PutObject(ctx, "obj")
+}
+
+func runWithPanic(c *clientHelper, ctx context.Context) {
+	if err := c.MakeBucket(ctx, c.bucket); err != nil {
+		panic(err)
+	}
+	c.PutObject(ctx, "obj")
+}
--- a/tests/benchmark/corpus/javascript/safe/safe_localised_gherkin_regex.js
+++ b/tests/benchmark/corpus/javascript/safe/safe_localised_gherkin_regex.js
@ -0,0 +1,26 @@
+// js-safe-realrepo-006 — distilled from gogs `public/plugins/codemirror-5.17.0/
+// mode/gherkin/gherkin.js` line 107.  The CodeMirror Gherkin tokenizer ships
+// localised feature-keyword aliases as one large regex inside a boolean
+// sub-condition.  The CFG builder textualises every sub-condition of a
+// boolean chain and truncates that text to MAX_CONDITION_TEXT_LEN (256
+// bytes) for diagnostics; naive byte-slice truncation panicked when byte
+// 256 landed inside a multi-byte UTF-8 character (here Gurmukhi `ਖ`,
+// 3-byte UTF-8).  Engine fix:
+// `src/utils/snippet.rs::truncate_at_char_boundary`, applied at three CFG
+// sites and two symex display sites.  Invariant: scanning this file must
+// terminate without panicking the rayon worker, regardless of where byte
+// 256 lands inside the regex.
+
+function tokenLocalisedFeatureKeyword(stream, state) {
+    if (
+        !state.inKeywordLine &&
+        state.allowFeature &&
+        stream.match(/(機能|功能|フィーチャ|기능|โครงหลัก|ความสามารถ|ความต้องการทางธุรกิจ|ಹೆಚ್ಚಳ|గుణము|ಮುಹಾಂದರಾ|ਮੁਹਾਂਦਰਾ|ਨਕਸ਼ ਨੁਹਾਰ|ਖਾਸੀਅਤ|रूप लेख|وِیژگی|خاصية|תכונה|Функціонал|Функция|Функционалност|Функционал|Үзенчәлеклелек|Свойство|Особина|Мөмкинлек|Могућност|Λειτουργία|Δυνατότητα|Właściwość|Vlastnosť|Trajto|Tính năng|Savybė|Požiadavka|Požadavek|Potrzeba biznesowa|Özellik|Osobina|Ominaisuus|Omadus|Mogućnost|Mogucnost|Jellemző|Funzionalità|Funktionalitéit|Funktionalität|Funkcja|Funkcionalnost|Funkcionalitāte|Funkcia|Fungsi|Functionaliteit|Funcționalitate|Funcţionalitate|Functionalitate|Funcionalitat|Funcionalidade|Fonctionnalité|Fitur|Fīča|Feature|Eiginleiki|Egenskap|Egenskab|Característica|Caracteristica|Business Need|Aspekt|Arwedd|Ability):/)
+    ) {
+        state.inKeywordLine = true;
+        return "keyword";
+    }
+    return null;
+}
+
+module.exports = { tokenLocalisedFeatureKeyword };
--- a/tests/benchmark/corpus/python/auth/vuln_fastapi_route_no_dependencies.py
+++ b/tests/benchmark/corpus/python/auth/vuln_fastapi_route_no_dependencies.py
@ -0,0 +1,19 @@
+"""
+Vulnerable counterpart to safe_fastapi_route_dependencies_auth.py: same
+shape but with NO `dependencies=[Depends(...)]` keyword arg on the route
+decorator.  The FastAPI ownership-check rule must still fire — the
+recognizer must not blanket-suppress every FastAPI route, only those
+with an actual dependency-injected auth check.
+"""
+from fastapi import FastAPI
+
+router = FastAPI()
+
+
+@router.delete("/{connection_id}")
+def delete_connection(connection_id: str, session):
+    """No auth — must still fire missing_ownership_check."""
+    connection = session.scalar(select(Connection).filter_by(conn_id=connection_id))
+    if connection is None:
+        raise HTTPException(404, "not found")
+    session.delete(connection)
--- a/tests/benchmark/corpus/python/safe/safe_fastapi_route_dependencies_auth.py
+++ b/tests/benchmark/corpus/python/safe/safe_fastapi_route_dependencies_auth.py
@ -0,0 +1,43 @@
+"""
+Distilled from airflow `airflow-core/src/airflow/api_fastapi/core_api/routes/public/connections.py`:
+    @connections_router.delete(
+        "/{connection_id}",
+        dependencies=[Depends(requires_access_connection(method="DELETE"))],
+    )
+    def delete_connection(connection_id: str, session: SessionDep):
+        connection = session.scalar(select(Connection).filter_by(conn_id=connection_id))
+        ...
+        session.delete(connection)
+
+The route's `dependencies=[Depends(requires_access_*)]` declares the auth gate at
+the FastAPI level.  The ownership-check rule must recognise the dependency-
+injected check and not flag the row-fetch / mutation as missing ownership.
+"""
+from fastapi import Depends, FastAPI
+
+router = FastAPI()
+
+
+def requires_access_connection(method: str):
+    def check():
+        ...
+    return check
+
+
+@router.delete(
+    "/{connection_id}",
+    dependencies=[Depends(requires_access_connection(method="DELETE"))],
+)
+def delete_connection(connection_id: str, session):
+    connection = session.scalar(select(Connection).filter_by(conn_id=connection_id))
+    if connection is None:
+        raise HTTPException(404, "not found")
+    session.delete(connection)
+
+
+@router.get(
+    "/{connection_id}",
+    dependencies=[Depends(requires_access_connection(method="GET"))],
+)
+def get_connection(connection_id: str, session):
+    return session.scalar(select(Connection).filter_by(conn_id=connection_id))
--- a/tests/benchmark/corpus/python/safe/safe_fastapi_route_level_row_fetch.py
+++ b/tests/benchmark/corpus/python/safe/safe_fastapi_route_level_row_fetch.py
@ -0,0 +1,79 @@
+"""
+Distilled from airflow `airflow-core/src/airflow/api_fastapi/core_api/routes/public/dag_run.py`:
+
+    @dag_run_router.post(
+        "",
+        dependencies=[Depends(requires_access_dag(method="POST", access_entity=DagAccessEntity.RUN))],
+    )
+    def trigger_dag_run(dag_id, body, dag_bag, user, session, request):
+        dm = session.scalar(select(DagModel).where(DagModel.dag_id == dag_id))
+        ...
+        dag = get_latest_version_of_dag(dag_bag, dag_id, session)
+        dag_run = dag.create_dagrun(run_id=params["run_id"], ...)
+
+The route-level `dependencies=[Depends(requires_access_dag(method="POST",
+access_entity=...))]` decorator authorizes the entire handler — the
+handler body's `dag.create_dagrun(...)` call (where `dag` is a row
+fetched using the auth-checked `dag_id`) must be covered too, even
+though the call's subject is the bare row variable rather than the
+original id.
+
+Before the route-level fix, `auth_check_covers_subject` walked
+`check.subjects` (empty for decorator-level checks whose inner call
+carries no per-arg ValueRef) and never matched.  After the fix,
+`is_route_level=true` short-circuits coverage to true for any
+non-login-guard route-level check, suppressing both the row-fetch
+ownership flag and the downstream method-call ownership flag.
+"""
+
+from fastapi import Depends, FastAPI
+
+router = FastAPI()
+
+
+def requires_access_dag(method: str, access_entity=None):
+    def check():
+        ...
+    return check
+
+
+def get_latest_version_of_dag(dag_bag, dag_id, session):
+    return dag_bag.get(dag_id)
+
+
+@router.get(
+    "/{dag_id}/runs/{run_id}",
+    dependencies=[Depends(requires_access_dag(method="GET"))],
+)
+def get_dag_run(dag_id: str, run_id: str, session):
+    """
+    Route-level guard authorizes the entire handler.  The
+    `filter_by(dag_id=dag_id, run_id=run_id)` ORM call must NOT trip
+    `py.auth.missing_ownership_check` even though the per-arg subjects
+    are id-shaped — the route-level decorator covers them.
+    """
+    dag_run = session.scalar(
+        select(DagRun).filter_by(dag_id=dag_id, run_id=run_id)
+    )
+    if dag_run is None:
+        raise HTTPException(404, "not found")
+    return dag_run
+
+
+@router.delete(
+    "/{dag_id}",
+    dependencies=[Depends(requires_access_dag(method="DELETE"))],
+)
+def delete_dag(dag_id: str, session):
+    """
+    Same shape, DELETE method.  The row fetch and row-variable
+    method call must also be fully covered by the route-level guard.
+    `dag` is fetched using the auth-checked `dag_id`; without the
+    `is_route_level` short-circuit, the per-name walk would mismatch
+    `dag.<method>` (subject is the row var) against the check's
+    empty subjects vec.
+    """
+    dag = session.scalar(select(DagModel).where(DagModel.dag_id == dag_id))
+    if dag is None:
+        raise HTTPException(404, "not found")
+    dag.cleanup_runs(session=session)
--- a/tests/benchmark/corpus/python/safe/safe_pytest_sqlalchemy_session.py
+++ b/tests/benchmark/corpus/python/safe/safe_pytest_sqlalchemy_session.py
@ -0,0 +1,33 @@
+"""
+Distilled from airflow `tests/unit/models/test_backfill.py` and
+`providers/google/tests/unit/google/cloud/hooks/test_dlp.py`: pytest test
+methods that take a SQLAlchemy `session` fixture by name and call
+`session.commit()` / `session.add(...)` / `session.scalar(...)`.
+
+Bare `session.<sqlalchemy_verb>` was previously classified as auth Session
+context, which triggered `unit_has_user_input_evidence` even though the
+test function takes no user input — the `session` fixture is the
+SQLAlchemy ORM Session, not the auth/HTTP session.  After the engine
+classifier narrowing, only `session.<identity_field>` (`session.user`,
+`session.user_id`, ...) is treated as auth context; SQLAlchemy verbs
+do not contribute user-input evidence on their own.
+"""
+
+
+def test_reverse_and_depends_on_past_fails(dep_on_past, dag_maker, session):
+    with dag_maker() as dag:
+        pass
+    session.commit()
+    b = _create_backfill(
+        dag_id=dag.dag_id,
+        from_date="2021-01-01",
+        to_date="2021-01-05",
+    )
+    if dep_on_past:
+        assert b is None
+
+
+def test_create_deidentify_template_with_org_id(self, get_conn, mock_project_id):
+    get_conn.return_value.create_deidentify_template.return_value = {}
+    result = self.hook.create_deidentify_template(organization_id="ORG_ID")
+    assert result == {}
--- a/tests/benchmark/corpus/rust/auth/db_connection_type_inferred.rs
+++ b/tests/benchmark/corpus/rust/auth/db_connection_type_inferred.rs
@ -2,7 +2,7 @@
 // produces a `DatabaseConnection` via SSA `constructor_type` (through
 // `peel_identity_suffix`, which strips `.unwrap()` before matching).  The
 // handler then calls `conn.execute(..)`, a callee name that appears in
-// neither `mutation_indicator_names` nor `read_indicator_names` for Rust —
+// neither `mutation_indicator_names` nor `read_indicator_names` for Rust ,
 // name-based classification returns `None`, so the ownership gate
 // already cannot flag the call.  The type-map refinement should *still*
 // leave the call unflagged (the type map produces `DbMutation`, but
--- a/tests/benchmark/corpus/rust/auth/hashmap_local_noise.rs
+++ b/tests/benchmark/corpus/rust/auth/hashmap_local_noise.rs
@ -16,7 +16,7 @@ pub async fn handle_list_peer_docs(req: Req, ctx: Ctx) -> Result<String, ()> {
    let user = auth::require_auth(&req, &ctx).await?;
    let doc_ids: Vec<i64> = vec![1, 2, 3];

-    // Pure in-memory bookkeeping — no authorization decision here.
+    // Pure in-memory bookkeeping, no authorization decision here.
    let mut counts: HashMap<i64, usize> = HashMap::new();
    let mut seen: HashSet<i64> = HashSet::new();
    for doc_id in &doc_ids {
--- a/tests/benchmark/corpus/rust/auth/row_fetch_then_authorize.rs
+++ b/tests/benchmark/corpus/rust/auth/row_fetch_then_authorize.rs
@ -2,7 +2,7 @@
 // the row by id first to obtain the resource it needs to authorize, then
 // calls a named authorization function on the fetched row.  This is the
 // canonical pattern in Lemmy's Actix handlers (and most row-level Rails /
-// Django authz code) — the authorization check appears textually after the
+// Django authz code), the authorization check appears textually after the
 // fetch but is the first thing the function does on the row.

 use std::result::Result;
--- a/tests/benchmark/corpus/rust/auth/row_ownership_equality.rs
+++ b/tests/benchmark/corpus/rust/auth/row_ownership_equality.rs
@ -41,7 +41,7 @@ pub async fn handle_delete_doc(req: Req, ctx: Ctx, doc_id: i64) -> Result<String
        return json_err("cannot delete another user's doc", 403);
    }

-    // By construction, the row belongs to `user` — so any id read from it is authorized.
+    // By construction, the row belongs to `user`, so any id read from it is authorized.
    let group_id = existing.get_i64("group_id");
    realtime::publish_to_group(group_id, "doc_deleted");
    Ok("ok".into())
--- a/tests/benchmark/corpus/rust/auth/row_ownership_no_early_exit.rs
+++ b/tests/benchmark/corpus/rust/auth/row_ownership_no_early_exit.rs
@ -31,7 +31,7 @@ pub async fn handle_update_doc(req: Req, ctx: Ctx, doc_id: i64) -> Result<String
    );
    let owner_id = existing.get_i64("user_id");

-    // Equality compared but no early exit — the check has no effect.
+    // Equality compared but no early exit, the check has no effect.
    if owner_id != user.id {
        // missing return
        println!("not your doc (but proceeding anyway)");
--- a/tests/benchmark/corpus/rust/auth/safe_dto_int_field_axum.rs
+++ b/tests/benchmark/corpus/rust/auth/safe_dto_int_field_axum.rs
@ -1,7 +1,7 @@
 // Phase 6 D05: an Axum `Json<UpdateDoc>` extractor whose `doc_id`
 // field is declared as `i64`.  The DTO field-level taint analysis
 // proves the value reaching `db.exec` is numeric and exempts
-// `dto.doc_id` from the auth subject classifier — the rule must NOT
+// `dto.doc_id` from the auth subject classifier, the rule must NOT
 // fire because numeric DTO fields cannot bypass ownership.
 use axum::extract::Json;

--- a/tests/benchmark/corpus/rust/auth/safe_local_collection_param_types.rs
+++ b/tests/benchmark/corpus/rust/auth/safe_local_collection_param_types.rs
@ -0,0 +1,70 @@
+// Function-parameter type annotations naming an in-memory container
+// (`RoaringBitmap`, `HashMap<K, V>`, `HashSet<T>`, ...) classify the
+// receiver as `TypeKind::LocalCollection`, which the auth analyser
+// maps to `SinkClass::InMemoryLocal` (always non-auth-relevant).
+// Without this, the verb-name dispatch (`is_mutation: insert/remove`)
+// classified `unsharded.insert(docid)` /
+// `task_ids.insert(task_id)` as `DbMutation` and fired
+// `missing_ownership_check` whenever the function had at least one
+// id-shaped parameter to pass `unit_has_user_input_evidence`.
+//
+// Cluster surfaced from
+// meilisearch/index-scheduler/src/scheduler/enterprise_edition/network.rs::balance_shards
+// (`unsharded: RoaringBitmap` typed parameter) and same-pattern
+// helpers across the index-scheduler.
+
+use std::collections::{BTreeSet, HashMap, HashSet};
+
+struct RoaringBitmap;
+impl RoaringBitmap {
+    fn new() -> Self { Self }
+    fn insert(&mut self, _x: u32) -> bool { true }
+    fn remove(&mut self, _x: u32) -> bool { true }
+    fn contains(&self, _x: u32) -> bool { true }
+}
+
+// 1. Bare-typed RoaringBitmap parameter, function has id-like param
+//    `docid` so user-input-evidence fires; the receiver type proves
+//    the operation is in-memory bookkeeping.
+fn balance_shards(mut unsharded: RoaringBitmap, docid: u32) {
+    unsharded.insert(docid);
+    unsharded.remove(docid);
+}
+
+// 2. `&mut RoaringBitmap` reference, ref-stripping must reach the
+//    underlying type head.
+fn process_docids(docids: &mut RoaringBitmap, docid: u32) {
+    docids.insert(docid);
+    docids.remove(docid);
+    let _ = docids.contains(docid);
+}
+
+// 3. Lifetime-annotated reference: `&'a mut HashMap<...>`.
+//    Module-path prefix would also be dropped; head matches `HashMap`.
+fn store_shard_docids<'a>(
+    new_shard_docids: &'a mut HashMap<String, u32>,
+    shard: String,
+    docid: u32,
+) {
+    new_shard_docids.insert(shard, docid);
+}
+
+// 4. Std-collection HashSet typed param.
+fn add_user_id(ids: &mut HashSet<u64>, user_id: u64) {
+    ids.insert(user_id);
+    ids.remove(&user_id);
+}
+
+// 5. Local var bound from constructor, already covered, but pinned
+//    here as a regression guard for the `RoaringBitmap::new()`
+//    constructor entry.
+fn build_local_set(task_id: u32) -> RoaringBitmap {
+    let mut s = RoaringBitmap::new();
+    s.insert(task_id);
+    s
+}
+
+// 6. BTreeSet typed param.
+fn collect_seen(seen: &mut BTreeSet<u32>, item_id: u32) {
+    seen.insert(item_id);
+}
--- a/tests/benchmark/corpus/rust/auth/safe_local_user_view_extractor.rs
+++ b/tests/benchmark/corpus/rust/auth/safe_local_user_view_extractor.rs
@ -1,7 +1,7 @@
 // Real-repo motivation (lemmy `LocalUserView` extractor).
 //
 // Lemmy's authenticated-actor extractor type is named `LocalUserView`
-// — every route handler signature is
+//, every route handler signature is
 // `pub async fn handler(.., local_user_view: LocalUserView)`.  The
 // previous exact-name list in `is_self_actor_type_text`
 // (`CurrentUser`, `SessionUser`, `AuthUser`, `AdminUser`,
@ -44,7 +44,7 @@ pub async fn write_self_note(
    pool: &mut Pool,
    local_user_view: LocalUserView,
 ) -> Result<(), ()> {
-    // Login predicate on the actor itself — subject is the actor.
+    // Login predicate on the actor itself, subject is the actor.
    // No additional ownership check needed because the subject is the
    // caller's own row.
    let _ = is_admin(&local_user_view);
--- a/tests/benchmark/corpus/rust/auth/safe_param_type_segment_idents.rs
+++ b/tests/benchmark/corpus/rust/auth/safe_param_type_segment_idents.rs
@ -0,0 +1,78 @@
+// Internal helper whose parameter list contains type-segment idents
+// that lowercase-match the framework-request-name allow-list (`path`,
+// `request`, `ctx`, `body`, `path`).  Before the
+// `collect_param_names` Rust-parameter arm, the recursive default arm
+// pulled `std`, `path`, `Path` out of `dst: &std::path::Path` and
+// pushed them into `unit.params`, `path` then matched the
+// framework-name list and gated `unit_has_user_input_evidence` open,
+// firing `missing_ownership_check` at every id-shaped operation in
+// the body.
+//
+// Cluster surfaced from
+// meilisearch/index-scheduler/src/scheduler/process_snapshot_creation.rs::remove_tasks
+// (`unsafe fn remove_tasks(tasks: &[Task], dst: &std::path::Path,
+// index_base_map_size: usize)`).  None of the actual params (`tasks`,
+// `dst`, `sz`) match the user-input-evidence heuristic, so the rule
+// must NOT fire on the internal task-cleanup loop.
+
+struct Task {
+    uid: u32,
+}
+
+struct Database;
+
+impl Database {
+    fn delete(&self, _w: &mut u32, _u: &u32) -> Result<(), ()> {
+        Ok(())
+    }
+}
+
+struct TaskQueue {
+    all_tasks: Database,
+    canceled_by: Database,
+}
+
+fn remove_tasks(
+    tasks: &[Task],
+    dst: &std::path::Path,
+    sz: usize,
+) -> Result<(), ()> {
+    let _ = (dst, sz);
+    let mut wtxn = 0u32;
+    let task_queue = TaskQueue {
+        all_tasks: Database,
+        canceled_by: Database,
+    };
+    let TaskQueue {
+        all_tasks,
+        canceled_by,
+    } = task_queue;
+    for task in tasks {
+        all_tasks.delete(&mut wtxn, &task.uid)?;
+        canceled_by.delete(&mut wtxn, &task.uid)?;
+    }
+    Ok(())
+}
+
+// Same shape with a typed wrapper whose tail segment lowercases to
+// `path` (`PathBuf` → `pathbuf` does NOT match, but `Path` does).
+// Confirms the Rust `parameter` arm in `collect_param_names` keeps
+// `Path` out of `unit.params` even when wrapped in a generic.
+
+struct Wrapper<T>(T);
+struct PathHandle;
+struct Item {
+    uid: u32,
+}
+struct Repo;
+impl Repo {
+    fn delete(&self, _u: &u32) {}
+}
+
+fn cleanup_internal(out: Wrapper<PathHandle>, items: &[Item]) {
+    let _ = out;
+    let repo = Repo;
+    for item in items {
+        repo.delete(&item.uid);
+    }
+}
--- a/tests/benchmark/corpus/rust/auth/safe_row_fetch_multiline_let.rs
+++ b/tests/benchmark/corpus/rust/auth/safe_row_fetch_multiline_let.rs
@ -4,7 +4,7 @@
 // (the call body wraps onto the next line for readability).  Before
 // the line-counting fix, `row_population_data` recorded the
 // `let_declaration`'s start row while `op.line` saw the inner call's
-// start row — they differed by one and the row-fetch exemption
+// start row, they differed by one and the row-fetch exemption
 // missed.  Recording the **call**'s start line aligns the two and
 // the exemption fires for the multi-line shape too.

@ -52,7 +52,7 @@ pub async fn lock_comment(
    let comment_id = req.comment_id;
    let local_instance_id = local_user_view.person.instance_id;

-    // Multi-line let — the let_declaration starts on this line, but
+    // Multi-line let, the let_declaration starts on this line, but
    // the inner `CommentView::read(..)` call starts on the next line.
    // `op.line` for the read sink is the call's line, not the let's.
    let orig_comment =
--- a/tests/benchmark/corpus/rust/auth/safe_row_population_reverse_walk.rs
+++ b/tests/benchmark/corpus/rust/auth/safe_row_population_reverse_walk.rs
@ -4,7 +4,7 @@
 // `let community = Community::read(pool, req.community_id)` records
 // `community → [req.community_id]` in `row_population_data`.  An auth
 // check `check_community_user_action(&user, &community, ..)` then
-// authorises the row — and any **downstream** operation that re-uses
+// authorises the row, and any **downstream** operation that re-uses
 // `req.community_id` (a later mutation by the same id, or a related
 // view fetched by the same id) is materially covered by that check.
 //
@ -71,7 +71,7 @@ pub async fn transfer_community(
    pool: &mut Pool,
    local_user_view: LocalUserView,
 ) -> Result<(), ()> {
-    // Row fetch — `community` is populated from `req.community_id`.
+    // Row fetch, `community` is populated from `req.community_id`.
    let community = Community::read(pool, req.community_id)?;

    // Authorisation check on the fetched row.  Subject = `community`
@ -84,7 +84,7 @@ pub async fn transfer_community(
    // the row that was fetched with this id).
    CommunityActions::delete_mods_for_community(pool, req.community_id)?;

-    // Local alias of the same request field — `var_alias_chain`
+    // Local alias of the same request field, `var_alias_chain`
    // records `community_id → "req.community_id"` so the reverse-walk
    // also covers downstream sinks that pass the bare alias.  Before
    // the alias-chain fix, the next read fired
--- a/tests/benchmark/corpus/rust/auth/safe_typed_path_int_extractor.rs
+++ b/tests/benchmark/corpus/rust/auth/safe_typed_path_int_extractor.rs
@ -1,7 +1,7 @@
 // Phase 5 typed-extractor exclusion: an Axum-style `Path<i64>`
 // parameter is a framework-validated numeric extractor.  The runtime
 // guarantees a numeric value, so even though `project_id` reaches a
-// SQL helper, the rule must NOT fire — the value cannot carry an
+// SQL helper, the rule must NOT fire, the value cannot carry an
 // injection payload nor bypass ownership.
 use axum::extract::Path;

--- a/tests/benchmark/corpus/rust/auth/self_publish_email.rs
+++ b/tests/benchmark/corpus/rust/auth/self_publish_email.rs
@ -36,7 +36,7 @@ mod serde_json {
 }

 // Real-repo shape from website/src/handlers/social.rs:
-//   `realtime::publish_to_user(&ctx.env, &user.email, ...)` — publish
+//   `realtime::publish_to_user(&ctx.env, &user.email, ...)`, publish
 //   to the authed user's OWN channel keyed by their email.  The
 //   `email` / `username` / `handle` fields of a self-actor binding
 //   reference the actor's own identity, just like `id` / `user_id`,
--- a/tests/benchmark/corpus/rust/auth/self_scoped_user.rs
+++ b/tests/benchmark/corpus/rust/auth/self_scoped_user.rs
@ -5,7 +5,7 @@ mod auth { pub async fn require_auth(_r: &super::Req, _c: &super::Ctx) -> Result

 // The handler's `get_peer_ids(&db, user.id)` call below must not be
 // flagged. `user` is bound from `auth::require_auth(..)` so `user.id`
-// is the caller's own id — the call is self-referential, not a foreign
+// is the caller's own id, the call is self-referential, not a foreign
 // scoped id. The library-style helper below is a pass-through so its
 // body contains no DB sinks (the internal `user_id` → DB flow is a
 // separate pattern covered by helper-summary lifting).
--- a/tests/benchmark/corpus/rust/auth/sql_join_acl.rs
+++ b/tests/benchmark/corpus/rust/auth/sql_join_acl.rs
@ -2,7 +2,7 @@
 // against an ACL table (`group_members`) with a WHERE clause that pins
 // the row to the current user (`gm.user_id = ?1` bound to `user.id`).
 // Every returned row is membership-gated by construction, so downstream
-// uses of the row's columns (`group_id` here) are authorized — the
+// uses of the row's columns (`group_id` here) are authorized, the
 // `realtime::publish_to_group` call MUST NOT be flagged as missing an
 // ownership check after B3.
 struct Ctx;
--- a/tests/benchmark/corpus/rust/auth/transitive_helper.rs
+++ b/tests/benchmark/corpus/rust/auth/transitive_helper.rs
@ -1,7 +1,7 @@
 // target: authorization happens inside `validate_target`, which
 // internally calls `authz::require_membership` against the same
 // `group_id` the handler subsequently mutates. The current rule cannot
-// see this transitively — B4 lifts per-function auth-check summaries
+// see this transitively, B4 lifts per-function auth-check summaries
 // (which positional params are auth-checked) so the handler-level call
 // to `validate_target(&db, group_id, user.id)` is recognised as an
 // auth check covering `group_id`. Result: `db.exec(..)` MUST NOT flag
@ -45,7 +45,7 @@ pub async fn handle_create_comment(
    let user = auth::require_auth(&req, &ctx).await?;
    let db = Db;

-    // Authorization happens inside validate_target — helper-summary
+    // Authorization happens inside validate_target, helper-summary
    // lifting propagates the per-param auth check so this covers
    // `group_id`.
    validate_target(&db, group_id, user.id).await?;
--- a/tests/benchmark/corpus/rust/auth/unsafe_dto_string_field_axum.rs
+++ b/tests/benchmark/corpus/rust/auth/unsafe_dto_string_field_axum.rs
@ -1,7 +1,7 @@
 // Phase 6 D06 (negative): same DTO shape as
 // `safe_dto_int_field_axum.rs` but the flow uses the `doc_id` field
 // whose declared type is `String`.  Phase 6 must NOT exempt the
-// member-access subject — String DTO fields can carry an injection
+// member-access subject, String DTO fields can carry an injection
 // payload, so the auth rule must continue to fire.
 use axum::extract::Json;

--- a/tests/benchmark/corpus/rust/auth/unsafe_handler_local_collection_does_not_blanket_suppress.rs
+++ b/tests/benchmark/corpus/rust/auth/unsafe_handler_local_collection_does_not_blanket_suppress.rs
@ -0,0 +1,28 @@
+// Vulnerable counterpart to `safe_local_collection_param_types.rs`
+// and `safe_param_type_segment_idents.rs`.  Proves the LocalCollection
+// receiver-type override and the Rust `parameter` arm in
+// `collect_param_names` don't blanket-suppress real handlers that mix
+// in-memory containers with persistent-store calls (`db.update`).
+// Scoped identifier (`req.target_user_id`) flows into a real DB
+// mutation with no preceding ownership check, must still fire.
+
+use std::collections::HashMap;
+
+struct DocumentRequest {
+    target_user_id: u64,
+    new_owner: u64,
+}
+
+struct DbConnection;
+impl DbConnection {
+    fn update_owner(&self, _doc_id: u64, _owner: u64) {}
+}
+
+// `cache: &mut HashMap<u64, String>` is a local container, its
+// mutations are non-auth-relevant.  But `db.update_owner` is a
+// real persistent-store write, classified as `DbMutation`, and the
+// handler still has no auth check on `req.target_user_id`.
+async fn change_owner(req: DocumentRequest, cache: &mut HashMap<u64, String>, db: DbConnection) {
+    cache.remove(&req.target_user_id); // local container op, OK
+    db.update_owner(req.target_user_id, req.new_owner); // <-- IDOR sink
+}
--- a/tests/benchmark/corpus/rust/auth/unsafe_row_fetch_no_authz.rs
+++ b/tests/benchmark/corpus/rust/auth/unsafe_row_fetch_no_authz.rs
@ -1,4 +1,4 @@
-// Vulnerable counterpart to `row_fetch_then_authorize.rs` — the row is
+// Vulnerable counterpart to `row_fetch_then_authorize.rs`, the row is
 // fetched by user-supplied id but no authorization function names it.
 // The row-fetch exemption must NOT fire here; the rule should still
 // flag the read as missing an ownership/membership check.
--- a/tests/benchmark/corpus/rust/auth/unsafe_row_population_no_check.rs
+++ b/tests/benchmark/corpus/rust/auth/unsafe_row_population_no_check.rs
@ -33,12 +33,12 @@ pub async fn transfer_community(
    req: TransferCommunity,
    pool: &mut Pool,
 ) -> Result<(), ()> {
-    // Row fetch — populates `community → [req.community_id]` — but
+    // Row fetch, populates `community → [req.community_id]`, but
    // no `check_*_action(&user, &community, ..)` follows.
    let _community = Community::read(pool, req.community_id)?;

    // Mutation by id with no preceding ownership/membership check.
-    // This is the genuine IDOR — must flag.
+    // This is the genuine IDOR, must flag.
    CommunityActions::delete_mods_for_community(pool, req.community_id)?;

    Ok(())
--- a/tests/benchmark/corpus/rust/sqli/sqli_metachar_gate_wrong_sink.rs
+++ b/tests/benchmark/corpus/rust/sqli/sqli_metachar_gate_wrong_sink.rs
@ -3,7 +3,7 @@ use rusqlite::Connection;

 fn main() {
    let user_id = env::var("USER_ID").unwrap();
-    // Rejecting shell metacharacters does NOT make SQL injection safe —
+    // Rejecting shell metacharacters does NOT make SQL injection safe ,
    // the metachar gate only covers shell-family sinks.
    if user_id.contains(";") || user_id.contains("|") {
        return;
--- a/tests/benchmark/corpus/rust/traversal/traversal_no_sanitizer.rs
+++ b/tests/benchmark/corpus/rust/traversal/traversal_no_sanitizer.rs
@ -1,6 +1,6 @@
 // rs-path-006: Negative-case guard for PathFact.
 //
-// No sanitiser and no narrowing — PathFact stays Top on every axis, so
+// No sanitiser and no narrowing, PathFact stays Top on every axis, so
 // the FILE_IO sink MUST fire.  This fixture guards against PathFact
 // over-suppression sneaking into `is_path_safe_for_sink`.
 use std::env;
--- a/tests/benchmark/corpus/typescript/auth/safe_local_collection_receiver.ts
+++ b/tests/benchmark/corpus/typescript/auth/safe_local_collection_receiver.ts
@ -0,0 +1,96 @@
+// Real-repo shape from excalidraw's element manipulation libraries
+// (`packages/element/src/binding.ts`, `frame.ts`, `duplicate.ts`,
+// `DebugCanvas.tsx`).  In a pure data-manipulation function whose
+// receiver is a JS built-in collection (`Map`, `Set`, `WeakMap`,
+// `WeakSet`, `Array`) — either declared inline (`new Map()`),
+// annotated directly (`m: Map<K, V>`), or aliased via a same-file
+// `type X = Map<K, V>` — the call site is a container operation,
+// not a data-layer read/mutation, and `js.auth.missing_ownership_check`
+// must not flag.
+//
+// Closes the excalidraw FP cluster (66 → ~9 on
+// `js.auth.missing_ownership_check`).  The fix lives at the deepest
+// representable layer: SSA `TypeFacts::constructor_type` recognises
+// `new Map()` / `new Set()` constructors as
+// `TypeKind::LocalCollection`; `cfg::params::ts_type_to_local_collection`
+// extends `classify_param_type_ts` so explicitly-typed params resolve
+// to `LocalCollection` independent of NestJS decorator presence;
+// `cfg::dto::collect_type_alias_local_collections` populates a
+// per-file `TYPE_ALIAS_LC` set so same-file `type X = Map<...>`
+// aliases also resolve.  The auth analyser already exempts
+// `LocalCollection`-typed receivers via
+// `auth_analysis::sink_class_for_type → InMemoryLocal`.
+
+type ElementsMap = Map<string, { id: string; frameId?: string }>;
+type IdMap = Map<string, string>;
+type GroupSet = Set<string>;
+type ElementArray = readonly { id: string }[];
+
+interface BindingFix {
+  elementId: string;
+}
+
+// ── 1. Direct Map<...> annotation on a parameter ────────────────────
+function lookupBinding(
+  binding: BindingFix,
+  origIdToDuplicateId: Map<string, string>,
+): string | undefined {
+  return origIdToDuplicateId.get(binding.elementId);
+}
+
+// ── 2. Same-file `type X = Map<...>` alias ─────────────────────────
+function debugRender(elementsMap: ElementsMap, id: string) {
+  const bindable = elementsMap.get(id);
+  if (!bindable) return null;
+  return bindable;
+}
+
+// ── 3. Set / WeakMap / WeakSet annotation ──────────────────────────
+function trackVisited(visited: Set<string>, key: string) {
+  if (!visited.has(key)) {
+    visited.add(key);
+  }
+  return visited.size;
+}
+
+function rememberElement(
+  cache: WeakMap<object, string>,
+  obj: object,
+  v: string,
+) {
+  cache.set(obj, v);
+  return cache.get(obj);
+}
+
+// ── 4. Array generics (`T[]`, `Array<T>`, `ReadonlyArray<T>`) ──────
+function findItemArr(arr: { id: string }[], targetId: string) {
+  return arr.find((x) => x.id === targetId);
+}
+
+function findItemReadonly(arr: ElementArray, targetId: string) {
+  return arr.find((x) => x.id === targetId);
+}
+
+function findItemGeneric(arr: Array<string>, v: string) {
+  return arr.find((x) => x === v);
+}
+
+// ── 5. Local `new Map()` / `new Set()` constructors ────────────────
+function buildIndex(items: { id: string; v: string }[]) {
+  const idx = new Map<string, string>();
+  for (const it of items) {
+    idx.set(it.id, it.v);
+  }
+  return idx.get(items[0]?.id ?? "");
+}
+
+// ── 6. Type-alias chain (alias of alias) ───────────────────────────
+function aliasOfAlias(m: IdMap, k: string) {
+  return m.get(k);
+}
+
+// ── 7. Set with `add` / `has` (mutation-side) ──────────────────────
+function trackGroup(groups: GroupSet, g: string) {
+  groups.add(g);
+  return groups.has(g);
+}
--- a/tests/benchmark/corpus/typescript/auth/vuln_local_collection_does_not_blanket_suppress.ts
+++ b/tests/benchmark/corpus/typescript/auth/vuln_local_collection_does_not_blanket_suppress.ts
@ -0,0 +1,28 @@
+// Vulnerable counterpart to `safe_local_collection_receiver.ts`.
+//
+// Pinned to prove the LocalCollection-receiver fix does NOT
+// blanket-suppress missing-ownership findings on real DB / API
+// receivers that happen to share method names (`get`, `find`, `set`)
+// with JS built-in collections.  When the receiver type is a real
+// `Prisma` / `Repository` / `db` chain — not a tracked Map / Set /
+// Array — the auth analyser must still fire.
+
+interface PrismaClient {
+  user: {
+    findUnique(args: { where: { id: string } }): Promise<{ id: string } | null>;
+    update(args: { where: { id: string }; data: object }): Promise<void>;
+  };
+}
+
+declare const prisma: PrismaClient;
+
+// User passes an attacker-controlled id.  No prior auth check; receiver
+// is a Prisma client (NOT a Map / Set / Array), so the missing-ownership
+// rule must fire on `prisma.user.findUnique`.
+export async function dangerousFetch(targetUserId: string) {
+  return prisma.user.findUnique({ where: { id: targetUserId } });
+}
+
+export async function dangerousMutate(targetUserId: string, data: object) {
+  return prisma.user.update({ where: { id: targetUserId }, data });
+}
--- a/tests/benchmark/corpus/typescript/safe/safe_helper_with_validator.ts
+++ b/tests/benchmark/corpus/typescript/safe/safe_helper_with_validator.ts
@ -0,0 +1,33 @@
+// Helper-summary all_validated propagation (precision regression
+// guard).  The helper performs an indirect-validator check on
+// `child.webhookUrl` and throws on failure; callers passing tainted
+// `child` should NOT see the helper's `param_to_sink` summary refire
+// because the validator inside the helper proved the path safe.
+//
+// Pinned by tests/lib::helper_with_validator_does_not_propagate_to_caller_via_summary.
+
+import express, { Request, Response } from 'express';
+import axios from 'axios';
+
+interface IWebhookFilterPart {
+    webhookUrl?: string;
+}
+
+declare function validateUrlSsrf(url: string): Promise<string | null>;
+
+async function getWebhookResponse(child: IWebhookFilterPart) {
+    const ssrfError = await validateUrlSsrf(child.webhookUrl);
+    if (ssrfError) {
+        throw new Error('blocked');
+    }
+    return await axios.post(child.webhookUrl, {});
+}
+
+const app = express();
+app.use(express.json());
+
+app.post('/run', async (req: Request, res: Response) => {
+    const child: IWebhookFilterPart = req.body.filter;
+    const r = await getWebhookResponse(child);
+    res.json({ r });
+});
--- a/tests/benchmark/corpus/typescript/safe/safe_indirect_validator.ts
+++ b/tests/benchmark/corpus/typescript/safe/safe_indirect_validator.ts
@ -0,0 +1,23 @@
+// Indirect-validator branch narrowing (precision regression guard).
+// Pattern: `const err = validateXxx(input); if (err) throw …;` —
+// the validator's input is treated as validated on the success
+// branch, so the downstream sink does not refire.
+//
+// Pinned by tests/lib::indirect_validator_narrowing_marks_arg_validated.
+
+import express, { Request, Response } from 'express';
+import axios from 'axios';
+
+declare function validateUrlSsrf(url: string): Promise<string | null>;
+
+const app = express();
+
+app.get('/proxy', async (req: Request, res: Response) => {
+    const target = req.query.url as string;
+    const ssrfError = await validateUrlSsrf(target);
+    if (ssrfError) {
+        throw new Error('blocked');
+    }
+    const response = await axios.get(target);
+    res.send(response.data);
+});
--- a/tests/benchmark/corpus/typescript/safe/safe_strapi_db_query_chain.ts
+++ b/tests/benchmark/corpus/typescript/safe/safe_strapi_db_query_chain.ts
@ -0,0 +1,37 @@
+// Strapi-style ORM accessor chain — `<obj>.db.query(MODEL_UID).<orm_method>(...)`.
+// MODEL_UID is a literal model identifier, not raw SQL; the trailing
+// findOne/findMany/create/update/delete/count are intrinsically parameterised
+// by the ORM (per-call values arrive through field-keyed object literals
+// that the driver escapes).  Should NOT fire as a SQL-injection sink.
+
+declare const strapi: any;
+
+async function getApiToken(whereParams: Record<string, unknown>) {
+    const token = await strapi.db.query('admin::api-token').findOne({
+        select: ['id', 'name'],
+        where: whereParams,
+    });
+    return token;
+}
+
+async function listTokens() {
+    return strapi.db.query('admin::api-token').findMany({
+        where: { type: 'read-only' },
+    });
+}
+
+async function createToken(data: unknown) {
+    return strapi.db.query('admin::api-token').create({ data });
+}
+
+async function updateToken(id: number, data: unknown) {
+    return strapi.db.query('admin::api-token').update({ where: { id }, data });
+}
+
+async function deleteToken(id: number) {
+    return strapi.db.query('admin::api-token').delete({ where: { id } });
+}
+
+async function countTokens() {
+    return strapi.db.query('admin::api-token').count();
+}
--- a/tests/benchmark/corpus/typescript/sqli/sqli_db_query_concat.ts
+++ b/tests/benchmark/corpus/typescript/sqli/sqli_db_query_concat.ts
@ -0,0 +1,26 @@
+// Vulnerable counterpart — bare `connection.query(...)` and chained
+// `db.query(...).then(...)` whose arg 0 is concatenated with attacker
+// input.  Both must still fire as SQL_QUERY sinks: the chain has no
+// ORM-method outer call (`.then` is a Promise method, not an ORM
+// accessor), and arg 0 is not a string literal in the second case.
+
+import express, { Request, Response } from 'express';
+
+declare const connection: any;
+declare const db: any;
+
+const app = express();
+
+app.get('/user', (req: Request, res: Response) => {
+    const name = req.query.name as string;
+    // bare SQL — real SQLi sink, no chain
+    connection.query(`SELECT * FROM users WHERE name = '${name}'`);
+});
+
+app.get('/by-id', async (req: Request, res: Response) => {
+    const id = req.query.id as string;
+    // chained `.then` is a Promise method, not an ORM accessor; arg 0 is
+    // also a binary_expression (not a string literal) so the ORM-shape
+    // recogniser refuses to suppress.
+    db.query("SELECT * FROM users WHERE id = " + id).then((rows: any) => res.json(rows[0]));
+});
--- a/tests/benchmark/cve_corpus/java/CVE-2022-1471/patched.java
+++ b/tests/benchmark/cve_corpus/java/CVE-2022-1471/patched.java
@ -0,0 +1,37 @@
+// Nyx CVE benchmark fixture (patched counterpart).
+//
+// CVE:      CVE-2022-1471
+// Project:  SnakeYAML (snakeyaml/snakeyaml)
+// License:  Apache-2.0
+//           (https://github.com/snakeyaml/snakeyaml/blob/master/LICENSE.txt)
+// Advisory: https://github.com/advisories/GHSA-mjmj-j48q-9wg2
+//
+// Patched variant: the parser is constructed with `SafeConstructor`,
+// which restricts the YAML tag handler set to primitives + standard
+// collections. SnakeYAML 2.0 ships with `SafeConstructor` as the
+// default; pre-2.0 consumers patched their own call sites to pass
+// `SafeConstructor` explicitly (the form below).
+//
+// Patched-fix simplification: the upstream remediation also covers
+// callers that need richer types via custom `Constructor` subclasses
+// with declared safe types — those are out of scope for this fixture.
+
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+import org.yaml.snakeyaml.LoaderOptions;
+import org.yaml.snakeyaml.Yaml;
+import org.yaml.snakeyaml.constructor.SafeConstructor;
+
+public class YamlConfigServlet extends HttpServlet {
+    @Override
+    protected void doPost(HttpServletRequest req, HttpServletResponse res) throws Exception {
+        String body = req.getReader().readLine();
+        // Patched: SafeConstructor forbids arbitrary class tags;
+        // any non-primitive `!!…` payload throws ConstructorException.
+        Yaml yaml = new Yaml(new SafeConstructor(new LoaderOptions()));
+        Object loaded = yaml.load(body);
+        res.setHeader("X-Yaml-Class", loaded.getClass().getName());
+        res.setStatus(HttpServletResponse.SC_OK);
+    }
+}
--- a/tests/benchmark/cve_corpus/java/CVE-2022-1471/vulnerable.java
+++ b/tests/benchmark/cve_corpus/java/CVE-2022-1471/vulnerable.java
@ -0,0 +1,43 @@
+// Nyx CVE benchmark fixture.
+//
+// CVE:        CVE-2022-1471
+// Project:    SnakeYAML (snakeyaml/snakeyaml; consumed via any app
+//             that constructs `new Yaml()` and calls `.load()` on
+//             attacker-controlled bytes)
+// License:    Apache-2.0
+//             (https://github.com/snakeyaml/snakeyaml/blob/master/LICENSE.txt)
+// Advisory:   https://github.com/advisories/GHSA-mjmj-j48q-9wg2
+//             https://nvd.nist.gov/vuln/detail/CVE-2022-1471
+// Vulnerable: SnakeYAML <= 1.33; the default `Constructor` accepts
+//             arbitrary tags (`!!javax.script.ScriptEngineManager`,
+//             `!!java.net.URLClassLoader`, etc.) and instantiates any
+//             class via reflection, reaching RCE on consumers that
+//             feed network input straight into Yaml.load().
+//
+// Verbatim load-bearing lines: the unsafe `new Yaml()` construction
+// and the `yaml.load(body)` call mirror the call-site shape called
+// out in the advisory's "vulnerable code" example. The patched fix
+// (next file) shows the SnakeYAML 2.0 fix pattern of explicitly
+// passing `new SafeConstructor(new LoaderOptions())`.
+//
+// Trims: imports trimmed to just SnakeYAML and Servlet API; no
+// helper / logging code.
+
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+import org.yaml.snakeyaml.Yaml;
+
+public class YamlConfigServlet extends HttpServlet {
+    @Override
+    protected void doPost(HttpServletRequest req, HttpServletResponse res) throws Exception {
+        String body = req.getReader().readLine();
+        // Vulnerable: default Constructor allows arbitrary class
+        // instantiation via YAML tag handlers — `body` may contain
+        // `!!javax.script.ScriptEngineManager` and friends.
+        Yaml yaml = new Yaml();
+        Object loaded = yaml.load(body);
+        res.setHeader("X-Yaml-Class", loaded.getClass().getName());
+        res.setStatus(HttpServletResponse.SC_OK);
+    }
+}
--- a/tests/benchmark/cve_corpus/java/CVE-2022-42889/patched.java
+++ b/tests/benchmark/cve_corpus/java/CVE-2022-42889/patched.java
@ -0,0 +1,33 @@
+// Nyx CVE benchmark fixture (patched counterpart).
+//
+// CVE:      CVE-2022-42889 ("Text4Shell")
+// Project:  Apache Commons Text (apache/commons-text)
+// License:  Apache-2.0
+//           (https://github.com/apache/commons-text/blob/master/LICENSE.txt)
+// Advisory: https://github.com/advisories/GHSA-599f-7c49-w659
+//
+// Patched variant: the substitutor is built with `new StringSubstitutor()`
+// (no factory) so the lookup map is empty — `${anything}` becomes a
+// literal pass-through. This is the recommended app-side mitigation
+// for callers that cannot upgrade past 1.9, and it is also the
+// behaviour of the 1.10.0 default `createDefault()` factory which
+// drops the `script:` / `dns:` / `url:` interpolation lookups.
+
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+import org.apache.commons.text.StringSubstitutor;
+
+public class TemplateRenderServlet extends HttpServlet {
+    @Override
+    protected void doGet(HttpServletRequest req, HttpServletResponse res) throws Exception {
+        String input = req.getParameter("template");
+        // Patched: no interpolator constructed; the substitutor has
+        // no lookups registered, so `${…}` is left as a literal in
+        // the rendered output. No script/dns/url evaluation.
+        StringSubstitutor substitutor = new StringSubstitutor();
+        String rendered = substitutor.replace(input);
+        res.setHeader("X-Rendered-Length", String.valueOf(rendered.length()));
+        res.setStatus(HttpServletResponse.SC_OK);
+    }
+}
--- a/tests/benchmark/cve_corpus/java/CVE-2022-42889/vulnerable.java
+++ b/tests/benchmark/cve_corpus/java/CVE-2022-42889/vulnerable.java
@ -0,0 +1,45 @@
+// Nyx CVE benchmark fixture.
+//
+// CVE:        CVE-2022-42889 (a.k.a. "Text4Shell")
+// Project:    Apache Commons Text (apache/commons-text); consumed via
+//             any app that calls `StringSubstitutor.createInterpolator()`
+//             on attacker-controlled input.
+// License:    Apache-2.0
+//             (https://github.com/apache/commons-text/blob/master/LICENSE.txt)
+// Advisory:   https://github.com/advisories/GHSA-599f-7c49-w659
+//             https://nvd.nist.gov/vuln/detail/CVE-2022-42889
+// Vulnerable: commons-text 1.5 .. 1.9. `createInterpolator()`
+//             enables the `script:`, `dns:`, and `url:` lookups by
+//             default, so a substitution like `${script:javascript:…}`
+//             evaluates JavaScript via the JSR-223 ScriptEngineManager
+//             — full RCE on any consumer that feeds untrusted input
+//             through `.replace()`.
+//
+// Verbatim load-bearing lines: the `StringSubstitutor.createInterpolator()`
+// factory call and the `interpolator.replace(input)` sink mirror the
+// minimal triggering pattern published in the OSS-Security advisory
+// (https://www.openwall.com/lists/oss-security/2022/10/13/4) and the
+// vendor mitigation guidance for 1.10.0.
+//
+// Trims: imports limited to commons-text + servlet; no surrounding
+// templating boilerplate.
+
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+import org.apache.commons.text.StringSubstitutor;
+
+public class TemplateRenderServlet extends HttpServlet {
+    @Override
+    protected void doGet(HttpServletRequest req, HttpServletResponse res) throws Exception {
+        String input = req.getParameter("template");
+        // Vulnerable: createInterpolator() enables script:/dns:/url:
+        // lookups by default; .replace() evaluates them against
+        // `input` — `${script:js:…}` → arbitrary JavaScript via the
+        // JDK ScriptEngineManager.
+        StringSubstitutor interpolator = StringSubstitutor.createInterpolator();
+        String rendered = interpolator.replace(input);
+        res.setHeader("X-Rendered-Length", String.valueOf(rendered.length()));
+        res.setStatus(HttpServletResponse.SC_OK);
+    }
+}
--- a/tests/benchmark/cve_corpus/python/CVE-2025-69662/patched.py
+++ b/tests/benchmark/cve_corpus/python/CVE-2025-69662/patched.py
@ -0,0 +1,47 @@
+# Nyx CVE benchmark fixture.
+#
+# CVE:      CVE-2025-69662
+# Project:  geopandas (geopandas/geopandas)
+# License:  BSD-3-Clause  (https://github.com/geopandas/geopandas/blob/main/LICENSE.txt)
+# Advisory: https://github.com/advisories/GHSA-6497-prx7-gpmq
+# Patched:  6aa8ef14ffdee4ba1044349ab948e1a1fbfaf419 geopandas/io/sql.py:432-438
+#
+# Fix: replace the f-string-built Find_SRID probe with a
+# bound-parameter SQLAlchemy text() statement; SQLAlchemy passes the
+# values via the driver's parameter binding, so attacker-supplied
+# identifiers can no longer break out of the literal context.
+#
+# Trims:
+#   - Same scaffolding trim as vulnerable.py — `.fetchone()[0]` (post-
+#     sink result extraction) removed.
+#   - Patched-fix simplification: the upstream fix nests
+#     `text(...).bindparams(...)` directly inside `connection.execute(...)`.
+#     The fixture lifts the bound-parameter clause into a local `stmt`
+#     so the `.bindparams` call is a top-level CFG node — without this
+#     reshape, cfg-unguarded-sink fires on the surrounding execute
+#     because the inlined sanitizer-in-arg shape is not yet recognised
+#     by the dominator-based guard check.  The verbatim bytes of the
+#     `text(...).bindparams(...)` clause are preserved.
+
+from flask import Flask, request
+from sqlalchemy import create_engine, text
+
+app = Flask(__name__)
+engine = create_engine("postgresql://localhost/geo")
+
+
+@app.post("/upload-layer")
+def upload_layer():
+    body = request.get_json(force=True) or {}
+    geom_name = body.get("geom_name", "geom")
+    name = body.get("table", "data")
+    schema_name = body.get("schema", "public")
+    with engine.begin() as connection:
+        # Verbatim bytes from sql.py:433-437 — bound-parameter probe.
+        stmt = text(
+            "SELECT Find_SRID(:schema_name, :name, :geom_name);"
+        ).bindparams(
+            schema_name=schema_name, name=name, geom_name=geom_name
+        )
+        connection.execute(stmt)
+    return {"ok": True}
--- a/tests/benchmark/cve_corpus/python/CVE-2025-69662/vulnerable.py
+++ b/tests/benchmark/cve_corpus/python/CVE-2025-69662/vulnerable.py
@ -0,0 +1,46 @@
+# Nyx CVE benchmark fixture.
+#
+# CVE:        CVE-2025-69662
+# Project:    geopandas (geopandas/geopandas)
+# License:    BSD-3-Clause  (https://github.com/geopandas/geopandas/blob/main/LICENSE.txt)
+# Advisory:   https://github.com/advisories/GHSA-6497-prx7-gpmq
+# Vulnerable: c301579e0ac4034c19bece63c08bf628613700b4 geopandas/io/sql.py:432-435
+#
+# geopandas.GeoDataFrame.to_postgis() concatenated the GeoDataFrame's
+# geometry column name (and the schema/table names) into a Find_SRID
+# probe via f-string. A user uploading a GeoDataFrame whose geometry
+# column was named with embedded SQL (e.g. "geom'); DROP TABLE...--")
+# achieved arbitrary SQL execution against the target Postgres database.
+#
+# Trims:
+#   - Surrounding to_postgis() body (CRS lookup, EWKB conversion, dtype
+#     dict construction at L399-422) that scaffolds the vulnerable
+#     Find_SRID probe.
+#   - Trailing `.fetchone()[0]` on the connection.execute(...) result —
+#     downstream of the sink (result extraction), not on the flow path.
+#
+# Only the source statement (geom_name from request input), the
+# f-string SQL builder, and the connection.execute(text(...)) sink are
+# preserved verbatim from sql.py:432-435.
+
+from flask import Flask, request
+from sqlalchemy import create_engine, text
+
+app = Flask(__name__)
+engine = create_engine("postgresql://localhost/geo")
+
+
+@app.post("/upload-layer")
+def upload_layer():
+    body = request.get_json(force=True) or {}
+    # geom_name is supplied by the API caller — no validation upstream.
+    geom_name = body.get("geom_name", "geom")
+    name = body.get("table", "data")
+    schema_name = body.get("schema", "public")
+    with engine.begin() as connection:
+        # Verbatim from sql.py:432-435 — Find_SRID probe with
+        # f-string-interpolated identifiers.
+        connection.execute(
+            text(f"SELECT Find_SRID('{schema_name}', '{name}', '{geom_name}');")
+        )
+    return {"ok": True}
--- a/tests/benchmark/cve_corpus/python/CVE-2026-33626/patched.py
+++ b/tests/benchmark/cve_corpus/python/CVE-2026-33626/patched.py
@ -0,0 +1,79 @@
+# Nyx CVE benchmark fixture.
+#
+# CVE:      CVE-2026-33626
+# Project:  LMDeploy (InternLM/lmdeploy)
+# License:  Apache-2.0  (https://github.com/InternLM/lmdeploy/blob/main/LICENSE)
+# Advisory: https://github.com/advisories/GHSA-25c5-rg58-mhxh
+# Patched:  71d64a339edb901e9005358e0633fbbab367d626 lmdeploy/vl/media/connection.py:24-69
+#
+# Fix: introduce `_is_safe_url(url)` which resolves the hostname via
+# `socket.getaddrinfo`, walks every returned IP, and rejects any that
+# aren't `is_global` (covers loopback, RFC1918 private, link-local,
+# multicast, reserved, unspecified).  The vulnerable scheme-only check
+# is replaced by this allowlist gate before the fetch.
+#
+# Trims: same scaffolding trim as vulnerable.py — MediaIO generic
+# plumbing replaced with a Flask handler; fetch_timeout env-var
+# resolution collapsed to a literal.  The `_is_safe_url` body, the
+# replacement gate at L55-58, and the `client.get(...,
+# allow_redirects=True)` fetch are preserved verbatim from the fix
+# commit.
+
+import ipaddress
+import socket
+from urllib.parse import urlparse
+
+import requests
+from flask import Flask, request
+
+app = Flask(__name__)
+headers = {"User-Agent": "Mozilla/5.0"}
+
+
+def _is_safe_url(url: str) -> tuple[bool, str]:
+    """Check if the URL is safe to fetch (not internal/private)."""
+    try:
+        parsed = urlparse(url)
+        if parsed.scheme not in ('http', 'https'):
+            return False, f'Unsupported scheme: {parsed.scheme}'
+
+        hostname = parsed.hostname
+        if not hostname:
+            return False, 'Could not parse hostname from URL'
+
+        # check all IPs (IPv4 + IPv6) using getaddrinfo
+        try:
+            infos = socket.getaddrinfo(hostname, None)
+        except socket.gaierror:
+            return False, 'Hostname resolution failed'
+
+        for info in infos:
+            ip = ipaddress.ip_address(info[4][0])
+            # block any IP that is not globally routable
+            if not ip.is_global:
+                return False, f'Blocked non-global IP detected: {ip}'
+
+        return True, 'URL is safe'
+    except Exception as e:
+        return False, f'URL validation failed: {str(e)}'
+
+
+@app.post("/load-image")
+def load_image():
+    body = request.get_json(force=True) or {}
+    url = body.get("url", "")
+    url_spec = urlparse(url)
+    # Verbatim from connection.py:55-58 — replaces the scheme-only
+    # check with a private-IP-blocking allowlist.
+    is_safe, reason = _is_safe_url(url_spec.geturl())
+    if not is_safe:
+        raise ValueError(f'URL is blocked for security reasons: {reason}')
+
+    fetch_timeout = 10
+    client = requests.Session()
+    client.max_redirects = 3
+    response = client.get(
+        url_spec.geturl(), headers=headers, timeout=fetch_timeout, allow_redirects=True
+    )
+    response.raise_for_status()
+    return {"size": len(response.content)}
--- a/tests/benchmark/cve_corpus/python/CVE-2026-33626/vulnerable.py
+++ b/tests/benchmark/cve_corpus/python/CVE-2026-33626/vulnerable.py
@ -0,0 +1,51 @@
+# Nyx CVE benchmark fixture.
+#
+# CVE:        CVE-2026-33626
+# Project:    LMDeploy (InternLM/lmdeploy)
+# License:    Apache-2.0  (https://github.com/InternLM/lmdeploy/blob/main/LICENSE)
+# Advisory:   https://github.com/advisories/GHSA-25c5-rg58-mhxh
+# Vulnerable: 819a80836e991ca3f427b0e85faca159083d3d40 lmdeploy/vl/media/connection.py:23-37
+#
+# LMDeploy's vision-language image loader accepted user-supplied
+# image URLs from the chat-completion request and fetched them via
+# `requests.Session().get(url)` after only a scheme check.  Attackers
+# embedded URLs pointing at internal network services or cloud
+# metadata endpoints (e.g. http://169.254.169.254/...) and exfiltrated
+# the response back through the model output.
+#
+# Trims:
+#   - Surrounding _load_data_url / file-URL branches that don't reach
+#     the HTTP sink (lines 41+).
+#   - The scheme-only allowlist check at L24-25 of upstream.  The
+#     CVE is host-based SSRF (private IP / cloud-metadata host); the
+#     scheme check was the insufficient validation the fix replaces.
+#     Removing it keeps the load-bearing source → sink flow intact.
+#   - The fetch_timeout env-var resolution (L28-31) — collapsed to a
+#     literal so the fixture is self-contained.
+#   - MediaIO[_M] generic plumbing — replaced with a Flask handler so
+#     the source is a concrete request flow.
+#
+# The verbatim load-bearing lines are the `client = requests.Session()`
+# constructor and the `client.get(url_spec.geturl(), headers=headers,
+# timeout=fetch_timeout)` fetch site at lines 33-34 of upstream.
+
+from urllib.parse import urlparse
+
+import requests
+from flask import Flask, request
+
+app = Flask(__name__)
+headers = {"User-Agent": "Mozilla/5.0"}
+
+
+@app.post("/load-image")
+def load_image():
+    body = request.get_json(force=True) or {}
+    url = body.get("url", "")
+    url_spec = urlparse(url)
+    fetch_timeout = 10
+    # Verbatim from connection.py:33-34 — Session().get(url).
+    client = requests.Session()
+    response = client.get(url_spec.geturl(), headers=headers, timeout=fetch_timeout)
+    response.raise_for_status()
+    return {"size": len(response.content)}
--- a/tests/benchmark/cve_corpus/rust/CVE-2018-20997/patched.rs
+++ b/tests/benchmark/cve_corpus/rust/CVE-2018-20997/patched.rs
@ -2,7 +2,7 @@
 //
 // CVE:      CVE-2018-20997
 // Advisory: https://rustsec.org/advisories/RUSTSEC-2018-0003
-// Project:  tar-rs — zip-slip fix
+// Project:  tar-rs, zip-slip fix
 // License:  MIT OR Apache-2.0
 //
 // Patched variant: the extractor rejects any entry path that contains
--- a/tests/benchmark/cve_corpus/rust/CVE-2018-20997/vulnerable.rs
+++ b/tests/benchmark/cve_corpus/rust/CVE-2018-20997/vulnerable.rs
@ -2,18 +2,18 @@
 //
 // CVE:      CVE-2018-20997
 // Advisory: https://rustsec.org/advisories/RUSTSEC-2018-0003
-// Project:  tar-rs (alexcrichton/tar-rs) — "zip slip" on tar extraction
+// Project:  tar-rs (alexcrichton/tar-rs), "zip slip" on tar extraction
 // License:  MIT OR Apache-2.0  (https://github.com/alexcrichton/tar-rs/blob/main/LICENSE-MIT)
 //
 // tar-rs <= 0.4.15 trusted tar entry paths verbatim when unpacking.
 // A crafted archive with an entry named `../../etc/shadow` would cause
 // `Archive::unpack` to write outside the destination directory, giving
 // malicious tarballs arbitrary file write.  Every consumer that
-// streamed user-supplied archives — package managers, OCI tooling,
-// container image importers — inherited the traversal.
+// streamed user-supplied archives, package managers, OCI tooling,
+// container image importers, inherited the traversal.
 //
-// This fixture is a minimal reproducer of the unsafe sink pattern —
-// attacker-controlled archive entry path -> fs::File::create(path) — not
+// This fixture is a minimal reproducer of the unsafe sink pattern ,
+// attacker-controlled archive entry path -> fs::File::create(path), not
 // an excerpt of tar-rs internals.  The entry path is modelled as an env
 // var so the single-file benchmark harness sees the flow; in a real
 // extractor the same shape fires for `archive.entries()?.map(|e|
--- a/tests/benchmark/cve_corpus/rust/CVE-2022-36113/patched.rs
+++ b/tests/benchmark/cve_corpus/rust/CVE-2022-36113/patched.rs
@ -2,7 +2,7 @@
 //
 // CVE:      CVE-2022-36113
 // Advisory: https://blog.rust-lang.org/2022/09/14/cargo-cves.html
-// Project:  cargo — `.cargo-ok` symlink follow fix
+// Project:  cargo, `.cargo-ok` symlink follow fix
 // License:  MIT OR Apache-2.0
 //
 // Patched variant: the crate name is passed through
--- a/tests/benchmark/cve_corpus/rust/CVE-2022-36113/vulnerable.rs
+++ b/tests/benchmark/cve_corpus/rust/CVE-2022-36113/vulnerable.rs
@ -3,7 +3,7 @@
 // CVE:      CVE-2022-36113
 // Advisory: https://blog.rust-lang.org/2022/09/14/cargo-cves.html
 //           https://rustsec.org/advisories/RUSTSEC-2022-0064
-// Project:  cargo (rust-lang/cargo) — "Arbitrary file corruption through
+// Project:  cargo (rust-lang/cargo), "Arbitrary file corruption through
 //           crate extraction" (`.cargo-ok` symlink following)
 // License:  MIT OR Apache-2.0  (https://github.com/rust-lang/cargo/blob/master/LICENSE-MIT)
 //
@ -15,9 +15,9 @@
 // switched the marker open to `OpenOptions::create_new(true)` so a
 // pre-existing symlink aborts the extraction.
 //
-// This fixture is a minimal reproducer of the unsafe sink pattern —
+// This fixture is a minimal reproducer of the unsafe sink pattern ,
 // attacker-controlled crate name plumbed into the marker path ->
-// fs::File::create(marker) through a symlink — not an excerpt of cargo
+// fs::File::create(marker) through a symlink, not an excerpt of cargo
 // internals.
 use std::env;
 use std::fs::File;
--- a/tests/benchmark/cve_corpus/rust/CVE-2024-24576/patched.rs
+++ b/tests/benchmark/cve_corpus/rust/CVE-2024-24576/patched.rs
@ -2,7 +2,7 @@
 //
 // CVE:      CVE-2024-24576
 // Advisory: https://rustsec.org/advisories/RUSTSEC-2024-0003
-// Project:  Rust standard library — "BatBadBut"
+// Project:  Rust standard library, "BatBadBut"
 // License:  MIT OR Apache-2.0
 //
 // Patched variant: the caller filters the argument through a cmd.exe-
--- a/tests/benchmark/cve_corpus/rust/CVE-2024-24576/vulnerable.rs
+++ b/tests/benchmark/cve_corpus/rust/CVE-2024-24576/vulnerable.rs
@ -3,7 +3,7 @@
 // CVE:      CVE-2024-24576
 // Advisory: https://rustsec.org/advisories/RUSTSEC-2024-0003
 // Blog:     https://blog.rust-lang.org/2024/04/09/cve-2024-24576.html
-// Project:  Rust standard library (std::process::Command) — "BatBadBut"
+// Project:  Rust standard library (std::process::Command), "BatBadBut"
 // License:  MIT OR Apache-2.0  (https://github.com/rust-lang/rust/blob/master/COPYRIGHT)
 //
 // Rust < 1.77.2 on Windows built the argv for .bat/.cmd invocations by
@ -14,8 +14,8 @@
 // line, and every consumer of `std::process::Command::new("...bat")`
 // on Windows inherited the RCE.
 //
-// This fixture is a minimal reproducer of the unsafe sink pattern —
-// caller-supplied input -> Command::new("update.bat").arg(name) — not
+// This fixture is a minimal reproducer of the unsafe sink pattern ,
+// caller-supplied input -> Command::new("update.bat").arg(name), not
 // an excerpt of rustc / libstd internals.  The source is modelled as
 // `env::var` so the single-file benchmark harness sees the flow; in a
 // real deployment the same shape fires for an Axum/Actix/Rocket handler
--- a/tests/benchmark/cve_corpus/typescript/GHSA-4x48-cgf9-q33f/patched.ts
+++ b/tests/benchmark/cve_corpus/typescript/GHSA-4x48-cgf9-q33f/patched.ts
@ -0,0 +1,62 @@
+// Nyx CVE benchmark fixture (patched counterpart).
+//
+// CVE:        GHSA-4x48-cgf9-q33f (no CVE id assigned)
+// Project:    Novu (novuhq/novu)
+// License:    MIT  (libs/application-generic — see LICENSE-MIT)
+// Advisory:   https://github.com/novuhq/novu/security/advisories/GHSA-4x48-cgf9-q33f
+// Patched:    87d965eb88340ac7cd262dd52c8015acd092dc68
+//             libs/application-generic/src/usecases/conditions-filter/conditions-filter.usecase.ts:241-289
+//
+// The fix performs the existing call-site SSRF check `validateUrlSsrf`
+// (already used by the HTTP-Request workflow step) before the webhook
+// POST. The branch validates protocol/host and rejects when the URL
+// hits localhost/private/cloud-metadata addresses; only on success
+// does control reach axios.post.
+//
+// Patched-fix simplification: validateUrlSsrf is sourced from
+// '../../utils/ssrf-url-validation.ts' upstream — inlined here as a
+// no-op signature so the fixture parses without the larger novu
+// monorepo. The branch shape (early throw on truthy ssrfError) is
+// verbatim from the patch.
+
+import express, { Request, Response } from 'express';
+import axios from 'axios';
+
+interface IWebhookFilterPart {
+    webhookUrl?: string;
+}
+
+declare function validateUrlSsrf(url: string): Promise<string | null>;
+
+async function getWebhookResponse(
+    child: IWebhookFilterPart,
+): Promise<Record<string, unknown> | undefined> {
+    if (!child.webhookUrl) return undefined;
+
+    const payload = {};
+    const config: { headers: Record<string, string> } = { headers: {} };
+
+    const ssrfError = await validateUrlSsrf(child.webhookUrl);
+
+    if (ssrfError) {
+        throw new Error(
+            JSON.stringify({
+                message: ssrfError,
+                data: 'Webhook URL blocked by SSRF protection.',
+            })
+        );
+    }
+
+    return await axios.post(child.webhookUrl, payload, config).then((response) => {
+        return response.data as Record<string, unknown>;
+    });
+}
+
+const app = express();
+app.use(express.json());
+
+app.post('/conditions-filter/run', async (req: Request, res: Response) => {
+    const child: IWebhookFilterPart = req.body.filter;
+    const result = await getWebhookResponse(child);
+    res.json({ result });
+});
--- a/tests/benchmark/cve_corpus/typescript/GHSA-4x48-cgf9-q33f/vulnerable.ts
+++ b/tests/benchmark/cve_corpus/typescript/GHSA-4x48-cgf9-q33f/vulnerable.ts
@ -0,0 +1,53 @@
+// Nyx CVE benchmark fixture.
+//
+// CVE:        GHSA-4x48-cgf9-q33f (no CVE id assigned)
+// Project:    Novu (novuhq/novu)
+// License:    MIT  (libs/application-generic — see LICENSE-MIT)
+// Advisory:   https://github.com/novuhq/novu/security/advisories/GHSA-4x48-cgf9-q33f
+// Vulnerable: 87d965eb88340ac7cd262dd52c8015acd092dc68^
+//             libs/application-generic/src/usecases/conditions-filter/conditions-filter.usecase.ts:241-272
+//
+// `getWebhookResponse` POSTs to a user-configured webhook URL using raw
+// `axios.post(child.webhookUrl, ...)` with no SSRF validation. The
+// `child` filter part is sourced from a workflow filter config the
+// caller controls, so the URL flows attacker-influenced into axios.
+//
+// Trims:
+//   - HMAC config branch (verbatim kept; not on the flow path but
+//     trivial scaffolding to keep the call shape).
+//   - buildHmac, buildPayload, processFilter dispatcher, environment
+//     repository lookups, decryptApiKey usage. Verbatim load-bearing
+//     lines are the IWebhookFilterPart param shape and the
+//     axios.post(child.webhookUrl, payload, config) call.
+
+import express, { Request, Response } from 'express';
+import axios from 'axios';
+
+interface IWebhookFilterPart {
+    webhookUrl?: string;
+}
+
+async function getWebhookResponse(
+    child: IWebhookFilterPart,
+): Promise<Record<string, unknown> | undefined> {
+    if (!child.webhookUrl) return undefined;
+
+    const payload = {};
+
+    const config: { headers: Record<string, string> } = {
+        headers: {},
+    };
+
+    return await axios.post(child.webhookUrl, payload, config).then((response) => {
+        return response.data as Record<string, unknown>;
+    });
+}
+
+const app = express();
+app.use(express.json());
+
+app.post('/conditions-filter/run', async (req: Request, res: Response) => {
+    const child: IWebhookFilterPart = req.body.filter;
+    const result = await getWebhookResponse(child);
+    res.json({ result });
+});
--- a/tests/benchmark/ground_truth.json
+++ b/tests/benchmark/ground_truth.json
@ -3,7 +3,7 @@
  "metadata": {
    "description": "Nyx benchmark ground truth",
    "created": "2026-03-20",
-    "corpus_size": 433
+    "corpus_size": 458
  },
  "cases": [
    {
@ -8394,6 +8394,35 @@
      "disabled": false,
      "notes": "Prisma $queryRawUnsafe \u2014 TS-specific ORM sink"
    },
+    {
+      "case_id": "ts-sqli-003",
+      "file": "typescript/sqli/sqli_db_query_concat.ts",
+      "language": "typescript",
+      "is_vulnerable": true,
+      "vuln_class": "sqli",
+      "cwe": "CWE-89",
+      "provenance": "synthetic",
+      "equivalence_tier": "exact",
+      "match_mode": "rule_match",
+      "expected_rule_ids": [
+        "taint-unsanitised-flow"
+      ],
+      "allowed_alternative_rule_ids": [
+        "cfg-unguarded-sink"
+      ],
+      "forbidden_rule_ids": [],
+      "expected_severity": "MEDIUM",
+      "expected_category": "Security",
+      "expected_sink_lines": null,
+      "expected_source_lines": null,
+      "tags": [
+        "sqli",
+        "real-repo-precision-2026-04-29",
+        "regression-guard"
+      ],
+      "disabled": false,
+      "notes": "Vulnerable counterpart for ts-safe-017 \u2014 bare `connection.query(`SELECT...`)` and chained `db.query(SQL).then(...)` (Promise method, not ORM accessor) must still fire as SQL_QUERY sinks even after the ORM-chain recogniser landed."
+    },
    {
      "case_id": "ts-cmdi-001",
      "file": "typescript/cmdi/cmdi_exec_template.ts",
@ -9560,6 +9589,72 @@
      "disabled": false,
      "notes": "CVE-2023-26159 patched counterpart: URL allowlist check guards axios.get; regression guard that Nyx does not refire on the fix"
    },
+    {
+      "case_id": "cve-ts-ghsa-4x48-cgf9-q33f-vulnerable",
+      "file": "cve_corpus/typescript/GHSA-4x48-cgf9-q33f/vulnerable.ts",
+      "language": "typescript",
+      "is_vulnerable": true,
+      "vuln_class": "ssrf",
+      "cwe": "CWE-918",
+      "provenance": "real_cve",
+      "equivalence_tier": "exact",
+      "match_mode": "rule_match",
+      "expected_rule_ids": [
+        "taint-unsanitised-flow"
+      ],
+      "allowed_alternative_rule_ids": [],
+      "forbidden_rule_ids": [],
+      "expected_severity": "MEDIUM",
+      "expected_category": "Security",
+      "expected_sink_lines": [
+        [
+          51,
+          51
+        ]
+      ],
+      "expected_source_lines": [
+        [
+          50,
+          50
+        ]
+      ],
+      "tags": [
+        "cve",
+        "novu",
+        "ssrf",
+        "vulnerable"
+      ],
+      "disabled": false,
+      "notes": "GHSA-4x48-cgf9-q33f: Novu conditions-filter webhook bypassed validateUrlSsrf; raw axios.post(child.webhookUrl) is the cross-function SSRF sink. MIT-licensed libs/application-generic package."
+    },
+    {
+      "case_id": "cve-ts-ghsa-4x48-cgf9-q33f-patched",
+      "file": "cve_corpus/typescript/GHSA-4x48-cgf9-q33f/patched.ts",
+      "language": "typescript",
+      "is_vulnerable": false,
+      "vuln_class": "safe",
+      "cwe": "N/A",
+      "provenance": "real_cve",
+      "equivalence_tier": "exact",
+      "match_mode": "file_presence",
+      "expected_rule_ids": [],
+      "allowed_alternative_rule_ids": [],
+      "forbidden_rule_ids": [
+        "taint-unsanitised-flow"
+      ],
+      "expected_severity": null,
+      "expected_category": "Security",
+      "expected_sink_lines": [],
+      "expected_source_lines": [],
+      "tags": [
+        "cve",
+        "novu",
+        "patched",
+        "negative"
+      ],
+      "disabled": false,
+      "notes": "GHSA-4x48-cgf9-q33f patched: validateUrlSsrf(child.webhookUrl) followed by `if (ssrfError) throw` guards the axios.post call; regression guard for the indirect-validator branch narrowing + summary all_validated propagation."
+    },
    {
      "case_id": "cve-py-2017-18342-vulnerable",
      "file": "cve_corpus/python/CVE-2017-18342/vulnerable.py",
@ -9629,6 +9724,144 @@
      "disabled": false,
      "notes": "CVE-2017-18342 patched counterpart: yaml.safe_load replaces yaml.load; regression guard that Nyx does not refire on the fix"
    },
+    {
+      "case_id": "cve-py-2025-69662-vulnerable",
+      "file": "cve_corpus/python/CVE-2025-69662/vulnerable.py",
+      "language": "python",
+      "is_vulnerable": true,
+      "vuln_class": "sql_injection",
+      "cwe": "CWE-89",
+      "provenance": "real_cve",
+      "equivalence_tier": "exact",
+      "match_mode": "rule_match",
+      "expected_rule_ids": [
+        "py.sqli.text_format"
+      ],
+      "allowed_alternative_rule_ids": [
+        "taint-unsanitised-flow"
+      ],
+      "forbidden_rule_ids": [],
+      "expected_severity": "MEDIUM",
+      "expected_category": "Security",
+      "expected_sink_lines": [
+        [
+          43,
+          44
+        ]
+      ],
+      "expected_source_lines": [
+        [
+          35,
+          35
+        ]
+      ],
+      "tags": [
+        "cve",
+        "geopandas",
+        "sql_injection",
+        "flask",
+        "sqlalchemy"
+      ],
+      "disabled": false,
+      "notes": "CVE-2025-69662: geopandas to_postgis() interpolated GeoDataFrame's geometry column name into Find_SRID probe via f-string; SQL injection on user-uploaded layer. BSD-3-Clause"
+    },
+    {
+      "case_id": "cve-py-2025-69662-patched",
+      "file": "cve_corpus/python/CVE-2025-69662/patched.py",
+      "language": "python",
+      "is_vulnerable": false,
+      "vuln_class": "safe",
+      "cwe": "N/A",
+      "provenance": "real_cve",
+      "equivalence_tier": "exact",
+      "match_mode": "file_presence",
+      "expected_rule_ids": [],
+      "allowed_alternative_rule_ids": [],
+      "forbidden_rule_ids": [
+        "py.sqli.text_format",
+        "py.sqli.execute_format",
+        "taint-unsanitised-flow"
+      ],
+      "expected_severity": null,
+      "expected_category": "Security",
+      "expected_sink_lines": [],
+      "expected_source_lines": [],
+      "tags": [
+        "cve",
+        "geopandas",
+        "patched",
+        "negative"
+      ],
+      "disabled": false,
+      "notes": "CVE-2025-69662 patched counterpart: text(...).bindparams() replaces f-string interpolation; regression guard that Nyx does not refire on the fix"
+    },
+    {
+      "case_id": "cve-py-2026-33626-vulnerable",
+      "file": "cve_corpus/python/CVE-2026-33626/vulnerable.py",
+      "language": "python",
+      "is_vulnerable": true,
+      "vuln_class": "ssrf",
+      "cwe": "CWE-918",
+      "provenance": "real_cve",
+      "equivalence_tier": "exact",
+      "match_mode": "rule_match",
+      "expected_rule_ids": [
+        "taint-unsanitised-flow"
+      ],
+      "allowed_alternative_rule_ids": [],
+      "forbidden_rule_ids": [],
+      "expected_severity": "MEDIUM",
+      "expected_category": "Security",
+      "expected_sink_lines": [
+        [
+          49,
+          49
+        ]
+      ],
+      "expected_source_lines": [
+        [
+          43,
+          43
+        ]
+      ],
+      "tags": [
+        "cve",
+        "lmdeploy",
+        "ssrf",
+        "flask",
+        "requests"
+      ],
+      "disabled": false,
+      "notes": "CVE-2026-33626: LMDeploy vision-language image loader fetched user-supplied URLs via requests.Session().get without private-IP guard; SSRF / cloud-metadata exfil. Apache-2.0"
+    },
+    {
+      "case_id": "cve-py-2026-33626-patched",
+      "file": "cve_corpus/python/CVE-2026-33626/patched.py",
+      "language": "python",
+      "is_vulnerable": false,
+      "vuln_class": "safe",
+      "cwe": "N/A",
+      "provenance": "real_cve",
+      "equivalence_tier": "exact",
+      "match_mode": "file_presence",
+      "expected_rule_ids": [],
+      "allowed_alternative_rule_ids": [],
+      "forbidden_rule_ids": [
+        "taint-unsanitised-flow"
+      ],
+      "expected_severity": null,
+      "expected_category": "Security",
+      "expected_sink_lines": [],
+      "expected_source_lines": [],
+      "tags": [
+        "cve",
+        "lmdeploy",
+        "patched",
+        "negative"
+      ],
+      "disabled": false,
+      "notes": "CVE-2026-33626 patched counterpart: _is_safe_url private-IP allowlist gate replaces scheme-only check; regression guard that Nyx does not refire on the fix"
+    },
    {
      "case_id": "cve-php-2017-9841-vulnerable",
      "file": "cve_corpus/php/CVE-2017-9841/vulnerable.php",
@ -10694,6 +10927,147 @@
      "disabled": false,
      "notes": "CVE-2017-12629 patched counterpart: transformer name allowlist + in-process secure TransformerFactory removes the Runtime.exec path; regression guard that Nyx does not refire on the fix"
    },
+    {
+      "case_id": "cve-java-2022-1471-vulnerable",
+      "file": "cve_corpus/java/CVE-2022-1471/vulnerable.java",
+      "language": "java",
+      "is_vulnerable": true,
+      "vuln_class": "deserialization",
+      "cwe": "CWE-502",
+      "provenance": "real_cve",
+      "equivalence_tier": "exact",
+      "match_mode": "rule_match",
+      "expected_rule_ids": [
+        "java.deser.snakeyaml_unsafe_constructor"
+      ],
+      "allowed_alternative_rule_ids": [
+        "taint-unsanitised-flow"
+      ],
+      "forbidden_rule_ids": [],
+      "expected_severity": "MEDIUM",
+      "expected_category": "Security",
+      "expected_sink_lines": [
+        [
+          38,
+          38
+        ]
+      ],
+      "expected_source_lines": [
+        [
+          34,
+          34
+        ]
+      ],
+      "tags": [
+        "cve",
+        "snakeyaml",
+        "deserialization",
+        "servlet"
+      ],
+      "disabled": false,
+      "notes": "CVE-2022-1471: SnakeYAML <2.0 default Constructor accepts arbitrary class tags (`!!javax.script.ScriptEngineManager`, `!!java.net.URLClassLoader`, ...) reaching RCE on apps that load attacker-controlled YAML. Apache-2.0"
+    },
+    {
+      "case_id": "cve-java-2022-1471-patched",
+      "file": "cve_corpus/java/CVE-2022-1471/patched.java",
+      "language": "java",
+      "is_vulnerable": false,
+      "vuln_class": "safe",
+      "cwe": "N/A",
+      "provenance": "real_cve",
+      "equivalence_tier": "exact",
+      "match_mode": "file_presence",
+      "expected_rule_ids": [],
+      "allowed_alternative_rule_ids": [],
+      "forbidden_rule_ids": [
+        "java.deser.snakeyaml_unsafe_constructor",
+        "java.deser.readobject",
+        "taint-unsanitised-flow"
+      ],
+      "expected_severity": null,
+      "expected_category": "Security",
+      "expected_sink_lines": [],
+      "expected_source_lines": [],
+      "tags": [
+        "cve",
+        "snakeyaml",
+        "patched",
+        "negative"
+      ],
+      "disabled": false,
+      "notes": "CVE-2022-1471 patched counterpart: explicit SafeConstructor argument restricts the YAML tag handler set to primitives + standard collections; regression guard that Nyx does not refire on the safe form"
+    },
+    {
+      "case_id": "cve-java-2022-42889-vulnerable",
+      "file": "cve_corpus/java/CVE-2022-42889/vulnerable.java",
+      "language": "java",
+      "is_vulnerable": true,
+      "vuln_class": "code_exec",
+      "cwe": "CWE-94",
+      "provenance": "real_cve",
+      "equivalence_tier": "exact",
+      "match_mode": "rule_match",
+      "expected_rule_ids": [
+        "java.code_exec.text4shell_interpolator"
+      ],
+      "allowed_alternative_rule_ids": [
+        "taint-unsanitised-flow"
+      ],
+      "forbidden_rule_ids": [],
+      "expected_severity": "MEDIUM",
+      "expected_category": "Security",
+      "expected_sink_lines": [
+        [
+          40,
+          40
+        ]
+      ],
+      "expected_source_lines": [
+        [
+          35,
+          35
+        ]
+      ],
+      "tags": [
+        "cve",
+        "commons-text",
+        "text4shell",
+        "code-exec",
+        "servlet"
+      ],
+      "disabled": false,
+      "notes": "CVE-2022-42889 (Text4Shell): Apache Commons Text 1.5..1.9 StringSubstitutor.createInterpolator() enables script:/dns:/url: lookups; ${script:js:...} reaches the JSR-223 ScriptEngineManager. Apache-2.0"
+    },
+    {
+      "case_id": "cve-java-2022-42889-patched",
+      "file": "cve_corpus/java/CVE-2022-42889/patched.java",
+      "language": "java",
+      "is_vulnerable": false,
+      "vuln_class": "safe",
+      "cwe": "N/A",
+      "provenance": "real_cve",
+      "equivalence_tier": "exact",
+      "match_mode": "file_presence",
+      "expected_rule_ids": [],
+      "allowed_alternative_rule_ids": [],
+      "forbidden_rule_ids": [
+        "java.code_exec.text4shell_interpolator",
+        "taint-unsanitised-flow"
+      ],
+      "expected_severity": null,
+      "expected_category": "Security",
+      "expected_sink_lines": [],
+      "expected_source_lines": [],
+      "tags": [
+        "cve",
+        "commons-text",
+        "text4shell",
+        "patched",
+        "negative"
+      ],
+      "disabled": false,
+      "notes": "CVE-2022-42889 patched counterpart: substitutor built directly with `new StringSubstitutor()` so the lookup map is empty; ${...} pass-through. No script/dns/url evaluation."
+    },
    {
      "case_id": "rs-auth-001",
      "file": "rust/auth/actix_scoped_write_missing.rs",
@ -12233,6 +12607,89 @@
      "disabled": false,
      "notes": "TS cross-function bool validator; deferred \u2014 same reason as js-safe-016."
    },
+    {
+      "case_id": "ts-safe-017",
+      "file": "typescript/safe/safe_strapi_db_query_chain.ts",
+      "language": "typescript",
+      "is_vulnerable": false,
+      "vuln_class": "safe",
+      "cwe": "CWE-89",
+      "provenance": "real-repo-distilled",
+      "equivalence_tier": "exact",
+      "match_mode": "rule_match",
+      "expected_rule_ids": [],
+      "allowed_alternative_rule_ids": [],
+      "forbidden_rule_ids": [
+        "cfg-unguarded-sink",
+        "taint-unsanitised-flow"
+      ],
+      "expected_severity": "NONE",
+      "expected_category": "Security",
+      "expected_sink_lines": [],
+      "expected_source_lines": [],
+      "tags": [
+        "real-repo-precision-2026-04-29",
+        "strapi",
+        "orm-chain"
+      ],
+      "disabled": false,
+      "notes": "Strapi-style ORM accessor: `<obj>.db.query(MODEL_UID).<orm_method>(...)`; the `db.query` call's literal model UID + the ORM-method outer chain (findOne/findMany/create/update/delete/count) prove the chain is parameterised. Synthesised same-node Sanitizer(SQL_QUERY) suppresses cfg-unguarded-sink and taint-unsanitised-flow."
+    },
+    {
+      "case_id": "ts-safe-018",
+      "file": "typescript/safe/safe_indirect_validator.ts",
+      "language": "typescript",
+      "is_vulnerable": false,
+      "vuln_class": "safe",
+      "cwe": "N/A",
+      "provenance": "synthetic",
+      "equivalence_tier": "exact",
+      "match_mode": "file_presence",
+      "expected_rule_ids": [],
+      "allowed_alternative_rule_ids": [],
+      "forbidden_rule_ids": [
+        "taint-unsanitised-flow",
+        "cfg-unguarded-sink"
+      ],
+      "expected_severity": null,
+      "expected_category": "Security",
+      "expected_sink_lines": [],
+      "expected_source_lines": [],
+      "tags": [
+        "indirect-validator",
+        "ssrf",
+        "negative"
+      ],
+      "disabled": false,
+      "notes": "Indirect-validator branch narrowing — `const err = validateUrlSsrf(target); if (err) throw …;` should suppress the downstream axios.get sink. Pinned by tests/lib::indirect_validator_narrowing_marks_arg_validated."
+    },
+    {
+      "case_id": "ts-safe-019",
+      "file": "typescript/safe/safe_helper_with_validator.ts",
+      "language": "typescript",
+      "is_vulnerable": false,
+      "vuln_class": "safe",
+      "cwe": "N/A",
+      "provenance": "synthetic",
+      "equivalence_tier": "exact",
+      "match_mode": "file_presence",
+      "expected_rule_ids": [],
+      "allowed_alternative_rule_ids": [],
+      "forbidden_rule_ids": [
+        "taint-unsanitised-flow"
+      ],
+      "expected_severity": null,
+      "expected_category": "Security",
+      "expected_sink_lines": [],
+      "expected_source_lines": [],
+      "tags": [
+        "helper-summary",
+        "ssrf",
+        "negative"
+      ],
+      "disabled": false,
+      "notes": "Helper-summary all_validated propagation — when a helper's body validates the param via `validateXxx`, the per-param probe's all_validated event should be skipped during summary extraction so callers don't refire the cross-fn SSRF. Pinned by tests/lib::helper_with_validator_does_not_propagate_to_caller_via_summary."
+    },
    {
      "case_id": "py-auth-decorator-001",
      "file": "python/safe/safe_login_required_decorator.py",
@ -12897,6 +13354,31 @@
      "disabled": false,
      "notes": "Happy-path `if (!data.error && Array.isArray(...))` and body-mentioning-err do not fire `cfg-error-fallthrough` (website/public/app/core/app.js)"
    },
+    {
+      "case_id": "js-safe-realrepo-006",
+      "file": "javascript/safe/safe_localised_gherkin_regex.js",
+      "language": "javascript",
+      "is_vulnerable": false,
+      "vuln_class": "safe",
+      "cwe": "N/A",
+      "provenance": "synthetic",
+      "equivalence_tier": "exact",
+      "match_mode": "rule_match",
+      "expected_rule_ids": [],
+      "allowed_alternative_rule_ids": [],
+      "forbidden_rule_ids": [],
+      "expected_severity": null,
+      "expected_category": "Security",
+      "expected_sink_lines": [],
+      "expected_source_lines": [],
+      "tags": [
+        "panic-guard",
+        "negative",
+        "real-repo-precision-2026-04-29"
+      ],
+      "disabled": false,
+      "notes": "Panic guard: CodeMirror Gherkin tokenizer ships a long localised regex inside a boolean sub-condition. Naive byte-slice truncation in CFG condition-text (`t[..MAX_CONDITION_TEXT_LEN]`) panicked when byte 256 landed inside a multi-byte UTF-8 character (Gurmukhi `ਖ`). Engine fix: src/utils/snippet.rs::truncate_at_char_boundary applied at three CFG sites + two symex display sites (gogs public/plugins/codemirror-5.17.0/mode/gherkin/gherkin.js:107)."
+    },
    {
      "case_id": "go-safe-realrepo-001",
      "file": "go/safe/safe_error_log_only_function.go",
@ -13126,6 +13608,33 @@
      "disabled": false,
      "notes": "`func (c *Cache) ...` with `c.foo()` / `c.Fs.Create(...)` intra-struct dispatches \u2014 Go method receivers must seed `non_sink_vars` so the verb-name fallback doesn't fire on bare-receiver internal calls.  Closes the hugo cache/filecache.go cluster (~48 hits)."
    },
+    {
+      "case_id": "go-safe-realrepo-006",
+      "file": "go/safe/safe_test_helper_fatal.go",
+      "language": "go",
+      "is_vulnerable": false,
+      "vuln_class": "safe",
+      "cwe": "N/A",
+      "provenance": "synthetic",
+      "equivalence_tier": "exact",
+      "match_mode": "rule_match",
+      "expected_rule_ids": [],
+      "allowed_alternative_rule_ids": [],
+      "forbidden_rule_ids": [
+        "cfg-error-fallthrough"
+      ],
+      "expected_severity": null,
+      "expected_category": "Security",
+      "expected_sink_lines": [],
+      "expected_source_lines": [],
+      "tags": [
+        "cfg",
+        "negative",
+        "real-repo-precision-2026-04-29"
+      ],
+      "disabled": false,
+      "notes": "`if err != nil { c.Fatalf(...) }` / `os.Exit` / `log.Fatalf` / `panic(err)` are documented terminators (Goexit-class).  cfg-error-fallthrough must walk through them as terminating paths.  Closes the minio test-file cluster (49+34+12+11+9+7+7 hits across xl-storage_test.go, erasure-healing_test.go, format-erasure_test.go, \u2026).  Engine fix: src/cfg_analysis/error_handling.rs::call_never_returns."
+    },
    {
      "case_id": "go-auth-realrepo-001",
      "file": "go/auth/vuln_repo_findbyid_no_auth.go",
@ -13429,6 +13938,59 @@
      "disabled": false,
      "notes": "Regression guard: same TRPC handler shape as ts-auth-realrepo-004 but the SQL parameter is `input.targetUserId` (request body field), not `ctx.user.id`.  The TRPC ctx self-actor exemption must apply ONLY to ctx.user.<id-like> subjects, never to other paths in the same param."
    },
+    {
+      "case_id": "ts-auth-realrepo-006",
+      "file": "typescript/auth/safe_local_collection_receiver.ts",
+      "language": "typescript",
+      "is_vulnerable": false,
+      "vuln_class": "safe",
+      "cwe": "N/A",
+      "provenance": "real-repo",
+      "equivalence_tier": "exact",
+      "match_mode": "rule_match",
+      "expected_rule_ids": [],
+      "allowed_alternative_rule_ids": [],
+      "forbidden_rule_ids": [
+        "js.auth.missing_ownership_check"
+      ],
+      "expected_severity": null,
+      "expected_category": "Security",
+      "expected_sink_lines": [],
+      "expected_source_lines": [],
+      "tags": [
+        "auth",
+        "negative",
+        "real-repo-precision-2026-04-29"
+      ],
+      "disabled": false,
+      "notes": "Excalidraw `Map<K, V>` / `Set<T>` / `WeakMap` / `WeakSet` / `Array<T>` / `T[]` / `readonly T[]` receivers — direct annotation, same-file `type X = Map<...>` aliasing, and inline `new Map()` constructor. SSA `constructor_type` JS/TS arm + `cfg::params::ts_type_to_local_collection` + `cfg::dto::collect_type_alias_local_collections` route every shape through `TypeKind::LocalCollection` → `SinkClass::InMemoryLocal`, suppressing missing-ownership."
+    },
+    {
+      "case_id": "ts-auth-realrepo-007",
+      "file": "typescript/auth/vuln_local_collection_does_not_blanket_suppress.ts",
+      "language": "typescript",
+      "is_vulnerable": true,
+      "vuln_class": "auth",
+      "cwe": "CWE-639",
+      "provenance": "real-repo",
+      "equivalence_tier": "exact",
+      "match_mode": "rule_match",
+      "expected_rule_ids": [
+        "js.auth.missing_ownership_check"
+      ],
+      "allowed_alternative_rule_ids": [],
+      "forbidden_rule_ids": [],
+      "expected_severity": "MEDIUM",
+      "expected_category": "Security",
+      "expected_sink_lines": [],
+      "expected_source_lines": [],
+      "tags": [
+        "auth",
+        "real-repo-precision-2026-04-29"
+      ],
+      "disabled": false,
+      "notes": "Vulnerable counterpart to ts-auth-realrepo-006: `prisma.user.findUnique` / `prisma.user.update` with attacker-supplied id and no preceding auth check.  Receiver is NOT a tracked Map / Set / Array, so the LocalCollection fix must NOT suppress this — proves the type-aware suppression doesn't blanket-cover real DB clients that share method names (`get`, `find`, `update`) with JS containers."
+    },
    {
      "case_id": "rs-auth-realrepo-009",
      "file": "rust/auth/safe_local_user_view_extractor.rs",
@ -13484,6 +14046,89 @@
      "disabled": false,
      "notes": "Negative counterpart for the LocalUserView recogniser: handler takes the typed extractor but mutates a row by `req.target_user_id` (foreign id) without any ownership check \u2014 must still flag.  Guards against an over-broad recogniser that would treat any handler with a self-actor extractor as authorised by default."
    },
+    {
+      "case_id": "rs-auth-realrepo-011",
+      "file": "rust/auth/safe_param_type_segment_idents.rs",
+      "language": "rust",
+      "is_vulnerable": false,
+      "vuln_class": "safe",
+      "cwe": "N/A",
+      "provenance": "synthetic",
+      "equivalence_tier": "exact",
+      "match_mode": "rule_match",
+      "expected_rule_ids": [],
+      "allowed_alternative_rule_ids": [],
+      "forbidden_rule_ids": [
+        "rs.auth.missing_ownership_check"
+      ],
+      "expected_severity": null,
+      "expected_category": "Security",
+      "expected_sink_lines": [],
+      "expected_source_lines": [],
+      "tags": [
+        "auth",
+        "negative",
+        "real-repo-precision-2026-04-29",
+        "noise-budget-zero"
+      ],
+      "disabled": false,
+      "notes": "Rust `parameter` arm in `collect_param_names` keeps type-segment idents (`std`, `path`, `Path`) out of `unit.params` so `dst: &std::path::Path` doesn't gate `unit_has_user_input_evidence` open via the framework-name allow-list (`path`).  Surfaced from meilisearch/index-scheduler/scheduler/process_snapshot_creation.rs::remove_tasks where `dst: &std::path::Path` made every `db.delete(task.uid)` fire missing-ownership-check."
+    },
+    {
+      "case_id": "rs-auth-realrepo-012",
+      "file": "rust/auth/safe_local_collection_param_types.rs",
+      "language": "rust",
+      "is_vulnerable": false,
+      "vuln_class": "safe",
+      "cwe": "N/A",
+      "provenance": "synthetic",
+      "equivalence_tier": "exact",
+      "match_mode": "rule_match",
+      "expected_rule_ids": [],
+      "allowed_alternative_rule_ids": [],
+      "forbidden_rule_ids": [
+        "rs.auth.missing_ownership_check"
+      ],
+      "expected_severity": null,
+      "expected_category": "Security",
+      "expected_sink_lines": [],
+      "expected_source_lines": [],
+      "tags": [
+        "auth",
+        "negative",
+        "real-repo-precision-2026-04-29",
+        "noise-budget-zero"
+      ],
+      "disabled": false,
+      "notes": "Rust function-parameter type annotations naming an in-memory container (`RoaringBitmap`, `HashMap<K,V>`, `HashSet<T>`, `BTreeSet<T>`) classify the receiver as `TypeKind::LocalCollection` \u2192 `SinkClass::InMemoryLocal`, suppressing the verb-name dispatch's DbMutation classification.  Surfaced from meilisearch/index-scheduler/scheduler/enterprise_edition/network.rs::balance_shards (`unsharded: RoaringBitmap`).  Mirrors the JS/TS `ts_type_to_local_collection` fix from 2026-04-29."
+    },
+    {
+      "case_id": "rs-auth-realrepo-013",
+      "file": "rust/auth/unsafe_handler_local_collection_does_not_blanket_suppress.rs",
+      "language": "rust",
+      "is_vulnerable": true,
+      "vuln_class": "auth",
+      "cwe": "CWE-285",
+      "provenance": "synthetic",
+      "equivalence_tier": "exact",
+      "match_mode": "rule_match",
+      "expected_rule_ids": [
+        "rs.auth.missing_ownership_check"
+      ],
+      "allowed_alternative_rule_ids": [],
+      "forbidden_rule_ids": [],
+      "expected_severity": "High",
+      "expected_category": "Security",
+      "expected_sink_lines": [],
+      "expected_source_lines": [],
+      "tags": [
+        "auth",
+        "positive",
+        "real-repo-precision-2026-04-29"
+      ],
+      "disabled": false,
+      "notes": "Negative-counterpart guard for the LocalCollection / parameter-name fixes: handler takes a HashMap typed param (in-memory bookkeeping) but ALSO calls `db.update_owner(req.target_user_id, ...)` (real DbMutation).  The cache mutation must not blanket-suppress the persistent-store mutation \u2014 the rule must still fire on `db.update_owner`."
+    },
    {
      "case_id": "ruby-safe-ar-query-shapes-001",
      "file": "ruby/safe/safe_active_record_query_shapes.rb",
@ -13715,6 +14360,120 @@
      ],
      "disabled": false,
      "notes": "Concatenated SQL passed to em.createQuery(...) \u2014 receiver-chain walk sees binary_expression at arg 0, refuses to synthesise sanitizer, structural sink fires. Regression guard for the JPA parameterised-execute fix."
+    },
+    {
+      "case_id": "py-auth-realrepo-005",
+      "file": "python/safe/safe_fastapi_route_dependencies_auth.py",
+      "language": "python",
+      "is_vulnerable": false,
+      "vuln_class": "safe",
+      "cwe": "N/A",
+      "provenance": "real-repo",
+      "equivalence_tier": "exact",
+      "match_mode": "rule_match",
+      "expected_rule_ids": [],
+      "allowed_alternative_rule_ids": [],
+      "forbidden_rule_ids": [
+        "py.auth.missing_ownership_check"
+      ],
+      "expected_severity": null,
+      "expected_category": "Security",
+      "expected_sink_lines": [],
+      "expected_source_lines": [],
+      "tags": [
+        "auth",
+        "fastapi",
+        "real-repo-precision-2026-04-29"
+      ],
+      "disabled": false,
+      "notes": "Distilled from airflow api_fastapi/core_api/routes/public/connections.py: FastAPI route decorator carries `dependencies=[Depends(requires_access_connection(method=\"DELETE\"))]`; the Flask extractor's new `dependencies=` kwarg walker plus inject_middleware_auth subject synthesis recognises the auth gate."
+    },
+    {
+      "case_id": "py-auth-realrepo-007",
+      "file": "python/safe/safe_fastapi_route_level_row_fetch.py",
+      "language": "python",
+      "is_vulnerable": false,
+      "vuln_class": "safe",
+      "cwe": "N/A",
+      "provenance": "real-repo",
+      "equivalence_tier": "exact",
+      "match_mode": "rule_match",
+      "expected_rule_ids": [],
+      "allowed_alternative_rule_ids": [],
+      "forbidden_rule_ids": [
+        "py.auth.missing_ownership_check"
+      ],
+      "expected_severity": null,
+      "expected_category": "Security",
+      "expected_sink_lines": [],
+      "expected_source_lines": [],
+      "tags": [
+        "auth",
+        "fastapi",
+        "real-repo-precision-2026-04-29"
+      ],
+      "disabled": false,
+      "notes": "Distilled from airflow api_fastapi/core_api/routes/public/dag_run.py: FastAPI route decorator carries `dependencies=[Depends(requires_access_dag(method=\"GET\"))]`; the route-level guard must cover not only direct path-param subjects (filter_by(dag_id=dag_id)) but also row-variable receivers (`dag.cleanup_runs(...)` after `dag = session.scalar(select(DagModel)...)`).  Pinned by the `is_route_level` short-circuit in `auth_check_covers_subject` plus the kind-aware `function_params_route_handler` that includes id-like Python typed params (`dag_id: str`) in `unit.params`."
+    },
+    {
+      "case_id": "py-auth-realrepo-006",
+      "file": "python/safe/safe_pytest_sqlalchemy_session.py",
+      "language": "python",
+      "is_vulnerable": false,
+      "vuln_class": "safe",
+      "cwe": "N/A",
+      "provenance": "real-repo",
+      "equivalence_tier": "exact",
+      "match_mode": "rule_match",
+      "expected_rule_ids": [],
+      "allowed_alternative_rule_ids": [],
+      "forbidden_rule_ids": [
+        "py.auth.missing_ownership_check",
+        "py.auth.token_override_without_validation"
+      ],
+      "expected_severity": null,
+      "expected_category": "Security",
+      "expected_sink_lines": [],
+      "expected_source_lines": [],
+      "tags": [
+        "auth",
+        "pytest",
+        "real-repo-precision-2026-04-29"
+      ],
+      "disabled": false,
+      "notes": "Distilled from airflow tests/unit/models/test_backfill.py: pytest test methods with SQLAlchemy `session` fixture and `session.commit()` calls.  Bare `session.<sqlalchemy_verb>` no longer counts as auth Session evidence; only `session.<identity_field>` (user/user_id/...) does."
+    },
+    {
+      "case_id": "py-auth-realrepo-007",
+      "file": "python/auth/vuln_fastapi_route_no_dependencies.py",
+      "language": "python",
+      "is_vulnerable": true,
+      "vuln_class": "auth",
+      "cwe": "CWE-862",
+      "provenance": "real-repo",
+      "equivalence_tier": "exact",
+      "match_mode": "rule_match",
+      "expected_rule_ids": [
+        "py.auth.missing_ownership_check"
+      ],
+      "allowed_alternative_rule_ids": [],
+      "forbidden_rule_ids": [],
+      "expected_severity": "HIGH",
+      "expected_category": "Security",
+      "expected_sink_lines": [
+        [
+          15,
+          15
+        ]
+      ],
+      "expected_source_lines": [],
+      "tags": [
+        "auth",
+        "fastapi",
+        "real-repo-precision-2026-04-29"
+      ],
+      "disabled": false,
+      "notes": "Vulnerable counterpart to py-auth-realrepo-005: same FastAPI route shape but no `dependencies=[Depends(...)]` keyword arg.  Regression guard: the dependency-injection recogniser must not blanket-suppress every FastAPI route."
    }
  ]
 }
--- a/tests/benchmark/results/latest.json
+++ b/tests/benchmark/results/latest.json
@ -1,6 +1,6 @@
 {
  "benchmark_version": "1.0",
-  "timestamp": "2026-04-29T05:42:03Z",
+  "timestamp": "2026-04-29T21:50:34Z",
  "scanner_version": "0.5.0",
  "scanner_config": {
    "analysis_mode": "Full",
@ -9,9 +9,9 @@
    "state_analysis_enabled": true,
    "worker_threads": 1
  },
-  "ground_truth_hash": "sha256:3e034f1fc5c7bb7838f1fb2c63de5ca5a36aacfdf5d66cf25f30bff99f25f1cf",
-  "corpus_size": 433,
-  "cases_run": 432,
+  "ground_truth_hash": "sha256:5b391d654f88673e5a200af875d513cf83812af747739395e8315768b8983ce3",
+  "corpus_size": 458,
+  "cases_run": 457,
  "cases_skipped": 1,
  "outcomes": [
    {
@ -1306,6 +1306,74 @@
      "security_finding_count": 2,
      "non_security_finding_count": 0
    },
+    {
+      "case_id": "cve-java-2022-1471-patched",
+      "file": "cve_corpus/java/CVE-2022-1471/patched.java",
+      "language": "java",
+      "vuln_class": "safe",
+      "is_vulnerable": false,
+      "outcome_file_level": "TN",
+      "outcome_rule_level": "TN",
+      "outcome_location_level": null,
+      "matched_rule_ids": [],
+      "unexpected_rule_ids": [],
+      "all_finding_ids": [],
+      "security_finding_count": 0,
+      "non_security_finding_count": 0
+    },
+    {
+      "case_id": "cve-java-2022-1471-vulnerable",
+      "file": "cve_corpus/java/CVE-2022-1471/vulnerable.java",
+      "language": "java",
+      "vuln_class": "deserialization",
+      "is_vulnerable": true,
+      "outcome_file_level": "TP",
+      "outcome_rule_level": "TP",
+      "outcome_location_level": "TP",
+      "matched_rule_ids": [
+        "java.deser.snakeyaml_unsafe_constructor"
+      ],
+      "unexpected_rule_ids": [],
+      "all_finding_ids": [
+        "java.deser.snakeyaml_unsafe_constructor"
+      ],
+      "security_finding_count": 1,
+      "non_security_finding_count": 0
+    },
+    {
+      "case_id": "cve-java-2022-42889-patched",
+      "file": "cve_corpus/java/CVE-2022-42889/patched.java",
+      "language": "java",
+      "vuln_class": "safe",
+      "is_vulnerable": false,
+      "outcome_file_level": "TN",
+      "outcome_rule_level": "TN",
+      "outcome_location_level": null,
+      "matched_rule_ids": [],
+      "unexpected_rule_ids": [],
+      "all_finding_ids": [],
+      "security_finding_count": 0,
+      "non_security_finding_count": 0
+    },
+    {
+      "case_id": "cve-java-2022-42889-vulnerable",
+      "file": "cve_corpus/java/CVE-2022-42889/vulnerable.java",
+      "language": "java",
+      "vuln_class": "code_exec",
+      "is_vulnerable": true,
+      "outcome_file_level": "TP",
+      "outcome_rule_level": "TP",
+      "outcome_location_level": "TP",
+      "matched_rule_ids": [
+        "java.code_exec.text4shell_interpolator"
+      ],
+      "unexpected_rule_ids": [],
+      "all_finding_ids": [
+        "java.code_exec.text4shell_interpolator"
+      ],
+      "security_finding_count": 1,
+      "non_security_finding_count": 0
+    },
    {
      "case_id": "cve-js-2019-14939-patched",
      "file": "cve_corpus/javascript/CVE-2019-14939/patched.js",
@ -1520,6 +1588,76 @@
      "security_finding_count": 2,
      "non_security_finding_count": 0
    },
+    {
+      "case_id": "cve-py-2025-69662-patched",
+      "file": "cve_corpus/python/CVE-2025-69662/patched.py",
+      "language": "python",
+      "vuln_class": "safe",
+      "is_vulnerable": false,
+      "outcome_file_level": "TN",
+      "outcome_rule_level": "TN",
+      "outcome_location_level": null,
+      "matched_rule_ids": [],
+      "unexpected_rule_ids": [],
+      "all_finding_ids": [],
+      "security_finding_count": 0,
+      "non_security_finding_count": 0
+    },
+    {
+      "case_id": "cve-py-2025-69662-vulnerable",
+      "file": "cve_corpus/python/CVE-2025-69662/vulnerable.py",
+      "language": "python",
+      "vuln_class": "sql_injection",
+      "is_vulnerable": true,
+      "outcome_file_level": "TP",
+      "outcome_rule_level": "TP",
+      "outcome_location_level": "TP",
+      "matched_rule_ids": [
+        "taint-unsanitised-flow (source 35:12)",
+        "py.sqli.text_format"
+      ],
+      "unexpected_rule_ids": [],
+      "all_finding_ids": [
+        "taint-unsanitised-flow (source 35:12)",
+        "py.sqli.text_format"
+      ],
+      "security_finding_count": 2,
+      "non_security_finding_count": 0
+    },
+    {
+      "case_id": "cve-py-2026-33626-patched",
+      "file": "cve_corpus/python/CVE-2026-33626/patched.py",
+      "language": "python",
+      "vuln_class": "safe",
+      "is_vulnerable": false,
+      "outcome_file_level": "TN",
+      "outcome_rule_level": "TN",
+      "outcome_location_level": null,
+      "matched_rule_ids": [],
+      "unexpected_rule_ids": [],
+      "all_finding_ids": [],
+      "security_finding_count": 0,
+      "non_security_finding_count": 0
+    },
+    {
+      "case_id": "cve-py-2026-33626-vulnerable",
+      "file": "cve_corpus/python/CVE-2026-33626/vulnerable.py",
+      "language": "python",
+      "vuln_class": "ssrf",
+      "is_vulnerable": true,
+      "outcome_file_level": "TP",
+      "outcome_rule_level": "TP",
+      "outcome_location_level": "TP",
+      "matched_rule_ids": [
+        "taint-unsanitised-flow (source 43:12)"
+      ],
+      "unexpected_rule_ids": [],
+      "all_finding_ids": [
+        "taint-unsanitised-flow (source 43:12)"
+      ],
+      "security_finding_count": 1,
+      "non_security_finding_count": 0
+    },
    {
      "case_id": "cve-rb-2013-0156-patched",
      "file": "cve_corpus/ruby/CVE-2013-0156/patched.rb",
@ -1737,6 +1875,40 @@
      "security_finding_count": 1,
      "non_security_finding_count": 0
    },
+    {
+      "case_id": "cve-ts-ghsa-4x48-cgf9-q33f-patched",
+      "file": "cve_corpus/typescript/GHSA-4x48-cgf9-q33f/patched.ts",
+      "language": "typescript",
+      "vuln_class": "safe",
+      "is_vulnerable": false,
+      "outcome_file_level": "TN",
+      "outcome_rule_level": "TN",
+      "outcome_location_level": null,
+      "matched_rule_ids": [],
+      "unexpected_rule_ids": [],
+      "all_finding_ids": [],
+      "security_finding_count": 0,
+      "non_security_finding_count": 0
+    },
+    {
+      "case_id": "cve-ts-ghsa-4x48-cgf9-q33f-vulnerable",
+      "file": "cve_corpus/typescript/GHSA-4x48-cgf9-q33f/vulnerable.ts",
+      "language": "typescript",
+      "vuln_class": "ssrf",
+      "is_vulnerable": true,
+      "outcome_file_level": "TP",
+      "outcome_rule_level": "TP",
+      "outcome_location_level": "TP",
+      "matched_rule_ids": [
+        "taint-unsanitised-flow (source 50:5)"
+      ],
+      "unexpected_rule_ids": [],
+      "all_finding_ids": [
+        "taint-unsanitised-flow (source 50:5)"
+      ],
+      "security_finding_count": 1,
+      "non_security_finding_count": 0
+    },
    {
      "case_id": "go-auth-realrepo-001",
      "file": "go/auth/vuln_repo_findbyid_no_auth.go",
@ -2371,6 +2543,21 @@
      "security_finding_count": 0,
      "non_security_finding_count": 0
    },
+    {
+      "case_id": "go-safe-realrepo-006",
+      "file": "go/safe/safe_test_helper_fatal.go",
+      "language": "go",
+      "vuln_class": "safe",
+      "is_vulnerable": false,
+      "outcome_file_level": "TN",
+      "outcome_rule_level": "TN",
+      "outcome_location_level": null,
+      "matched_rule_ids": [],
+      "unexpected_rule_ids": [],
+      "all_finding_ids": [],
+      "security_finding_count": 0,
+      "non_security_finding_count": 0
+    },
    {
      "case_id": "go-sqli-001",
      "file": "go/sqli/sqli_concat.go",
@ -3590,6 +3777,21 @@
      "security_finding_count": 0,
      "non_security_finding_count": 0
    },
+    {
+      "case_id": "js-safe-realrepo-006",
+      "file": "javascript/safe/safe_localised_gherkin_regex.js",
+      "language": "javascript",
+      "vuln_class": "safe",
+      "is_vulnerable": false,
+      "outcome_file_level": "TN",
+      "outcome_rule_level": "TN",
+      "outcome_location_level": null,
+      "matched_rule_ids": [],
+      "unexpected_rule_ids": [],
+      "all_finding_ids": [],
+      "security_finding_count": 0,
+      "non_security_finding_count": 0
+    },
    {
      "case_id": "js-sqli-001",
      "file": "javascript/sqli/sqli_concat.js",
@ -4497,6 +4699,70 @@
      "security_finding_count": 1,
      "non_security_finding_count": 0
    },
+    {
+      "case_id": "py-auth-realrepo-005",
+      "file": "python/safe/safe_fastapi_route_dependencies_auth.py",
+      "language": "python",
+      "vuln_class": "safe",
+      "is_vulnerable": false,
+      "outcome_file_level": "TN",
+      "outcome_rule_level": "TN",
+      "outcome_location_level": null,
+      "matched_rule_ids": [],
+      "unexpected_rule_ids": [],
+      "all_finding_ids": [],
+      "security_finding_count": 0,
+      "non_security_finding_count": 0
+    },
+    {
+      "case_id": "py-auth-realrepo-006",
+      "file": "python/safe/safe_pytest_sqlalchemy_session.py",
+      "language": "python",
+      "vuln_class": "safe",
+      "is_vulnerable": false,
+      "outcome_file_level": "TN",
+      "outcome_rule_level": "TN",
+      "outcome_location_level": null,
+      "matched_rule_ids": [],
+      "unexpected_rule_ids": [],
+      "all_finding_ids": [],
+      "security_finding_count": 0,
+      "non_security_finding_count": 0
+    },
+    {
+      "case_id": "py-auth-realrepo-007",
+      "file": "python/safe/safe_fastapi_route_level_row_fetch.py",
+      "language": "python",
+      "vuln_class": "safe",
+      "is_vulnerable": false,
+      "outcome_file_level": "TN",
+      "outcome_rule_level": "TN",
+      "outcome_location_level": null,
+      "matched_rule_ids": [],
+      "unexpected_rule_ids": [],
+      "all_finding_ids": [],
+      "security_finding_count": 0,
+      "non_security_finding_count": 0
+    },
+    {
+      "case_id": "py-auth-realrepo-007",
+      "file": "python/auth/vuln_fastapi_route_no_dependencies.py",
+      "language": "python",
+      "vuln_class": "auth",
+      "is_vulnerable": true,
+      "outcome_file_level": "TP",
+      "outcome_rule_level": "TP",
+      "outcome_location_level": "TP",
+      "matched_rule_ids": [
+        "py.auth.missing_ownership_check"
+      ],
+      "unexpected_rule_ids": [],
+      "all_finding_ids": [
+        "py.auth.missing_ownership_check"
+      ],
+      "security_finding_count": 1,
+      "non_security_finding_count": 0
+    },
    {
      "case_id": "py-cmdi-001",
      "file": "python/cmdi/cmdi_direct.py",
@ -5630,6 +5896,55 @@
      "security_finding_count": 1,
      "non_security_finding_count": 0
    },
+    {
+      "case_id": "rs-auth-realrepo-011",
+      "file": "rust/auth/safe_param_type_segment_idents.rs",
+      "language": "rust",
+      "vuln_class": "safe",
+      "is_vulnerable": false,
+      "outcome_file_level": "TN",
+      "outcome_rule_level": "TN",
+      "outcome_location_level": null,
+      "matched_rule_ids": [],
+      "unexpected_rule_ids": [],
+      "all_finding_ids": [],
+      "security_finding_count": 0,
+      "non_security_finding_count": 0
+    },
+    {
+      "case_id": "rs-auth-realrepo-012",
+      "file": "rust/auth/safe_local_collection_param_types.rs",
+      "language": "rust",
+      "vuln_class": "safe",
+      "is_vulnerable": false,
+      "outcome_file_level": "TN",
+      "outcome_rule_level": "TN",
+      "outcome_location_level": null,
+      "matched_rule_ids": [],
+      "unexpected_rule_ids": [],
+      "all_finding_ids": [],
+      "security_finding_count": 0,
+      "non_security_finding_count": 0
+    },
+    {
+      "case_id": "rs-auth-realrepo-013",
+      "file": "rust/auth/unsafe_handler_local_collection_does_not_blanket_suppress.rs",
+      "language": "rust",
+      "vuln_class": "auth",
+      "is_vulnerable": true,
+      "outcome_file_level": "TP",
+      "outcome_rule_level": "TP",
+      "outcome_location_level": null,
+      "matched_rule_ids": [
+        "rs.auth.missing_ownership_check"
+      ],
+      "unexpected_rule_ids": [],
+      "all_finding_ids": [
+        "rs.auth.missing_ownership_check"
+      ],
+      "security_finding_count": 1,
+      "non_security_finding_count": 0
+    },
    {
      "case_id": "rs-auth-typed-extractors-001",
      "file": "rust/auth/safe_typed_path_int_extractor.rs",
@ -7043,6 +7358,42 @@
      "security_finding_count": 1,
      "non_security_finding_count": 0
    },
+    {
+      "case_id": "ts-auth-realrepo-006",
+      "file": "typescript/auth/safe_local_collection_receiver.ts",
+      "language": "typescript",
+      "vuln_class": "safe",
+      "is_vulnerable": false,
+      "outcome_file_level": "TN",
+      "outcome_rule_level": "TN",
+      "outcome_location_level": null,
+      "matched_rule_ids": [],
+      "unexpected_rule_ids": [],
+      "all_finding_ids": [],
+      "security_finding_count": 0,
+      "non_security_finding_count": 0
+    },
+    {
+      "case_id": "ts-auth-realrepo-007",
+      "file": "typescript/auth/vuln_local_collection_does_not_blanket_suppress.ts",
+      "language": "typescript",
+      "vuln_class": "auth",
+      "is_vulnerable": true,
+      "outcome_file_level": "TP",
+      "outcome_rule_level": "TP",
+      "outcome_location_level": null,
+      "matched_rule_ids": [
+        "js.auth.missing_ownership_check",
+        "js.auth.missing_ownership_check"
+      ],
+      "unexpected_rule_ids": [],
+      "all_finding_ids": [
+        "js.auth.missing_ownership_check",
+        "js.auth.missing_ownership_check"
+      ],
+      "security_finding_count": 2,
+      "non_security_finding_count": 0
+    },
    {
      "case_id": "ts-cmdi-001",
      "file": "typescript/cmdi/cmdi_exec_template.ts",
@ -7493,6 +7844,53 @@
      "security_finding_count": 0,
      "non_security_finding_count": 0
    },
+    {
+      "case_id": "ts-safe-017",
+      "file": "typescript/safe/safe_strapi_db_query_chain.ts",
+      "language": "typescript",
+      "vuln_class": "safe",
+      "is_vulnerable": false,
+      "outcome_file_level": "TN",
+      "outcome_rule_level": "TN",
+      "outcome_location_level": null,
+      "matched_rule_ids": [],
+      "unexpected_rule_ids": [],
+      "all_finding_ids": [
+        "ts.quality.any_annotation"
+      ],
+      "security_finding_count": 0,
+      "non_security_finding_count": 1
+    },
+    {
+      "case_id": "ts-safe-018",
+      "file": "typescript/safe/safe_indirect_validator.ts",
+      "language": "typescript",
+      "vuln_class": "safe",
+      "is_vulnerable": false,
+      "outcome_file_level": "TN",
+      "outcome_rule_level": "TN",
+      "outcome_location_level": null,
+      "matched_rule_ids": [],
+      "unexpected_rule_ids": [],
+      "all_finding_ids": [],
+      "security_finding_count": 0,
+      "non_security_finding_count": 0
+    },
+    {
+      "case_id": "ts-safe-019",
+      "file": "typescript/safe/safe_helper_with_validator.ts",
+      "language": "typescript",
+      "vuln_class": "safe",
+      "is_vulnerable": false,
+      "outcome_file_level": "TN",
+      "outcome_rule_level": "TN",
+      "outcome_location_level": null,
+      "matched_rule_ids": [],
+      "unexpected_rule_ids": [],
+      "all_finding_ids": [],
+      "security_finding_count": 0,
+      "non_security_finding_count": 0
+    },
    {
      "case_id": "ts-secrets-001",
      "file": "typescript/secrets/fallback_secret.ts",
@ -7552,6 +7950,30 @@
      "security_finding_count": 2,
      "non_security_finding_count": 0
    },
+    {
+      "case_id": "ts-sqli-003",
+      "file": "typescript/sqli/sqli_db_query_concat.ts",
+      "language": "typescript",
+      "vuln_class": "sqli",
+      "is_vulnerable": true,
+      "outcome_file_level": "TP",
+      "outcome_rule_level": "TP",
+      "outcome_location_level": null,
+      "matched_rule_ids": [
+        "taint-unsanitised-flow (source 15:5)",
+        "taint-unsanitised-flow (source 21:5)"
+      ],
+      "unexpected_rule_ids": [],
+      "all_finding_ids": [
+        "ts.quality.any_annotation",
+        "ts.quality.any_annotation",
+        "taint-unsanitised-flow (source 15:5)",
+        "taint-unsanitised-flow (source 21:5)",
+        "ts.quality.any_annotation"
+      ],
+      "security_finding_count": 2,
+      "non_security_finding_count": 3
+    },
    {
      "case_id": "ts-ssrf-001",
      "file": "typescript/ssrf/ssrf_axios_user_url.ts",
@ -7771,22 +8193,22 @@
    }
  ],
  "aggregate_file_level": {
-    "tp": 216,
+    "tp": 225,
    "fp": 1,
    "fn_": 0,
-    "tn": 215,
-    "precision": 0.9953917050691244,
+    "tn": 231,
+    "precision": 0.995575221238938,
    "recall": 1.0,
-    "f1": 0.997690531177829
+    "f1": 0.9977827050997783
  },
  "aggregate_rule_level": {
-    "tp": 216,
+    "tp": 225,
    "fp": 1,
    "fn_": 0,
-    "tn": 215,
-    "precision": 0.9953917050691244,
+    "tn": 231,
+    "precision": 0.995575221238938,
    "recall": 1.0,
-    "f1": 0.997690531177829
+    "f1": 0.9977827050997783
  },
  "by_language": {
    "c": {
@ -7811,16 +8233,16 @@
      "tp": 25,
      "fp": 1,
      "fn_": 0,
-      "tn": 27,
+      "tn": 28,
      "precision": 0.9615384615384616,
      "recall": 1.0,
      "f1": 0.9803921568627451
    },
    "java": {
-      "tp": 17,
+      "tp": 19,
      "fp": 0,
      "fn_": 0,
-      "tn": 18,
+      "tn": 20,
      "precision": 1.0,
      "recall": 1.0,
      "f1": 1.0
@ -7829,7 +8251,7 @@
      "tp": 19,
      "fp": 0,
      "fn_": 0,
-      "tn": 23,
+      "tn": 24,
      "precision": 1.0,
      "recall": 1.0,
      "f1": 1.0
@ -7844,10 +8266,10 @@
      "f1": 1.0
    },
    "python": {
-      "tp": 23,
+      "tp": 26,
      "fp": 0,
      "fn_": 0,
-      "tn": 23,
+      "tn": 28,
      "precision": 1.0,
      "recall": 1.0,
      "f1": 1.0
@ -7862,19 +8284,19 @@
      "f1": 1.0
    },
    "rust": {
-      "tp": 33,
+      "tp": 34,
      "fp": 0,
      "fn_": 0,
-      "tn": 37,
+      "tn": 39,
      "precision": 1.0,
      "recall": 1.0,
      "f1": 1.0
    },
    "typescript": {
-      "tp": 29,
+      "tp": 32,
      "fp": 0,
      "fn_": 0,
-      "tn": 18,
+      "tn": 23,
      "precision": 1.0,
      "recall": 1.0,
      "f1": 1.0
@ -7882,7 +8304,7 @@
  },
  "by_vuln_class": {
    "auth": {
-      "tp": 13,
+      "tp": 16,
      "fp": 0,
      "fn_": 0,
      "tn": 0,
@ -7909,7 +8331,7 @@
      "f1": 1.0
    },
    "code_exec": {
-      "tp": 2,
+      "tp": 3,
      "fp": 0,
      "fn_": 0,
      "tn": 0,
@ -7945,7 +8367,7 @@
      "f1": 1.0
    },
    "deserialization": {
-      "tp": 4,
+      "tp": 5,
      "fp": 0,
      "fn_": 0,
      "tn": 0,
@ -8002,7 +8424,7 @@
      "tp": 0,
      "fp": 1,
      "fn_": 0,
-      "tn": 215,
+      "tn": 231,
      "precision": 0.0,
      "recall": 1.0,
      "f1": 0.0
@ -8016,8 +8438,17 @@
      "recall": 1.0,
      "f1": 1.0
    },
+    "sql_injection": {
+      "tp": 1,
+      "fp": 0,
+      "fn_": 0,
+      "tn": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "f1": 1.0
+    },
    "sqli": {
-      "tp": 29,
+      "tp": 30,
      "fp": 0,
      "fn_": 0,
      "tn": 0,
@ -8026,7 +8457,7 @@
      "f1": 1.0
    },
    "ssrf": {
-      "tp": 26,
+      "tp": 28,
      "fp": 0,
      "fn_": 0,
      "tn": 0,
@ -8046,31 +8477,31 @@
  },
  "by_confidence": {
    ">=High": {
-      "tp": 90,
-      "fp": 90,
-      "fn_": 126,
-      "tn": 126,
-      "precision": 0.5,
-      "recall": 0.4166666666666667,
-      "f1": 0.45454545454545453
+      "tp": 79,
+      "fp": 104,
+      "fn_": 146,
+      "tn": 128,
+      "precision": 0.43169398907103823,
+      "recall": 0.3511111111111111,
+      "f1": 0.3872549019607843
    },
    ">=Low": {
-      "tp": 94,
-      "fp": 102,
-      "fn_": 122,
-      "tn": 114,
-      "precision": 0.47959183673469385,
-      "recall": 0.4351851851851852,
-      "f1": 0.4563106796116505
+      "tp": 81,
+      "fp": 116,
+      "fn_": 144,
+      "tn": 116,
+      "precision": 0.41116751269035534,
+      "recall": 0.36,
+      "f1": 0.3838862559241706
    },
    ">=Medium": {
-      "tp": 94,
-      "fp": 102,
-      "fn_": 122,
-      "tn": 114,
-      "precision": 0.47959183673469385,
-      "recall": 0.4351851851851852,
-      "f1": 0.4563106796116505
+      "tp": 81,
+      "fp": 116,
+      "fn_": 144,
+      "tn": 116,
+      "precision": 0.41116751269035534,
+      "recall": 0.36,
+      "f1": 0.3838862559241706
    }
  }
 }
--- a/tests/benchmark_test.rs
+++ b/tests/benchmark_test.rs
@ -191,7 +191,7 @@ struct BenchmarkResults {
 // ── Scanning ─────────────────────────────────────────────────────────

 fn scan_corpus_file(corpus_root: &Path, relative_path: &str) -> Vec<Diag> {
-    // `cve_corpus/*` cases live in a sibling of `corpus/` — see
+    // `cve_corpus/*` cases live in a sibling of `corpus/`, see
    // `tests/benchmark/cve_corpus/`.
    let source = if relative_path.starts_with("cve_corpus/") {
        corpus_root
@ -679,7 +679,7 @@ fn benchmark_evaluation() {
    // on this corpus, so 5pp is generous enough to absorb honest
    // FP↔TN trades while still catching a real regression in a
    // vulnerability class.  When you land a durable, measurable
-    // improvement, tighten these floors — do not relax them to paper
+    // improvement, tighten these floors, do not relax them to paper
    // over a regression.
    let rule = &results.aggregate_rule_level;
    assert!(
@ -790,7 +790,7 @@ fn score_rule_level_with_diags(

 fn sha256_hex(data: &[u8]) -> String {
    use std::io::Write;
-    // Simple SHA-256 via command — avoids adding a crypto dependency.
+    // Simple SHA-256 via command, avoids adding a crypto dependency.
    let mut child = std::process::Command::new("shasum")
        .args(["-a", "256"])
        .stdin(std::process::Stdio::piped())
--- a/tests/cli_validation_tests.rs
+++ b/tests/cli_validation_tests.rs
@ -3,7 +3,7 @@
 //! Nyx's surface is a `clap` parser plus a handful of downstream validators
 //! (`SeverityFilter::parse`, `Severity::from_str`, `Confidence::from_str`,
 //! `apply_profile`).  These tests lock in the user-visible contract that
-//! bad input exits non-zero with a message that names the offending flag —
+//! bad input exits non-zero with a message that names the offending flag ,
 //! a scanner that silently accepts a typo'd severity and returns zero
 //! findings is a footgun in CI.
 //!
@ -268,7 +268,7 @@ fn scan_quiet_suppresses_preview_banner() {
        .stderr(predicate::str::contains("Preview for C/C++").not());
 }

-/// JSON output format must not print the Preview banner either — machine-
+/// JSON output format must not print the Preview banner either, machine-
 /// readable output has to stay clean on both stdout and stderr.
 #[test]
 fn scan_json_format_suppresses_preview_banner() {
--- a/tests/common/mod.rs
+++ b/tests/common/mod.rs
@ -179,7 +179,7 @@ pub fn validate_expectations(diags: &[Diag], fixture_dir: &Path) {
        }
    }

-    // Noise budget (optional — omitted on tight safe-code fixtures)
+    // Noise budget (optional, omitted on tight safe-code fixtures)
    if let Some(budget) = &exp.noise_budget {
        assert_max_findings(diags, budget.max_total_findings, budget.max_high_findings);
    }
--- a/tests/concurrent_scan_tests.rs
+++ b/tests/concurrent_scan_tests.rs
@ -3,8 +3,8 @@
 //! Production defaults run the scanner with `worker_threads > 1`, and callers
 //! embedding `nyx_scanner` (the forthcoming `serve` UI, CI wrappers, scripted
 //! harnesses) may invoke `scan_no_index` from multiple threads in the same
-//! process.  Shared engine state — label tables, framework-detection caches,
-//! tree-sitter thread-local parsers, rayon globals, `once_cell` statics —
+//! process.  Shared engine state, label tables, framework-detection caches,
+//! tree-sitter thread-local parsers, rayon globals, `once_cell` statics ,
 //! must tolerate two simultaneous walks without races, panics, or diverging
 //! outputs.
 //!
@ -86,7 +86,7 @@ fn build_tree(root: &Path) {
 }

 /// Canonicalize a diag list for equality comparison.  Finding output ordering
-/// depends on rayon scheduling — the individual fields must be identical but
+/// depends on rayon scheduling, the individual fields must be identical but
 /// the sequence is not.  We sort by a stable composite key and stringify
 /// (Diag itself doesn't derive Ord).
 fn canonical_fingerprint(diags: &[Diag]) -> Vec<String> {
@ -104,7 +104,7 @@ fn two_concurrent_scans_produce_identical_findings() {
    let root = tmp.path().to_path_buf();
    build_tree(&root);

-    // Capture an initial single-threaded run so we have a reference point —
+    // Capture an initial single-threaded run so we have a reference point ,
    // if the concurrent run produced a subset we want to know whether that
    // matches a known-good baseline or diverges from it.
    let baseline = scan_no_index(&root, &test_cfg()).expect("baseline scan must succeed");
@ -138,7 +138,7 @@ fn two_concurrent_scans_produce_identical_findings() {
    );
 }

-/// Four concurrent scans over the same tree — larger blast radius for
+/// Four concurrent scans over the same tree, larger blast radius for
 /// serialization bugs in shared caches.  Runs on a small tree to keep
 /// CI time reasonable.
 #[test]
--- a/tests/cross_file_abstract_tests.rs
+++ b/tests/cross_file_abstract_tests.rs
@ -4,16 +4,16 @@
 //! Three fixtures cover the documented transfer forms currently tractable
 //! against the JS/Python abstract-suppression pipelines:
 //!
-//! * `cross_file_abstract_port_range` (Python) — Identity transfer on an
+//! * `cross_file_abstract_port_range` (Python), Identity transfer on an
 //!   integer-typed passthrough.  The caller's literal `8080` crosses the
 //!   file boundary and SHELL_ESCAPE suppression fires on the bounded int.
-//! * `cross_file_abstract_bounded_index` (Python) — Clamped transfer
+//! * `cross_file_abstract_bounded_index` (Python), Clamped transfer
 //!   derived from a baseline-invariant fact.  The callee returns a
 //!   literal `42`; the per-parameter transfer attaches it as
 //!   `Clamped { 42, 42 }` and the caller sees a bounded integer
 //!   without the return-abstract channel alone carrying the fact
 //!   through summary resolution ambiguity.
-//! * `cross_file_abstract_url_prefix_lock` (JS) — String-prefix transfer
+//! * `cross_file_abstract_url_prefix_lock` (JS), String-prefix transfer
 //!   across an Identity wrapper.  The caller writes
 //!   `url = asIs('https://internal/...' + userPath)` and passes `url` to
 //!   `axios.get`.  The CFG node's `string_prefix` is consumed by the
@ -21,7 +21,7 @@
 //!   prefix locks the host and SSRF suppression fires.
 //!
 //! Each fixture's `expectations.json` treats the cross-file SHELL/SSRF
-//! sink as *forbidden* on the main file — if cross-file abstract
+//! sink as *forbidden* on the main file, if cross-file abstract
 //! propagation regresses, the sink fires and the forbidden-finding
 //! assertion trips.

--- a/tests/cross_file_alias_tests.rs
+++ b/tests/cross_file_alias_tests.rs
@ -6,7 +6,7 @@
 //! Three fixtures cover distinct structural shapes of the summary
 //! channel:
 //!
-//! * `cross_file_alias_mutating_helper` (Java) — a void-returning
+//! * `cross_file_alias_mutating_helper` (Java), a void-returning
 //!   helper that stores its second argument into a field of its first
 //!   argument.  Without the points-to channel the cross-file summary
 //!   loses every taint edge (void return, no container-op in
@ -14,20 +14,20 @@
 //!   edge and the caller observes the field write through the argument
 //!   alias, producing a Runtime.exec finding.
 //!
-//! * `cross_file_alias_returned_alias` (JS) — a passthrough helper
+//! * `cross_file_alias_returned_alias` (JS), a passthrough helper
 //!   whose return aliases its first parameter.  `param_to_return` with
 //!   `Identity` already covered the taint cap; the points-to channel
 //!   adds the heap-identity alias `Param(0) → Return` so the caller
 //!   threads the points-to set through the call.  The existing
-//!   shell-exec sink must still fire — a regression guard on the
+//!   shell-exec sink must still fire, a regression guard on the
 //!   return-alias channel.
 //!
-//! * `cross_file_alias_bounded_graph` (Python) — a helper with a 20-
+//! * `cross_file_alias_bounded_graph` (Python), a helper with a 20-
 //!   edge alias graph that intentionally overflows `MAX_ALIAS_EDGES`.
 //!   The assertion is that the scan *terminates* under the bounded
 //!   analysis and falls back to the conservative
 //!   `PointsToSummary::overflow` behaviour, not a specific finding
-//!   count — overflow is an operational guarantee, not a precision one.
+//!   count, overflow is an operational guarantee, not a precision one.

 mod common;

@ -76,7 +76,7 @@ fn cross_file_container_factory() {
 }

 /// Receiver-chain regression: tainted receiver flows through
-/// `tainted.trim().toLowerCase()` — both zero-arg — and into
+/// `tainted.trim().toLowerCase()`, both zero-arg, and into
 /// `Runtime.exec`.  Pins the existing receiver-fallback behaviour so
 /// heap-aliasing changes do not regress it.
 #[test]
--- a/tests/cross_file_body_loading_tests.rs
+++ b/tests/cross_file_body_loading_tests.rs
@ -4,7 +4,7 @@
 //! The body-loading path is pure plumbing: the taint engine carries a
 //! `cross_file_bodies` field on `SsaTaintTransfer` that the cross-file
 //! inline path consumes.  This test guards the *availability*
-//! invariant — if pass 1 stops populating `bodies_by_key`, the inline
+//! invariant, if pass 1 stops populating `bodies_by_key`, the inline
 //! path would silently fall back to summary resolution even when
 //! cross-file bodies could have given context-sensitive precision.
 //!
@ -19,7 +19,7 @@ use nyx_scanner::symbol::Lang;
 use nyx_scanner::utils::config::{AnalysisMode, Config};
 use std::path::Path;

-/// Test-local config mirror of `tests/common/mod.rs::test_config` —
+/// Test-local config mirror of `tests/common/mod.rs::test_config` ,
 /// kept inline so this file does not need to pull in the shared module
 /// (which `cargo test --test cross_file_body_loading_tests` would
 /// require extra wiring for).
@ -39,7 +39,7 @@ fn test_config() -> Config {
 /// Replay the pass-1 body-collection logic from `scan_filesystem` on a
 /// handful of files and return the resulting `GlobalSummaries`.
 ///
-/// This mirrors the fold-body of `scan_filesystem`'s pass-1 rayon loop —
+/// This mirrors the fold-body of `scan_filesystem`'s pass-1 rayon loop ,
 /// the production code uses the same `analyse_file_fused` entry point
 /// and the same `insert` / `insert_ssa` / `insert_body` trio.  Keeping
 /// the test close to that shape catches drift between the fused pipeline
@ -72,7 +72,7 @@ fn cross_file_body_loading_smoke_python_two_files() {
    let root = tmp.path();

    // `a.py` defines a helper that takes one parameter, does a trivial
-    // string op, and returns.  The body is intentionally small — we only
+    // string op, and returns.  The body is intentionally small, we only
    // care that *any* eligible body is produced, not that it has
    // interesting taint content.
    let a_py = root.join("a.py");
@ -133,7 +133,7 @@ fn cross_file_body_loading_smoke_python_two_files() {
        body.param_count
    );

-    // Quick sanity on the SSA shape — an eligible body must have at
+    // Quick sanity on the SSA shape, an eligible body must have at
    // least one block.  Zero blocks would mean we stored an empty stub,
    // which would let the inline path silently do nothing on every
    // inline attempt.
@ -146,7 +146,7 @@ fn cross_file_body_loading_smoke_python_two_files() {
 #[test]
 fn cross_file_body_loading_empty_without_callees() {
    // A single file with no inter-procedural flow is still expected to
-    // produce a body for its one function — that's what body loading
+    // produce a body for its one function, that's what body loading
    // enables.  The *empty* case this test guards is "bodies_by_key
    // returns None when no bodies are loaded," which keeps the
    // threaded-through `Option` explicit for inline consumers.
--- a/tests/cross_file_context_off_tests.rs
+++ b/tests/cross_file_context_off_tests.rs
@ -8,8 +8,8 @@
 //! findings.
 //!
 //! This binary is split from `cross_file_context_tests.rs` because
-//! Cargo compiles each `tests/*.rs` file into its own test binary —
-//! separate processes — so the `NYX_CONTEXT_SENSITIVE` env flip here
+//! Cargo compiles each `tests/*.rs` file into its own test binary ,
+//! separate processes, so the `NYX_CONTEXT_SENSITIVE` env flip here
 //! does not race against the default-on tests running in parallel.
 //!
 //! The switch is read by `AnalysisOptions::current()` via the legacy
--- a/tests/cross_file_context_tests.rs
+++ b/tests/cross_file_context_tests.rs
@ -6,24 +6,24 @@
 //! The four fixtures under `tests/fixtures/cross_file_context_*` cover
 //! the documented precision wins and guardrails:
 //!
-//! * `cross_file_context_two_call_sites` (Python) — two calls to the same
+//! * `cross_file_context_two_call_sites` (Python), two calls to the same
 //!   cross-file helper, one tainted and one with a constant literal.
 //!   Asserts the tainted call still produces a finding.
-//! * `cross_file_context_callback` (JS) — cross-file helper invokes a
+//! * `cross_file_context_callback` (JS), cross-file helper invokes a
 //!   caller-side function passed as a callback.  Inline re-analysis of
 //!   the helper must resolve the callback binding and surface the
 //!   flow through `child_process.exec`.
-//! * `cross_file_context_sanitizer` (JS) — cross-file sanitizer applied
+//! * `cross_file_context_sanitizer` (JS), cross-file sanitizer applied
 //!   before an HTML sink.  Regression guard: cross-file inline must not
 //!   introduce a taint finding when the sanitiser is recognised.
-//! * `cross_file_context_deep_chain` (Python) — A -> B -> C chain with
+//! * `cross_file_context_deep_chain` (Python), A -> B -> C chain with
 //!   the sink in C.  k=1 means B->C resolves via summary; the end-to-end
 //!   finding must still surface so callers cannot lose recall on deep
 //!   chains.
 //!
 //! The `bodies_by_key_populated_for_cross_file_fixtures` test is a
 //! direct `GlobalSummaries`-level assertion that pass 1 loaded cross-file
-//! SSA bodies for each fixture — i.e. the cross-file inline path has
+//! SSA bodies for each fixture, i.e. the cross-file inline path has
 //! something to consult.  If this assertion flips to zero, cross-file
 //! inline would silently fall back to summary resolution and every
 //! expectations.json check above would be driven by the less precise
@ -65,7 +65,7 @@ fn test_config() -> Config {
 /// Walk a fixture directory and replay the pass-1 body collection that
 /// `scan_filesystem` does, returning the merged `GlobalSummaries`.
 ///
-/// This is used purely for the availability assertion — the actual
+/// This is used purely for the availability assertion, the actual
 /// scans under test go through the regular `scan_no_index` entry point.
 fn pass1_bodies(root: &Path) -> GlobalSummaries {
    let cfg = test_config();
@ -132,7 +132,7 @@ fn cross_file_context_sanitizer() {
 }

 /// Three-file deep chain (A -> B -> C) with the sink in C.  The
-/// end-to-end flow must still surface — k=1 depth cap on inline does
+/// end-to-end flow must still surface, k=1 depth cap on inline does
 /// not drop recall because B -> C resolves via summary.
 #[test]
 fn cross_file_context_deep_chain() {
--- a/tests/cross_file_phi_tests.rs
+++ b/tests/cross_file_phi_tests.rs
@ -4,18 +4,18 @@
 //! Three fixtures cover distinct structural shapes of the per-return-path
 //! transform:
 //!
-//! * `cross_file_phi_validated_branch` (Python) — a callee whose two
+//! * `cross_file_phi_validated_branch` (Python), a callee whose two
 //!   return branches are both `Identity` on the value, differing only in
 //!   the predicate gate.  The required SQLi finding confirms the
 //!   summary-application path does not regress on the common "union is
 //!   precise enough" case.
-//! * `cross_file_phi_partial_sanitiser` (JS) — the callee has two
+//! * `cross_file_phi_partial_sanitiser` (JS), the callee has two
 //!   returns with *different* transforms (Identity vs
 //!   StripBits(HTML_ESCAPE)).  The caller invokes the unsanitised branch,
-//!   so the XSS sink must still fire — a regression guard against a
+//!   so the XSS sink must still fire, a regression guard against a
 //!   per-path application that over-eagerly attributes sanitation across
 //!   all branches.
-//! * `cross_file_phi_both_branches_safe` (Go) — both return paths run
+//! * `cross_file_phi_both_branches_safe` (Go), both return paths run
 //!   the same sanitising validator.  The SQL sink is on the forbidden
 //!   list: if the per-path decomposition regresses to "either branch
 //!   could be raw" the caller would pick up a false positive.
--- a/tests/db_corruption_tests.rs
+++ b/tests/db_corruption_tests.rs
@ -6,10 +6,10 @@
 //! clear error instead of panicking, hanging, or producing nonsense
 //! findings.  These tests exercise both classes of corruption:
 //!
-//!   1. Truncation to zero bytes — SQLite treats a zero-length file as a
+//!   1. Truncation to zero bytes, SQLite treats a zero-length file as a
 //!      fresh empty DB.  We expect the indexer to bootstrap the schema and
 //!      carry on.
-//!   2. Arbitrary garbage in the header — SQLite rejects this with
+//!   2. Arbitrary garbage in the header, SQLite rejects this with
 //!      `SQLITE_NOTADB` during pragma/schema execution.  We expect the
 //!      indexer to return a structured error, not a panic.
 //!
@ -122,7 +122,7 @@ fn zero_truncated_db_rebuilds_on_init() {
    let pool = Indexer::init(&db_path)
        .expect("Indexer::init should bootstrap a schema into an empty file");

-    // After init, the DB is empty of prior state — an indexed scan should
+    // After init, the DB is empty of prior state, an indexed scan should
    // still run end-to-end but will effectively be acting like a cold
    // rebuild.  We don't re-call build_index here because the plan is to
    // confirm the raw init path is resilient.
@ -143,14 +143,14 @@ fn zero_truncated_db_rebuilds_on_init() {
 }

 /// Clobber the SQLite magic header with garbage bytes.  This is the
-/// "actual corruption" case — SQLite rejects it with `SQLITE_NOTADB` the
+/// "actual corruption" case, SQLite rejects it with `SQLITE_NOTADB` the
 /// first time pragma or SQL is executed, which surfaces as
 /// `NyxError::Sql(_)` from `Indexer::init`.
 #[test]
 fn garbage_header_db_returns_structured_error() {
    let (_project_name, db_path, _project, _db_dir) = build_indexed_project();

-    // Write 100 bytes of `0xFF` — guaranteed not to match SQLite's header
+    // Write 100 bytes of `0xFF`, guaranteed not to match SQLite's header
    // magic "SQLite format 3\0".
    clobber_header(&db_path, 0xFF, 100);

@ -186,7 +186,7 @@ fn garbage_header_db_returns_structured_error() {
 // NOTE: A mid-file corruption test (garbage at bytes 100..200, preserving
 // SQLite magic) was attempted and is deliberately omitted.  That shape
 // triggers a slow corruption-detection path in SQLite where `Indexer::init`
-// takes 150–200 seconds before returning — unsuitable for CI wall-clock
+// takes 150–200 seconds before returning, unsuitable for CI wall-clock
 // budgets.  The two tests above already cover the "corrupt-on-arrival"
 // cases that users actually hit (crash-truncated file, deliberate clobber).
 // A follow-up should either short-circuit `PRAGMA integrity_check` up
--- a/tests/dedup_alternative_paths_tests.rs
+++ b/tests/dedup_alternative_paths_tests.rs
@ -2,7 +2,7 @@
 //! [`nyx_scanner::taint::analyse_file`] must preserve distinct flows
 //! that share a source but differ on validation status or intermediate
 //! variables.  Historically the dedup collapsed all `(body_id, sink,
-//! source)` siblings, preferring the validated one — so an unguarded
+//! source)` siblings, preferring the validated one, so an unguarded
 //! exploit on a sibling branch was silently dropped in favour of a
 //! neighbouring guarded flow.
 //!
@ -35,7 +35,7 @@ fn dedup_preserves_validated_and_unvalidated_flows() {
    validate_expectations(&diags, &dir);

    // Load-bearing assertion: the two flows live on distinct sink
-    // lines (6 and 8 in the source — actual lines depend on the
+    // lines (6 and 8 in the source, actual lines depend on the
    // fixture file format, so we only assert distinct sinks).
    let taint: Vec<&nyx_scanner::commands::scan::Diag> = diags
        .iter()
@ -58,7 +58,7 @@ fn dedup_preserves_validated_and_unvalidated_flows() {
            .collect::<Vec<_>>(),
    );

-    // The two findings must live on different source lines — if the
+    // The two findings must live on different source lines, if the
    // engine collapses them into one, the test will fail here even
    // when the count assertion above coincidentally passes (e.g. if
    // a future change started emitting one validated and one
@ -73,7 +73,7 @@ fn dedup_preserves_validated_and_unvalidated_flows() {

    // Every taint finding must carry a stable `finding_id` that
    // downstream formatters can reference.  This is the plumbing that
-    // feeds alternative-path cross-linking — verify it is non-empty
+    // feeds alternative-path cross-linking, verify it is non-empty
    // for every taint finding so regressions in `analyse_file`'s
    // post-dedup `make_finding_id` pass surface here.
    for d in &taint {
@ -87,7 +87,7 @@ fn dedup_preserves_validated_and_unvalidated_flows() {
        );
    }

-    // At least one validated/unvalidated split must be present — the
+    // At least one validated/unvalidated split must be present, the
    // whole point of the fixture is that a guarded branch and an
    // unguarded branch reach `exec(input)` and both must report.
    // We do not require an exact split since future sanitization
@ -103,7 +103,7 @@ fn dedup_preserves_validated_and_unvalidated_flows() {
         is not behind any allowlist. Found only validated findings.",
    );
    // `validated` may legitimately be empty if the engine does not yet
-    // recognise `isWhitelisted` as a predicate — the fixture is still
+    // recognise `isWhitelisted` as a predicate, the fixture is still
    // load-bearing because the `min_count: 2` in expectations.json
    // asserts both findings surface regardless of which is classified
    // as validated.  Drop the assertion to avoid gating the regression
--- a/tests/determinism_threads_tests.rs
+++ b/tests/determinism_threads_tests.rs
@ -2,9 +2,9 @@
 //!
 //! The scanner's two-pass pipeline runs rayon `par_iter` over files in
 //! both pass-1 (summary extraction) and pass-2 (rule evaluation), and
-//! merges summaries via `try_reduce`.  A latent ordering bug — a
+//! merges summaries via `try_reduce`.  A latent ordering bug, a
 //! shared mutable state hit unprotected from multiple threads, or a
-//! `HashMap` iteration order leaking into a finding identity — can
+//! `HashMap` iteration order leaking into a finding identity, can
 //! surface as a diagnostic that appears with 4 workers but not with 1.
 //!
 //! This test runs the same fixture under worker-thread counts of 1,
@ -14,7 +14,7 @@
 //! assertion fires only on real output divergence.
 //!
 //! If this test ever flakes, prefer investigating the engine over
-//! weakening the normaliser — engine-level determinism across thread
+//! weakening the normaliser, engine-level determinism across thread
 //! counts is load-bearing for reproducible CI runs.
 mod common;

--- a/tests/engine_notes_rank_tests.rs
+++ b/tests/engine_notes_rank_tests.rs
@ -26,7 +26,7 @@ use nyx_scanner::rank::{compute_attack_rank, rank_diags};
 // ── Diag factories ─────────────────────────────────────────────────────

 /// A converged taint finding that the points-based scorer will score
-/// as `Confidence::High`.  Used as the "clean" baseline — any delta
+/// as `Confidence::High`.  Used as the "clean" baseline, any delta
 /// against this must come from attached engine notes.
 fn high_confidence_taint_diag(path: &str, line: u32) -> Diag {
    Diag {
@ -204,7 +204,7 @@ fn rank_diags_sorts_converged_above_capped_at_same_severity() {
 #[test]
 fn rank_diags_preserves_severity_tier_under_bail() {
    // High + Bail must still outrank Medium + clean at the same
-    // evidence-strength baseline — this is the tier-boundary invariant
+    // evidence-strength baseline, this is the tier-boundary invariant
    // that the -8 completeness magnitude is calibrated for.
    let mut high_bailed = high_confidence_taint_diag("a.rs", 1);
    attach_notes(
@ -421,7 +421,7 @@ fn sarif_omits_loss_direction_for_informational_only() {
 fn every_engine_note_direction_is_documented() {
    // Enumerate every EngineNote variant and assert its direction.
    // The intent is that a contributor adding a new variant will cause
-    // this test to fail to compile (no match arm) — a structural guard
+    // this test to fail to compile (no match arm), a structural guard
    // against silent misclassification.
    fn check(note: EngineNote, expected: LossDirection) {
        assert_eq!(
--- a/tests/engine_notes_tests.rs
+++ b/tests/engine_notes_tests.rs
@ -2,7 +2,7 @@
 //! test forces a specific cap-site to fire on a tiny fixture by
 //! overriding the engine's safety cap, then asserts either that the
 //! corresponding observability counter moved *or* that the note
-//! propagated to a produced finding — whichever is the more stable
+//! propagated to a produced finding, whichever is the more stable
 //! signal for that cap.

 mod common;
@ -19,7 +19,7 @@ use std::path::Path;
 use std::sync::Mutex;

 /// Process-wide atomics for cap overrides mean tests that fiddle with
-/// them must run serially — cargo test defaults to parallel.
+/// them must run serially, cargo test defaults to parallel.
 static CAP_GUARD: Mutex<()> = Mutex::new(());

 fn fixture(name: &str) -> std::path::PathBuf {
@ -32,7 +32,7 @@ fn fixture(name: &str) -> std::path::PathBuf {
 fn worklist_cap_trips_observability_counter() {
    let _guard = CAP_GUARD.lock().unwrap_or_else(|e| e.into_inner());
    // Force a very tight worklist budget so every body with > 0 blocks
-    // trips the cap.  The observability counter is the stable signal —
+    // trips the cap.  The observability counter is the stable signal ,
    // note attribution to a specific finding may be lost on bodies that
    // capped *before* emitting their sink event.
    reset_worklist_observability();
@ -59,7 +59,7 @@ fn origins_cap_trips_observability_on_multi_source_fixture() {
    // Set origins to 1 and scan a fixture with multiple top-level
    // sources flowing into the same sink.  Any non-trivial taint flow
    // will produce at least one tainted value whose origin list hit the
-    // cap — detected by the post-hoc saturation scan at the end of
+    // cap, detected by the post-hoc saturation scan at the end of
    // `run_ssa_taint_internal`.
    reset_origins_observability();
    set_max_origins_override(1);
--- a/tests/fetch_data_exfil_integration_tests.rs
+++ b/tests/fetch_data_exfil_integration_tests.rs
@ -0,0 +1,125 @@
+//! Integration tests for the `Cap::DATA_EXFIL` detector class.
+//!
+//! Validates per-cap attribution at multi-gate call sites: a single `fetch`
+//! call carries both an SSRF gate (URL flow) and a DATA_EXFIL gate (body /
+//! headers / json flow), and a tainted body must not surface as SSRF and
+//! vice versa.  Also sanity-checks the SARIF output so the new finding
+//! class produces a distinct rule id.
+
+mod common;
+
+use common::scan_fixture_dir;
+use nyx_scanner::commands::scan::Diag;
+use nyx_scanner::utils::config::AnalysisMode;
+use std::path::PathBuf;
+
+fn js_fixture_dir() -> PathBuf {
+    PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+        .join("tests")
+        .join("fixtures")
+        .join("js")
+}
+
+fn diags_for(file: &str) -> Vec<Diag> {
+    let dir = js_fixture_dir();
+    let all = scan_fixture_dir(&dir, AnalysisMode::Full);
+    all.into_iter().filter(|d| d.path.ends_with(file)).collect()
+}
+
+#[test]
+fn fetch_body_data_exfil_emits_data_exfil_not_ssrf() {
+    let diags = diags_for("fetch_body_data_exfil.js");
+    let exfil = diags
+        .iter()
+        .filter(|d| d.id.starts_with("taint-data-exfiltration"))
+        .count();
+    let plain_taint = diags
+        .iter()
+        .filter(|d| d.id.starts_with("taint-unsanitised-flow"))
+        .count();
+    assert!(
+        exfil >= 1,
+        "expected at least one taint-data-exfiltration finding, got 0.\n\
+         Diags: {:#?}",
+        diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
+    );
+    assert_eq!(
+        plain_taint,
+        0,
+        "fixed-URL fetch with tainted body must NOT emit SSRF \
+         (taint-unsanitised-flow), got {plain_taint}.\n\
+         Diags: {:#?}",
+        diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
+    );
+}
+
+#[test]
+fn fetch_ssrf_url_tainted_emits_ssrf_not_data_exfil() {
+    let diags = diags_for("fetch_ssrf_url_tainted.js");
+    let ssrf = diags
+        .iter()
+        .filter(|d| d.id.starts_with("taint-unsanitised-flow"))
+        .count();
+    let exfil = diags
+        .iter()
+        .filter(|d| d.id.starts_with("taint-data-exfiltration"))
+        .count();
+    assert!(
+        ssrf >= 1,
+        "expected at least one taint-unsanitised-flow (SSRF) finding, got 0.\n\
+         Diags: {:#?}",
+        diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
+    );
+    assert_eq!(
+        exfil,
+        0,
+        "tainted-URL fetch must NOT emit DATA_EXFIL, got {exfil}.\n\
+         Diags: {:#?}",
+        diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
+    );
+}
+
+#[test]
+fn sarif_distinguishes_data_exfil_rule_id_from_ssrf() {
+    use nyx_scanner::output::build_sarif;
+
+    let dir = js_fixture_dir();
+    let diags = scan_fixture_dir(&dir, AnalysisMode::Full);
+    let sarif = build_sarif(&diags, &dir);
+
+    let rules = sarif["runs"][0]["tool"]["driver"]["rules"]
+        .as_array()
+        .expect("SARIF rules array");
+    let rule_ids: Vec<&str> = rules.iter().filter_map(|r| r["id"].as_str()).collect();
+
+    assert!(
+        rule_ids.contains(&"taint-data-exfiltration"),
+        "SARIF rules must contain taint-data-exfiltration, got: {rule_ids:?}"
+    );
+    assert!(
+        rule_ids.contains(&"taint-unsanitised-flow"),
+        "SARIF rules must contain taint-unsanitised-flow, got: {rule_ids:?}"
+    );
+
+    // Each finding should reference exactly one rule, and the cap-specific
+    // class must not be folded back into the generic taint bucket.
+    let results = sarif["runs"][0]["results"]
+        .as_array()
+        .expect("SARIF results array");
+    let exfil_results = results
+        .iter()
+        .filter(|r| r["ruleId"].as_str() == Some("taint-data-exfiltration"))
+        .count();
+    let ssrf_results = results
+        .iter()
+        .filter(|r| r["ruleId"].as_str() == Some("taint-unsanitised-flow"))
+        .count();
+    assert!(
+        exfil_results >= 1,
+        "expected >= 1 SARIF result with ruleId taint-data-exfiltration, got {exfil_results}",
+    );
+    assert!(
+        ssrf_results >= 1,
+        "expected >= 1 SARIF result with ruleId taint-unsanitised-flow, got {ssrf_results}",
+    );
+}
--- a/tests/fixtures/async_rust/main.rs
+++ b/tests/fixtures/async_rust/main.rs
@ -1,7 +1,7 @@
 // Regression fixture: Rust async flow through `tokio::process::Command`.
 //
 // Per docs/language-maturity.md, Rust's Tokio process variants are not
-// yet covered — the Tokio async process APIs are a known gap.  The
+// yet covered, the Tokio async process APIs are a known gap.  The
 // fixture is checked in so that when Rust async-process coverage lands,
 // the engine begins producing the intended finding and the
 // `forbidden_findings` assertion forces whoever adds the coverage to
--- a/tests/fixtures/auth_analysis/cross_file_helper_authz.rs
+++ b/tests/fixtures/auth_analysis/cross_file_helper_authz.rs
@ -1,7 +1,7 @@
 // Target: authorization happens inside `require_owner`, which
 // delegates to `require_group_member` (a configured authorization
 // check name).  The handler in `cross_file_helper_handler.rs`
-// delegates ownership validation to this helper — cross-file helper
+// delegates ownership validation to this helper, cross-file helper
 // lifting should recognise the call as an auth check covering the
 // supplied `row`.
 struct Db;
--- a/tests/fixtures/auth_analysis/db_connection_type_inferred.rs
+++ b/tests/fixtures/auth_analysis/db_connection_type_inferred.rs
@ -2,7 +2,7 @@
 // produces a `DatabaseConnection` via SSA `constructor_type` (through
 // `peel_identity_suffix`, which strips `.unwrap()` before matching).  The
 // handler then calls `conn.execute(..)`, a callee name that appears in
-// neither `mutation_indicator_names` nor `read_indicator_names` for Rust —
+// neither `mutation_indicator_names` nor `read_indicator_names` for Rust ,
 // name-based classification returns `None`, so the ownership gate
 // already cannot flag the call.  The type-map refinement should *still*
 // leave the call unflagged (the type map produces `DbMutation`, but
--- a/tests/fixtures/auth_analysis/hashmap_local_noise.rs
+++ b/tests/fixtures/auth_analysis/hashmap_local_noise.rs
@ -16,7 +16,7 @@ pub async fn handle_list_peer_docs(req: Req, ctx: Ctx) -> Result<String, ()> {
    let user = auth::require_auth(&req, &ctx).await?;
    let doc_ids: Vec<i64> = vec![1, 2, 3];

-    // Pure in-memory bookkeeping — no authorization decision here.
+    // Pure in-memory bookkeeping, no authorization decision here.
    let mut counts: HashMap<i64, usize> = HashMap::new();
    let mut seen: HashSet<i64> = HashSet::new();
    for doc_id in &doc_ids {
--- a/tests/fixtures/auth_analysis/helper_no_auth_lift.rs
+++ b/tests/fixtures/auth_analysis/helper_no_auth_lift.rs
@ -1,5 +1,5 @@
 // B4 regression guard: `format_target` does NOT auth-check
-// `group_id` — it just constructs a string from it. The helper-lift
+// `group_id`, it just constructs a string from it. The helper-lift
 // pass must not synthesise a covering AuthCheck on the handler's call
 // site, so the subsequent `db.exec("INSERT INTO comments …", &[group_id])`
 // MUST still flag.
@ -19,7 +19,7 @@ mod auth {
 }

 fn format_target(group_id: i64, suffix: &str) -> String {
-    // No auth check here — pure formatting.
+    // No auth check here, pure formatting.
    format!("group:{}{}", group_id, suffix)
 }

--- a/tests/fixtures/auth_analysis/row_ownership_equality.rs
+++ b/tests/fixtures/auth_analysis/row_ownership_equality.rs
@ -41,7 +41,7 @@ pub async fn handle_delete_doc(req: Req, ctx: Ctx, doc_id: i64) -> Result<String
        return json_err("cannot delete another user's doc", 403);
    }

-    // By construction, the row belongs to `user` — so any id read from it is authorized.
+    // By construction, the row belongs to `user`, so any id read from it is authorized.
    let group_id = existing.get_i64("group_id");
    realtime::publish_to_group(group_id, "doc_deleted");
    Ok("ok".into())
--- a/tests/fixtures/auth_analysis/row_ownership_no_early_exit.rs
+++ b/tests/fixtures/auth_analysis/row_ownership_no_early_exit.rs
@ -31,7 +31,7 @@ pub async fn handle_update_doc(req: Req, ctx: Ctx, doc_id: i64) -> Result<String
    );
    let owner_id = existing.get_i64("user_id");

-    // Equality compared but no early exit — the check has no effect.
+    // Equality compared but no early exit, the check has no effect.
    if owner_id != user.id {
        // missing return
        println!("not your doc (but proceeding anyway)");
--- a/tests/fixtures/auth_analysis/self_scoped_user.rs
+++ b/tests/fixtures/auth_analysis/self_scoped_user.rs
@ -5,7 +5,7 @@ mod auth { pub async fn require_auth(_r: &super::Req, _c: &super::Ctx) -> Result

 // The handler's `get_peer_ids(&db, user.id)` call below must not be
 // flagged. `user` is bound from `auth::require_auth(..)` so `user.id`
-// is the caller's own id — the call is self-referential, not a foreign
+// is the caller's own id, the call is self-referential, not a foreign
 // scoped id. The library-style helper below is a pass-through so its
 // body contains no DB sinks (the internal `user_id` → DB flow is a
 // separate pattern covered by helper-summary lifting).
--- a/tests/fixtures/auth_analysis/sql_join_acl.rs
+++ b/tests/fixtures/auth_analysis/sql_join_acl.rs
@ -2,7 +2,7 @@
 // against an ACL table (`group_members`) with a WHERE clause that pins
 // the row to the current user (`gm.user_id = ?1` bound to `user.id`).
 // Every returned row is membership-gated by construction, so downstream
-// uses of the row's columns (`group_id` here) are authorized — the
+// uses of the row's columns (`group_id` here) are authorized, the
 // `realtime::publish_to_group` call MUST NOT be flagged as missing an
 // ownership check after B3.
 struct Ctx;
--- a/tests/fixtures/auth_analysis/sql_no_acl_join_flags.rs
+++ b/tests/fixtures/auth_analysis/sql_no_acl_join_flags.rs
@ -1,7 +1,7 @@
 // B3 regression guard: the SELECT JOINs through `audit_log` (NOT in
 // the configured ACL list) and the WHERE clause pins on
 // `al.user_id = ?1`. The audit-log row's user is the audit subject,
-// not the doc owner — so this query does NOT prove caller ownership
+// not the doc owner, so this query does NOT prove caller ownership
 // of the returned `doc_id`. The downstream realtime publish MUST
 // still flag for a missing ownership check after B3.
 struct Ctx;
--- a/tests/fixtures/auth_analysis/transitive_helper.rs
+++ b/tests/fixtures/auth_analysis/transitive_helper.rs
@ -1,7 +1,7 @@
 // target: authorization happens inside `validate_target`, which
 // internally calls `authz::require_membership` against the same
 // `group_id` the handler subsequently mutates. The current rule cannot
-// see this transitively — B4 lifts per-function auth-check summaries
+// see this transitively, B4 lifts per-function auth-check summaries
 // (which positional params are auth-checked) so the handler-level call
 // to `validate_target(&db, group_id, user.id)` is recognised as an
 // auth check covering `group_id`. Result: `db.exec(..)` MUST NOT flag
@ -45,7 +45,7 @@ pub async fn handle_create_comment(
    let user = auth::require_auth(&req, &ctx).await?;
    let db = Db;

-    // Authorization happens inside validate_target — helper-summary
+    // Authorization happens inside validate_target, helper-summary
    // lifting propagates the per-param auth check so this covers
    // `group_id`.
    validate_target(&db, group_id, user.id).await?;
--- a/tests/fixtures/fp_guards/auth_local_collection_receiver/App.ts
+++ b/tests/fixtures/fp_guards/auth_local_collection_receiver/App.ts
@ -0,0 +1,31 @@
+// FP guard for `js.auth.missing_ownership_check` — JS built-in
+// container receivers must not be classified as data-layer sinks.
+// See `tests/benchmark/corpus/typescript/auth/safe_local_collection_receiver.ts`
+// for the full real-repo distillation.
+
+type ElementsMap = Map<string, { id: string }>;
+
+function fromAlias(elementsMap: ElementsMap, id: string) {
+  return elementsMap.get(id);
+}
+
+function fromDirectGeneric(m: Map<string, string>, k: string) {
+  return m.get(k);
+}
+
+function fromArrayShorthand(arr: { id: string }[], targetId: string) {
+  return arr.find((x) => x.id === targetId);
+}
+
+function fromLocalConstructor() {
+  const cache = new Map<string, string>();
+  cache.set("a", "x");
+  return cache.get("a");
+}
+
+function fromSet(visited: Set<string>, k: string) {
+  if (!visited.has(k)) {
+    visited.add(k);
+  }
+  return visited.size;
+}
--- a/tests/fixtures/fp_guards/auth_local_collection_receiver/expectations.json
+++ b/tests/fixtures/fp_guards/auth_local_collection_receiver/expectations.json
@ -0,0 +1,16 @@
+{
+  "required_findings": [],
+  "forbidden_findings": [
+    { "id_prefix": "js.auth.missing_ownership_check" }
+  ],
+  "noise_budget": {
+    "max_total_findings": 1,
+    "max_high_findings": 0
+  },
+  "performance_expectations": {
+    "max_ms_no_index": 1000,
+    "max_ms_index_cold": 1500,
+    "max_ms_index_warm": 500,
+    "ci_mode": "lenient"
+  }
+}
--- a/tests/fixtures/fp_guards/auth_rust_param_typed_local_collection/expectations.json
+++ b/tests/fixtures/fp_guards/auth_rust_param_typed_local_collection/expectations.json
@ -0,0 +1,16 @@
+{
+  "required_findings": [],
+  "forbidden_findings": [
+    { "id_prefix": "rs.auth.missing_ownership_check" }
+  ],
+  "noise_budget": {
+    "max_total_findings": 2,
+    "max_high_findings": 0
+  },
+  "performance_expectations": {
+    "max_ms_no_index": 1000,
+    "max_ms_index_cold": 1500,
+    "max_ms_index_warm": 500,
+    "ci_mode": "lenient"
+  }
+}
--- a/tests/fixtures/fp_guards/auth_rust_param_typed_local_collection/snapshot.rs
+++ b/tests/fixtures/fp_guards/auth_rust_param_typed_local_collection/snapshot.rs
@ -0,0 +1,93 @@
+// Real-repo precision guard mirroring meilisearch's index-scheduler
+// shape:
+// crates/index-scheduler/src/scheduler/process_snapshot_creation.rs::remove_tasks
+// (`unsafe fn remove_tasks(tasks: &[Task], dst: &std::path::Path,
+// index_base_map_size: usize)` plus per-loop bitmap mutations on
+// destructured heed `Database` handles), plus the LocalCollection
+// receiver-type cluster
+// (`crates/index-scheduler/src/scheduler/enterprise_edition/network.rs::balance_shards`,
+// `unsharded: RoaringBitmap`).
+//
+// Both engine fixes must hold: the Rust `parameter` arm in
+// `collect_param_names` (only descends into `pattern`, never `type`)
+// and the Rust LocalCollection type-text classifier
+// (`rust_type_to_local_collection`).  Without either, this file would
+// produce missing-ownership-check findings on internal helpers /
+// in-memory bitmap mutations.
+
+use std::collections::{BTreeSet, HashMap, HashSet};
+
+struct RoaringBitmap;
+impl RoaringBitmap {
+    fn new() -> Self { Self }
+    fn insert(&mut self, _x: u32) -> bool { true }
+    fn remove(&mut self, _x: u32) -> bool { true }
+    fn contains(&self, _x: u32) -> bool { true }
+}
+
+struct Task { uid: u32 }
+
+struct Database;
+impl Database {
+    fn delete(&self, _w: &mut u32, _u: &u32) -> Result<(), ()> { Ok(()) }
+}
+
+struct TaskQueue {
+    all_tasks: Database,
+    canceled_by: Database,
+}
+
+// Rust `parameter` arm: type-segment idents (`std`, `path`, `Path`)
+// must NOT pollute `unit.params` and gate user-input-evidence open.
+unsafe fn remove_tasks(
+    tasks: &[Task],
+    dst: &std::path::Path,
+    sz: usize,
+) -> Result<(), ()> {
+    let _ = (dst, sz);
+    let mut wtxn = 0u32;
+    let task_queue = TaskQueue {
+        all_tasks: Database,
+        canceled_by: Database,
+    };
+    let TaskQueue { all_tasks, canceled_by } = task_queue;
+    for task in tasks {
+        all_tasks.delete(&mut wtxn, &task.uid)?;
+        canceled_by.delete(&mut wtxn, &task.uid)?;
+    }
+    Ok(())
+}
+
+// LocalCollection typed param: `unsharded: RoaringBitmap` resolves to
+// `TypeKind::LocalCollection`, so `unsharded.insert(docid)` /
+// `unsharded.remove(docid)` classify as `SinkClass::InMemoryLocal`
+// (non-auth-relevant).
+fn balance_shards(mut unsharded: RoaringBitmap, docid: u32) {
+    unsharded.insert(docid);
+    unsharded.remove(docid);
+}
+
+// `&'a mut HashMap<...>` reference + lifetime: ref-stripping must
+// reach the type head.
+fn store_shard_docids<'a>(
+    new_shard_docids: &'a mut HashMap<String, u32>,
+    shard: String,
+    docid: u32,
+) {
+    new_shard_docids.insert(shard, docid);
+}
+
+fn add_user_id(ids: &mut HashSet<u64>, user_id: u64) {
+    ids.insert(user_id);
+    ids.remove(&user_id);
+}
+
+fn collect_seen(seen: &mut BTreeSet<u32>, item_id: u32) {
+    seen.insert(item_id);
+}
+
+fn build_local_set(task_id: u32) -> RoaringBitmap {
+    let mut s = RoaringBitmap::new();
+    s.insert(task_id);
+    s
+}
--- a/tests/fixtures/fp_guards/cfg_utf8_long_condition/App.js
+++ b/tests/fixtures/fp_guards/cfg_utf8_long_condition/App.js
@ -0,0 +1,41 @@
+// FP guard / panic guard — CFG condition-text truncation must be UTF-8 safe.
+//
+// Reproduces the gogs scan crash where a CodeMirror Gherkin tokenizer ships a
+// long localised regex inside a boolean sub-condition (`stream.match(/.../) &&
+// other`).  When `push_condition_node` textualises the sub-expression, the
+// regex literal exceeds MAX_CONDITION_TEXT_LEN (256 bytes); naive byte-slice
+// truncation panicked when byte 256 landed inside a multi-byte UTF-8
+// character (here Gurmukhi `ਖ`, three bytes).  Engine fix in
+// `src/utils/snippet.rs::truncate_at_char_boundary`, applied at three CFG
+// sites + two symex display sites.
+//
+// Invariant: scanning this file must terminate without panicking the rayon
+// worker, regardless of where byte 256 lands.
+
+function tokenLocalisedFeatureKeyword(stream, state) {
+    if (
+        !state.inKeywordLine &&
+        state.allowFeature &&
+        stream.match(/(機能|功能|フィーチャ|기능|โครงหลัก|ความสามารถ|ความต้องการทางธุรกิจ|ಹೆಚ್ಚಳ|గుణము|ಮುಹಾಂದರಾ|ਮੁਹਾਂਦਰਾ|ਨਕਸ਼ ਨੁਹਾਰ|ਖਾਸੀਅਤ|रूप लेख|وِیژگی|خاصية|תכונה|Функціонал|Функция|Функционалност|Функционал|Үзенчәлеклелек|Свойство|Особина|Мөмкинлек|Могућност|Λειτουργία|Δυνατότητα|Właściwość|Vlastnosť|Trajto|Tính năng|Savybė|Požiadavka|Požadavek|Potrzeba biznesowa|Özellik|Osobina|Ominaisuus|Omadus|Mogućnost|Mogucnost|Jellemző|Funzionalità|Funktionalitéit|Funktionalität|Funkcja|Funkcionalnost|Funkcionalitāte|Funkcia|Fungsi|Functionaliteit|Funcționalitate|Funcţionalitate|Functionalitate|Funcionalitat|Funcionalidade|Fonctionnalité|Fitur|Fīča|Feature|Eiginleiki|Egenskap|Egenskab|Característica|Caracteristica|Business Need|Aspekt|Arwedd|Ability):/)
+    ) {
+        state.inKeywordLine = true;
+        return "keyword";
+    }
+    return null;
+}
+
+// Sanity: also exercise the let-match-guard truncation site
+// (`emit_rust_match_guard_if`) by way of a JS analogue with a CFG-relevant
+// boolean chain that wraps localised text into the second branch.  The CFG
+// builder still has to textualise both arms.
+function classify(s) {
+    if (
+        s.length > 0 &&
+        s.indexOf("ਨਕਸ਼ ਨੁਹਾਰ ਖਾਸੀਅਤ रूप लेख وِیژگی خاصية תכונה Функціонал Функция Функционалност Функционал Үзенчәлеклелек Свойство Особина Мөмкинлек Могућност Λειτουργία Δυνατότητα") >= 0
+    ) {
+        return "localised";
+    }
+    return "ascii";
+}
+
+module.exports = { tokenLocalisedFeatureKeyword, classify };
--- a/tests/fixtures/fp_guards/cfg_utf8_long_condition/expectations.json
+++ b/tests/fixtures/fp_guards/cfg_utf8_long_condition/expectations.json
@ -0,0 +1,14 @@
+{
+  "required_findings": [],
+  "forbidden_findings": [],
+  "noise_budget": {
+    "max_total_findings": 0,
+    "max_high_findings": 0
+  },
+  "performance_expectations": {
+    "max_ms_no_index": 1000,
+    "max_ms_index_cold": 1500,
+    "max_ms_index_warm": 500,
+    "ci_mode": "lenient"
+  }
+}
--- a/tests/fixtures/fp_guards/framework_fastapi_route_level_auth/App.py
+++ b/tests/fixtures/fp_guards/framework_fastapi_route_level_auth/App.py
@ -0,0 +1,51 @@
+"""
+FP guard for FastAPI / Flask route-level dependency-injection auth.
+
+The `dependencies=[Depends(requires_access_dag(...))]` decorator
+authorises the entire handler — every value the handler receives,
+every row it fetches, and every operation downstream.  The
+`is_route_level` flag on the injected AuthCheck tells
+`auth_check_covers_subject` to short-circuit `true`, suppressing
+`py.auth.missing_ownership_check` on the body's ORM calls (`filter_by`,
+`scalar`, …) and on row-variable receivers (`dag.cleanup_runs(...)`).
+
+A bare route with no `dependencies=` keyword is a real ownership-
+check FP — the engine must still flag it.  The vulnerable
+counterpart lives in
+`tests/benchmark/corpus/python/auth/vuln_fastapi_route_no_dependencies.py`.
+"""
+from fastapi import Depends, FastAPI
+
+router = FastAPI()
+
+
+def requires_access_dag(method: str, access_entity=None):
+    def check():
+        ...
+    return check
+
+
+@router.get(
+    "/{dag_id}/runs/{run_id}",
+    dependencies=[Depends(requires_access_dag(method="GET"))],
+)
+def get_dag_run(dag_id: str, run_id: str, session):
+    """Path params + ORM call covered by route-level guard."""
+    dag_run = session.scalar(
+        select(DagRun).filter_by(dag_id=dag_id, run_id=run_id)
+    )
+    if dag_run is None:
+        raise HTTPException(404, "not found")
+    return dag_run
+
+
+@router.delete(
+    "/{dag_id}",
+    dependencies=[Depends(requires_access_dag(method="DELETE"))],
+)
+def delete_dag(dag_id: str, session):
+    """Row fetch + row-variable method call covered by route-level guard."""
+    dag = session.scalar(select(DagModel).where(DagModel.dag_id == dag_id))
+    if dag is None:
+        raise HTTPException(404, "not found")
+    dag.cleanup_runs(session=session)
--- a/tests/fixtures/fp_guards/framework_fastapi_route_level_auth/expectations.json
+++ b/tests/fixtures/fp_guards/framework_fastapi_route_level_auth/expectations.json
@ -0,0 +1,16 @@
+{
+  "required_findings": [],
+  "forbidden_findings": [
+    { "id_prefix": "py.auth.missing_ownership_check" }
+  ],
+  "noise_budget": {
+    "max_total_findings": 2,
+    "max_high_findings": 0
+  },
+  "performance_expectations": {
+    "max_ms_no_index": 1500,
+    "max_ms_index_cold": 2000,
+    "max_ms_index_warm": 800,
+    "ci_mode": "lenient"
+  }
+}
--- a/tests/fixtures/fp_guards/framework_strapi_db_query_chain/App.ts
+++ b/tests/fixtures/fp_guards/framework_strapi_db_query_chain/App.ts
@ -0,0 +1,40 @@
+// Strapi-style ORM accessor: `<obj>.db.query(MODEL_UID).<orm_method>(...)`.
+// MODEL_UID is a literal model identifier (not raw SQL); the trailing
+// findOne/findMany/create/update/delete/count are intrinsically
+// parameterised — the actual SQL is generated by the ORM and per-call
+// values arrive through field-keyed object literals the driver escapes.
+//
+// FP-guard: cfg-unguarded-sink and taint-unsanitised-flow must NOT
+// fire on this shape.
+
+declare const strapi: any;
+
+async function getApiToken(whereParams: Record<string, unknown>) {
+    return strapi.db.query('admin::api-token').findOne({
+        select: ['id', 'name', 'lastUsedAt'],
+        populate: ['permissions'],
+        where: whereParams,
+    });
+}
+
+async function listTokens() {
+    return strapi.db.query('admin::api-token').findMany({
+        where: { type: 'read-only' },
+    });
+}
+
+async function createToken(data: unknown) {
+    return strapi.db.query('admin::api-token').create({ data });
+}
+
+async function updateToken(id: number, data: unknown) {
+    return strapi.db.query('admin::api-token').update({ where: { id }, data });
+}
+
+async function deleteToken(id: number) {
+    return strapi.db.query('admin::api-token').delete({ where: { id } });
+}
+
+async function countTokens() {
+    return strapi.db.query('admin::api-token').count();
+}
--- a/tests/fixtures/fp_guards/framework_strapi_db_query_chain/expectations.json
+++ b/tests/fixtures/fp_guards/framework_strapi_db_query_chain/expectations.json
@ -0,0 +1,17 @@
+{
+  "required_findings": [],
+  "forbidden_findings": [
+    { "id_prefix": "cfg-unguarded-sink" },
+    { "id_prefix": "taint-unsanitised-flow" }
+  ],
+  "noise_budget": {
+    "max_total_findings": 3,
+    "max_high_findings": 0
+  },
+  "performance_expectations": {
+    "max_ms_no_index": 1000,
+    "max_ms_index_cold": 1500,
+    "max_ms_index_warm": 500,
+    "ci_mode": "lenient"
+  }
+}
--- a/tests/fixtures/js/fetch_body_data_exfil.js
+++ b/tests/fixtures/js/fetch_body_data_exfil.js
@ -0,0 +1,13 @@
+// DATA_EXFIL fixture: a fixed destination URL and an attacker-influenced
+// body.  SSRF must NOT fire (destination is hardcoded) but `Cap::DATA_EXFIL`
+// must fire on the body field — request-bound bytes are leaving the process
+// via the outbound request payload.
+//
+// Driven by `fetch_data_exfil_integration_tests.rs`.
+function leakBody(req) {
+    var payload = req.body.message;
+    fetch('/endpoint', {
+        method: 'POST',
+        body: payload,
+    });
+}
--- a/tests/fixtures/js/fetch_ssrf_url_tainted.js
+++ b/tests/fixtures/js/fetch_ssrf_url_tainted.js
@ -0,0 +1,10 @@
+// SSRF regression fixture: attacker-controlled destination URL.  SSRF must
+// fire on the URL flow (arg 0) and `Cap::DATA_EXFIL` must NOT fire — the two
+// classes share the callee but cap attribution is per-position so a tainted
+// URL never surfaces as data exfiltration.
+//
+// Driven by `fetch_data_exfil_integration_tests.rs`.
+function proxy(req) {
+    var target = req.query.target;
+    fetch(target);
+}
--- a/tests/fixtures/mixed_project/config.rs
+++ b/tests/fixtures/mixed_project/config.rs
@ -2,7 +2,7 @@ use std::env;
 use std::fs;
 use std::process::Command;

-/// Infrastructure provisioning tool — Rust core.
+/// Infrastructure provisioning tool, Rust core.
 /// Reads infrastructure config from environment and executes provisioning commands.

 struct InfraConfig {
@ -56,7 +56,7 @@ fn apply_terraform() {
        .unwrap();
 }

-/// Destroys infrastructure — reads target from env.
+/// Destroys infrastructure, reads target from env.
 /// VULN: env var flows into Command
 fn destroy_cluster() {
    let cluster = env::var("DESTROY_TARGET").unwrap();
--- a/tests/fixtures/patterns/java/negative.java
+++ b/tests/fixtures/patterns/java/negative.java
@ -1,5 +1,9 @@
 import java.sql.*;
 import java.security.SecureRandom;
+import org.yaml.snakeyaml.Yaml;
+import org.yaml.snakeyaml.LoaderOptions;
+import org.yaml.snakeyaml.constructor.SafeConstructor;
+import org.apache.commons.text.StringSubstitutor;

 class Negative {
    // Safe: parameterized query
@ -19,4 +23,17 @@ class Negative {
    void safeLiteralQuery(Statement stmt) throws Exception {
        stmt.executeQuery("SELECT COUNT(*) FROM users");
    }
+
+    // Safe: SnakeYAML 2.0 / explicit SafeConstructor — CVE-2022-1471 fix shape.
+    void safeSnakeyamlSafeConstructor(String body) {
+        LoaderOptions opts = new LoaderOptions();
+        Yaml yaml = new Yaml(new SafeConstructor(opts));
+        Object data = yaml.load(body);
+    }
+
+    // Safe: empty StringSubstitutor — no interpolator factory — CVE-2022-42889 fix shape.
+    String safeStringSubstitutorPassthrough(String input) {
+        StringSubstitutor s = new StringSubstitutor();
+        return s.replace(input);
+    }
 }
--- a/tests/fixtures/patterns/java/positive.java
+++ b/tests/fixtures/patterns/java/positive.java
@ -1,6 +1,8 @@
 import java.io.*;
 import java.util.Random;
 import java.security.MessageDigest;
+import org.yaml.snakeyaml.Yaml;
+import org.apache.commons.text.StringSubstitutor;

 class Positive {
    // java.deser.readobject
@ -45,4 +47,16 @@ class Positive {
    void triggerGetWriterPrint(javax.servlet.http.HttpServletResponse resp) throws Exception {
        resp.getWriter().println("<html>" + "data" + "</html>");
    }
+
+    // java.deser.snakeyaml_unsafe_constructor — CVE-2022-1471 regression guard.
+    void triggerSnakeyamlUnsafeConstructor() throws Exception {
+        Yaml yaml = new Yaml();
+        Object data = yaml.load("payload");
+    }
+
+    // java.code_exec.text4shell_interpolator — CVE-2022-42889 regression guard.
+    String triggerText4ShellInterpolator(String input) {
+        StringSubstitutor s = StringSubstitutor.createInterpolator();
+        return s.replace(input);
+    }
 }
--- a/tests/fixtures/patterns/python/positive.py
+++ b/tests/fixtures/patterns/python/positive.py
@ -42,6 +42,14 @@ def trigger_yaml(data):
 def trigger_sql_concat(cursor, user):
    cursor.execute("SELECT * FROM users WHERE name = '" + user + "'")

+# py.sqli.execute_format (f-string variant)
+def trigger_sql_fstring(cursor, user):
+    cursor.execute(f"SELECT * FROM users WHERE name = '{user}'")
+
+# py.sqli.text_format
+def trigger_sqlalchemy_text_fstring(connection, user):
+    connection.execute(text(f"SELECT * FROM users WHERE name = '{user}'"))
+
 # py.crypto.md5
 def trigger_md5(data):
    hashlib.md5(data)
--- a/tests/fixtures/phase_c_auth_taint/handler_sanitized.rs
+++ b/tests/fixtures/phase_c_auth_taint/handler_sanitized.rs
@ -23,7 +23,7 @@ mod auth {

 // Negative control: the handler validates ownership via
 // `authz::require_group_member(...)?` before the realtime publish.  Phase C
-// should NOT emit `rs.auth.missing_ownership_check.taint` here — the
+// should NOT emit `rs.auth.missing_ownership_check.taint` here, the
 // sanitizer clears `UNAUTHORIZED_ID` from the argument SSA values.
 pub async fn handle_publish_checked(Path(group_id): Path<i64>) -> Result<&'static str, ()> {
    let user = auth::current_user();
--- a/tests/fixtures/real_world/javascript/taint/fetch_object_url_tainted_fires.expect.json
+++ b/tests/fixtures/real_world/javascript/taint/fetch_object_url_tainted_fires.expect.json
@ -1,6 +1,6 @@
 {
-  "description": "fetch({url: taintedUrl, body: fixed}) — destination-aware object-literal case. url is tainted, must fire.",
-  "tags": ["taint", "ssrf", "fetch", "destination-aware", "object-config"],
+  "description": "fetch({url: taintedUrl, body: fixed}) — destination-aware object-literal case. url is tainted (SSRF), body is fixed. SSRF must fire and the cross-boundary data-exfiltration class (Cap::DATA_EXFIL) must NOT fire — the two classes share the callee but cap attribution is per-position.",
+  "tags": ["taint", "ssrf", "fetch", "destination-aware", "object-config", "cap-attribution"],
  "modes": ["full"],
  "expected": [
    {
@ -10,6 +10,12 @@
      "line_range": [6, 14],
      "evidence_contains": [],
      "notes": "req.query.target → fetch({url: target, ...}) — tainted destination field under object-literal shape."
+    },
+    {
+      "rule_id": "taint-data-exfiltration",
+      "must_not_match": true,
+      "line_range": [6, 14],
+      "notes": "body is a fixed literal '{}' — DATA_EXFIL must NOT fire on this site (regression guard for per-cap attribution)."
    }
  ]
 }
--- a/Show more
+++ b/Show more