Python fp and docs updtes (#58)

* refactor: Update comments for clarity and add expectations.json files for performance metrics

* feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks

* feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks

* refactor: Simplify code formatting for better readability in multiple files

* refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration

* feat: Update Java and Python patterns to include new security rules

* refactor: Improve comment clarity and consistency across multiple Rust files

* refactor: Simplify code formatting for improved readability in integration tests and module files

* refactor: Improve comment formatting and enhance clarity in assertions across multiple files
This commit is contained in:
Eli Peter 2026-04-29 19:53:34 -04:00 committed by GitHub
parent 4db0805de6
commit a438886217
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
291 changed files with 9485 additions and 3851 deletions

View file

@ -187,7 +187,7 @@ fn interval_join_clamped_widens_range() {
#[test]
fn interval_join_identity_vs_clamped_is_top() {
// Different flow shapes cannot be combined into a single bounded
// form conservative fallback is Top.
// form, conservative fallback is Top.
let a = IntervalTransfer::Identity;
let b = IntervalTransfer::Clamped { lo: 0, hi: 10 };
assert_eq!(a.join(&b), IntervalTransfer::Top);
@ -296,7 +296,7 @@ fn transfer_apply_combines_subdomains() {
// Interval identity forwards the caller-known bound.
assert_eq!(out.interval.lo, Some(8080));
assert_eq!(out.interval.hi, Some(8080));
// String literal-prefix overrides the caller-side input the
// String literal-prefix overrides the caller-side input, the
// callee's structural fact wins.
assert_eq!(out.string.prefix.as_deref(), Some("https://safe.com/"));
// Bit subdomain is always Top on cross-file transfer by design.

View file

@ -649,7 +649,7 @@ fn hashmap_local_noise_is_clean() {
#[test]
fn row_ownership_equality_is_clean() {
// `if owner_id != user.id { return ... }` is a row-level
// ownership check both the row-fetching call and any downstream
// ownership check, both the row-fetching call and any downstream
// uses of the row's fields should be considered authorized.
assert_absent(
"row_ownership_equality.rs",
@ -670,7 +670,7 @@ fn row_ownership_no_early_exit_flags() {
#[test]
fn helper_scoped_params_is_clean() {
// A library helper whose internal work is `result.insert(..)`
// on a locally-constructed HashSet is not a sink the call is
// on a locally-constructed HashSet is not a sink, the call is
// classified as non-sink because the receiver is the locally-bound
// collection.
assert_absent("helper_scoped_params.rs", "rs.auth.missing_ownership_check");
@ -688,7 +688,7 @@ fn self_scoped_user_is_clean() {
fn true_positive_missing_check_flags() {
// Positive control: an authenticated handler that deletes a doc
// and publishes against a group without any ownership/membership
// check must still flag.
// check, must still flag.
assert_has(
"true_positive_missing_check.rs",
"rs.auth.missing_ownership_check",
@ -763,7 +763,7 @@ fn db_connection_type_inferred_is_clean() {
// inferred as a `DatabaseConnection` via SSA `constructor_type`
// (through `peel_identity_suffix`). The handler logs the caller's
// own id; no foreign scoped id reaches the sink, so the ownership
// gate has nothing to flag the type-facts refinement must not
// gate has nothing to flag, the type-facts refinement must not
// introduce a false positive here.
assert_absent(
"db_connection_type_inferred.rs",

View file

@ -4,13 +4,13 @@ Current baseline (2026-04-29):
| Metric | File-level | Rule-level | CI floor |
|-----------|------------|------------|----------|
| Precision | 0.991 | 0.991 | 0.861 |
| Recall | 0.995 | 0.995 | 0.944 |
| F1 | 0.993 | 0.993 | 0.901 |
| Precision | 0.996 | 0.996 | 0.861 |
| Recall | 1.000 | 1.000 | 0.944 |
| F1 | 0.998 | 0.998 | 0.901 |
Corpus: 433 cases across 10 languages, 432 evaluated (1 disabled). Per-run JSON lands in `tests/benchmark/results/` (`latest.json` plus dated snapshots). See `README.md` for what the scoring modes mean and how to run a subset.
Corpus: 451 cases across 10 languages, 449 evaluated (no disabled). Per-run JSON lands in `tests/benchmark/results/` (`latest.json` plus dated snapshots). See `README.md` for what the scoring modes mean and how to run a subset.
The corpus is mostly synthetic 8-20 line fixtures, one vulnerability or one safe pattern per file. A smaller real-CVE replay set under `cve_corpus/` covers 18 published CVEs across all 10 languages. Both contribute to the headline numbers.
The corpus is mostly synthetic 8-20 line fixtures, one vulnerability or one safe pattern per file. A smaller real-CVE replay set under `cve_corpus/` covers 20 published CVEs across all 10 languages. Both contribute to the headline numbers.
## Real CVE coverage
@ -20,14 +20,19 @@ Real disclosed CVEs reduced to minimal reproducers, vulnerable + patched pair pe
|----------------|------------|----------------------------|----------------------|-----------------|----------|
| CVE-2023-48022 | Python | Ray | Apache-2.0 | CMDI | detected |
| CVE-2017-18342 | Python | PyYAML | MIT | Deserialization | detected |
| CVE-2025-69662 | Python | geopandas | BSD-3-Clause | SQL Injection | detected |
| CVE-2026-33626 | Python | LMDeploy | Apache-2.0 | SSRF | detected |
| CVE-2019-14939 | JavaScript | mongo-express | MIT | code_exec | detected |
| CVE-2025-64430 | JavaScript | Parse Server | Apache-2.0 | SSRF | detected |
| CVE-2023-26159 | TypeScript | follow-redirects | MIT | SSRF | detected |
| GHSA-4x48-cgf9-q33f | TypeScript | Novu | MIT | SSRF | detected |
| CVE-2022-30323 | Go | hashicorp/go-getter | MPL-2.0 | CMDI | detected |
| CVE-2023-3188 | Go | owncast | MIT | SSRF | detected |
| CVE-2024-31450 | Go | owncast | MIT | path_traversal | detected |
| CVE-2015-7501 | Java | Apache Commons Collections | Apache-2.0 | Deserialization | detected |
| CVE-2017-12629 | Java | Apache Solr | Apache-2.0 | CMDI | detected |
| CVE-2022-1471 | Java | SnakeYAML | Apache-2.0 | Deserialization | detected |
| CVE-2022-42889 | Java | Apache Commons Text | Apache-2.0 | code_exec | detected |
| CVE-2013-0156 | Ruby | Ruby on Rails | MIT | Deserialization | detected |
| CVE-2020-8130 | Ruby | Rake | MIT | CMDI | detected |
| CVE-2017-9841 | PHP | PHPUnit | BSD-3-Clause | code_exec | detected |
@ -60,6 +65,9 @@ Most recent first. Metrics are rule-level on the corpus size at that point.
| Date | Change | Corpus | P | R | F1 |
|------------|------------------------------------------------------------------------------|--------|-------|-------|-------|
| 2026-04-29 | Java SnakeYAML + Text4Shell patterns; CVE-2022-1471 and CVE-2022-42889 detected | 449 | 0.996 | 1.000 | 0.998 |
| 2026-04-29 | Indirect-validator branch narrowing (`const err = validate(x); if (err) throw …;`) + helper-summary all_validated propagation; Novu GHSA-4x48-cgf9-q33f detected | 445 | 0.991 | 1.000 | 0.995 |
| 2026-04-29 | Python f-string SQLi pattern + bindparams sanitizer + HttpClient SSRF rules; CVE-2025-69662 (geopandas) and CVE-2026-33626 (LMDeploy) detected | 439 | 0.991 | 1.000 | 0.995 |
| 2026-04-29 | Phantom-Param-aware field suppression: CVE-2023-3188 detected, FP guards hold | 432 | 0.995 | 1.000 | 0.998 |
| 2026-04-28 | Ruby bare `Kernel#open` CMDI sink, exact-match sigil on label matchers | 428 | 0.995 | 1.000 | 0.998 |
| 2026-04-28 | Go SSRF/FILE_IO sink expansion (`http.DefaultClient.*`, `os.Remove`/`WriteFile`) plus Decode-writeback container op | 426 | 0.995 | 1.000 | 0.998 |

View file

@ -0,0 +1,62 @@
// go-safe-realrepo-006 — distilled from minio cmd/admin-handlers-users_test.go
// (and the identical pattern across xl-storage_test.go, erasure-healing_test.go,
// 49+34+12+11+9+7+7+5 findings on minio test files alone).
//
// `cfg-error-fallthrough` looks for `if err != nil { … }` whose body fails to
// terminate. Test code idiomatically writes
//
// if err != nil { c.Fatalf("...", err) }
// postSink(...)
//
// where `c.Fatalf` (a `*testing.T` method) calls `runtime.Goexit()` and the
// `postSink` line is unreachable on the error path. The rule classified
// this as fall-through because `Fatalf` looks like an ordinary call. Engine
// fix: `src/cfg_analysis/error_handling.rs::call_never_returns` recognises
// `Fatal*`, `Panic*`, `FailNow`, `os.Exit`, `runtime.Goexit`, `log.Fatal*`,
// `panic`, etc. as terminators inside `terminates_on_all_paths`.
package safe
import (
"context"
"log"
"os"
"testing"
)
type clientHelper struct {
bucket string
}
func (c *clientHelper) MakeBucket(ctx context.Context, name string) error { return nil }
func (c *clientHelper) PutObject(ctx context.Context, name string) error { return nil }
func setupBucket(t *testing.T, c *clientHelper, ctx context.Context) {
if err := c.MakeBucket(ctx, c.bucket); err != nil {
t.Fatalf("bucket creat error: %v", err)
}
if err := c.PutObject(ctx, "obj"); err != nil {
t.Fatal(err)
}
}
func runWithExit(c *clientHelper, ctx context.Context) {
if err := c.MakeBucket(ctx, c.bucket); err != nil {
log.Fatalf("init failed: %v", err)
}
c.PutObject(ctx, "obj")
}
func runWithOsExit(c *clientHelper, ctx context.Context) {
if err := c.MakeBucket(ctx, c.bucket); err != nil {
os.Exit(1)
}
c.PutObject(ctx, "obj")
}
func runWithPanic(c *clientHelper, ctx context.Context) {
if err := c.MakeBucket(ctx, c.bucket); err != nil {
panic(err)
}
c.PutObject(ctx, "obj")
}

View file

@ -0,0 +1,26 @@
// js-safe-realrepo-006 — distilled from gogs `public/plugins/codemirror-5.17.0/
// mode/gherkin/gherkin.js` line 107. The CodeMirror Gherkin tokenizer ships
// localised feature-keyword aliases as one large regex inside a boolean
// sub-condition. The CFG builder textualises every sub-condition of a
// boolean chain and truncates that text to MAX_CONDITION_TEXT_LEN (256
// bytes) for diagnostics; naive byte-slice truncation panicked when byte
// 256 landed inside a multi-byte UTF-8 character (here Gurmukhi `ਖ`,
// 3-byte UTF-8). Engine fix:
// `src/utils/snippet.rs::truncate_at_char_boundary`, applied at three CFG
// sites and two symex display sites. Invariant: scanning this file must
// terminate without panicking the rayon worker, regardless of where byte
// 256 lands inside the regex.
function tokenLocalisedFeatureKeyword(stream, state) {
if (
!state.inKeywordLine &&
state.allowFeature &&
stream.match(/(機能|功能|フィーチャ|기능|โครงหลัก|ความสามารถ|ความต้องการทางธุรกิจ|ಹೆಚ್ಚಳ|గుణము|ಮುಹಾಂದರಾ|ਮੁਹਾਂਦਰਾ|ਨਕਸ਼ ਨੁਹਾਰ|ਖਾਸੀਅਤ|रूप लेख|وِیژگی|خاصية|תכונה|Функціонал|Функция|Функционалност|Функционал|Үзенчәлеклелек|Свойство|Особина|Мөмкинлек|Могућност|Λειτουργία|Δυνατότητα|Właściwość|Vlastnosť|Trajto|Tính năng|Savybė|Požiadavka|Požadavek|Potrzeba biznesowa|Özellik|Osobina|Ominaisuus|Omadus|Mogućnost|Mogucnost|Jellemző|Funzionalità|Funktionalitéit|Funktionalität|Funkcja|Funkcionalnost|Funkcionalitāte|Funkcia|Fungsi|Functionaliteit|Funcționalitate|Funcţionalitate|Functionalitate|Funcionalitat|Funcionalidade|Fonctionnalité|Fitur|Fīča|Feature|Eiginleiki|Egenskap|Egenskab|Característica|Caracteristica|Business Need|Aspekt|Arwedd|Ability):/)
) {
state.inKeywordLine = true;
return "keyword";
}
return null;
}
module.exports = { tokenLocalisedFeatureKeyword };

View file

@ -0,0 +1,19 @@
"""
Vulnerable counterpart to safe_fastapi_route_dependencies_auth.py: same
shape but with NO `dependencies=[Depends(...)]` keyword arg on the route
decorator. The FastAPI ownership-check rule must still fire the
recognizer must not blanket-suppress every FastAPI route, only those
with an actual dependency-injected auth check.
"""
from fastapi import FastAPI
router = FastAPI()
@router.delete("/{connection_id}")
def delete_connection(connection_id: str, session):
"""No auth — must still fire missing_ownership_check."""
connection = session.scalar(select(Connection).filter_by(conn_id=connection_id))
if connection is None:
raise HTTPException(404, "not found")
session.delete(connection)

View file

@ -0,0 +1,43 @@
"""
Distilled from airflow `airflow-core/src/airflow/api_fastapi/core_api/routes/public/connections.py`:
@connections_router.delete(
"/{connection_id}",
dependencies=[Depends(requires_access_connection(method="DELETE"))],
)
def delete_connection(connection_id: str, session: SessionDep):
connection = session.scalar(select(Connection).filter_by(conn_id=connection_id))
...
session.delete(connection)
The route's `dependencies=[Depends(requires_access_*)]` declares the auth gate at
the FastAPI level. The ownership-check rule must recognise the dependency-
injected check and not flag the row-fetch / mutation as missing ownership.
"""
from fastapi import Depends, FastAPI
router = FastAPI()
def requires_access_connection(method: str):
def check():
...
return check
@router.delete(
"/{connection_id}",
dependencies=[Depends(requires_access_connection(method="DELETE"))],
)
def delete_connection(connection_id: str, session):
connection = session.scalar(select(Connection).filter_by(conn_id=connection_id))
if connection is None:
raise HTTPException(404, "not found")
session.delete(connection)
@router.get(
"/{connection_id}",
dependencies=[Depends(requires_access_connection(method="GET"))],
)
def get_connection(connection_id: str, session):
return session.scalar(select(Connection).filter_by(conn_id=connection_id))

View file

@ -0,0 +1,79 @@
"""
Distilled from airflow `airflow-core/src/airflow/api_fastapi/core_api/routes/public/dag_run.py`:
@dag_run_router.post(
"",
dependencies=[Depends(requires_access_dag(method="POST", access_entity=DagAccessEntity.RUN))],
)
def trigger_dag_run(dag_id, body, dag_bag, user, session, request):
dm = session.scalar(select(DagModel).where(DagModel.dag_id == dag_id))
...
dag = get_latest_version_of_dag(dag_bag, dag_id, session)
dag_run = dag.create_dagrun(run_id=params["run_id"], ...)
The route-level `dependencies=[Depends(requires_access_dag(method="POST",
access_entity=...))]` decorator authorizes the entire handler the
handler body's `dag.create_dagrun(...)` call (where `dag` is a row
fetched using the auth-checked `dag_id`) must be covered too, even
though the call's subject is the bare row variable rather than the
original id.
Before the route-level fix, `auth_check_covers_subject` walked
`check.subjects` (empty for decorator-level checks whose inner call
carries no per-arg ValueRef) and never matched. After the fix,
`is_route_level=true` short-circuits coverage to true for any
non-login-guard route-level check, suppressing both the row-fetch
ownership flag and the downstream method-call ownership flag.
"""
from fastapi import Depends, FastAPI
router = FastAPI()
def requires_access_dag(method: str, access_entity=None):
def check():
...
return check
def get_latest_version_of_dag(dag_bag, dag_id, session):
return dag_bag.get(dag_id)
@router.get(
"/{dag_id}/runs/{run_id}",
dependencies=[Depends(requires_access_dag(method="GET"))],
)
def get_dag_run(dag_id: str, run_id: str, session):
"""
Route-level guard authorizes the entire handler. The
`filter_by(dag_id=dag_id, run_id=run_id)` ORM call must NOT trip
`py.auth.missing_ownership_check` even though the per-arg subjects
are id-shaped the route-level decorator covers them.
"""
dag_run = session.scalar(
select(DagRun).filter_by(dag_id=dag_id, run_id=run_id)
)
if dag_run is None:
raise HTTPException(404, "not found")
return dag_run
@router.delete(
"/{dag_id}",
dependencies=[Depends(requires_access_dag(method="DELETE"))],
)
def delete_dag(dag_id: str, session):
"""
Same shape, DELETE method. The row fetch and row-variable
method call must also be fully covered by the route-level guard.
`dag` is fetched using the auth-checked `dag_id`; without the
`is_route_level` short-circuit, the per-name walk would mismatch
`dag.<method>` (subject is the row var) against the check's
empty subjects vec.
"""
dag = session.scalar(select(DagModel).where(DagModel.dag_id == dag_id))
if dag is None:
raise HTTPException(404, "not found")
dag.cleanup_runs(session=session)

View file

@ -0,0 +1,33 @@
"""
Distilled from airflow `tests/unit/models/test_backfill.py` and
`providers/google/tests/unit/google/cloud/hooks/test_dlp.py`: pytest test
methods that take a SQLAlchemy `session` fixture by name and call
`session.commit()` / `session.add(...)` / `session.scalar(...)`.
Bare `session.<sqlalchemy_verb>` was previously classified as auth Session
context, which triggered `unit_has_user_input_evidence` even though the
test function takes no user input the `session` fixture is the
SQLAlchemy ORM Session, not the auth/HTTP session. After the engine
classifier narrowing, only `session.<identity_field>` (`session.user`,
`session.user_id`, ...) is treated as auth context; SQLAlchemy verbs
do not contribute user-input evidence on their own.
"""
def test_reverse_and_depends_on_past_fails(dep_on_past, dag_maker, session):
with dag_maker() as dag:
pass
session.commit()
b = _create_backfill(
dag_id=dag.dag_id,
from_date="2021-01-01",
to_date="2021-01-05",
)
if dep_on_past:
assert b is None
def test_create_deidentify_template_with_org_id(self, get_conn, mock_project_id):
get_conn.return_value.create_deidentify_template.return_value = {}
result = self.hook.create_deidentify_template(organization_id="ORG_ID")
assert result == {}

View file

@ -2,7 +2,7 @@
// produces a `DatabaseConnection` via SSA `constructor_type` (through
// `peel_identity_suffix`, which strips `.unwrap()` before matching). The
// handler then calls `conn.execute(..)`, a callee name that appears in
// neither `mutation_indicator_names` nor `read_indicator_names` for Rust
// neither `mutation_indicator_names` nor `read_indicator_names` for Rust ,
// name-based classification returns `None`, so the ownership gate
// already cannot flag the call. The type-map refinement should *still*
// leave the call unflagged (the type map produces `DbMutation`, but

View file

@ -16,7 +16,7 @@ pub async fn handle_list_peer_docs(req: Req, ctx: Ctx) -> Result<String, ()> {
let user = auth::require_auth(&req, &ctx).await?;
let doc_ids: Vec<i64> = vec![1, 2, 3];
// Pure in-memory bookkeeping no authorization decision here.
// Pure in-memory bookkeeping, no authorization decision here.
let mut counts: HashMap<i64, usize> = HashMap::new();
let mut seen: HashSet<i64> = HashSet::new();
for doc_id in &doc_ids {

View file

@ -2,7 +2,7 @@
// the row by id first to obtain the resource it needs to authorize, then
// calls a named authorization function on the fetched row. This is the
// canonical pattern in Lemmy's Actix handlers (and most row-level Rails /
// Django authz code) the authorization check appears textually after the
// Django authz code), the authorization check appears textually after the
// fetch but is the first thing the function does on the row.
use std::result::Result;

View file

@ -41,7 +41,7 @@ pub async fn handle_delete_doc(req: Req, ctx: Ctx, doc_id: i64) -> Result<String
return json_err("cannot delete another user's doc", 403);
}
// By construction, the row belongs to `user` so any id read from it is authorized.
// By construction, the row belongs to `user`, so any id read from it is authorized.
let group_id = existing.get_i64("group_id");
realtime::publish_to_group(group_id, "doc_deleted");
Ok("ok".into())

View file

@ -31,7 +31,7 @@ pub async fn handle_update_doc(req: Req, ctx: Ctx, doc_id: i64) -> Result<String
);
let owner_id = existing.get_i64("user_id");
// Equality compared but no early exit the check has no effect.
// Equality compared but no early exit, the check has no effect.
if owner_id != user.id {
// missing return
println!("not your doc (but proceeding anyway)");

View file

@ -1,7 +1,7 @@
// Phase 6 D05: an Axum `Json<UpdateDoc>` extractor whose `doc_id`
// field is declared as `i64`. The DTO field-level taint analysis
// proves the value reaching `db.exec` is numeric and exempts
// `dto.doc_id` from the auth subject classifier the rule must NOT
// `dto.doc_id` from the auth subject classifier, the rule must NOT
// fire because numeric DTO fields cannot bypass ownership.
use axum::extract::Json;

View file

@ -0,0 +1,70 @@
// Function-parameter type annotations naming an in-memory container
// (`RoaringBitmap`, `HashMap<K, V>`, `HashSet<T>`, ...) classify the
// receiver as `TypeKind::LocalCollection`, which the auth analyser
// maps to `SinkClass::InMemoryLocal` (always non-auth-relevant).
// Without this, the verb-name dispatch (`is_mutation: insert/remove`)
// classified `unsharded.insert(docid)` /
// `task_ids.insert(task_id)` as `DbMutation` and fired
// `missing_ownership_check` whenever the function had at least one
// id-shaped parameter to pass `unit_has_user_input_evidence`.
//
// Cluster surfaced from
// meilisearch/index-scheduler/src/scheduler/enterprise_edition/network.rs::balance_shards
// (`unsharded: RoaringBitmap` typed parameter) and same-pattern
// helpers across the index-scheduler.
use std::collections::{BTreeSet, HashMap, HashSet};
struct RoaringBitmap;
impl RoaringBitmap {
fn new() -> Self { Self }
fn insert(&mut self, _x: u32) -> bool { true }
fn remove(&mut self, _x: u32) -> bool { true }
fn contains(&self, _x: u32) -> bool { true }
}
// 1. Bare-typed RoaringBitmap parameter, function has id-like param
// `docid` so user-input-evidence fires; the receiver type proves
// the operation is in-memory bookkeeping.
fn balance_shards(mut unsharded: RoaringBitmap, docid: u32) {
unsharded.insert(docid);
unsharded.remove(docid);
}
// 2. `&mut RoaringBitmap` reference, ref-stripping must reach the
// underlying type head.
fn process_docids(docids: &mut RoaringBitmap, docid: u32) {
docids.insert(docid);
docids.remove(docid);
let _ = docids.contains(docid);
}
// 3. Lifetime-annotated reference: `&'a mut HashMap<...>`.
// Module-path prefix would also be dropped; head matches `HashMap`.
fn store_shard_docids<'a>(
new_shard_docids: &'a mut HashMap<String, u32>,
shard: String,
docid: u32,
) {
new_shard_docids.insert(shard, docid);
}
// 4. Std-collection HashSet typed param.
fn add_user_id(ids: &mut HashSet<u64>, user_id: u64) {
ids.insert(user_id);
ids.remove(&user_id);
}
// 5. Local var bound from constructor, already covered, but pinned
// here as a regression guard for the `RoaringBitmap::new()`
// constructor entry.
fn build_local_set(task_id: u32) -> RoaringBitmap {
let mut s = RoaringBitmap::new();
s.insert(task_id);
s
}
// 6. BTreeSet typed param.
fn collect_seen(seen: &mut BTreeSet<u32>, item_id: u32) {
seen.insert(item_id);
}

View file

@ -1,7 +1,7 @@
// Real-repo motivation (lemmy `LocalUserView` extractor).
//
// Lemmy's authenticated-actor extractor type is named `LocalUserView`
// every route handler signature is
//, every route handler signature is
// `pub async fn handler(.., local_user_view: LocalUserView)`. The
// previous exact-name list in `is_self_actor_type_text`
// (`CurrentUser`, `SessionUser`, `AuthUser`, `AdminUser`,
@ -44,7 +44,7 @@ pub async fn write_self_note(
pool: &mut Pool,
local_user_view: LocalUserView,
) -> Result<(), ()> {
// Login predicate on the actor itself subject is the actor.
// Login predicate on the actor itself, subject is the actor.
// No additional ownership check needed because the subject is the
// caller's own row.
let _ = is_admin(&local_user_view);

View file

@ -0,0 +1,78 @@
// Internal helper whose parameter list contains type-segment idents
// that lowercase-match the framework-request-name allow-list (`path`,
// `request`, `ctx`, `body`, `path`). Before the
// `collect_param_names` Rust-parameter arm, the recursive default arm
// pulled `std`, `path`, `Path` out of `dst: &std::path::Path` and
// pushed them into `unit.params`, `path` then matched the
// framework-name list and gated `unit_has_user_input_evidence` open,
// firing `missing_ownership_check` at every id-shaped operation in
// the body.
//
// Cluster surfaced from
// meilisearch/index-scheduler/src/scheduler/process_snapshot_creation.rs::remove_tasks
// (`unsafe fn remove_tasks(tasks: &[Task], dst: &std::path::Path,
// index_base_map_size: usize)`). None of the actual params (`tasks`,
// `dst`, `sz`) match the user-input-evidence heuristic, so the rule
// must NOT fire on the internal task-cleanup loop.
struct Task {
uid: u32,
}
struct Database;
impl Database {
fn delete(&self, _w: &mut u32, _u: &u32) -> Result<(), ()> {
Ok(())
}
}
struct TaskQueue {
all_tasks: Database,
canceled_by: Database,
}
fn remove_tasks(
tasks: &[Task],
dst: &std::path::Path,
sz: usize,
) -> Result<(), ()> {
let _ = (dst, sz);
let mut wtxn = 0u32;
let task_queue = TaskQueue {
all_tasks: Database,
canceled_by: Database,
};
let TaskQueue {
all_tasks,
canceled_by,
} = task_queue;
for task in tasks {
all_tasks.delete(&mut wtxn, &task.uid)?;
canceled_by.delete(&mut wtxn, &task.uid)?;
}
Ok(())
}
// Same shape with a typed wrapper whose tail segment lowercases to
// `path` (`PathBuf` → `pathbuf` does NOT match, but `Path` does).
// Confirms the Rust `parameter` arm in `collect_param_names` keeps
// `Path` out of `unit.params` even when wrapped in a generic.
struct Wrapper<T>(T);
struct PathHandle;
struct Item {
uid: u32,
}
struct Repo;
impl Repo {
fn delete(&self, _u: &u32) {}
}
fn cleanup_internal(out: Wrapper<PathHandle>, items: &[Item]) {
let _ = out;
let repo = Repo;
for item in items {
repo.delete(&item.uid);
}
}

View file

@ -4,7 +4,7 @@
// (the call body wraps onto the next line for readability). Before
// the line-counting fix, `row_population_data` recorded the
// `let_declaration`'s start row while `op.line` saw the inner call's
// start row they differed by one and the row-fetch exemption
// start row, they differed by one and the row-fetch exemption
// missed. Recording the **call**'s start line aligns the two and
// the exemption fires for the multi-line shape too.
@ -52,7 +52,7 @@ pub async fn lock_comment(
let comment_id = req.comment_id;
let local_instance_id = local_user_view.person.instance_id;
// Multi-line let the let_declaration starts on this line, but
// Multi-line let, the let_declaration starts on this line, but
// the inner `CommentView::read(..)` call starts on the next line.
// `op.line` for the read sink is the call's line, not the let's.
let orig_comment =

View file

@ -4,7 +4,7 @@
// `let community = Community::read(pool, req.community_id)` records
// `community → [req.community_id]` in `row_population_data`. An auth
// check `check_community_user_action(&user, &community, ..)` then
// authorises the row and any **downstream** operation that re-uses
// authorises the row, and any **downstream** operation that re-uses
// `req.community_id` (a later mutation by the same id, or a related
// view fetched by the same id) is materially covered by that check.
//
@ -71,7 +71,7 @@ pub async fn transfer_community(
pool: &mut Pool,
local_user_view: LocalUserView,
) -> Result<(), ()> {
// Row fetch `community` is populated from `req.community_id`.
// Row fetch, `community` is populated from `req.community_id`.
let community = Community::read(pool, req.community_id)?;
// Authorisation check on the fetched row. Subject = `community`
@ -84,7 +84,7 @@ pub async fn transfer_community(
// the row that was fetched with this id).
CommunityActions::delete_mods_for_community(pool, req.community_id)?;
// Local alias of the same request field `var_alias_chain`
// Local alias of the same request field, `var_alias_chain`
// records `community_id → "req.community_id"` so the reverse-walk
// also covers downstream sinks that pass the bare alias. Before
// the alias-chain fix, the next read fired

View file

@ -1,7 +1,7 @@
// Phase 5 typed-extractor exclusion: an Axum-style `Path<i64>`
// parameter is a framework-validated numeric extractor. The runtime
// guarantees a numeric value, so even though `project_id` reaches a
// SQL helper, the rule must NOT fire the value cannot carry an
// SQL helper, the rule must NOT fire, the value cannot carry an
// injection payload nor bypass ownership.
use axum::extract::Path;

View file

@ -36,7 +36,7 @@ mod serde_json {
}
// Real-repo shape from website/src/handlers/social.rs:
// `realtime::publish_to_user(&ctx.env, &user.email, ...)` publish
// `realtime::publish_to_user(&ctx.env, &user.email, ...)`, publish
// to the authed user's OWN channel keyed by their email. The
// `email` / `username` / `handle` fields of a self-actor binding
// reference the actor's own identity, just like `id` / `user_id`,

View file

@ -5,7 +5,7 @@ mod auth { pub async fn require_auth(_r: &super::Req, _c: &super::Ctx) -> Result
// The handler's `get_peer_ids(&db, user.id)` call below must not be
// flagged. `user` is bound from `auth::require_auth(..)` so `user.id`
// is the caller's own id the call is self-referential, not a foreign
// is the caller's own id, the call is self-referential, not a foreign
// scoped id. The library-style helper below is a pass-through so its
// body contains no DB sinks (the internal `user_id` → DB flow is a
// separate pattern covered by helper-summary lifting).

View file

@ -2,7 +2,7 @@
// against an ACL table (`group_members`) with a WHERE clause that pins
// the row to the current user (`gm.user_id = ?1` bound to `user.id`).
// Every returned row is membership-gated by construction, so downstream
// uses of the row's columns (`group_id` here) are authorized the
// uses of the row's columns (`group_id` here) are authorized, the
// `realtime::publish_to_group` call MUST NOT be flagged as missing an
// ownership check after B3.
struct Ctx;

View file

@ -1,7 +1,7 @@
// target: authorization happens inside `validate_target`, which
// internally calls `authz::require_membership` against the same
// `group_id` the handler subsequently mutates. The current rule cannot
// see this transitively B4 lifts per-function auth-check summaries
// see this transitively, B4 lifts per-function auth-check summaries
// (which positional params are auth-checked) so the handler-level call
// to `validate_target(&db, group_id, user.id)` is recognised as an
// auth check covering `group_id`. Result: `db.exec(..)` MUST NOT flag
@ -45,7 +45,7 @@ pub async fn handle_create_comment(
let user = auth::require_auth(&req, &ctx).await?;
let db = Db;
// Authorization happens inside validate_target helper-summary
// Authorization happens inside validate_target, helper-summary
// lifting propagates the per-param auth check so this covers
// `group_id`.
validate_target(&db, group_id, user.id).await?;

View file

@ -1,7 +1,7 @@
// Phase 6 D06 (negative): same DTO shape as
// `safe_dto_int_field_axum.rs` but the flow uses the `doc_id` field
// whose declared type is `String`. Phase 6 must NOT exempt the
// member-access subject String DTO fields can carry an injection
// member-access subject, String DTO fields can carry an injection
// payload, so the auth rule must continue to fire.
use axum::extract::Json;

View file

@ -0,0 +1,28 @@
// Vulnerable counterpart to `safe_local_collection_param_types.rs`
// and `safe_param_type_segment_idents.rs`. Proves the LocalCollection
// receiver-type override and the Rust `parameter` arm in
// `collect_param_names` don't blanket-suppress real handlers that mix
// in-memory containers with persistent-store calls (`db.update`).
// Scoped identifier (`req.target_user_id`) flows into a real DB
// mutation with no preceding ownership check, must still fire.
use std::collections::HashMap;
struct DocumentRequest {
target_user_id: u64,
new_owner: u64,
}
struct DbConnection;
impl DbConnection {
fn update_owner(&self, _doc_id: u64, _owner: u64) {}
}
// `cache: &mut HashMap<u64, String>` is a local container, its
// mutations are non-auth-relevant. But `db.update_owner` is a
// real persistent-store write, classified as `DbMutation`, and the
// handler still has no auth check on `req.target_user_id`.
async fn change_owner(req: DocumentRequest, cache: &mut HashMap<u64, String>, db: DbConnection) {
cache.remove(&req.target_user_id); // local container op, OK
db.update_owner(req.target_user_id, req.new_owner); // <-- IDOR sink
}

View file

@ -1,4 +1,4 @@
// Vulnerable counterpart to `row_fetch_then_authorize.rs` the row is
// Vulnerable counterpart to `row_fetch_then_authorize.rs`, the row is
// fetched by user-supplied id but no authorization function names it.
// The row-fetch exemption must NOT fire here; the rule should still
// flag the read as missing an ownership/membership check.

View file

@ -33,12 +33,12 @@ pub async fn transfer_community(
req: TransferCommunity,
pool: &mut Pool,
) -> Result<(), ()> {
// Row fetch — populates `community → [req.community_id]` — but
// Row fetch, populates `community → [req.community_id]`, but
// no `check_*_action(&user, &community, ..)` follows.
let _community = Community::read(pool, req.community_id)?;
// Mutation by id with no preceding ownership/membership check.
// This is the genuine IDOR must flag.
// This is the genuine IDOR, must flag.
CommunityActions::delete_mods_for_community(pool, req.community_id)?;
Ok(())

View file

@ -3,7 +3,7 @@ use rusqlite::Connection;
fn main() {
let user_id = env::var("USER_ID").unwrap();
// Rejecting shell metacharacters does NOT make SQL injection safe
// Rejecting shell metacharacters does NOT make SQL injection safe ,
// the metachar gate only covers shell-family sinks.
if user_id.contains(";") || user_id.contains("|") {
return;

View file

@ -1,6 +1,6 @@
// rs-path-006: Negative-case guard for PathFact.
//
// No sanitiser and no narrowing PathFact stays Top on every axis, so
// No sanitiser and no narrowing, PathFact stays Top on every axis, so
// the FILE_IO sink MUST fire. This fixture guards against PathFact
// over-suppression sneaking into `is_path_safe_for_sink`.
use std::env;

View file

@ -0,0 +1,96 @@
// Real-repo shape from excalidraw's element manipulation libraries
// (`packages/element/src/binding.ts`, `frame.ts`, `duplicate.ts`,
// `DebugCanvas.tsx`). In a pure data-manipulation function whose
// receiver is a JS built-in collection (`Map`, `Set`, `WeakMap`,
// `WeakSet`, `Array`) — either declared inline (`new Map()`),
// annotated directly (`m: Map<K, V>`), or aliased via a same-file
// `type X = Map<K, V>` — the call site is a container operation,
// not a data-layer read/mutation, and `js.auth.missing_ownership_check`
// must not flag.
//
// Closes the excalidraw FP cluster (66 → ~9 on
// `js.auth.missing_ownership_check`). The fix lives at the deepest
// representable layer: SSA `TypeFacts::constructor_type` recognises
// `new Map()` / `new Set()` constructors as
// `TypeKind::LocalCollection`; `cfg::params::ts_type_to_local_collection`
// extends `classify_param_type_ts` so explicitly-typed params resolve
// to `LocalCollection` independent of NestJS decorator presence;
// `cfg::dto::collect_type_alias_local_collections` populates a
// per-file `TYPE_ALIAS_LC` set so same-file `type X = Map<...>`
// aliases also resolve. The auth analyser already exempts
// `LocalCollection`-typed receivers via
// `auth_analysis::sink_class_for_type → InMemoryLocal`.
type ElementsMap = Map<string, { id: string; frameId?: string }>;
type IdMap = Map<string, string>;
type GroupSet = Set<string>;
type ElementArray = readonly { id: string }[];
interface BindingFix {
elementId: string;
}
// ── 1. Direct Map<...> annotation on a parameter ────────────────────
function lookupBinding(
binding: BindingFix,
origIdToDuplicateId: Map<string, string>,
): string | undefined {
return origIdToDuplicateId.get(binding.elementId);
}
// ── 2. Same-file `type X = Map<...>` alias ─────────────────────────
function debugRender(elementsMap: ElementsMap, id: string) {
const bindable = elementsMap.get(id);
if (!bindable) return null;
return bindable;
}
// ── 3. Set / WeakMap / WeakSet annotation ──────────────────────────
function trackVisited(visited: Set<string>, key: string) {
if (!visited.has(key)) {
visited.add(key);
}
return visited.size;
}
function rememberElement(
cache: WeakMap<object, string>,
obj: object,
v: string,
) {
cache.set(obj, v);
return cache.get(obj);
}
// ── 4. Array generics (`T[]`, `Array<T>`, `ReadonlyArray<T>`) ──────
function findItemArr(arr: { id: string }[], targetId: string) {
return arr.find((x) => x.id === targetId);
}
function findItemReadonly(arr: ElementArray, targetId: string) {
return arr.find((x) => x.id === targetId);
}
function findItemGeneric(arr: Array<string>, v: string) {
return arr.find((x) => x === v);
}
// ── 5. Local `new Map()` / `new Set()` constructors ────────────────
function buildIndex(items: { id: string; v: string }[]) {
const idx = new Map<string, string>();
for (const it of items) {
idx.set(it.id, it.v);
}
return idx.get(items[0]?.id ?? "");
}
// ── 6. Type-alias chain (alias of alias) ───────────────────────────
function aliasOfAlias(m: IdMap, k: string) {
return m.get(k);
}
// ── 7. Set with `add` / `has` (mutation-side) ──────────────────────
function trackGroup(groups: GroupSet, g: string) {
groups.add(g);
return groups.has(g);
}

View file

@ -0,0 +1,28 @@
// Vulnerable counterpart to `safe_local_collection_receiver.ts`.
//
// Pinned to prove the LocalCollection-receiver fix does NOT
// blanket-suppress missing-ownership findings on real DB / API
// receivers that happen to share method names (`get`, `find`, `set`)
// with JS built-in collections. When the receiver type is a real
// `Prisma` / `Repository` / `db` chain — not a tracked Map / Set /
// Array — the auth analyser must still fire.
interface PrismaClient {
user: {
findUnique(args: { where: { id: string } }): Promise<{ id: string } | null>;
update(args: { where: { id: string }; data: object }): Promise<void>;
};
}
declare const prisma: PrismaClient;
// User passes an attacker-controlled id. No prior auth check; receiver
// is a Prisma client (NOT a Map / Set / Array), so the missing-ownership
// rule must fire on `prisma.user.findUnique`.
export async function dangerousFetch(targetUserId: string) {
return prisma.user.findUnique({ where: { id: targetUserId } });
}
export async function dangerousMutate(targetUserId: string, data: object) {
return prisma.user.update({ where: { id: targetUserId }, data });
}

View file

@ -0,0 +1,33 @@
// Helper-summary all_validated propagation (precision regression
// guard). The helper performs an indirect-validator check on
// `child.webhookUrl` and throws on failure; callers passing tainted
// `child` should NOT see the helper's `param_to_sink` summary refire
// because the validator inside the helper proved the path safe.
//
// Pinned by tests/lib::helper_with_validator_does_not_propagate_to_caller_via_summary.
import express, { Request, Response } from 'express';
import axios from 'axios';
interface IWebhookFilterPart {
webhookUrl?: string;
}
declare function validateUrlSsrf(url: string): Promise<string | null>;
async function getWebhookResponse(child: IWebhookFilterPart) {
const ssrfError = await validateUrlSsrf(child.webhookUrl);
if (ssrfError) {
throw new Error('blocked');
}
return await axios.post(child.webhookUrl, {});
}
const app = express();
app.use(express.json());
app.post('/run', async (req: Request, res: Response) => {
const child: IWebhookFilterPart = req.body.filter;
const r = await getWebhookResponse(child);
res.json({ r });
});

View file

@ -0,0 +1,23 @@
// Indirect-validator branch narrowing (precision regression guard).
// Pattern: `const err = validateXxx(input); if (err) throw …;` —
// the validator's input is treated as validated on the success
// branch, so the downstream sink does not refire.
//
// Pinned by tests/lib::indirect_validator_narrowing_marks_arg_validated.
import express, { Request, Response } from 'express';
import axios from 'axios';
declare function validateUrlSsrf(url: string): Promise<string | null>;
const app = express();
app.get('/proxy', async (req: Request, res: Response) => {
const target = req.query.url as string;
const ssrfError = await validateUrlSsrf(target);
if (ssrfError) {
throw new Error('blocked');
}
const response = await axios.get(target);
res.send(response.data);
});

View file

@ -0,0 +1,37 @@
// Strapi-style ORM accessor chain — `<obj>.db.query(MODEL_UID).<orm_method>(...)`.
// MODEL_UID is a literal model identifier, not raw SQL; the trailing
// findOne/findMany/create/update/delete/count are intrinsically parameterised
// by the ORM (per-call values arrive through field-keyed object literals
// that the driver escapes). Should NOT fire as a SQL-injection sink.
declare const strapi: any;
async function getApiToken(whereParams: Record<string, unknown>) {
const token = await strapi.db.query('admin::api-token').findOne({
select: ['id', 'name'],
where: whereParams,
});
return token;
}
async function listTokens() {
return strapi.db.query('admin::api-token').findMany({
where: { type: 'read-only' },
});
}
async function createToken(data: unknown) {
return strapi.db.query('admin::api-token').create({ data });
}
async function updateToken(id: number, data: unknown) {
return strapi.db.query('admin::api-token').update({ where: { id }, data });
}
async function deleteToken(id: number) {
return strapi.db.query('admin::api-token').delete({ where: { id } });
}
async function countTokens() {
return strapi.db.query('admin::api-token').count();
}

View file

@ -0,0 +1,26 @@
// Vulnerable counterpart — bare `connection.query(...)` and chained
// `db.query(...).then(...)` whose arg 0 is concatenated with attacker
// input. Both must still fire as SQL_QUERY sinks: the chain has no
// ORM-method outer call (`.then` is a Promise method, not an ORM
// accessor), and arg 0 is not a string literal in the second case.
import express, { Request, Response } from 'express';
declare const connection: any;
declare const db: any;
const app = express();
app.get('/user', (req: Request, res: Response) => {
const name = req.query.name as string;
// bare SQL — real SQLi sink, no chain
connection.query(`SELECT * FROM users WHERE name = '${name}'`);
});
app.get('/by-id', async (req: Request, res: Response) => {
const id = req.query.id as string;
// chained `.then` is a Promise method, not an ORM accessor; arg 0 is
// also a binary_expression (not a string literal) so the ORM-shape
// recogniser refuses to suppress.
db.query("SELECT * FROM users WHERE id = " + id).then((rows: any) => res.json(rows[0]));
});

View file

@ -0,0 +1,37 @@
// Nyx CVE benchmark fixture (patched counterpart).
//
// CVE: CVE-2022-1471
// Project: SnakeYAML (snakeyaml/snakeyaml)
// License: Apache-2.0
// (https://github.com/snakeyaml/snakeyaml/blob/master/LICENSE.txt)
// Advisory: https://github.com/advisories/GHSA-mjmj-j48q-9wg2
//
// Patched variant: the parser is constructed with `SafeConstructor`,
// which restricts the YAML tag handler set to primitives + standard
// collections. SnakeYAML 2.0 ships with `SafeConstructor` as the
// default; pre-2.0 consumers patched their own call sites to pass
// `SafeConstructor` explicitly (the form below).
//
// Patched-fix simplification: the upstream remediation also covers
// callers that need richer types via custom `Constructor` subclasses
// with declared safe types those are out of scope for this fixture.
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.yaml.snakeyaml.LoaderOptions;
import org.yaml.snakeyaml.Yaml;
import org.yaml.snakeyaml.constructor.SafeConstructor;
public class YamlConfigServlet extends HttpServlet {
@Override
protected void doPost(HttpServletRequest req, HttpServletResponse res) throws Exception {
String body = req.getReader().readLine();
// Patched: SafeConstructor forbids arbitrary class tags;
// any non-primitive `!!` payload throws ConstructorException.
Yaml yaml = new Yaml(new SafeConstructor(new LoaderOptions()));
Object loaded = yaml.load(body);
res.setHeader("X-Yaml-Class", loaded.getClass().getName());
res.setStatus(HttpServletResponse.SC_OK);
}
}

View file

@ -0,0 +1,43 @@
// Nyx CVE benchmark fixture.
//
// CVE: CVE-2022-1471
// Project: SnakeYAML (snakeyaml/snakeyaml; consumed via any app
// that constructs `new Yaml()` and calls `.load()` on
// attacker-controlled bytes)
// License: Apache-2.0
// (https://github.com/snakeyaml/snakeyaml/blob/master/LICENSE.txt)
// Advisory: https://github.com/advisories/GHSA-mjmj-j48q-9wg2
// https://nvd.nist.gov/vuln/detail/CVE-2022-1471
// Vulnerable: SnakeYAML <= 1.33; the default `Constructor` accepts
// arbitrary tags (`!!javax.script.ScriptEngineManager`,
// `!!java.net.URLClassLoader`, etc.) and instantiates any
// class via reflection, reaching RCE on consumers that
// feed network input straight into Yaml.load().
//
// Verbatim load-bearing lines: the unsafe `new Yaml()` construction
// and the `yaml.load(body)` call mirror the call-site shape called
// out in the advisory's "vulnerable code" example. The patched fix
// (next file) shows the SnakeYAML 2.0 fix pattern of explicitly
// passing `new SafeConstructor(new LoaderOptions())`.
//
// Trims: imports trimmed to just SnakeYAML and Servlet API; no
// helper / logging code.
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.yaml.snakeyaml.Yaml;
public class YamlConfigServlet extends HttpServlet {
@Override
protected void doPost(HttpServletRequest req, HttpServletResponse res) throws Exception {
String body = req.getReader().readLine();
// Vulnerable: default Constructor allows arbitrary class
// instantiation via YAML tag handlers `body` may contain
// `!!javax.script.ScriptEngineManager` and friends.
Yaml yaml = new Yaml();
Object loaded = yaml.load(body);
res.setHeader("X-Yaml-Class", loaded.getClass().getName());
res.setStatus(HttpServletResponse.SC_OK);
}
}

View file

@ -0,0 +1,33 @@
// Nyx CVE benchmark fixture (patched counterpart).
//
// CVE: CVE-2022-42889 ("Text4Shell")
// Project: Apache Commons Text (apache/commons-text)
// License: Apache-2.0
// (https://github.com/apache/commons-text/blob/master/LICENSE.txt)
// Advisory: https://github.com/advisories/GHSA-599f-7c49-w659
//
// Patched variant: the substitutor is built with `new StringSubstitutor()`
// (no factory) so the lookup map is empty `${anything}` becomes a
// literal pass-through. This is the recommended app-side mitigation
// for callers that cannot upgrade past 1.9, and it is also the
// behaviour of the 1.10.0 default `createDefault()` factory which
// drops the `script:` / `dns:` / `url:` interpolation lookups.
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.commons.text.StringSubstitutor;
public class TemplateRenderServlet extends HttpServlet {
@Override
protected void doGet(HttpServletRequest req, HttpServletResponse res) throws Exception {
String input = req.getParameter("template");
// Patched: no interpolator constructed; the substitutor has
// no lookups registered, so `${}` is left as a literal in
// the rendered output. No script/dns/url evaluation.
StringSubstitutor substitutor = new StringSubstitutor();
String rendered = substitutor.replace(input);
res.setHeader("X-Rendered-Length", String.valueOf(rendered.length()));
res.setStatus(HttpServletResponse.SC_OK);
}
}

View file

@ -0,0 +1,45 @@
// Nyx CVE benchmark fixture.
//
// CVE: CVE-2022-42889 (a.k.a. "Text4Shell")
// Project: Apache Commons Text (apache/commons-text); consumed via
// any app that calls `StringSubstitutor.createInterpolator()`
// on attacker-controlled input.
// License: Apache-2.0
// (https://github.com/apache/commons-text/blob/master/LICENSE.txt)
// Advisory: https://github.com/advisories/GHSA-599f-7c49-w659
// https://nvd.nist.gov/vuln/detail/CVE-2022-42889
// Vulnerable: commons-text 1.5 .. 1.9. `createInterpolator()`
// enables the `script:`, `dns:`, and `url:` lookups by
// default, so a substitution like `${script:javascript:}`
// evaluates JavaScript via the JSR-223 ScriptEngineManager
// full RCE on any consumer that feeds untrusted input
// through `.replace()`.
//
// Verbatim load-bearing lines: the `StringSubstitutor.createInterpolator()`
// factory call and the `interpolator.replace(input)` sink mirror the
// minimal triggering pattern published in the OSS-Security advisory
// (https://www.openwall.com/lists/oss-security/2022/10/13/4) and the
// vendor mitigation guidance for 1.10.0.
//
// Trims: imports limited to commons-text + servlet; no surrounding
// templating boilerplate.
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.commons.text.StringSubstitutor;
public class TemplateRenderServlet extends HttpServlet {
@Override
protected void doGet(HttpServletRequest req, HttpServletResponse res) throws Exception {
String input = req.getParameter("template");
// Vulnerable: createInterpolator() enables script:/dns:/url:
// lookups by default; .replace() evaluates them against
// `input` `${script:js:}` arbitrary JavaScript via the
// JDK ScriptEngineManager.
StringSubstitutor interpolator = StringSubstitutor.createInterpolator();
String rendered = interpolator.replace(input);
res.setHeader("X-Rendered-Length", String.valueOf(rendered.length()));
res.setStatus(HttpServletResponse.SC_OK);
}
}

View file

@ -0,0 +1,47 @@
# Nyx CVE benchmark fixture.
#
# CVE: CVE-2025-69662
# Project: geopandas (geopandas/geopandas)
# License: BSD-3-Clause (https://github.com/geopandas/geopandas/blob/main/LICENSE.txt)
# Advisory: https://github.com/advisories/GHSA-6497-prx7-gpmq
# Patched: 6aa8ef14ffdee4ba1044349ab948e1a1fbfaf419 geopandas/io/sql.py:432-438
#
# Fix: replace the f-string-built Find_SRID probe with a
# bound-parameter SQLAlchemy text() statement; SQLAlchemy passes the
# values via the driver's parameter binding, so attacker-supplied
# identifiers can no longer break out of the literal context.
#
# Trims:
# - Same scaffolding trim as vulnerable.py — `.fetchone()[0]` (post-
# sink result extraction) removed.
# - Patched-fix simplification: the upstream fix nests
# `text(...).bindparams(...)` directly inside `connection.execute(...)`.
# The fixture lifts the bound-parameter clause into a local `stmt`
# so the `.bindparams` call is a top-level CFG node — without this
# reshape, cfg-unguarded-sink fires on the surrounding execute
# because the inlined sanitizer-in-arg shape is not yet recognised
# by the dominator-based guard check. The verbatim bytes of the
# `text(...).bindparams(...)` clause are preserved.
from flask import Flask, request
from sqlalchemy import create_engine, text
app = Flask(__name__)
engine = create_engine("postgresql://localhost/geo")
@app.post("/upload-layer")
def upload_layer():
body = request.get_json(force=True) or {}
geom_name = body.get("geom_name", "geom")
name = body.get("table", "data")
schema_name = body.get("schema", "public")
with engine.begin() as connection:
# Verbatim bytes from sql.py:433-437 — bound-parameter probe.
stmt = text(
"SELECT Find_SRID(:schema_name, :name, :geom_name);"
).bindparams(
schema_name=schema_name, name=name, geom_name=geom_name
)
connection.execute(stmt)
return {"ok": True}

View file

@ -0,0 +1,46 @@
# Nyx CVE benchmark fixture.
#
# CVE: CVE-2025-69662
# Project: geopandas (geopandas/geopandas)
# License: BSD-3-Clause (https://github.com/geopandas/geopandas/blob/main/LICENSE.txt)
# Advisory: https://github.com/advisories/GHSA-6497-prx7-gpmq
# Vulnerable: c301579e0ac4034c19bece63c08bf628613700b4 geopandas/io/sql.py:432-435
#
# geopandas.GeoDataFrame.to_postgis() concatenated the GeoDataFrame's
# geometry column name (and the schema/table names) into a Find_SRID
# probe via f-string. A user uploading a GeoDataFrame whose geometry
# column was named with embedded SQL (e.g. "geom'); DROP TABLE...--")
# achieved arbitrary SQL execution against the target Postgres database.
#
# Trims:
# - Surrounding to_postgis() body (CRS lookup, EWKB conversion, dtype
# dict construction at L399-422) that scaffolds the vulnerable
# Find_SRID probe.
# - Trailing `.fetchone()[0]` on the connection.execute(...) result —
# downstream of the sink (result extraction), not on the flow path.
#
# Only the source statement (geom_name from request input), the
# f-string SQL builder, and the connection.execute(text(...)) sink are
# preserved verbatim from sql.py:432-435.
from flask import Flask, request
from sqlalchemy import create_engine, text
app = Flask(__name__)
engine = create_engine("postgresql://localhost/geo")
@app.post("/upload-layer")
def upload_layer():
body = request.get_json(force=True) or {}
# geom_name is supplied by the API caller — no validation upstream.
geom_name = body.get("geom_name", "geom")
name = body.get("table", "data")
schema_name = body.get("schema", "public")
with engine.begin() as connection:
# Verbatim from sql.py:432-435 — Find_SRID probe with
# f-string-interpolated identifiers.
connection.execute(
text(f"SELECT Find_SRID('{schema_name}', '{name}', '{geom_name}');")
)
return {"ok": True}

View file

@ -0,0 +1,79 @@
# Nyx CVE benchmark fixture.
#
# CVE: CVE-2026-33626
# Project: LMDeploy (InternLM/lmdeploy)
# License: Apache-2.0 (https://github.com/InternLM/lmdeploy/blob/main/LICENSE)
# Advisory: https://github.com/advisories/GHSA-25c5-rg58-mhxh
# Patched: 71d64a339edb901e9005358e0633fbbab367d626 lmdeploy/vl/media/connection.py:24-69
#
# Fix: introduce `_is_safe_url(url)` which resolves the hostname via
# `socket.getaddrinfo`, walks every returned IP, and rejects any that
# aren't `is_global` (covers loopback, RFC1918 private, link-local,
# multicast, reserved, unspecified). The vulnerable scheme-only check
# is replaced by this allowlist gate before the fetch.
#
# Trims: same scaffolding trim as vulnerable.py — MediaIO generic
# plumbing replaced with a Flask handler; fetch_timeout env-var
# resolution collapsed to a literal. The `_is_safe_url` body, the
# replacement gate at L55-58, and the `client.get(...,
# allow_redirects=True)` fetch are preserved verbatim from the fix
# commit.
import ipaddress
import socket
from urllib.parse import urlparse
import requests
from flask import Flask, request
app = Flask(__name__)
headers = {"User-Agent": "Mozilla/5.0"}
def _is_safe_url(url: str) -> tuple[bool, str]:
"""Check if the URL is safe to fetch (not internal/private)."""
try:
parsed = urlparse(url)
if parsed.scheme not in ('http', 'https'):
return False, f'Unsupported scheme: {parsed.scheme}'
hostname = parsed.hostname
if not hostname:
return False, 'Could not parse hostname from URL'
# check all IPs (IPv4 + IPv6) using getaddrinfo
try:
infos = socket.getaddrinfo(hostname, None)
except socket.gaierror:
return False, 'Hostname resolution failed'
for info in infos:
ip = ipaddress.ip_address(info[4][0])
# block any IP that is not globally routable
if not ip.is_global:
return False, f'Blocked non-global IP detected: {ip}'
return True, 'URL is safe'
except Exception as e:
return False, f'URL validation failed: {str(e)}'
@app.post("/load-image")
def load_image():
body = request.get_json(force=True) or {}
url = body.get("url", "")
url_spec = urlparse(url)
# Verbatim from connection.py:55-58 — replaces the scheme-only
# check with a private-IP-blocking allowlist.
is_safe, reason = _is_safe_url(url_spec.geturl())
if not is_safe:
raise ValueError(f'URL is blocked for security reasons: {reason}')
fetch_timeout = 10
client = requests.Session()
client.max_redirects = 3
response = client.get(
url_spec.geturl(), headers=headers, timeout=fetch_timeout, allow_redirects=True
)
response.raise_for_status()
return {"size": len(response.content)}

View file

@ -0,0 +1,51 @@
# Nyx CVE benchmark fixture.
#
# CVE: CVE-2026-33626
# Project: LMDeploy (InternLM/lmdeploy)
# License: Apache-2.0 (https://github.com/InternLM/lmdeploy/blob/main/LICENSE)
# Advisory: https://github.com/advisories/GHSA-25c5-rg58-mhxh
# Vulnerable: 819a80836e991ca3f427b0e85faca159083d3d40 lmdeploy/vl/media/connection.py:23-37
#
# LMDeploy's vision-language image loader accepted user-supplied
# image URLs from the chat-completion request and fetched them via
# `requests.Session().get(url)` after only a scheme check. Attackers
# embedded URLs pointing at internal network services or cloud
# metadata endpoints (e.g. http://169.254.169.254/...) and exfiltrated
# the response back through the model output.
#
# Trims:
# - Surrounding _load_data_url / file-URL branches that don't reach
# the HTTP sink (lines 41+).
# - The scheme-only allowlist check at L24-25 of upstream. The
# CVE is host-based SSRF (private IP / cloud-metadata host); the
# scheme check was the insufficient validation the fix replaces.
# Removing it keeps the load-bearing source → sink flow intact.
# - The fetch_timeout env-var resolution (L28-31) — collapsed to a
# literal so the fixture is self-contained.
# - MediaIO[_M] generic plumbing — replaced with a Flask handler so
# the source is a concrete request flow.
#
# The verbatim load-bearing lines are the `client = requests.Session()`
# constructor and the `client.get(url_spec.geturl(), headers=headers,
# timeout=fetch_timeout)` fetch site at lines 33-34 of upstream.
from urllib.parse import urlparse
import requests
from flask import Flask, request
app = Flask(__name__)
headers = {"User-Agent": "Mozilla/5.0"}
@app.post("/load-image")
def load_image():
body = request.get_json(force=True) or {}
url = body.get("url", "")
url_spec = urlparse(url)
fetch_timeout = 10
# Verbatim from connection.py:33-34 — Session().get(url).
client = requests.Session()
response = client.get(url_spec.geturl(), headers=headers, timeout=fetch_timeout)
response.raise_for_status()
return {"size": len(response.content)}

View file

@ -2,7 +2,7 @@
//
// CVE: CVE-2018-20997
// Advisory: https://rustsec.org/advisories/RUSTSEC-2018-0003
// Project: tar-rs zip-slip fix
// Project: tar-rs, zip-slip fix
// License: MIT OR Apache-2.0
//
// Patched variant: the extractor rejects any entry path that contains

View file

@ -2,18 +2,18 @@
//
// CVE: CVE-2018-20997
// Advisory: https://rustsec.org/advisories/RUSTSEC-2018-0003
// Project: tar-rs (alexcrichton/tar-rs) "zip slip" on tar extraction
// Project: tar-rs (alexcrichton/tar-rs), "zip slip" on tar extraction
// License: MIT OR Apache-2.0 (https://github.com/alexcrichton/tar-rs/blob/main/LICENSE-MIT)
//
// tar-rs <= 0.4.15 trusted tar entry paths verbatim when unpacking.
// A crafted archive with an entry named `../../etc/shadow` would cause
// `Archive::unpack` to write outside the destination directory, giving
// malicious tarballs arbitrary file write. Every consumer that
// streamed user-supplied archives package managers, OCI tooling,
// container image importers inherited the traversal.
// streamed user-supplied archives, package managers, OCI tooling,
// container image importers, inherited the traversal.
//
// This fixture is a minimal reproducer of the unsafe sink pattern
// attacker-controlled archive entry path -> fs::File::create(path) not
// This fixture is a minimal reproducer of the unsafe sink pattern ,
// attacker-controlled archive entry path -> fs::File::create(path), not
// an excerpt of tar-rs internals. The entry path is modelled as an env
// var so the single-file benchmark harness sees the flow; in a real
// extractor the same shape fires for `archive.entries()?.map(|e|

View file

@ -2,7 +2,7 @@
//
// CVE: CVE-2022-36113
// Advisory: https://blog.rust-lang.org/2022/09/14/cargo-cves.html
// Project: cargo `.cargo-ok` symlink follow fix
// Project: cargo, `.cargo-ok` symlink follow fix
// License: MIT OR Apache-2.0
//
// Patched variant: the crate name is passed through

View file

@ -3,7 +3,7 @@
// CVE: CVE-2022-36113
// Advisory: https://blog.rust-lang.org/2022/09/14/cargo-cves.html
// https://rustsec.org/advisories/RUSTSEC-2022-0064
// Project: cargo (rust-lang/cargo) "Arbitrary file corruption through
// Project: cargo (rust-lang/cargo), "Arbitrary file corruption through
// crate extraction" (`.cargo-ok` symlink following)
// License: MIT OR Apache-2.0 (https://github.com/rust-lang/cargo/blob/master/LICENSE-MIT)
//
@ -15,9 +15,9 @@
// switched the marker open to `OpenOptions::create_new(true)` so a
// pre-existing symlink aborts the extraction.
//
// This fixture is a minimal reproducer of the unsafe sink pattern
// This fixture is a minimal reproducer of the unsafe sink pattern ,
// attacker-controlled crate name plumbed into the marker path ->
// fs::File::create(marker) through a symlink not an excerpt of cargo
// fs::File::create(marker) through a symlink, not an excerpt of cargo
// internals.
use std::env;
use std::fs::File;

View file

@ -2,7 +2,7 @@
//
// CVE: CVE-2024-24576
// Advisory: https://rustsec.org/advisories/RUSTSEC-2024-0003
// Project: Rust standard library "BatBadBut"
// Project: Rust standard library, "BatBadBut"
// License: MIT OR Apache-2.0
//
// Patched variant: the caller filters the argument through a cmd.exe-

View file

@ -3,7 +3,7 @@
// CVE: CVE-2024-24576
// Advisory: https://rustsec.org/advisories/RUSTSEC-2024-0003
// Blog: https://blog.rust-lang.org/2024/04/09/cve-2024-24576.html
// Project: Rust standard library (std::process::Command) "BatBadBut"
// Project: Rust standard library (std::process::Command), "BatBadBut"
// License: MIT OR Apache-2.0 (https://github.com/rust-lang/rust/blob/master/COPYRIGHT)
//
// Rust < 1.77.2 on Windows built the argv for .bat/.cmd invocations by
@ -14,8 +14,8 @@
// line, and every consumer of `std::process::Command::new("...bat")`
// on Windows inherited the RCE.
//
// This fixture is a minimal reproducer of the unsafe sink pattern
// caller-supplied input -> Command::new("update.bat").arg(name) not
// This fixture is a minimal reproducer of the unsafe sink pattern ,
// caller-supplied input -> Command::new("update.bat").arg(name), not
// an excerpt of rustc / libstd internals. The source is modelled as
// `env::var` so the single-file benchmark harness sees the flow; in a
// real deployment the same shape fires for an Axum/Actix/Rocket handler

View file

@ -0,0 +1,62 @@
// Nyx CVE benchmark fixture (patched counterpart).
//
// CVE: GHSA-4x48-cgf9-q33f (no CVE id assigned)
// Project: Novu (novuhq/novu)
// License: MIT (libs/application-generic — see LICENSE-MIT)
// Advisory: https://github.com/novuhq/novu/security/advisories/GHSA-4x48-cgf9-q33f
// Patched: 87d965eb88340ac7cd262dd52c8015acd092dc68
// libs/application-generic/src/usecases/conditions-filter/conditions-filter.usecase.ts:241-289
//
// The fix performs the existing call-site SSRF check `validateUrlSsrf`
// (already used by the HTTP-Request workflow step) before the webhook
// POST. The branch validates protocol/host and rejects when the URL
// hits localhost/private/cloud-metadata addresses; only on success
// does control reach axios.post.
//
// Patched-fix simplification: validateUrlSsrf is sourced from
// '../../utils/ssrf-url-validation.ts' upstream — inlined here as a
// no-op signature so the fixture parses without the larger novu
// monorepo. The branch shape (early throw on truthy ssrfError) is
// verbatim from the patch.
import express, { Request, Response } from 'express';
import axios from 'axios';
interface IWebhookFilterPart {
webhookUrl?: string;
}
declare function validateUrlSsrf(url: string): Promise<string | null>;
async function getWebhookResponse(
child: IWebhookFilterPart,
): Promise<Record<string, unknown> | undefined> {
if (!child.webhookUrl) return undefined;
const payload = {};
const config: { headers: Record<string, string> } = { headers: {} };
const ssrfError = await validateUrlSsrf(child.webhookUrl);
if (ssrfError) {
throw new Error(
JSON.stringify({
message: ssrfError,
data: 'Webhook URL blocked by SSRF protection.',
})
);
}
return await axios.post(child.webhookUrl, payload, config).then((response) => {
return response.data as Record<string, unknown>;
});
}
const app = express();
app.use(express.json());
app.post('/conditions-filter/run', async (req: Request, res: Response) => {
const child: IWebhookFilterPart = req.body.filter;
const result = await getWebhookResponse(child);
res.json({ result });
});

View file

@ -0,0 +1,53 @@
// Nyx CVE benchmark fixture.
//
// CVE: GHSA-4x48-cgf9-q33f (no CVE id assigned)
// Project: Novu (novuhq/novu)
// License: MIT (libs/application-generic — see LICENSE-MIT)
// Advisory: https://github.com/novuhq/novu/security/advisories/GHSA-4x48-cgf9-q33f
// Vulnerable: 87d965eb88340ac7cd262dd52c8015acd092dc68^
// libs/application-generic/src/usecases/conditions-filter/conditions-filter.usecase.ts:241-272
//
// `getWebhookResponse` POSTs to a user-configured webhook URL using raw
// `axios.post(child.webhookUrl, ...)` with no SSRF validation. The
// `child` filter part is sourced from a workflow filter config the
// caller controls, so the URL flows attacker-influenced into axios.
//
// Trims:
// - HMAC config branch (verbatim kept; not on the flow path but
// trivial scaffolding to keep the call shape).
// - buildHmac, buildPayload, processFilter dispatcher, environment
// repository lookups, decryptApiKey usage. Verbatim load-bearing
// lines are the IWebhookFilterPart param shape and the
// axios.post(child.webhookUrl, payload, config) call.
import express, { Request, Response } from 'express';
import axios from 'axios';
interface IWebhookFilterPart {
webhookUrl?: string;
}
async function getWebhookResponse(
child: IWebhookFilterPart,
): Promise<Record<string, unknown> | undefined> {
if (!child.webhookUrl) return undefined;
const payload = {};
const config: { headers: Record<string, string> } = {
headers: {},
};
return await axios.post(child.webhookUrl, payload, config).then((response) => {
return response.data as Record<string, unknown>;
});
}
const app = express();
app.use(express.json());
app.post('/conditions-filter/run', async (req: Request, res: Response) => {
const child: IWebhookFilterPart = req.body.filter;
const result = await getWebhookResponse(child);
res.json({ result });
});

View file

@ -3,7 +3,7 @@
"metadata": {
"description": "Nyx benchmark ground truth",
"created": "2026-03-20",
"corpus_size": 433
"corpus_size": 458
},
"cases": [
{
@ -8394,6 +8394,35 @@
"disabled": false,
"notes": "Prisma $queryRawUnsafe \u2014 TS-specific ORM sink"
},
{
"case_id": "ts-sqli-003",
"file": "typescript/sqli/sqli_db_query_concat.ts",
"language": "typescript",
"is_vulnerable": true,
"vuln_class": "sqli",
"cwe": "CWE-89",
"provenance": "synthetic",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [
"taint-unsanitised-flow"
],
"allowed_alternative_rule_ids": [
"cfg-unguarded-sink"
],
"forbidden_rule_ids": [],
"expected_severity": "MEDIUM",
"expected_category": "Security",
"expected_sink_lines": null,
"expected_source_lines": null,
"tags": [
"sqli",
"real-repo-precision-2026-04-29",
"regression-guard"
],
"disabled": false,
"notes": "Vulnerable counterpart for ts-safe-017 \u2014 bare `connection.query(`SELECT...`)` and chained `db.query(SQL).then(...)` (Promise method, not ORM accessor) must still fire as SQL_QUERY sinks even after the ORM-chain recogniser landed."
},
{
"case_id": "ts-cmdi-001",
"file": "typescript/cmdi/cmdi_exec_template.ts",
@ -9560,6 +9589,72 @@
"disabled": false,
"notes": "CVE-2023-26159 patched counterpart: URL allowlist check guards axios.get; regression guard that Nyx does not refire on the fix"
},
{
"case_id": "cve-ts-ghsa-4x48-cgf9-q33f-vulnerable",
"file": "cve_corpus/typescript/GHSA-4x48-cgf9-q33f/vulnerable.ts",
"language": "typescript",
"is_vulnerable": true,
"vuln_class": "ssrf",
"cwe": "CWE-918",
"provenance": "real_cve",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [
"taint-unsanitised-flow"
],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [],
"expected_severity": "MEDIUM",
"expected_category": "Security",
"expected_sink_lines": [
[
51,
51
]
],
"expected_source_lines": [
[
50,
50
]
],
"tags": [
"cve",
"novu",
"ssrf",
"vulnerable"
],
"disabled": false,
"notes": "GHSA-4x48-cgf9-q33f: Novu conditions-filter webhook bypassed validateUrlSsrf; raw axios.post(child.webhookUrl) is the cross-function SSRF sink. MIT-licensed libs/application-generic package."
},
{
"case_id": "cve-ts-ghsa-4x48-cgf9-q33f-patched",
"file": "cve_corpus/typescript/GHSA-4x48-cgf9-q33f/patched.ts",
"language": "typescript",
"is_vulnerable": false,
"vuln_class": "safe",
"cwe": "N/A",
"provenance": "real_cve",
"equivalence_tier": "exact",
"match_mode": "file_presence",
"expected_rule_ids": [],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [
"taint-unsanitised-flow"
],
"expected_severity": null,
"expected_category": "Security",
"expected_sink_lines": [],
"expected_source_lines": [],
"tags": [
"cve",
"novu",
"patched",
"negative"
],
"disabled": false,
"notes": "GHSA-4x48-cgf9-q33f patched: validateUrlSsrf(child.webhookUrl) followed by `if (ssrfError) throw` guards the axios.post call; regression guard for the indirect-validator branch narrowing + summary all_validated propagation."
},
{
"case_id": "cve-py-2017-18342-vulnerable",
"file": "cve_corpus/python/CVE-2017-18342/vulnerable.py",
@ -9629,6 +9724,144 @@
"disabled": false,
"notes": "CVE-2017-18342 patched counterpart: yaml.safe_load replaces yaml.load; regression guard that Nyx does not refire on the fix"
},
{
"case_id": "cve-py-2025-69662-vulnerable",
"file": "cve_corpus/python/CVE-2025-69662/vulnerable.py",
"language": "python",
"is_vulnerable": true,
"vuln_class": "sql_injection",
"cwe": "CWE-89",
"provenance": "real_cve",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [
"py.sqli.text_format"
],
"allowed_alternative_rule_ids": [
"taint-unsanitised-flow"
],
"forbidden_rule_ids": [],
"expected_severity": "MEDIUM",
"expected_category": "Security",
"expected_sink_lines": [
[
43,
44
]
],
"expected_source_lines": [
[
35,
35
]
],
"tags": [
"cve",
"geopandas",
"sql_injection",
"flask",
"sqlalchemy"
],
"disabled": false,
"notes": "CVE-2025-69662: geopandas to_postgis() interpolated GeoDataFrame's geometry column name into Find_SRID probe via f-string; SQL injection on user-uploaded layer. BSD-3-Clause"
},
{
"case_id": "cve-py-2025-69662-patched",
"file": "cve_corpus/python/CVE-2025-69662/patched.py",
"language": "python",
"is_vulnerable": false,
"vuln_class": "safe",
"cwe": "N/A",
"provenance": "real_cve",
"equivalence_tier": "exact",
"match_mode": "file_presence",
"expected_rule_ids": [],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [
"py.sqli.text_format",
"py.sqli.execute_format",
"taint-unsanitised-flow"
],
"expected_severity": null,
"expected_category": "Security",
"expected_sink_lines": [],
"expected_source_lines": [],
"tags": [
"cve",
"geopandas",
"patched",
"negative"
],
"disabled": false,
"notes": "CVE-2025-69662 patched counterpart: text(...).bindparams() replaces f-string interpolation; regression guard that Nyx does not refire on the fix"
},
{
"case_id": "cve-py-2026-33626-vulnerable",
"file": "cve_corpus/python/CVE-2026-33626/vulnerable.py",
"language": "python",
"is_vulnerable": true,
"vuln_class": "ssrf",
"cwe": "CWE-918",
"provenance": "real_cve",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [
"taint-unsanitised-flow"
],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [],
"expected_severity": "MEDIUM",
"expected_category": "Security",
"expected_sink_lines": [
[
49,
49
]
],
"expected_source_lines": [
[
43,
43
]
],
"tags": [
"cve",
"lmdeploy",
"ssrf",
"flask",
"requests"
],
"disabled": false,
"notes": "CVE-2026-33626: LMDeploy vision-language image loader fetched user-supplied URLs via requests.Session().get without private-IP guard; SSRF / cloud-metadata exfil. Apache-2.0"
},
{
"case_id": "cve-py-2026-33626-patched",
"file": "cve_corpus/python/CVE-2026-33626/patched.py",
"language": "python",
"is_vulnerable": false,
"vuln_class": "safe",
"cwe": "N/A",
"provenance": "real_cve",
"equivalence_tier": "exact",
"match_mode": "file_presence",
"expected_rule_ids": [],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [
"taint-unsanitised-flow"
],
"expected_severity": null,
"expected_category": "Security",
"expected_sink_lines": [],
"expected_source_lines": [],
"tags": [
"cve",
"lmdeploy",
"patched",
"negative"
],
"disabled": false,
"notes": "CVE-2026-33626 patched counterpart: _is_safe_url private-IP allowlist gate replaces scheme-only check; regression guard that Nyx does not refire on the fix"
},
{
"case_id": "cve-php-2017-9841-vulnerable",
"file": "cve_corpus/php/CVE-2017-9841/vulnerable.php",
@ -10694,6 +10927,147 @@
"disabled": false,
"notes": "CVE-2017-12629 patched counterpart: transformer name allowlist + in-process secure TransformerFactory removes the Runtime.exec path; regression guard that Nyx does not refire on the fix"
},
{
"case_id": "cve-java-2022-1471-vulnerable",
"file": "cve_corpus/java/CVE-2022-1471/vulnerable.java",
"language": "java",
"is_vulnerable": true,
"vuln_class": "deserialization",
"cwe": "CWE-502",
"provenance": "real_cve",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [
"java.deser.snakeyaml_unsafe_constructor"
],
"allowed_alternative_rule_ids": [
"taint-unsanitised-flow"
],
"forbidden_rule_ids": [],
"expected_severity": "MEDIUM",
"expected_category": "Security",
"expected_sink_lines": [
[
38,
38
]
],
"expected_source_lines": [
[
34,
34
]
],
"tags": [
"cve",
"snakeyaml",
"deserialization",
"servlet"
],
"disabled": false,
"notes": "CVE-2022-1471: SnakeYAML <2.0 default Constructor accepts arbitrary class tags (`!!javax.script.ScriptEngineManager`, `!!java.net.URLClassLoader`, ...) reaching RCE on apps that load attacker-controlled YAML. Apache-2.0"
},
{
"case_id": "cve-java-2022-1471-patched",
"file": "cve_corpus/java/CVE-2022-1471/patched.java",
"language": "java",
"is_vulnerable": false,
"vuln_class": "safe",
"cwe": "N/A",
"provenance": "real_cve",
"equivalence_tier": "exact",
"match_mode": "file_presence",
"expected_rule_ids": [],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [
"java.deser.snakeyaml_unsafe_constructor",
"java.deser.readobject",
"taint-unsanitised-flow"
],
"expected_severity": null,
"expected_category": "Security",
"expected_sink_lines": [],
"expected_source_lines": [],
"tags": [
"cve",
"snakeyaml",
"patched",
"negative"
],
"disabled": false,
"notes": "CVE-2022-1471 patched counterpart: explicit SafeConstructor argument restricts the YAML tag handler set to primitives + standard collections; regression guard that Nyx does not refire on the safe form"
},
{
"case_id": "cve-java-2022-42889-vulnerable",
"file": "cve_corpus/java/CVE-2022-42889/vulnerable.java",
"language": "java",
"is_vulnerable": true,
"vuln_class": "code_exec",
"cwe": "CWE-94",
"provenance": "real_cve",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [
"java.code_exec.text4shell_interpolator"
],
"allowed_alternative_rule_ids": [
"taint-unsanitised-flow"
],
"forbidden_rule_ids": [],
"expected_severity": "MEDIUM",
"expected_category": "Security",
"expected_sink_lines": [
[
40,
40
]
],
"expected_source_lines": [
[
35,
35
]
],
"tags": [
"cve",
"commons-text",
"text4shell",
"code-exec",
"servlet"
],
"disabled": false,
"notes": "CVE-2022-42889 (Text4Shell): Apache Commons Text 1.5..1.9 StringSubstitutor.createInterpolator() enables script:/dns:/url: lookups; ${script:js:...} reaches the JSR-223 ScriptEngineManager. Apache-2.0"
},
{
"case_id": "cve-java-2022-42889-patched",
"file": "cve_corpus/java/CVE-2022-42889/patched.java",
"language": "java",
"is_vulnerable": false,
"vuln_class": "safe",
"cwe": "N/A",
"provenance": "real_cve",
"equivalence_tier": "exact",
"match_mode": "file_presence",
"expected_rule_ids": [],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [
"java.code_exec.text4shell_interpolator",
"taint-unsanitised-flow"
],
"expected_severity": null,
"expected_category": "Security",
"expected_sink_lines": [],
"expected_source_lines": [],
"tags": [
"cve",
"commons-text",
"text4shell",
"patched",
"negative"
],
"disabled": false,
"notes": "CVE-2022-42889 patched counterpart: substitutor built directly with `new StringSubstitutor()` so the lookup map is empty; ${...} pass-through. No script/dns/url evaluation."
},
{
"case_id": "rs-auth-001",
"file": "rust/auth/actix_scoped_write_missing.rs",
@ -12233,6 +12607,89 @@
"disabled": false,
"notes": "TS cross-function bool validator; deferred \u2014 same reason as js-safe-016."
},
{
"case_id": "ts-safe-017",
"file": "typescript/safe/safe_strapi_db_query_chain.ts",
"language": "typescript",
"is_vulnerable": false,
"vuln_class": "safe",
"cwe": "CWE-89",
"provenance": "real-repo-distilled",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [
"cfg-unguarded-sink",
"taint-unsanitised-flow"
],
"expected_severity": "NONE",
"expected_category": "Security",
"expected_sink_lines": [],
"expected_source_lines": [],
"tags": [
"real-repo-precision-2026-04-29",
"strapi",
"orm-chain"
],
"disabled": false,
"notes": "Strapi-style ORM accessor: `<obj>.db.query(MODEL_UID).<orm_method>(...)`; the `db.query` call's literal model UID + the ORM-method outer chain (findOne/findMany/create/update/delete/count) prove the chain is parameterised. Synthesised same-node Sanitizer(SQL_QUERY) suppresses cfg-unguarded-sink and taint-unsanitised-flow."
},
{
"case_id": "ts-safe-018",
"file": "typescript/safe/safe_indirect_validator.ts",
"language": "typescript",
"is_vulnerable": false,
"vuln_class": "safe",
"cwe": "N/A",
"provenance": "synthetic",
"equivalence_tier": "exact",
"match_mode": "file_presence",
"expected_rule_ids": [],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [
"taint-unsanitised-flow",
"cfg-unguarded-sink"
],
"expected_severity": null,
"expected_category": "Security",
"expected_sink_lines": [],
"expected_source_lines": [],
"tags": [
"indirect-validator",
"ssrf",
"negative"
],
"disabled": false,
"notes": "Indirect-validator branch narrowing — `const err = validateUrlSsrf(target); if (err) throw …;` should suppress the downstream axios.get sink. Pinned by tests/lib::indirect_validator_narrowing_marks_arg_validated."
},
{
"case_id": "ts-safe-019",
"file": "typescript/safe/safe_helper_with_validator.ts",
"language": "typescript",
"is_vulnerable": false,
"vuln_class": "safe",
"cwe": "N/A",
"provenance": "synthetic",
"equivalence_tier": "exact",
"match_mode": "file_presence",
"expected_rule_ids": [],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [
"taint-unsanitised-flow"
],
"expected_severity": null,
"expected_category": "Security",
"expected_sink_lines": [],
"expected_source_lines": [],
"tags": [
"helper-summary",
"ssrf",
"negative"
],
"disabled": false,
"notes": "Helper-summary all_validated propagation — when a helper's body validates the param via `validateXxx`, the per-param probe's all_validated event should be skipped during summary extraction so callers don't refire the cross-fn SSRF. Pinned by tests/lib::helper_with_validator_does_not_propagate_to_caller_via_summary."
},
{
"case_id": "py-auth-decorator-001",
"file": "python/safe/safe_login_required_decorator.py",
@ -12897,6 +13354,31 @@
"disabled": false,
"notes": "Happy-path `if (!data.error && Array.isArray(...))` and body-mentioning-err do not fire `cfg-error-fallthrough` (website/public/app/core/app.js)"
},
{
"case_id": "js-safe-realrepo-006",
"file": "javascript/safe/safe_localised_gherkin_regex.js",
"language": "javascript",
"is_vulnerable": false,
"vuln_class": "safe",
"cwe": "N/A",
"provenance": "synthetic",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [],
"expected_severity": null,
"expected_category": "Security",
"expected_sink_lines": [],
"expected_source_lines": [],
"tags": [
"panic-guard",
"negative",
"real-repo-precision-2026-04-29"
],
"disabled": false,
"notes": "Panic guard: CodeMirror Gherkin tokenizer ships a long localised regex inside a boolean sub-condition. Naive byte-slice truncation in CFG condition-text (`t[..MAX_CONDITION_TEXT_LEN]`) panicked when byte 256 landed inside a multi-byte UTF-8 character (Gurmukhi `ਖ`). Engine fix: src/utils/snippet.rs::truncate_at_char_boundary applied at three CFG sites + two symex display sites (gogs public/plugins/codemirror-5.17.0/mode/gherkin/gherkin.js:107)."
},
{
"case_id": "go-safe-realrepo-001",
"file": "go/safe/safe_error_log_only_function.go",
@ -13126,6 +13608,33 @@
"disabled": false,
"notes": "`func (c *Cache) ...` with `c.foo()` / `c.Fs.Create(...)` intra-struct dispatches \u2014 Go method receivers must seed `non_sink_vars` so the verb-name fallback doesn't fire on bare-receiver internal calls. Closes the hugo cache/filecache.go cluster (~48 hits)."
},
{
"case_id": "go-safe-realrepo-006",
"file": "go/safe/safe_test_helper_fatal.go",
"language": "go",
"is_vulnerable": false,
"vuln_class": "safe",
"cwe": "N/A",
"provenance": "synthetic",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [
"cfg-error-fallthrough"
],
"expected_severity": null,
"expected_category": "Security",
"expected_sink_lines": [],
"expected_source_lines": [],
"tags": [
"cfg",
"negative",
"real-repo-precision-2026-04-29"
],
"disabled": false,
"notes": "`if err != nil { c.Fatalf(...) }` / `os.Exit` / `log.Fatalf` / `panic(err)` are documented terminators (Goexit-class). cfg-error-fallthrough must walk through them as terminating paths. Closes the minio test-file cluster (49+34+12+11+9+7+7 hits across xl-storage_test.go, erasure-healing_test.go, format-erasure_test.go, \u2026). Engine fix: src/cfg_analysis/error_handling.rs::call_never_returns."
},
{
"case_id": "go-auth-realrepo-001",
"file": "go/auth/vuln_repo_findbyid_no_auth.go",
@ -13429,6 +13938,59 @@
"disabled": false,
"notes": "Regression guard: same TRPC handler shape as ts-auth-realrepo-004 but the SQL parameter is `input.targetUserId` (request body field), not `ctx.user.id`. The TRPC ctx self-actor exemption must apply ONLY to ctx.user.<id-like> subjects, never to other paths in the same param."
},
{
"case_id": "ts-auth-realrepo-006",
"file": "typescript/auth/safe_local_collection_receiver.ts",
"language": "typescript",
"is_vulnerable": false,
"vuln_class": "safe",
"cwe": "N/A",
"provenance": "real-repo",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [
"js.auth.missing_ownership_check"
],
"expected_severity": null,
"expected_category": "Security",
"expected_sink_lines": [],
"expected_source_lines": [],
"tags": [
"auth",
"negative",
"real-repo-precision-2026-04-29"
],
"disabled": false,
"notes": "Excalidraw `Map<K, V>` / `Set<T>` / `WeakMap` / `WeakSet` / `Array<T>` / `T[]` / `readonly T[]` receivers — direct annotation, same-file `type X = Map<...>` aliasing, and inline `new Map()` constructor. SSA `constructor_type` JS/TS arm + `cfg::params::ts_type_to_local_collection` + `cfg::dto::collect_type_alias_local_collections` route every shape through `TypeKind::LocalCollection` → `SinkClass::InMemoryLocal`, suppressing missing-ownership."
},
{
"case_id": "ts-auth-realrepo-007",
"file": "typescript/auth/vuln_local_collection_does_not_blanket_suppress.ts",
"language": "typescript",
"is_vulnerable": true,
"vuln_class": "auth",
"cwe": "CWE-639",
"provenance": "real-repo",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [
"js.auth.missing_ownership_check"
],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [],
"expected_severity": "MEDIUM",
"expected_category": "Security",
"expected_sink_lines": [],
"expected_source_lines": [],
"tags": [
"auth",
"real-repo-precision-2026-04-29"
],
"disabled": false,
"notes": "Vulnerable counterpart to ts-auth-realrepo-006: `prisma.user.findUnique` / `prisma.user.update` with attacker-supplied id and no preceding auth check. Receiver is NOT a tracked Map / Set / Array, so the LocalCollection fix must NOT suppress this — proves the type-aware suppression doesn't blanket-cover real DB clients that share method names (`get`, `find`, `update`) with JS containers."
},
{
"case_id": "rs-auth-realrepo-009",
"file": "rust/auth/safe_local_user_view_extractor.rs",
@ -13484,6 +14046,89 @@
"disabled": false,
"notes": "Negative counterpart for the LocalUserView recogniser: handler takes the typed extractor but mutates a row by `req.target_user_id` (foreign id) without any ownership check \u2014 must still flag. Guards against an over-broad recogniser that would treat any handler with a self-actor extractor as authorised by default."
},
{
"case_id": "rs-auth-realrepo-011",
"file": "rust/auth/safe_param_type_segment_idents.rs",
"language": "rust",
"is_vulnerable": false,
"vuln_class": "safe",
"cwe": "N/A",
"provenance": "synthetic",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [
"rs.auth.missing_ownership_check"
],
"expected_severity": null,
"expected_category": "Security",
"expected_sink_lines": [],
"expected_source_lines": [],
"tags": [
"auth",
"negative",
"real-repo-precision-2026-04-29",
"noise-budget-zero"
],
"disabled": false,
"notes": "Rust `parameter` arm in `collect_param_names` keeps type-segment idents (`std`, `path`, `Path`) out of `unit.params` so `dst: &std::path::Path` doesn't gate `unit_has_user_input_evidence` open via the framework-name allow-list (`path`). Surfaced from meilisearch/index-scheduler/scheduler/process_snapshot_creation.rs::remove_tasks where `dst: &std::path::Path` made every `db.delete(task.uid)` fire missing-ownership-check."
},
{
"case_id": "rs-auth-realrepo-012",
"file": "rust/auth/safe_local_collection_param_types.rs",
"language": "rust",
"is_vulnerable": false,
"vuln_class": "safe",
"cwe": "N/A",
"provenance": "synthetic",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [
"rs.auth.missing_ownership_check"
],
"expected_severity": null,
"expected_category": "Security",
"expected_sink_lines": [],
"expected_source_lines": [],
"tags": [
"auth",
"negative",
"real-repo-precision-2026-04-29",
"noise-budget-zero"
],
"disabled": false,
"notes": "Rust function-parameter type annotations naming an in-memory container (`RoaringBitmap`, `HashMap<K,V>`, `HashSet<T>`, `BTreeSet<T>`) classify the receiver as `TypeKind::LocalCollection` \u2192 `SinkClass::InMemoryLocal`, suppressing the verb-name dispatch's DbMutation classification. Surfaced from meilisearch/index-scheduler/scheduler/enterprise_edition/network.rs::balance_shards (`unsharded: RoaringBitmap`). Mirrors the JS/TS `ts_type_to_local_collection` fix from 2026-04-29."
},
{
"case_id": "rs-auth-realrepo-013",
"file": "rust/auth/unsafe_handler_local_collection_does_not_blanket_suppress.rs",
"language": "rust",
"is_vulnerable": true,
"vuln_class": "auth",
"cwe": "CWE-285",
"provenance": "synthetic",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [
"rs.auth.missing_ownership_check"
],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [],
"expected_severity": "High",
"expected_category": "Security",
"expected_sink_lines": [],
"expected_source_lines": [],
"tags": [
"auth",
"positive",
"real-repo-precision-2026-04-29"
],
"disabled": false,
"notes": "Negative-counterpart guard for the LocalCollection / parameter-name fixes: handler takes a HashMap typed param (in-memory bookkeeping) but ALSO calls `db.update_owner(req.target_user_id, ...)` (real DbMutation). The cache mutation must not blanket-suppress the persistent-store mutation \u2014 the rule must still fire on `db.update_owner`."
},
{
"case_id": "ruby-safe-ar-query-shapes-001",
"file": "ruby/safe/safe_active_record_query_shapes.rb",
@ -13715,6 +14360,120 @@
],
"disabled": false,
"notes": "Concatenated SQL passed to em.createQuery(...) \u2014 receiver-chain walk sees binary_expression at arg 0, refuses to synthesise sanitizer, structural sink fires. Regression guard for the JPA parameterised-execute fix."
},
{
"case_id": "py-auth-realrepo-005",
"file": "python/safe/safe_fastapi_route_dependencies_auth.py",
"language": "python",
"is_vulnerable": false,
"vuln_class": "safe",
"cwe": "N/A",
"provenance": "real-repo",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [
"py.auth.missing_ownership_check"
],
"expected_severity": null,
"expected_category": "Security",
"expected_sink_lines": [],
"expected_source_lines": [],
"tags": [
"auth",
"fastapi",
"real-repo-precision-2026-04-29"
],
"disabled": false,
"notes": "Distilled from airflow api_fastapi/core_api/routes/public/connections.py: FastAPI route decorator carries `dependencies=[Depends(requires_access_connection(method=\"DELETE\"))]`; the Flask extractor's new `dependencies=` kwarg walker plus inject_middleware_auth subject synthesis recognises the auth gate."
},
{
"case_id": "py-auth-realrepo-007",
"file": "python/safe/safe_fastapi_route_level_row_fetch.py",
"language": "python",
"is_vulnerable": false,
"vuln_class": "safe",
"cwe": "N/A",
"provenance": "real-repo",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [
"py.auth.missing_ownership_check"
],
"expected_severity": null,
"expected_category": "Security",
"expected_sink_lines": [],
"expected_source_lines": [],
"tags": [
"auth",
"fastapi",
"real-repo-precision-2026-04-29"
],
"disabled": false,
"notes": "Distilled from airflow api_fastapi/core_api/routes/public/dag_run.py: FastAPI route decorator carries `dependencies=[Depends(requires_access_dag(method=\"GET\"))]`; the route-level guard must cover not only direct path-param subjects (filter_by(dag_id=dag_id)) but also row-variable receivers (`dag.cleanup_runs(...)` after `dag = session.scalar(select(DagModel)...)`). Pinned by the `is_route_level` short-circuit in `auth_check_covers_subject` plus the kind-aware `function_params_route_handler` that includes id-like Python typed params (`dag_id: str`) in `unit.params`."
},
{
"case_id": "py-auth-realrepo-006",
"file": "python/safe/safe_pytest_sqlalchemy_session.py",
"language": "python",
"is_vulnerable": false,
"vuln_class": "safe",
"cwe": "N/A",
"provenance": "real-repo",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [
"py.auth.missing_ownership_check",
"py.auth.token_override_without_validation"
],
"expected_severity": null,
"expected_category": "Security",
"expected_sink_lines": [],
"expected_source_lines": [],
"tags": [
"auth",
"pytest",
"real-repo-precision-2026-04-29"
],
"disabled": false,
"notes": "Distilled from airflow tests/unit/models/test_backfill.py: pytest test methods with SQLAlchemy `session` fixture and `session.commit()` calls. Bare `session.<sqlalchemy_verb>` no longer counts as auth Session evidence; only `session.<identity_field>` (user/user_id/...) does."
},
{
"case_id": "py-auth-realrepo-007",
"file": "python/auth/vuln_fastapi_route_no_dependencies.py",
"language": "python",
"is_vulnerable": true,
"vuln_class": "auth",
"cwe": "CWE-862",
"provenance": "real-repo",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [
"py.auth.missing_ownership_check"
],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [],
"expected_severity": "HIGH",
"expected_category": "Security",
"expected_sink_lines": [
[
15,
15
]
],
"expected_source_lines": [],
"tags": [
"auth",
"fastapi",
"real-repo-precision-2026-04-29"
],
"disabled": false,
"notes": "Vulnerable counterpart to py-auth-realrepo-005: same FastAPI route shape but no `dependencies=[Depends(...)]` keyword arg. Regression guard: the dependency-injection recogniser must not blanket-suppress every FastAPI route."
}
]
}

View file

@ -1,6 +1,6 @@
{
"benchmark_version": "1.0",
"timestamp": "2026-04-29T05:42:03Z",
"timestamp": "2026-04-29T21:50:34Z",
"scanner_version": "0.5.0",
"scanner_config": {
"analysis_mode": "Full",
@ -9,9 +9,9 @@
"state_analysis_enabled": true,
"worker_threads": 1
},
"ground_truth_hash": "sha256:3e034f1fc5c7bb7838f1fb2c63de5ca5a36aacfdf5d66cf25f30bff99f25f1cf",
"corpus_size": 433,
"cases_run": 432,
"ground_truth_hash": "sha256:5b391d654f88673e5a200af875d513cf83812af747739395e8315768b8983ce3",
"corpus_size": 458,
"cases_run": 457,
"cases_skipped": 1,
"outcomes": [
{
@ -1306,6 +1306,74 @@
"security_finding_count": 2,
"non_security_finding_count": 0
},
{
"case_id": "cve-java-2022-1471-patched",
"file": "cve_corpus/java/CVE-2022-1471/patched.java",
"language": "java",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [],
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "cve-java-2022-1471-vulnerable",
"file": "cve_corpus/java/CVE-2022-1471/vulnerable.java",
"language": "java",
"vuln_class": "deserialization",
"is_vulnerable": true,
"outcome_file_level": "TP",
"outcome_rule_level": "TP",
"outcome_location_level": "TP",
"matched_rule_ids": [
"java.deser.snakeyaml_unsafe_constructor"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"java.deser.snakeyaml_unsafe_constructor"
],
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
"case_id": "cve-java-2022-42889-patched",
"file": "cve_corpus/java/CVE-2022-42889/patched.java",
"language": "java",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [],
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "cve-java-2022-42889-vulnerable",
"file": "cve_corpus/java/CVE-2022-42889/vulnerable.java",
"language": "java",
"vuln_class": "code_exec",
"is_vulnerable": true,
"outcome_file_level": "TP",
"outcome_rule_level": "TP",
"outcome_location_level": "TP",
"matched_rule_ids": [
"java.code_exec.text4shell_interpolator"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"java.code_exec.text4shell_interpolator"
],
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
"case_id": "cve-js-2019-14939-patched",
"file": "cve_corpus/javascript/CVE-2019-14939/patched.js",
@ -1520,6 +1588,76 @@
"security_finding_count": 2,
"non_security_finding_count": 0
},
{
"case_id": "cve-py-2025-69662-patched",
"file": "cve_corpus/python/CVE-2025-69662/patched.py",
"language": "python",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [],
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "cve-py-2025-69662-vulnerable",
"file": "cve_corpus/python/CVE-2025-69662/vulnerable.py",
"language": "python",
"vuln_class": "sql_injection",
"is_vulnerable": true,
"outcome_file_level": "TP",
"outcome_rule_level": "TP",
"outcome_location_level": "TP",
"matched_rule_ids": [
"taint-unsanitised-flow (source 35:12)",
"py.sqli.text_format"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"taint-unsanitised-flow (source 35:12)",
"py.sqli.text_format"
],
"security_finding_count": 2,
"non_security_finding_count": 0
},
{
"case_id": "cve-py-2026-33626-patched",
"file": "cve_corpus/python/CVE-2026-33626/patched.py",
"language": "python",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [],
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "cve-py-2026-33626-vulnerable",
"file": "cve_corpus/python/CVE-2026-33626/vulnerable.py",
"language": "python",
"vuln_class": "ssrf",
"is_vulnerable": true,
"outcome_file_level": "TP",
"outcome_rule_level": "TP",
"outcome_location_level": "TP",
"matched_rule_ids": [
"taint-unsanitised-flow (source 43:12)"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"taint-unsanitised-flow (source 43:12)"
],
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
"case_id": "cve-rb-2013-0156-patched",
"file": "cve_corpus/ruby/CVE-2013-0156/patched.rb",
@ -1737,6 +1875,40 @@
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
"case_id": "cve-ts-ghsa-4x48-cgf9-q33f-patched",
"file": "cve_corpus/typescript/GHSA-4x48-cgf9-q33f/patched.ts",
"language": "typescript",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [],
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "cve-ts-ghsa-4x48-cgf9-q33f-vulnerable",
"file": "cve_corpus/typescript/GHSA-4x48-cgf9-q33f/vulnerable.ts",
"language": "typescript",
"vuln_class": "ssrf",
"is_vulnerable": true,
"outcome_file_level": "TP",
"outcome_rule_level": "TP",
"outcome_location_level": "TP",
"matched_rule_ids": [
"taint-unsanitised-flow (source 50:5)"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"taint-unsanitised-flow (source 50:5)"
],
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
"case_id": "go-auth-realrepo-001",
"file": "go/auth/vuln_repo_findbyid_no_auth.go",
@ -2371,6 +2543,21 @@
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "go-safe-realrepo-006",
"file": "go/safe/safe_test_helper_fatal.go",
"language": "go",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [],
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "go-sqli-001",
"file": "go/sqli/sqli_concat.go",
@ -3590,6 +3777,21 @@
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "js-safe-realrepo-006",
"file": "javascript/safe/safe_localised_gherkin_regex.js",
"language": "javascript",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [],
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "js-sqli-001",
"file": "javascript/sqli/sqli_concat.js",
@ -4497,6 +4699,70 @@
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
"case_id": "py-auth-realrepo-005",
"file": "python/safe/safe_fastapi_route_dependencies_auth.py",
"language": "python",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [],
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "py-auth-realrepo-006",
"file": "python/safe/safe_pytest_sqlalchemy_session.py",
"language": "python",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [],
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "py-auth-realrepo-007",
"file": "python/safe/safe_fastapi_route_level_row_fetch.py",
"language": "python",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [],
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "py-auth-realrepo-007",
"file": "python/auth/vuln_fastapi_route_no_dependencies.py",
"language": "python",
"vuln_class": "auth",
"is_vulnerable": true,
"outcome_file_level": "TP",
"outcome_rule_level": "TP",
"outcome_location_level": "TP",
"matched_rule_ids": [
"py.auth.missing_ownership_check"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"py.auth.missing_ownership_check"
],
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
"case_id": "py-cmdi-001",
"file": "python/cmdi/cmdi_direct.py",
@ -5630,6 +5896,55 @@
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
"case_id": "rs-auth-realrepo-011",
"file": "rust/auth/safe_param_type_segment_idents.rs",
"language": "rust",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [],
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "rs-auth-realrepo-012",
"file": "rust/auth/safe_local_collection_param_types.rs",
"language": "rust",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [],
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "rs-auth-realrepo-013",
"file": "rust/auth/unsafe_handler_local_collection_does_not_blanket_suppress.rs",
"language": "rust",
"vuln_class": "auth",
"is_vulnerable": true,
"outcome_file_level": "TP",
"outcome_rule_level": "TP",
"outcome_location_level": null,
"matched_rule_ids": [
"rs.auth.missing_ownership_check"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"rs.auth.missing_ownership_check"
],
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
"case_id": "rs-auth-typed-extractors-001",
"file": "rust/auth/safe_typed_path_int_extractor.rs",
@ -7043,6 +7358,42 @@
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
"case_id": "ts-auth-realrepo-006",
"file": "typescript/auth/safe_local_collection_receiver.ts",
"language": "typescript",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [],
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "ts-auth-realrepo-007",
"file": "typescript/auth/vuln_local_collection_does_not_blanket_suppress.ts",
"language": "typescript",
"vuln_class": "auth",
"is_vulnerable": true,
"outcome_file_level": "TP",
"outcome_rule_level": "TP",
"outcome_location_level": null,
"matched_rule_ids": [
"js.auth.missing_ownership_check",
"js.auth.missing_ownership_check"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"js.auth.missing_ownership_check",
"js.auth.missing_ownership_check"
],
"security_finding_count": 2,
"non_security_finding_count": 0
},
{
"case_id": "ts-cmdi-001",
"file": "typescript/cmdi/cmdi_exec_template.ts",
@ -7493,6 +7844,53 @@
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "ts-safe-017",
"file": "typescript/safe/safe_strapi_db_query_chain.ts",
"language": "typescript",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [
"ts.quality.any_annotation"
],
"security_finding_count": 0,
"non_security_finding_count": 1
},
{
"case_id": "ts-safe-018",
"file": "typescript/safe/safe_indirect_validator.ts",
"language": "typescript",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [],
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "ts-safe-019",
"file": "typescript/safe/safe_helper_with_validator.ts",
"language": "typescript",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [],
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "ts-secrets-001",
"file": "typescript/secrets/fallback_secret.ts",
@ -7552,6 +7950,30 @@
"security_finding_count": 2,
"non_security_finding_count": 0
},
{
"case_id": "ts-sqli-003",
"file": "typescript/sqli/sqli_db_query_concat.ts",
"language": "typescript",
"vuln_class": "sqli",
"is_vulnerable": true,
"outcome_file_level": "TP",
"outcome_rule_level": "TP",
"outcome_location_level": null,
"matched_rule_ids": [
"taint-unsanitised-flow (source 15:5)",
"taint-unsanitised-flow (source 21:5)"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"ts.quality.any_annotation",
"ts.quality.any_annotation",
"taint-unsanitised-flow (source 15:5)",
"taint-unsanitised-flow (source 21:5)",
"ts.quality.any_annotation"
],
"security_finding_count": 2,
"non_security_finding_count": 3
},
{
"case_id": "ts-ssrf-001",
"file": "typescript/ssrf/ssrf_axios_user_url.ts",
@ -7771,22 +8193,22 @@
}
],
"aggregate_file_level": {
"tp": 216,
"tp": 225,
"fp": 1,
"fn_": 0,
"tn": 215,
"precision": 0.9953917050691244,
"tn": 231,
"precision": 0.995575221238938,
"recall": 1.0,
"f1": 0.997690531177829
"f1": 0.9977827050997783
},
"aggregate_rule_level": {
"tp": 216,
"tp": 225,
"fp": 1,
"fn_": 0,
"tn": 215,
"precision": 0.9953917050691244,
"tn": 231,
"precision": 0.995575221238938,
"recall": 1.0,
"f1": 0.997690531177829
"f1": 0.9977827050997783
},
"by_language": {
"c": {
@ -7811,16 +8233,16 @@
"tp": 25,
"fp": 1,
"fn_": 0,
"tn": 27,
"tn": 28,
"precision": 0.9615384615384616,
"recall": 1.0,
"f1": 0.9803921568627451
},
"java": {
"tp": 17,
"tp": 19,
"fp": 0,
"fn_": 0,
"tn": 18,
"tn": 20,
"precision": 1.0,
"recall": 1.0,
"f1": 1.0
@ -7829,7 +8251,7 @@
"tp": 19,
"fp": 0,
"fn_": 0,
"tn": 23,
"tn": 24,
"precision": 1.0,
"recall": 1.0,
"f1": 1.0
@ -7844,10 +8266,10 @@
"f1": 1.0
},
"python": {
"tp": 23,
"tp": 26,
"fp": 0,
"fn_": 0,
"tn": 23,
"tn": 28,
"precision": 1.0,
"recall": 1.0,
"f1": 1.0
@ -7862,19 +8284,19 @@
"f1": 1.0
},
"rust": {
"tp": 33,
"tp": 34,
"fp": 0,
"fn_": 0,
"tn": 37,
"tn": 39,
"precision": 1.0,
"recall": 1.0,
"f1": 1.0
},
"typescript": {
"tp": 29,
"tp": 32,
"fp": 0,
"fn_": 0,
"tn": 18,
"tn": 23,
"precision": 1.0,
"recall": 1.0,
"f1": 1.0
@ -7882,7 +8304,7 @@
},
"by_vuln_class": {
"auth": {
"tp": 13,
"tp": 16,
"fp": 0,
"fn_": 0,
"tn": 0,
@ -7909,7 +8331,7 @@
"f1": 1.0
},
"code_exec": {
"tp": 2,
"tp": 3,
"fp": 0,
"fn_": 0,
"tn": 0,
@ -7945,7 +8367,7 @@
"f1": 1.0
},
"deserialization": {
"tp": 4,
"tp": 5,
"fp": 0,
"fn_": 0,
"tn": 0,
@ -8002,7 +8424,7 @@
"tp": 0,
"fp": 1,
"fn_": 0,
"tn": 215,
"tn": 231,
"precision": 0.0,
"recall": 1.0,
"f1": 0.0
@ -8016,8 +8438,17 @@
"recall": 1.0,
"f1": 1.0
},
"sql_injection": {
"tp": 1,
"fp": 0,
"fn_": 0,
"tn": 0,
"precision": 1.0,
"recall": 1.0,
"f1": 1.0
},
"sqli": {
"tp": 29,
"tp": 30,
"fp": 0,
"fn_": 0,
"tn": 0,
@ -8026,7 +8457,7 @@
"f1": 1.0
},
"ssrf": {
"tp": 26,
"tp": 28,
"fp": 0,
"fn_": 0,
"tn": 0,
@ -8046,31 +8477,31 @@
},
"by_confidence": {
">=High": {
"tp": 90,
"fp": 90,
"fn_": 126,
"tn": 126,
"precision": 0.5,
"recall": 0.4166666666666667,
"f1": 0.45454545454545453
"tp": 79,
"fp": 104,
"fn_": 146,
"tn": 128,
"precision": 0.43169398907103823,
"recall": 0.3511111111111111,
"f1": 0.3872549019607843
},
">=Low": {
"tp": 94,
"fp": 102,
"fn_": 122,
"tn": 114,
"precision": 0.47959183673469385,
"recall": 0.4351851851851852,
"f1": 0.4563106796116505
"tp": 81,
"fp": 116,
"fn_": 144,
"tn": 116,
"precision": 0.41116751269035534,
"recall": 0.36,
"f1": 0.3838862559241706
},
">=Medium": {
"tp": 94,
"fp": 102,
"fn_": 122,
"tn": 114,
"precision": 0.47959183673469385,
"recall": 0.4351851851851852,
"f1": 0.4563106796116505
"tp": 81,
"fp": 116,
"fn_": 144,
"tn": 116,
"precision": 0.41116751269035534,
"recall": 0.36,
"f1": 0.3838862559241706
}
}
}

View file

@ -191,7 +191,7 @@ struct BenchmarkResults {
// ── Scanning ─────────────────────────────────────────────────────────
fn scan_corpus_file(corpus_root: &Path, relative_path: &str) -> Vec<Diag> {
// `cve_corpus/*` cases live in a sibling of `corpus/` see
// `cve_corpus/*` cases live in a sibling of `corpus/`, see
// `tests/benchmark/cve_corpus/`.
let source = if relative_path.starts_with("cve_corpus/") {
corpus_root
@ -679,7 +679,7 @@ fn benchmark_evaluation() {
// on this corpus, so 5pp is generous enough to absorb honest
// FP↔TN trades while still catching a real regression in a
// vulnerability class. When you land a durable, measurable
// improvement, tighten these floors do not relax them to paper
// improvement, tighten these floors, do not relax them to paper
// over a regression.
let rule = &results.aggregate_rule_level;
assert!(
@ -790,7 +790,7 @@ fn score_rule_level_with_diags(
fn sha256_hex(data: &[u8]) -> String {
use std::io::Write;
// Simple SHA-256 via command avoids adding a crypto dependency.
// Simple SHA-256 via command, avoids adding a crypto dependency.
let mut child = std::process::Command::new("shasum")
.args(["-a", "256"])
.stdin(std::process::Stdio::piped())

View file

@ -3,7 +3,7 @@
//! Nyx's surface is a `clap` parser plus a handful of downstream validators
//! (`SeverityFilter::parse`, `Severity::from_str`, `Confidence::from_str`,
//! `apply_profile`). These tests lock in the user-visible contract that
//! bad input exits non-zero with a message that names the offending flag
//! bad input exits non-zero with a message that names the offending flag ,
//! a scanner that silently accepts a typo'd severity and returns zero
//! findings is a footgun in CI.
//!
@ -268,7 +268,7 @@ fn scan_quiet_suppresses_preview_banner() {
.stderr(predicate::str::contains("Preview for C/C++").not());
}
/// JSON output format must not print the Preview banner either machine-
/// JSON output format must not print the Preview banner either, machine-
/// readable output has to stay clean on both stdout and stderr.
#[test]
fn scan_json_format_suppresses_preview_banner() {

View file

@ -179,7 +179,7 @@ pub fn validate_expectations(diags: &[Diag], fixture_dir: &Path) {
}
}
// Noise budget (optional omitted on tight safe-code fixtures)
// Noise budget (optional, omitted on tight safe-code fixtures)
if let Some(budget) = &exp.noise_budget {
assert_max_findings(diags, budget.max_total_findings, budget.max_high_findings);
}

View file

@ -3,8 +3,8 @@
//! Production defaults run the scanner with `worker_threads > 1`, and callers
//! embedding `nyx_scanner` (the forthcoming `serve` UI, CI wrappers, scripted
//! harnesses) may invoke `scan_no_index` from multiple threads in the same
//! process. Shared engine state label tables, framework-detection caches,
//! tree-sitter thread-local parsers, rayon globals, `once_cell` statics
//! process. Shared engine state, label tables, framework-detection caches,
//! tree-sitter thread-local parsers, rayon globals, `once_cell` statics ,
//! must tolerate two simultaneous walks without races, panics, or diverging
//! outputs.
//!
@ -86,7 +86,7 @@ fn build_tree(root: &Path) {
}
/// Canonicalize a diag list for equality comparison. Finding output ordering
/// depends on rayon scheduling the individual fields must be identical but
/// depends on rayon scheduling, the individual fields must be identical but
/// the sequence is not. We sort by a stable composite key and stringify
/// (Diag itself doesn't derive Ord).
fn canonical_fingerprint(diags: &[Diag]) -> Vec<String> {
@ -104,7 +104,7 @@ fn two_concurrent_scans_produce_identical_findings() {
let root = tmp.path().to_path_buf();
build_tree(&root);
// Capture an initial single-threaded run so we have a reference point
// Capture an initial single-threaded run so we have a reference point ,
// if the concurrent run produced a subset we want to know whether that
// matches a known-good baseline or diverges from it.
let baseline = scan_no_index(&root, &test_cfg()).expect("baseline scan must succeed");
@ -138,7 +138,7 @@ fn two_concurrent_scans_produce_identical_findings() {
);
}
/// Four concurrent scans over the same tree larger blast radius for
/// Four concurrent scans over the same tree, larger blast radius for
/// serialization bugs in shared caches. Runs on a small tree to keep
/// CI time reasonable.
#[test]

View file

@ -4,16 +4,16 @@
//! Three fixtures cover the documented transfer forms currently tractable
//! against the JS/Python abstract-suppression pipelines:
//!
//! * `cross_file_abstract_port_range` (Python) Identity transfer on an
//! * `cross_file_abstract_port_range` (Python), Identity transfer on an
//! integer-typed passthrough. The caller's literal `8080` crosses the
//! file boundary and SHELL_ESCAPE suppression fires on the bounded int.
//! * `cross_file_abstract_bounded_index` (Python) Clamped transfer
//! * `cross_file_abstract_bounded_index` (Python), Clamped transfer
//! derived from a baseline-invariant fact. The callee returns a
//! literal `42`; the per-parameter transfer attaches it as
//! `Clamped { 42, 42 }` and the caller sees a bounded integer
//! without the return-abstract channel alone carrying the fact
//! through summary resolution ambiguity.
//! * `cross_file_abstract_url_prefix_lock` (JS) String-prefix transfer
//! * `cross_file_abstract_url_prefix_lock` (JS), String-prefix transfer
//! across an Identity wrapper. The caller writes
//! `url = asIs('https://internal/...' + userPath)` and passes `url` to
//! `axios.get`. The CFG node's `string_prefix` is consumed by the
@ -21,7 +21,7 @@
//! prefix locks the host and SSRF suppression fires.
//!
//! Each fixture's `expectations.json` treats the cross-file SHELL/SSRF
//! sink as *forbidden* on the main file if cross-file abstract
//! sink as *forbidden* on the main file, if cross-file abstract
//! propagation regresses, the sink fires and the forbidden-finding
//! assertion trips.

View file

@ -6,7 +6,7 @@
//! Three fixtures cover distinct structural shapes of the summary
//! channel:
//!
//! * `cross_file_alias_mutating_helper` (Java) a void-returning
//! * `cross_file_alias_mutating_helper` (Java), a void-returning
//! helper that stores its second argument into a field of its first
//! argument. Without the points-to channel the cross-file summary
//! loses every taint edge (void return, no container-op in
@ -14,20 +14,20 @@
//! edge and the caller observes the field write through the argument
//! alias, producing a Runtime.exec finding.
//!
//! * `cross_file_alias_returned_alias` (JS) a passthrough helper
//! * `cross_file_alias_returned_alias` (JS), a passthrough helper
//! whose return aliases its first parameter. `param_to_return` with
//! `Identity` already covered the taint cap; the points-to channel
//! adds the heap-identity alias `Param(0) → Return` so the caller
//! threads the points-to set through the call. The existing
//! shell-exec sink must still fire a regression guard on the
//! shell-exec sink must still fire, a regression guard on the
//! return-alias channel.
//!
//! * `cross_file_alias_bounded_graph` (Python) a helper with a 20-
//! * `cross_file_alias_bounded_graph` (Python), a helper with a 20-
//! edge alias graph that intentionally overflows `MAX_ALIAS_EDGES`.
//! The assertion is that the scan *terminates* under the bounded
//! analysis and falls back to the conservative
//! `PointsToSummary::overflow` behaviour, not a specific finding
//! count overflow is an operational guarantee, not a precision one.
//! count, overflow is an operational guarantee, not a precision one.
mod common;
@ -76,7 +76,7 @@ fn cross_file_container_factory() {
}
/// Receiver-chain regression: tainted receiver flows through
/// `tainted.trim().toLowerCase()` — both zero-arg — and into
/// `tainted.trim().toLowerCase()`, both zero-arg, and into
/// `Runtime.exec`. Pins the existing receiver-fallback behaviour so
/// heap-aliasing changes do not regress it.
#[test]

View file

@ -4,7 +4,7 @@
//! The body-loading path is pure plumbing: the taint engine carries a
//! `cross_file_bodies` field on `SsaTaintTransfer` that the cross-file
//! inline path consumes. This test guards the *availability*
//! invariant if pass 1 stops populating `bodies_by_key`, the inline
//! invariant, if pass 1 stops populating `bodies_by_key`, the inline
//! path would silently fall back to summary resolution even when
//! cross-file bodies could have given context-sensitive precision.
//!
@ -19,7 +19,7 @@ use nyx_scanner::symbol::Lang;
use nyx_scanner::utils::config::{AnalysisMode, Config};
use std::path::Path;
/// Test-local config mirror of `tests/common/mod.rs::test_config`
/// Test-local config mirror of `tests/common/mod.rs::test_config` ,
/// kept inline so this file does not need to pull in the shared module
/// (which `cargo test --test cross_file_body_loading_tests` would
/// require extra wiring for).
@ -39,7 +39,7 @@ fn test_config() -> Config {
/// Replay the pass-1 body-collection logic from `scan_filesystem` on a
/// handful of files and return the resulting `GlobalSummaries`.
///
/// This mirrors the fold-body of `scan_filesystem`'s pass-1 rayon loop
/// This mirrors the fold-body of `scan_filesystem`'s pass-1 rayon loop ,
/// the production code uses the same `analyse_file_fused` entry point
/// and the same `insert` / `insert_ssa` / `insert_body` trio. Keeping
/// the test close to that shape catches drift between the fused pipeline
@ -72,7 +72,7 @@ fn cross_file_body_loading_smoke_python_two_files() {
let root = tmp.path();
// `a.py` defines a helper that takes one parameter, does a trivial
// string op, and returns. The body is intentionally small we only
// string op, and returns. The body is intentionally small, we only
// care that *any* eligible body is produced, not that it has
// interesting taint content.
let a_py = root.join("a.py");
@ -133,7 +133,7 @@ fn cross_file_body_loading_smoke_python_two_files() {
body.param_count
);
// Quick sanity on the SSA shape an eligible body must have at
// Quick sanity on the SSA shape, an eligible body must have at
// least one block. Zero blocks would mean we stored an empty stub,
// which would let the inline path silently do nothing on every
// inline attempt.
@ -146,7 +146,7 @@ fn cross_file_body_loading_smoke_python_two_files() {
#[test]
fn cross_file_body_loading_empty_without_callees() {
// A single file with no inter-procedural flow is still expected to
// produce a body for its one function that's what body loading
// produce a body for its one function, that's what body loading
// enables. The *empty* case this test guards is "bodies_by_key
// returns None when no bodies are loaded," which keeps the
// threaded-through `Option` explicit for inline consumers.

View file

@ -8,8 +8,8 @@
//! findings.
//!
//! This binary is split from `cross_file_context_tests.rs` because
//! Cargo compiles each `tests/*.rs` file into its own test binary
//! separate processes so the `NYX_CONTEXT_SENSITIVE` env flip here
//! Cargo compiles each `tests/*.rs` file into its own test binary ,
//! separate processes, so the `NYX_CONTEXT_SENSITIVE` env flip here
//! does not race against the default-on tests running in parallel.
//!
//! The switch is read by `AnalysisOptions::current()` via the legacy

View file

@ -6,24 +6,24 @@
//! The four fixtures under `tests/fixtures/cross_file_context_*` cover
//! the documented precision wins and guardrails:
//!
//! * `cross_file_context_two_call_sites` (Python) two calls to the same
//! * `cross_file_context_two_call_sites` (Python), two calls to the same
//! cross-file helper, one tainted and one with a constant literal.
//! Asserts the tainted call still produces a finding.
//! * `cross_file_context_callback` (JS) cross-file helper invokes a
//! * `cross_file_context_callback` (JS), cross-file helper invokes a
//! caller-side function passed as a callback. Inline re-analysis of
//! the helper must resolve the callback binding and surface the
//! flow through `child_process.exec`.
//! * `cross_file_context_sanitizer` (JS) cross-file sanitizer applied
//! * `cross_file_context_sanitizer` (JS), cross-file sanitizer applied
//! before an HTML sink. Regression guard: cross-file inline must not
//! introduce a taint finding when the sanitiser is recognised.
//! * `cross_file_context_deep_chain` (Python) A -> B -> C chain with
//! * `cross_file_context_deep_chain` (Python), A -> B -> C chain with
//! the sink in C. k=1 means B->C resolves via summary; the end-to-end
//! finding must still surface so callers cannot lose recall on deep
//! chains.
//!
//! The `bodies_by_key_populated_for_cross_file_fixtures` test is a
//! direct `GlobalSummaries`-level assertion that pass 1 loaded cross-file
//! SSA bodies for each fixture i.e. the cross-file inline path has
//! SSA bodies for each fixture, i.e. the cross-file inline path has
//! something to consult. If this assertion flips to zero, cross-file
//! inline would silently fall back to summary resolution and every
//! expectations.json check above would be driven by the less precise
@ -65,7 +65,7 @@ fn test_config() -> Config {
/// Walk a fixture directory and replay the pass-1 body collection that
/// `scan_filesystem` does, returning the merged `GlobalSummaries`.
///
/// This is used purely for the availability assertion the actual
/// This is used purely for the availability assertion, the actual
/// scans under test go through the regular `scan_no_index` entry point.
fn pass1_bodies(root: &Path) -> GlobalSummaries {
let cfg = test_config();
@ -132,7 +132,7 @@ fn cross_file_context_sanitizer() {
}
/// Three-file deep chain (A -> B -> C) with the sink in C. The
/// end-to-end flow must still surface k=1 depth cap on inline does
/// end-to-end flow must still surface, k=1 depth cap on inline does
/// not drop recall because B -> C resolves via summary.
#[test]
fn cross_file_context_deep_chain() {

View file

@ -4,18 +4,18 @@
//! Three fixtures cover distinct structural shapes of the per-return-path
//! transform:
//!
//! * `cross_file_phi_validated_branch` (Python) a callee whose two
//! * `cross_file_phi_validated_branch` (Python), a callee whose two
//! return branches are both `Identity` on the value, differing only in
//! the predicate gate. The required SQLi finding confirms the
//! summary-application path does not regress on the common "union is
//! precise enough" case.
//! * `cross_file_phi_partial_sanitiser` (JS) the callee has two
//! * `cross_file_phi_partial_sanitiser` (JS), the callee has two
//! returns with *different* transforms (Identity vs
//! StripBits(HTML_ESCAPE)). The caller invokes the unsanitised branch,
//! so the XSS sink must still fire a regression guard against a
//! so the XSS sink must still fire, a regression guard against a
//! per-path application that over-eagerly attributes sanitation across
//! all branches.
//! * `cross_file_phi_both_branches_safe` (Go) both return paths run
//! * `cross_file_phi_both_branches_safe` (Go), both return paths run
//! the same sanitising validator. The SQL sink is on the forbidden
//! list: if the per-path decomposition regresses to "either branch
//! could be raw" the caller would pick up a false positive.

View file

@ -6,10 +6,10 @@
//! clear error instead of panicking, hanging, or producing nonsense
//! findings. These tests exercise both classes of corruption:
//!
//! 1. Truncation to zero bytes SQLite treats a zero-length file as a
//! 1. Truncation to zero bytes, SQLite treats a zero-length file as a
//! fresh empty DB. We expect the indexer to bootstrap the schema and
//! carry on.
//! 2. Arbitrary garbage in the header SQLite rejects this with
//! 2. Arbitrary garbage in the header, SQLite rejects this with
//! `SQLITE_NOTADB` during pragma/schema execution. We expect the
//! indexer to return a structured error, not a panic.
//!
@ -122,7 +122,7 @@ fn zero_truncated_db_rebuilds_on_init() {
let pool = Indexer::init(&db_path)
.expect("Indexer::init should bootstrap a schema into an empty file");
// After init, the DB is empty of prior state an indexed scan should
// After init, the DB is empty of prior state, an indexed scan should
// still run end-to-end but will effectively be acting like a cold
// rebuild. We don't re-call build_index here because the plan is to
// confirm the raw init path is resilient.
@ -143,14 +143,14 @@ fn zero_truncated_db_rebuilds_on_init() {
}
/// Clobber the SQLite magic header with garbage bytes. This is the
/// "actual corruption" case SQLite rejects it with `SQLITE_NOTADB` the
/// "actual corruption" case, SQLite rejects it with `SQLITE_NOTADB` the
/// first time pragma or SQL is executed, which surfaces as
/// `NyxError::Sql(_)` from `Indexer::init`.
#[test]
fn garbage_header_db_returns_structured_error() {
let (_project_name, db_path, _project, _db_dir) = build_indexed_project();
// Write 100 bytes of `0xFF` guaranteed not to match SQLite's header
// Write 100 bytes of `0xFF`, guaranteed not to match SQLite's header
// magic "SQLite format 3\0".
clobber_header(&db_path, 0xFF, 100);
@ -186,7 +186,7 @@ fn garbage_header_db_returns_structured_error() {
// NOTE: A mid-file corruption test (garbage at bytes 100..200, preserving
// SQLite magic) was attempted and is deliberately omitted. That shape
// triggers a slow corruption-detection path in SQLite where `Indexer::init`
// takes 150200 seconds before returning unsuitable for CI wall-clock
// takes 150200 seconds before returning, unsuitable for CI wall-clock
// budgets. The two tests above already cover the "corrupt-on-arrival"
// cases that users actually hit (crash-truncated file, deliberate clobber).
// A follow-up should either short-circuit `PRAGMA integrity_check` up

View file

@ -2,7 +2,7 @@
//! [`nyx_scanner::taint::analyse_file`] must preserve distinct flows
//! that share a source but differ on validation status or intermediate
//! variables. Historically the dedup collapsed all `(body_id, sink,
//! source)` siblings, preferring the validated one so an unguarded
//! source)` siblings, preferring the validated one, so an unguarded
//! exploit on a sibling branch was silently dropped in favour of a
//! neighbouring guarded flow.
//!
@ -35,7 +35,7 @@ fn dedup_preserves_validated_and_unvalidated_flows() {
validate_expectations(&diags, &dir);
// Load-bearing assertion: the two flows live on distinct sink
// lines (6 and 8 in the source actual lines depend on the
// lines (6 and 8 in the source, actual lines depend on the
// fixture file format, so we only assert distinct sinks).
let taint: Vec<&nyx_scanner::commands::scan::Diag> = diags
.iter()
@ -58,7 +58,7 @@ fn dedup_preserves_validated_and_unvalidated_flows() {
.collect::<Vec<_>>(),
);
// The two findings must live on different source lines if the
// The two findings must live on different source lines, if the
// engine collapses them into one, the test will fail here even
// when the count assertion above coincidentally passes (e.g. if
// a future change started emitting one validated and one
@ -73,7 +73,7 @@ fn dedup_preserves_validated_and_unvalidated_flows() {
// Every taint finding must carry a stable `finding_id` that
// downstream formatters can reference. This is the plumbing that
// feeds alternative-path cross-linking verify it is non-empty
// feeds alternative-path cross-linking, verify it is non-empty
// for every taint finding so regressions in `analyse_file`'s
// post-dedup `make_finding_id` pass surface here.
for d in &taint {
@ -87,7 +87,7 @@ fn dedup_preserves_validated_and_unvalidated_flows() {
);
}
// At least one validated/unvalidated split must be present the
// At least one validated/unvalidated split must be present, the
// whole point of the fixture is that a guarded branch and an
// unguarded branch reach `exec(input)` and both must report.
// We do not require an exact split since future sanitization
@ -103,7 +103,7 @@ fn dedup_preserves_validated_and_unvalidated_flows() {
is not behind any allowlist. Found only validated findings.",
);
// `validated` may legitimately be empty if the engine does not yet
// recognise `isWhitelisted` as a predicate the fixture is still
// recognise `isWhitelisted` as a predicate, the fixture is still
// load-bearing because the `min_count: 2` in expectations.json
// asserts both findings surface regardless of which is classified
// as validated. Drop the assertion to avoid gating the regression

View file

@ -2,9 +2,9 @@
//!
//! The scanner's two-pass pipeline runs rayon `par_iter` over files in
//! both pass-1 (summary extraction) and pass-2 (rule evaluation), and
//! merges summaries via `try_reduce`. A latent ordering bug a
//! merges summaries via `try_reduce`. A latent ordering bug, a
//! shared mutable state hit unprotected from multiple threads, or a
//! `HashMap` iteration order leaking into a finding identity can
//! `HashMap` iteration order leaking into a finding identity, can
//! surface as a diagnostic that appears with 4 workers but not with 1.
//!
//! This test runs the same fixture under worker-thread counts of 1,
@ -14,7 +14,7 @@
//! assertion fires only on real output divergence.
//!
//! If this test ever flakes, prefer investigating the engine over
//! weakening the normaliser engine-level determinism across thread
//! weakening the normaliser, engine-level determinism across thread
//! counts is load-bearing for reproducible CI runs.
mod common;

View file

@ -26,7 +26,7 @@ use nyx_scanner::rank::{compute_attack_rank, rank_diags};
// ── Diag factories ─────────────────────────────────────────────────────
/// A converged taint finding that the points-based scorer will score
/// as `Confidence::High`. Used as the "clean" baseline any delta
/// as `Confidence::High`. Used as the "clean" baseline, any delta
/// against this must come from attached engine notes.
fn high_confidence_taint_diag(path: &str, line: u32) -> Diag {
Diag {
@ -204,7 +204,7 @@ fn rank_diags_sorts_converged_above_capped_at_same_severity() {
#[test]
fn rank_diags_preserves_severity_tier_under_bail() {
// High + Bail must still outrank Medium + clean at the same
// evidence-strength baseline this is the tier-boundary invariant
// evidence-strength baseline, this is the tier-boundary invariant
// that the -8 completeness magnitude is calibrated for.
let mut high_bailed = high_confidence_taint_diag("a.rs", 1);
attach_notes(
@ -421,7 +421,7 @@ fn sarif_omits_loss_direction_for_informational_only() {
fn every_engine_note_direction_is_documented() {
// Enumerate every EngineNote variant and assert its direction.
// The intent is that a contributor adding a new variant will cause
// this test to fail to compile (no match arm) a structural guard
// this test to fail to compile (no match arm), a structural guard
// against silent misclassification.
fn check(note: EngineNote, expected: LossDirection) {
assert_eq!(

View file

@ -2,7 +2,7 @@
//! test forces a specific cap-site to fire on a tiny fixture by
//! overriding the engine's safety cap, then asserts either that the
//! corresponding observability counter moved *or* that the note
//! propagated to a produced finding whichever is the more stable
//! propagated to a produced finding, whichever is the more stable
//! signal for that cap.
mod common;
@ -19,7 +19,7 @@ use std::path::Path;
use std::sync::Mutex;
/// Process-wide atomics for cap overrides mean tests that fiddle with
/// them must run serially cargo test defaults to parallel.
/// them must run serially, cargo test defaults to parallel.
static CAP_GUARD: Mutex<()> = Mutex::new(());
fn fixture(name: &str) -> std::path::PathBuf {
@ -32,7 +32,7 @@ fn fixture(name: &str) -> std::path::PathBuf {
fn worklist_cap_trips_observability_counter() {
let _guard = CAP_GUARD.lock().unwrap_or_else(|e| e.into_inner());
// Force a very tight worklist budget so every body with > 0 blocks
// trips the cap. The observability counter is the stable signal
// trips the cap. The observability counter is the stable signal ,
// note attribution to a specific finding may be lost on bodies that
// capped *before* emitting their sink event.
reset_worklist_observability();
@ -59,7 +59,7 @@ fn origins_cap_trips_observability_on_multi_source_fixture() {
// Set origins to 1 and scan a fixture with multiple top-level
// sources flowing into the same sink. Any non-trivial taint flow
// will produce at least one tainted value whose origin list hit the
// cap detected by the post-hoc saturation scan at the end of
// cap, detected by the post-hoc saturation scan at the end of
// `run_ssa_taint_internal`.
reset_origins_observability();
set_max_origins_override(1);

View file

@ -0,0 +1,125 @@
//! Integration tests for the `Cap::DATA_EXFIL` detector class.
//!
//! Validates per-cap attribution at multi-gate call sites: a single `fetch`
//! call carries both an SSRF gate (URL flow) and a DATA_EXFIL gate (body /
//! headers / json flow), and a tainted body must not surface as SSRF and
//! vice versa. Also sanity-checks the SARIF output so the new finding
//! class produces a distinct rule id.
mod common;
use common::scan_fixture_dir;
use nyx_scanner::commands::scan::Diag;
use nyx_scanner::utils::config::AnalysisMode;
use std::path::PathBuf;
fn js_fixture_dir() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("tests")
.join("fixtures")
.join("js")
}
fn diags_for(file: &str) -> Vec<Diag> {
let dir = js_fixture_dir();
let all = scan_fixture_dir(&dir, AnalysisMode::Full);
all.into_iter().filter(|d| d.path.ends_with(file)).collect()
}
#[test]
fn fetch_body_data_exfil_emits_data_exfil_not_ssrf() {
let diags = diags_for("fetch_body_data_exfil.js");
let exfil = diags
.iter()
.filter(|d| d.id.starts_with("taint-data-exfiltration"))
.count();
let plain_taint = diags
.iter()
.filter(|d| d.id.starts_with("taint-unsanitised-flow"))
.count();
assert!(
exfil >= 1,
"expected at least one taint-data-exfiltration finding, got 0.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
assert_eq!(
plain_taint,
0,
"fixed-URL fetch with tainted body must NOT emit SSRF \
(taint-unsanitised-flow), got {plain_taint}.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
}
#[test]
fn fetch_ssrf_url_tainted_emits_ssrf_not_data_exfil() {
let diags = diags_for("fetch_ssrf_url_tainted.js");
let ssrf = diags
.iter()
.filter(|d| d.id.starts_with("taint-unsanitised-flow"))
.count();
let exfil = diags
.iter()
.filter(|d| d.id.starts_with("taint-data-exfiltration"))
.count();
assert!(
ssrf >= 1,
"expected at least one taint-unsanitised-flow (SSRF) finding, got 0.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
assert_eq!(
exfil,
0,
"tainted-URL fetch must NOT emit DATA_EXFIL, got {exfil}.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
}
#[test]
fn sarif_distinguishes_data_exfil_rule_id_from_ssrf() {
use nyx_scanner::output::build_sarif;
let dir = js_fixture_dir();
let diags = scan_fixture_dir(&dir, AnalysisMode::Full);
let sarif = build_sarif(&diags, &dir);
let rules = sarif["runs"][0]["tool"]["driver"]["rules"]
.as_array()
.expect("SARIF rules array");
let rule_ids: Vec<&str> = rules.iter().filter_map(|r| r["id"].as_str()).collect();
assert!(
rule_ids.contains(&"taint-data-exfiltration"),
"SARIF rules must contain taint-data-exfiltration, got: {rule_ids:?}"
);
assert!(
rule_ids.contains(&"taint-unsanitised-flow"),
"SARIF rules must contain taint-unsanitised-flow, got: {rule_ids:?}"
);
// Each finding should reference exactly one rule, and the cap-specific
// class must not be folded back into the generic taint bucket.
let results = sarif["runs"][0]["results"]
.as_array()
.expect("SARIF results array");
let exfil_results = results
.iter()
.filter(|r| r["ruleId"].as_str() == Some("taint-data-exfiltration"))
.count();
let ssrf_results = results
.iter()
.filter(|r| r["ruleId"].as_str() == Some("taint-unsanitised-flow"))
.count();
assert!(
exfil_results >= 1,
"expected >= 1 SARIF result with ruleId taint-data-exfiltration, got {exfil_results}",
);
assert!(
ssrf_results >= 1,
"expected >= 1 SARIF result with ruleId taint-unsanitised-flow, got {ssrf_results}",
);
}

View file

@ -1,7 +1,7 @@
// Regression fixture: Rust async flow through `tokio::process::Command`.
//
// Per docs/language-maturity.md, Rust's Tokio process variants are not
// yet covered the Tokio async process APIs are a known gap. The
// yet covered, the Tokio async process APIs are a known gap. The
// fixture is checked in so that when Rust async-process coverage lands,
// the engine begins producing the intended finding and the
// `forbidden_findings` assertion forces whoever adds the coverage to

View file

@ -1,7 +1,7 @@
// Target: authorization happens inside `require_owner`, which
// delegates to `require_group_member` (a configured authorization
// check name). The handler in `cross_file_helper_handler.rs`
// delegates ownership validation to this helper cross-file helper
// delegates ownership validation to this helper, cross-file helper
// lifting should recognise the call as an auth check covering the
// supplied `row`.
struct Db;

View file

@ -2,7 +2,7 @@
// produces a `DatabaseConnection` via SSA `constructor_type` (through
// `peel_identity_suffix`, which strips `.unwrap()` before matching). The
// handler then calls `conn.execute(..)`, a callee name that appears in
// neither `mutation_indicator_names` nor `read_indicator_names` for Rust
// neither `mutation_indicator_names` nor `read_indicator_names` for Rust ,
// name-based classification returns `None`, so the ownership gate
// already cannot flag the call. The type-map refinement should *still*
// leave the call unflagged (the type map produces `DbMutation`, but

View file

@ -16,7 +16,7 @@ pub async fn handle_list_peer_docs(req: Req, ctx: Ctx) -> Result<String, ()> {
let user = auth::require_auth(&req, &ctx).await?;
let doc_ids: Vec<i64> = vec![1, 2, 3];
// Pure in-memory bookkeeping no authorization decision here.
// Pure in-memory bookkeeping, no authorization decision here.
let mut counts: HashMap<i64, usize> = HashMap::new();
let mut seen: HashSet<i64> = HashSet::new();
for doc_id in &doc_ids {

View file

@ -1,5 +1,5 @@
// B4 regression guard: `format_target` does NOT auth-check
// `group_id` it just constructs a string from it. The helper-lift
// `group_id`, it just constructs a string from it. The helper-lift
// pass must not synthesise a covering AuthCheck on the handler's call
// site, so the subsequent `db.exec("INSERT INTO comments …", &[group_id])`
// MUST still flag.
@ -19,7 +19,7 @@ mod auth {
}
fn format_target(group_id: i64, suffix: &str) -> String {
// No auth check here pure formatting.
// No auth check here, pure formatting.
format!("group:{}{}", group_id, suffix)
}

View file

@ -41,7 +41,7 @@ pub async fn handle_delete_doc(req: Req, ctx: Ctx, doc_id: i64) -> Result<String
return json_err("cannot delete another user's doc", 403);
}
// By construction, the row belongs to `user` so any id read from it is authorized.
// By construction, the row belongs to `user`, so any id read from it is authorized.
let group_id = existing.get_i64("group_id");
realtime::publish_to_group(group_id, "doc_deleted");
Ok("ok".into())

View file

@ -31,7 +31,7 @@ pub async fn handle_update_doc(req: Req, ctx: Ctx, doc_id: i64) -> Result<String
);
let owner_id = existing.get_i64("user_id");
// Equality compared but no early exit the check has no effect.
// Equality compared but no early exit, the check has no effect.
if owner_id != user.id {
// missing return
println!("not your doc (but proceeding anyway)");

View file

@ -5,7 +5,7 @@ mod auth { pub async fn require_auth(_r: &super::Req, _c: &super::Ctx) -> Result
// The handler's `get_peer_ids(&db, user.id)` call below must not be
// flagged. `user` is bound from `auth::require_auth(..)` so `user.id`
// is the caller's own id the call is self-referential, not a foreign
// is the caller's own id, the call is self-referential, not a foreign
// scoped id. The library-style helper below is a pass-through so its
// body contains no DB sinks (the internal `user_id` → DB flow is a
// separate pattern covered by helper-summary lifting).

View file

@ -2,7 +2,7 @@
// against an ACL table (`group_members`) with a WHERE clause that pins
// the row to the current user (`gm.user_id = ?1` bound to `user.id`).
// Every returned row is membership-gated by construction, so downstream
// uses of the row's columns (`group_id` here) are authorized the
// uses of the row's columns (`group_id` here) are authorized, the
// `realtime::publish_to_group` call MUST NOT be flagged as missing an
// ownership check after B3.
struct Ctx;

View file

@ -1,7 +1,7 @@
// B3 regression guard: the SELECT JOINs through `audit_log` (NOT in
// the configured ACL list) and the WHERE clause pins on
// `al.user_id = ?1`. The audit-log row's user is the audit subject,
// not the doc owner so this query does NOT prove caller ownership
// not the doc owner, so this query does NOT prove caller ownership
// of the returned `doc_id`. The downstream realtime publish MUST
// still flag for a missing ownership check after B3.
struct Ctx;

View file

@ -1,7 +1,7 @@
// target: authorization happens inside `validate_target`, which
// internally calls `authz::require_membership` against the same
// `group_id` the handler subsequently mutates. The current rule cannot
// see this transitively B4 lifts per-function auth-check summaries
// see this transitively, B4 lifts per-function auth-check summaries
// (which positional params are auth-checked) so the handler-level call
// to `validate_target(&db, group_id, user.id)` is recognised as an
// auth check covering `group_id`. Result: `db.exec(..)` MUST NOT flag
@ -45,7 +45,7 @@ pub async fn handle_create_comment(
let user = auth::require_auth(&req, &ctx).await?;
let db = Db;
// Authorization happens inside validate_target helper-summary
// Authorization happens inside validate_target, helper-summary
// lifting propagates the per-param auth check so this covers
// `group_id`.
validate_target(&db, group_id, user.id).await?;

View file

@ -0,0 +1,31 @@
// FP guard for `js.auth.missing_ownership_check` — JS built-in
// container receivers must not be classified as data-layer sinks.
// See `tests/benchmark/corpus/typescript/auth/safe_local_collection_receiver.ts`
// for the full real-repo distillation.
type ElementsMap = Map<string, { id: string }>;
function fromAlias(elementsMap: ElementsMap, id: string) {
return elementsMap.get(id);
}
function fromDirectGeneric(m: Map<string, string>, k: string) {
return m.get(k);
}
function fromArrayShorthand(arr: { id: string }[], targetId: string) {
return arr.find((x) => x.id === targetId);
}
function fromLocalConstructor() {
const cache = new Map<string, string>();
cache.set("a", "x");
return cache.get("a");
}
function fromSet(visited: Set<string>, k: string) {
if (!visited.has(k)) {
visited.add(k);
}
return visited.size;
}

View file

@ -0,0 +1,16 @@
{
"required_findings": [],
"forbidden_findings": [
{ "id_prefix": "js.auth.missing_ownership_check" }
],
"noise_budget": {
"max_total_findings": 1,
"max_high_findings": 0
},
"performance_expectations": {
"max_ms_no_index": 1000,
"max_ms_index_cold": 1500,
"max_ms_index_warm": 500,
"ci_mode": "lenient"
}
}

View file

@ -0,0 +1,16 @@
{
"required_findings": [],
"forbidden_findings": [
{ "id_prefix": "rs.auth.missing_ownership_check" }
],
"noise_budget": {
"max_total_findings": 2,
"max_high_findings": 0
},
"performance_expectations": {
"max_ms_no_index": 1000,
"max_ms_index_cold": 1500,
"max_ms_index_warm": 500,
"ci_mode": "lenient"
}
}

View file

@ -0,0 +1,93 @@
// Real-repo precision guard mirroring meilisearch's index-scheduler
// shape:
// crates/index-scheduler/src/scheduler/process_snapshot_creation.rs::remove_tasks
// (`unsafe fn remove_tasks(tasks: &[Task], dst: &std::path::Path,
// index_base_map_size: usize)` plus per-loop bitmap mutations on
// destructured heed `Database` handles), plus the LocalCollection
// receiver-type cluster
// (`crates/index-scheduler/src/scheduler/enterprise_edition/network.rs::balance_shards`,
// `unsharded: RoaringBitmap`).
//
// Both engine fixes must hold: the Rust `parameter` arm in
// `collect_param_names` (only descends into `pattern`, never `type`)
// and the Rust LocalCollection type-text classifier
// (`rust_type_to_local_collection`). Without either, this file would
// produce missing-ownership-check findings on internal helpers /
// in-memory bitmap mutations.
use std::collections::{BTreeSet, HashMap, HashSet};
struct RoaringBitmap;
impl RoaringBitmap {
fn new() -> Self { Self }
fn insert(&mut self, _x: u32) -> bool { true }
fn remove(&mut self, _x: u32) -> bool { true }
fn contains(&self, _x: u32) -> bool { true }
}
struct Task { uid: u32 }
struct Database;
impl Database {
fn delete(&self, _w: &mut u32, _u: &u32) -> Result<(), ()> { Ok(()) }
}
struct TaskQueue {
all_tasks: Database,
canceled_by: Database,
}
// Rust `parameter` arm: type-segment idents (`std`, `path`, `Path`)
// must NOT pollute `unit.params` and gate user-input-evidence open.
unsafe fn remove_tasks(
tasks: &[Task],
dst: &std::path::Path,
sz: usize,
) -> Result<(), ()> {
let _ = (dst, sz);
let mut wtxn = 0u32;
let task_queue = TaskQueue {
all_tasks: Database,
canceled_by: Database,
};
let TaskQueue { all_tasks, canceled_by } = task_queue;
for task in tasks {
all_tasks.delete(&mut wtxn, &task.uid)?;
canceled_by.delete(&mut wtxn, &task.uid)?;
}
Ok(())
}
// LocalCollection typed param: `unsharded: RoaringBitmap` resolves to
// `TypeKind::LocalCollection`, so `unsharded.insert(docid)` /
// `unsharded.remove(docid)` classify as `SinkClass::InMemoryLocal`
// (non-auth-relevant).
fn balance_shards(mut unsharded: RoaringBitmap, docid: u32) {
unsharded.insert(docid);
unsharded.remove(docid);
}
// `&'a mut HashMap<...>` reference + lifetime: ref-stripping must
// reach the type head.
fn store_shard_docids<'a>(
new_shard_docids: &'a mut HashMap<String, u32>,
shard: String,
docid: u32,
) {
new_shard_docids.insert(shard, docid);
}
fn add_user_id(ids: &mut HashSet<u64>, user_id: u64) {
ids.insert(user_id);
ids.remove(&user_id);
}
fn collect_seen(seen: &mut BTreeSet<u32>, item_id: u32) {
seen.insert(item_id);
}
fn build_local_set(task_id: u32) -> RoaringBitmap {
let mut s = RoaringBitmap::new();
s.insert(task_id);
s
}

View file

@ -0,0 +1,41 @@
// FP guard / panic guard — CFG condition-text truncation must be UTF-8 safe.
//
// Reproduces the gogs scan crash where a CodeMirror Gherkin tokenizer ships a
// long localised regex inside a boolean sub-condition (`stream.match(/.../) &&
// other`). When `push_condition_node` textualises the sub-expression, the
// regex literal exceeds MAX_CONDITION_TEXT_LEN (256 bytes); naive byte-slice
// truncation panicked when byte 256 landed inside a multi-byte UTF-8
// character (here Gurmukhi `ਖ`, three bytes). Engine fix in
// `src/utils/snippet.rs::truncate_at_char_boundary`, applied at three CFG
// sites + two symex display sites.
//
// Invariant: scanning this file must terminate without panicking the rayon
// worker, regardless of where byte 256 lands.
function tokenLocalisedFeatureKeyword(stream, state) {
if (
!state.inKeywordLine &&
state.allowFeature &&
stream.match(/(機能|功能|フィーチャ|기능|โครงหลัก|ความสามารถ|ความต้องการทางธุรกิจ|ಹೆಚ್ಚಳ|గుణము|ಮುಹಾಂದರಾ|ਮੁਹਾਂਦਰਾ|ਨਕਸ਼ ਨੁਹਾਰ|ਖਾਸੀਅਤ|रूप लेख|وِیژگی|خاصية|תכונה|Функціонал|Функция|Функционалност|Функционал|Үзенчәлеклелек|Свойство|Особина|Мөмкинлек|Могућност|Λειτουργία|Δυνατότητα|Właściwość|Vlastnosť|Trajto|Tính năng|Savybė|Požiadavka|Požadavek|Potrzeba biznesowa|Özellik|Osobina|Ominaisuus|Omadus|Mogućnost|Mogucnost|Jellemző|Funzionalità|Funktionalitéit|Funktionalität|Funkcja|Funkcionalnost|Funkcionalitāte|Funkcia|Fungsi|Functionaliteit|Funcționalitate|Funcţionalitate|Functionalitate|Funcionalitat|Funcionalidade|Fonctionnalité|Fitur|Fīča|Feature|Eiginleiki|Egenskap|Egenskab|Característica|Caracteristica|Business Need|Aspekt|Arwedd|Ability):/)
) {
state.inKeywordLine = true;
return "keyword";
}
return null;
}
// Sanity: also exercise the let-match-guard truncation site
// (`emit_rust_match_guard_if`) by way of a JS analogue with a CFG-relevant
// boolean chain that wraps localised text into the second branch. The CFG
// builder still has to textualise both arms.
function classify(s) {
if (
s.length > 0 &&
s.indexOf("ਨਕਸ਼ ਨੁਹਾਰ ਖਾਸੀਅਤ रूप लेख وِیژگی خاصية תכונה Функціонал Функция Функционалност Функционал Үзенчәлеклелек Свойство Особина Мөмкинлек Могућност Λειτουργία Δυνατότητα") >= 0
) {
return "localised";
}
return "ascii";
}
module.exports = { tokenLocalisedFeatureKeyword, classify };

View file

@ -0,0 +1,14 @@
{
"required_findings": [],
"forbidden_findings": [],
"noise_budget": {
"max_total_findings": 0,
"max_high_findings": 0
},
"performance_expectations": {
"max_ms_no_index": 1000,
"max_ms_index_cold": 1500,
"max_ms_index_warm": 500,
"ci_mode": "lenient"
}
}

View file

@ -0,0 +1,51 @@
"""
FP guard for FastAPI / Flask route-level dependency-injection auth.
The `dependencies=[Depends(requires_access_dag(...))]` decorator
authorises the entire handler every value the handler receives,
every row it fetches, and every operation downstream. The
`is_route_level` flag on the injected AuthCheck tells
`auth_check_covers_subject` to short-circuit `true`, suppressing
`py.auth.missing_ownership_check` on the body's ORM calls (`filter_by`,
`scalar`, ) and on row-variable receivers (`dag.cleanup_runs(...)`).
A bare route with no `dependencies=` keyword is a real ownership-
check FP the engine must still flag it. The vulnerable
counterpart lives in
`tests/benchmark/corpus/python/auth/vuln_fastapi_route_no_dependencies.py`.
"""
from fastapi import Depends, FastAPI
router = FastAPI()
def requires_access_dag(method: str, access_entity=None):
def check():
...
return check
@router.get(
"/{dag_id}/runs/{run_id}",
dependencies=[Depends(requires_access_dag(method="GET"))],
)
def get_dag_run(dag_id: str, run_id: str, session):
"""Path params + ORM call covered by route-level guard."""
dag_run = session.scalar(
select(DagRun).filter_by(dag_id=dag_id, run_id=run_id)
)
if dag_run is None:
raise HTTPException(404, "not found")
return dag_run
@router.delete(
"/{dag_id}",
dependencies=[Depends(requires_access_dag(method="DELETE"))],
)
def delete_dag(dag_id: str, session):
"""Row fetch + row-variable method call covered by route-level guard."""
dag = session.scalar(select(DagModel).where(DagModel.dag_id == dag_id))
if dag is None:
raise HTTPException(404, "not found")
dag.cleanup_runs(session=session)

View file

@ -0,0 +1,16 @@
{
"required_findings": [],
"forbidden_findings": [
{ "id_prefix": "py.auth.missing_ownership_check" }
],
"noise_budget": {
"max_total_findings": 2,
"max_high_findings": 0
},
"performance_expectations": {
"max_ms_no_index": 1500,
"max_ms_index_cold": 2000,
"max_ms_index_warm": 800,
"ci_mode": "lenient"
}
}

View file

@ -0,0 +1,40 @@
// Strapi-style ORM accessor: `<obj>.db.query(MODEL_UID).<orm_method>(...)`.
// MODEL_UID is a literal model identifier (not raw SQL); the trailing
// findOne/findMany/create/update/delete/count are intrinsically
// parameterised — the actual SQL is generated by the ORM and per-call
// values arrive through field-keyed object literals the driver escapes.
//
// FP-guard: cfg-unguarded-sink and taint-unsanitised-flow must NOT
// fire on this shape.
declare const strapi: any;
async function getApiToken(whereParams: Record<string, unknown>) {
return strapi.db.query('admin::api-token').findOne({
select: ['id', 'name', 'lastUsedAt'],
populate: ['permissions'],
where: whereParams,
});
}
async function listTokens() {
return strapi.db.query('admin::api-token').findMany({
where: { type: 'read-only' },
});
}
async function createToken(data: unknown) {
return strapi.db.query('admin::api-token').create({ data });
}
async function updateToken(id: number, data: unknown) {
return strapi.db.query('admin::api-token').update({ where: { id }, data });
}
async function deleteToken(id: number) {
return strapi.db.query('admin::api-token').delete({ where: { id } });
}
async function countTokens() {
return strapi.db.query('admin::api-token').count();
}

View file

@ -0,0 +1,17 @@
{
"required_findings": [],
"forbidden_findings": [
{ "id_prefix": "cfg-unguarded-sink" },
{ "id_prefix": "taint-unsanitised-flow" }
],
"noise_budget": {
"max_total_findings": 3,
"max_high_findings": 0
},
"performance_expectations": {
"max_ms_no_index": 1000,
"max_ms_index_cold": 1500,
"max_ms_index_warm": 500,
"ci_mode": "lenient"
}
}

View file

@ -0,0 +1,13 @@
// DATA_EXFIL fixture: a fixed destination URL and an attacker-influenced
// body. SSRF must NOT fire (destination is hardcoded) but `Cap::DATA_EXFIL`
// must fire on the body field — request-bound bytes are leaving the process
// via the outbound request payload.
//
// Driven by `fetch_data_exfil_integration_tests.rs`.
function leakBody(req) {
var payload = req.body.message;
fetch('/endpoint', {
method: 'POST',
body: payload,
});
}

View file

@ -0,0 +1,10 @@
// SSRF regression fixture: attacker-controlled destination URL. SSRF must
// fire on the URL flow (arg 0) and `Cap::DATA_EXFIL` must NOT fire — the two
// classes share the callee but cap attribution is per-position so a tainted
// URL never surfaces as data exfiltration.
//
// Driven by `fetch_data_exfil_integration_tests.rs`.
function proxy(req) {
var target = req.query.target;
fetch(target);
}

View file

@ -2,7 +2,7 @@ use std::env;
use std::fs;
use std::process::Command;
/// Infrastructure provisioning tool Rust core.
/// Infrastructure provisioning tool, Rust core.
/// Reads infrastructure config from environment and executes provisioning commands.
struct InfraConfig {
@ -56,7 +56,7 @@ fn apply_terraform() {
.unwrap();
}
/// Destroys infrastructure reads target from env.
/// Destroys infrastructure, reads target from env.
/// VULN: env var flows into Command
fn destroy_cluster() {
let cluster = env::var("DESTROY_TARGET").unwrap();

View file

@ -1,5 +1,9 @@
import java.sql.*;
import java.security.SecureRandom;
import org.yaml.snakeyaml.Yaml;
import org.yaml.snakeyaml.LoaderOptions;
import org.yaml.snakeyaml.constructor.SafeConstructor;
import org.apache.commons.text.StringSubstitutor;
class Negative {
// Safe: parameterized query
@ -19,4 +23,17 @@ class Negative {
void safeLiteralQuery(Statement stmt) throws Exception {
stmt.executeQuery("SELECT COUNT(*) FROM users");
}
// Safe: SnakeYAML 2.0 / explicit SafeConstructor CVE-2022-1471 fix shape.
void safeSnakeyamlSafeConstructor(String body) {
LoaderOptions opts = new LoaderOptions();
Yaml yaml = new Yaml(new SafeConstructor(opts));
Object data = yaml.load(body);
}
// Safe: empty StringSubstitutor no interpolator factory CVE-2022-42889 fix shape.
String safeStringSubstitutorPassthrough(String input) {
StringSubstitutor s = new StringSubstitutor();
return s.replace(input);
}
}

View file

@ -1,6 +1,8 @@
import java.io.*;
import java.util.Random;
import java.security.MessageDigest;
import org.yaml.snakeyaml.Yaml;
import org.apache.commons.text.StringSubstitutor;
class Positive {
// java.deser.readobject
@ -45,4 +47,16 @@ class Positive {
void triggerGetWriterPrint(javax.servlet.http.HttpServletResponse resp) throws Exception {
resp.getWriter().println("<html>" + "data" + "</html>");
}
// java.deser.snakeyaml_unsafe_constructor CVE-2022-1471 regression guard.
void triggerSnakeyamlUnsafeConstructor() throws Exception {
Yaml yaml = new Yaml();
Object data = yaml.load("payload");
}
// java.code_exec.text4shell_interpolator CVE-2022-42889 regression guard.
String triggerText4ShellInterpolator(String input) {
StringSubstitutor s = StringSubstitutor.createInterpolator();
return s.replace(input);
}
}

View file

@ -42,6 +42,14 @@ def trigger_yaml(data):
def trigger_sql_concat(cursor, user):
cursor.execute("SELECT * FROM users WHERE name = '" + user + "'")
# py.sqli.execute_format (f-string variant)
def trigger_sql_fstring(cursor, user):
cursor.execute(f"SELECT * FROM users WHERE name = '{user}'")
# py.sqli.text_format
def trigger_sqlalchemy_text_fstring(connection, user):
connection.execute(text(f"SELECT * FROM users WHERE name = '{user}'"))
# py.crypto.md5
def trigger_md5(data):
hashlib.md5(data)

View file

@ -23,7 +23,7 @@ mod auth {
// Negative control: the handler validates ownership via
// `authz::require_group_member(...)?` before the realtime publish. Phase C
// should NOT emit `rs.auth.missing_ownership_check.taint` here the
// should NOT emit `rs.auth.missing_ownership_check.taint` here, the
// sanitizer clears `UNAUTHORIZED_ID` from the argument SSA values.
pub async fn handle_publish_checked(Path(group_id): Path<i64>) -> Result<&'static str, ()> {
let user = auth::current_user();

View file

@ -1,6 +1,6 @@
{
"description": "fetch({url: taintedUrl, body: fixed}) — destination-aware object-literal case. url is tainted, must fire.",
"tags": ["taint", "ssrf", "fetch", "destination-aware", "object-config"],
"description": "fetch({url: taintedUrl, body: fixed}) — destination-aware object-literal case. url is tainted (SSRF), body is fixed. SSRF must fire and the cross-boundary data-exfiltration class (Cap::DATA_EXFIL) must NOT fire — the two classes share the callee but cap attribution is per-position.",
"tags": ["taint", "ssrf", "fetch", "destination-aware", "object-config", "cap-attribution"],
"modes": ["full"],
"expected": [
{
@ -10,6 +10,12 @@
"line_range": [6, 14],
"evidence_contains": [],
"notes": "req.query.target → fetch({url: target, ...}) — tainted destination field under object-literal shape."
},
{
"rule_id": "taint-data-exfiltration",
"must_not_match": true,
"line_range": [6, 14],
"notes": "body is a fixed literal '{}' — DATA_EXFIL must NOT fire on this site (regression guard for per-cap attribution)."
}
]
}

Some files were not shown because too many files have changed in this diff Show more