mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-21 20:18:06 +02:00
Python fp and docs updtes (#58)
* refactor: Update comments for clarity and add expectations.json files for performance metrics * feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks * feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks * refactor: Simplify code formatting for better readability in multiple files * refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration * feat: Update Java and Python patterns to include new security rules * refactor: Improve comment clarity and consistency across multiple Rust files * refactor: Simplify code formatting for improved readability in integration tests and module files * refactor: Improve comment formatting and enhance clarity in assertions across multiple files
This commit is contained in:
parent
4db0805de6
commit
a438886217
291 changed files with 9485 additions and 3851 deletions
|
|
@ -187,7 +187,7 @@ fn interval_join_clamped_widens_range() {
|
|||
#[test]
|
||||
fn interval_join_identity_vs_clamped_is_top() {
|
||||
// Different flow shapes cannot be combined into a single bounded
|
||||
// form — conservative fallback is Top.
|
||||
// form, conservative fallback is Top.
|
||||
let a = IntervalTransfer::Identity;
|
||||
let b = IntervalTransfer::Clamped { lo: 0, hi: 10 };
|
||||
assert_eq!(a.join(&b), IntervalTransfer::Top);
|
||||
|
|
@ -296,7 +296,7 @@ fn transfer_apply_combines_subdomains() {
|
|||
// Interval identity forwards the caller-known bound.
|
||||
assert_eq!(out.interval.lo, Some(8080));
|
||||
assert_eq!(out.interval.hi, Some(8080));
|
||||
// String literal-prefix overrides the caller-side input — the
|
||||
// String literal-prefix overrides the caller-side input, the
|
||||
// callee's structural fact wins.
|
||||
assert_eq!(out.string.prefix.as_deref(), Some("https://safe.com/"));
|
||||
// Bit subdomain is always Top on cross-file transfer by design.
|
||||
|
|
|
|||
|
|
@ -649,7 +649,7 @@ fn hashmap_local_noise_is_clean() {
|
|||
#[test]
|
||||
fn row_ownership_equality_is_clean() {
|
||||
// `if owner_id != user.id { return ... }` is a row-level
|
||||
// ownership check — both the row-fetching call and any downstream
|
||||
// ownership check, both the row-fetching call and any downstream
|
||||
// uses of the row's fields should be considered authorized.
|
||||
assert_absent(
|
||||
"row_ownership_equality.rs",
|
||||
|
|
@ -670,7 +670,7 @@ fn row_ownership_no_early_exit_flags() {
|
|||
#[test]
|
||||
fn helper_scoped_params_is_clean() {
|
||||
// A library helper whose internal work is `result.insert(..)`
|
||||
// on a locally-constructed HashSet is not a sink — the call is
|
||||
// on a locally-constructed HashSet is not a sink, the call is
|
||||
// classified as non-sink because the receiver is the locally-bound
|
||||
// collection.
|
||||
assert_absent("helper_scoped_params.rs", "rs.auth.missing_ownership_check");
|
||||
|
|
@ -688,7 +688,7 @@ fn self_scoped_user_is_clean() {
|
|||
fn true_positive_missing_check_flags() {
|
||||
// Positive control: an authenticated handler that deletes a doc
|
||||
// and publishes against a group without any ownership/membership
|
||||
// check — must still flag.
|
||||
// check, must still flag.
|
||||
assert_has(
|
||||
"true_positive_missing_check.rs",
|
||||
"rs.auth.missing_ownership_check",
|
||||
|
|
@ -763,7 +763,7 @@ fn db_connection_type_inferred_is_clean() {
|
|||
// inferred as a `DatabaseConnection` via SSA `constructor_type`
|
||||
// (through `peel_identity_suffix`). The handler logs the caller's
|
||||
// own id; no foreign scoped id reaches the sink, so the ownership
|
||||
// gate has nothing to flag — the type-facts refinement must not
|
||||
// gate has nothing to flag, the type-facts refinement must not
|
||||
// introduce a false positive here.
|
||||
assert_absent(
|
||||
"db_connection_type_inferred.rs",
|
||||
|
|
|
|||
|
|
@ -4,13 +4,13 @@ Current baseline (2026-04-29):
|
|||
|
||||
| Metric | File-level | Rule-level | CI floor |
|
||||
|-----------|------------|------------|----------|
|
||||
| Precision | 0.991 | 0.991 | 0.861 |
|
||||
| Recall | 0.995 | 0.995 | 0.944 |
|
||||
| F1 | 0.993 | 0.993 | 0.901 |
|
||||
| Precision | 0.996 | 0.996 | 0.861 |
|
||||
| Recall | 1.000 | 1.000 | 0.944 |
|
||||
| F1 | 0.998 | 0.998 | 0.901 |
|
||||
|
||||
Corpus: 433 cases across 10 languages, 432 evaluated (1 disabled). Per-run JSON lands in `tests/benchmark/results/` (`latest.json` plus dated snapshots). See `README.md` for what the scoring modes mean and how to run a subset.
|
||||
Corpus: 451 cases across 10 languages, 449 evaluated (no disabled). Per-run JSON lands in `tests/benchmark/results/` (`latest.json` plus dated snapshots). See `README.md` for what the scoring modes mean and how to run a subset.
|
||||
|
||||
The corpus is mostly synthetic 8-20 line fixtures, one vulnerability or one safe pattern per file. A smaller real-CVE replay set under `cve_corpus/` covers 18 published CVEs across all 10 languages. Both contribute to the headline numbers.
|
||||
The corpus is mostly synthetic 8-20 line fixtures, one vulnerability or one safe pattern per file. A smaller real-CVE replay set under `cve_corpus/` covers 20 published CVEs across all 10 languages. Both contribute to the headline numbers.
|
||||
|
||||
## Real CVE coverage
|
||||
|
||||
|
|
@ -20,14 +20,19 @@ Real disclosed CVEs reduced to minimal reproducers, vulnerable + patched pair pe
|
|||
|----------------|------------|----------------------------|----------------------|-----------------|----------|
|
||||
| CVE-2023-48022 | Python | Ray | Apache-2.0 | CMDI | detected |
|
||||
| CVE-2017-18342 | Python | PyYAML | MIT | Deserialization | detected |
|
||||
| CVE-2025-69662 | Python | geopandas | BSD-3-Clause | SQL Injection | detected |
|
||||
| CVE-2026-33626 | Python | LMDeploy | Apache-2.0 | SSRF | detected |
|
||||
| CVE-2019-14939 | JavaScript | mongo-express | MIT | code_exec | detected |
|
||||
| CVE-2025-64430 | JavaScript | Parse Server | Apache-2.0 | SSRF | detected |
|
||||
| CVE-2023-26159 | TypeScript | follow-redirects | MIT | SSRF | detected |
|
||||
| GHSA-4x48-cgf9-q33f | TypeScript | Novu | MIT | SSRF | detected |
|
||||
| CVE-2022-30323 | Go | hashicorp/go-getter | MPL-2.0 | CMDI | detected |
|
||||
| CVE-2023-3188 | Go | owncast | MIT | SSRF | detected |
|
||||
| CVE-2024-31450 | Go | owncast | MIT | path_traversal | detected |
|
||||
| CVE-2015-7501 | Java | Apache Commons Collections | Apache-2.0 | Deserialization | detected |
|
||||
| CVE-2017-12629 | Java | Apache Solr | Apache-2.0 | CMDI | detected |
|
||||
| CVE-2022-1471 | Java | SnakeYAML | Apache-2.0 | Deserialization | detected |
|
||||
| CVE-2022-42889 | Java | Apache Commons Text | Apache-2.0 | code_exec | detected |
|
||||
| CVE-2013-0156 | Ruby | Ruby on Rails | MIT | Deserialization | detected |
|
||||
| CVE-2020-8130 | Ruby | Rake | MIT | CMDI | detected |
|
||||
| CVE-2017-9841 | PHP | PHPUnit | BSD-3-Clause | code_exec | detected |
|
||||
|
|
@ -60,6 +65,9 @@ Most recent first. Metrics are rule-level on the corpus size at that point.
|
|||
|
||||
| Date | Change | Corpus | P | R | F1 |
|
||||
|------------|------------------------------------------------------------------------------|--------|-------|-------|-------|
|
||||
| 2026-04-29 | Java SnakeYAML + Text4Shell patterns; CVE-2022-1471 and CVE-2022-42889 detected | 449 | 0.996 | 1.000 | 0.998 |
|
||||
| 2026-04-29 | Indirect-validator branch narrowing (`const err = validate(x); if (err) throw …;`) + helper-summary all_validated propagation; Novu GHSA-4x48-cgf9-q33f detected | 445 | 0.991 | 1.000 | 0.995 |
|
||||
| 2026-04-29 | Python f-string SQLi pattern + bindparams sanitizer + HttpClient SSRF rules; CVE-2025-69662 (geopandas) and CVE-2026-33626 (LMDeploy) detected | 439 | 0.991 | 1.000 | 0.995 |
|
||||
| 2026-04-29 | Phantom-Param-aware field suppression: CVE-2023-3188 detected, FP guards hold | 432 | 0.995 | 1.000 | 0.998 |
|
||||
| 2026-04-28 | Ruby bare `Kernel#open` CMDI sink, exact-match sigil on label matchers | 428 | 0.995 | 1.000 | 0.998 |
|
||||
| 2026-04-28 | Go SSRF/FILE_IO sink expansion (`http.DefaultClient.*`, `os.Remove`/`WriteFile`) plus Decode-writeback container op | 426 | 0.995 | 1.000 | 0.998 |
|
||||
|
|
|
|||
62
tests/benchmark/corpus/go/safe/safe_test_helper_fatal.go
Normal file
62
tests/benchmark/corpus/go/safe/safe_test_helper_fatal.go
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
// go-safe-realrepo-006 — distilled from minio cmd/admin-handlers-users_test.go
|
||||
// (and the identical pattern across xl-storage_test.go, erasure-healing_test.go,
|
||||
// 49+34+12+11+9+7+7+5 findings on minio test files alone).
|
||||
//
|
||||
// `cfg-error-fallthrough` looks for `if err != nil { … }` whose body fails to
|
||||
// terminate. Test code idiomatically writes
|
||||
//
|
||||
// if err != nil { c.Fatalf("...", err) }
|
||||
// postSink(...)
|
||||
//
|
||||
// where `c.Fatalf` (a `*testing.T` method) calls `runtime.Goexit()` and the
|
||||
// `postSink` line is unreachable on the error path. The rule classified
|
||||
// this as fall-through because `Fatalf` looks like an ordinary call. Engine
|
||||
// fix: `src/cfg_analysis/error_handling.rs::call_never_returns` recognises
|
||||
// `Fatal*`, `Panic*`, `FailNow`, `os.Exit`, `runtime.Goexit`, `log.Fatal*`,
|
||||
// `panic`, etc. as terminators inside `terminates_on_all_paths`.
|
||||
|
||||
package safe
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"os"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type clientHelper struct {
|
||||
bucket string
|
||||
}
|
||||
|
||||
func (c *clientHelper) MakeBucket(ctx context.Context, name string) error { return nil }
|
||||
func (c *clientHelper) PutObject(ctx context.Context, name string) error { return nil }
|
||||
|
||||
func setupBucket(t *testing.T, c *clientHelper, ctx context.Context) {
|
||||
if err := c.MakeBucket(ctx, c.bucket); err != nil {
|
||||
t.Fatalf("bucket creat error: %v", err)
|
||||
}
|
||||
if err := c.PutObject(ctx, "obj"); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func runWithExit(c *clientHelper, ctx context.Context) {
|
||||
if err := c.MakeBucket(ctx, c.bucket); err != nil {
|
||||
log.Fatalf("init failed: %v", err)
|
||||
}
|
||||
c.PutObject(ctx, "obj")
|
||||
}
|
||||
|
||||
func runWithOsExit(c *clientHelper, ctx context.Context) {
|
||||
if err := c.MakeBucket(ctx, c.bucket); err != nil {
|
||||
os.Exit(1)
|
||||
}
|
||||
c.PutObject(ctx, "obj")
|
||||
}
|
||||
|
||||
func runWithPanic(c *clientHelper, ctx context.Context) {
|
||||
if err := c.MakeBucket(ctx, c.bucket); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
c.PutObject(ctx, "obj")
|
||||
}
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
// js-safe-realrepo-006 — distilled from gogs `public/plugins/codemirror-5.17.0/
|
||||
// mode/gherkin/gherkin.js` line 107. The CodeMirror Gherkin tokenizer ships
|
||||
// localised feature-keyword aliases as one large regex inside a boolean
|
||||
// sub-condition. The CFG builder textualises every sub-condition of a
|
||||
// boolean chain and truncates that text to MAX_CONDITION_TEXT_LEN (256
|
||||
// bytes) for diagnostics; naive byte-slice truncation panicked when byte
|
||||
// 256 landed inside a multi-byte UTF-8 character (here Gurmukhi `ਖ`,
|
||||
// 3-byte UTF-8). Engine fix:
|
||||
// `src/utils/snippet.rs::truncate_at_char_boundary`, applied at three CFG
|
||||
// sites and two symex display sites. Invariant: scanning this file must
|
||||
// terminate without panicking the rayon worker, regardless of where byte
|
||||
// 256 lands inside the regex.
|
||||
|
||||
function tokenLocalisedFeatureKeyword(stream, state) {
|
||||
if (
|
||||
!state.inKeywordLine &&
|
||||
state.allowFeature &&
|
||||
stream.match(/(機能|功能|フィーチャ|기능|โครงหลัก|ความสามารถ|ความต้องการทางธุรกิจ|ಹೆಚ್ಚಳ|గుణము|ಮುಹಾಂದರಾ|ਮੁਹਾਂਦਰਾ|ਨਕਸ਼ ਨੁਹਾਰ|ਖਾਸੀਅਤ|रूप लेख|وِیژگی|خاصية|תכונה|Функціонал|Функция|Функционалност|Функционал|Үзенчәлеклелек|Свойство|Особина|Мөмкинлек|Могућност|Λειτουργία|Δυνατότητα|Właściwość|Vlastnosť|Trajto|Tính năng|Savybė|Požiadavka|Požadavek|Potrzeba biznesowa|Özellik|Osobina|Ominaisuus|Omadus|Mogućnost|Mogucnost|Jellemző|Funzionalità|Funktionalitéit|Funktionalität|Funkcja|Funkcionalnost|Funkcionalitāte|Funkcia|Fungsi|Functionaliteit|Funcționalitate|Funcţionalitate|Functionalitate|Funcionalitat|Funcionalidade|Fonctionnalité|Fitur|Fīča|Feature|Eiginleiki|Egenskap|Egenskab|Característica|Caracteristica|Business Need|Aspekt|Arwedd|Ability):/)
|
||||
) {
|
||||
state.inKeywordLine = true;
|
||||
return "keyword";
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = { tokenLocalisedFeatureKeyword };
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
"""
|
||||
Vulnerable counterpart to safe_fastapi_route_dependencies_auth.py: same
|
||||
shape but with NO `dependencies=[Depends(...)]` keyword arg on the route
|
||||
decorator. The FastAPI ownership-check rule must still fire — the
|
||||
recognizer must not blanket-suppress every FastAPI route, only those
|
||||
with an actual dependency-injected auth check.
|
||||
"""
|
||||
from fastapi import FastAPI
|
||||
|
||||
router = FastAPI()
|
||||
|
||||
|
||||
@router.delete("/{connection_id}")
|
||||
def delete_connection(connection_id: str, session):
|
||||
"""No auth — must still fire missing_ownership_check."""
|
||||
connection = session.scalar(select(Connection).filter_by(conn_id=connection_id))
|
||||
if connection is None:
|
||||
raise HTTPException(404, "not found")
|
||||
session.delete(connection)
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
"""
|
||||
Distilled from airflow `airflow-core/src/airflow/api_fastapi/core_api/routes/public/connections.py`:
|
||||
@connections_router.delete(
|
||||
"/{connection_id}",
|
||||
dependencies=[Depends(requires_access_connection(method="DELETE"))],
|
||||
)
|
||||
def delete_connection(connection_id: str, session: SessionDep):
|
||||
connection = session.scalar(select(Connection).filter_by(conn_id=connection_id))
|
||||
...
|
||||
session.delete(connection)
|
||||
|
||||
The route's `dependencies=[Depends(requires_access_*)]` declares the auth gate at
|
||||
the FastAPI level. The ownership-check rule must recognise the dependency-
|
||||
injected check and not flag the row-fetch / mutation as missing ownership.
|
||||
"""
|
||||
from fastapi import Depends, FastAPI
|
||||
|
||||
router = FastAPI()
|
||||
|
||||
|
||||
def requires_access_connection(method: str):
|
||||
def check():
|
||||
...
|
||||
return check
|
||||
|
||||
|
||||
@router.delete(
|
||||
"/{connection_id}",
|
||||
dependencies=[Depends(requires_access_connection(method="DELETE"))],
|
||||
)
|
||||
def delete_connection(connection_id: str, session):
|
||||
connection = session.scalar(select(Connection).filter_by(conn_id=connection_id))
|
||||
if connection is None:
|
||||
raise HTTPException(404, "not found")
|
||||
session.delete(connection)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/{connection_id}",
|
||||
dependencies=[Depends(requires_access_connection(method="GET"))],
|
||||
)
|
||||
def get_connection(connection_id: str, session):
|
||||
return session.scalar(select(Connection).filter_by(conn_id=connection_id))
|
||||
|
|
@ -0,0 +1,79 @@
|
|||
"""
|
||||
Distilled from airflow `airflow-core/src/airflow/api_fastapi/core_api/routes/public/dag_run.py`:
|
||||
|
||||
@dag_run_router.post(
|
||||
"",
|
||||
dependencies=[Depends(requires_access_dag(method="POST", access_entity=DagAccessEntity.RUN))],
|
||||
)
|
||||
def trigger_dag_run(dag_id, body, dag_bag, user, session, request):
|
||||
dm = session.scalar(select(DagModel).where(DagModel.dag_id == dag_id))
|
||||
...
|
||||
dag = get_latest_version_of_dag(dag_bag, dag_id, session)
|
||||
dag_run = dag.create_dagrun(run_id=params["run_id"], ...)
|
||||
|
||||
The route-level `dependencies=[Depends(requires_access_dag(method="POST",
|
||||
access_entity=...))]` decorator authorizes the entire handler — the
|
||||
handler body's `dag.create_dagrun(...)` call (where `dag` is a row
|
||||
fetched using the auth-checked `dag_id`) must be covered too, even
|
||||
though the call's subject is the bare row variable rather than the
|
||||
original id.
|
||||
|
||||
Before the route-level fix, `auth_check_covers_subject` walked
|
||||
`check.subjects` (empty for decorator-level checks whose inner call
|
||||
carries no per-arg ValueRef) and never matched. After the fix,
|
||||
`is_route_level=true` short-circuits coverage to true for any
|
||||
non-login-guard route-level check, suppressing both the row-fetch
|
||||
ownership flag and the downstream method-call ownership flag.
|
||||
"""
|
||||
|
||||
from fastapi import Depends, FastAPI
|
||||
|
||||
router = FastAPI()
|
||||
|
||||
|
||||
def requires_access_dag(method: str, access_entity=None):
|
||||
def check():
|
||||
...
|
||||
return check
|
||||
|
||||
|
||||
def get_latest_version_of_dag(dag_bag, dag_id, session):
|
||||
return dag_bag.get(dag_id)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/{dag_id}/runs/{run_id}",
|
||||
dependencies=[Depends(requires_access_dag(method="GET"))],
|
||||
)
|
||||
def get_dag_run(dag_id: str, run_id: str, session):
|
||||
"""
|
||||
Route-level guard authorizes the entire handler. The
|
||||
`filter_by(dag_id=dag_id, run_id=run_id)` ORM call must NOT trip
|
||||
`py.auth.missing_ownership_check` even though the per-arg subjects
|
||||
are id-shaped — the route-level decorator covers them.
|
||||
"""
|
||||
dag_run = session.scalar(
|
||||
select(DagRun).filter_by(dag_id=dag_id, run_id=run_id)
|
||||
)
|
||||
if dag_run is None:
|
||||
raise HTTPException(404, "not found")
|
||||
return dag_run
|
||||
|
||||
|
||||
@router.delete(
|
||||
"/{dag_id}",
|
||||
dependencies=[Depends(requires_access_dag(method="DELETE"))],
|
||||
)
|
||||
def delete_dag(dag_id: str, session):
|
||||
"""
|
||||
Same shape, DELETE method. The row fetch and row-variable
|
||||
method call must also be fully covered by the route-level guard.
|
||||
`dag` is fetched using the auth-checked `dag_id`; without the
|
||||
`is_route_level` short-circuit, the per-name walk would mismatch
|
||||
`dag.<method>` (subject is the row var) against the check's
|
||||
empty subjects vec.
|
||||
"""
|
||||
dag = session.scalar(select(DagModel).where(DagModel.dag_id == dag_id))
|
||||
if dag is None:
|
||||
raise HTTPException(404, "not found")
|
||||
dag.cleanup_runs(session=session)
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
"""
|
||||
Distilled from airflow `tests/unit/models/test_backfill.py` and
|
||||
`providers/google/tests/unit/google/cloud/hooks/test_dlp.py`: pytest test
|
||||
methods that take a SQLAlchemy `session` fixture by name and call
|
||||
`session.commit()` / `session.add(...)` / `session.scalar(...)`.
|
||||
|
||||
Bare `session.<sqlalchemy_verb>` was previously classified as auth Session
|
||||
context, which triggered `unit_has_user_input_evidence` even though the
|
||||
test function takes no user input — the `session` fixture is the
|
||||
SQLAlchemy ORM Session, not the auth/HTTP session. After the engine
|
||||
classifier narrowing, only `session.<identity_field>` (`session.user`,
|
||||
`session.user_id`, ...) is treated as auth context; SQLAlchemy verbs
|
||||
do not contribute user-input evidence on their own.
|
||||
"""
|
||||
|
||||
|
||||
def test_reverse_and_depends_on_past_fails(dep_on_past, dag_maker, session):
|
||||
with dag_maker() as dag:
|
||||
pass
|
||||
session.commit()
|
||||
b = _create_backfill(
|
||||
dag_id=dag.dag_id,
|
||||
from_date="2021-01-01",
|
||||
to_date="2021-01-05",
|
||||
)
|
||||
if dep_on_past:
|
||||
assert b is None
|
||||
|
||||
|
||||
def test_create_deidentify_template_with_org_id(self, get_conn, mock_project_id):
|
||||
get_conn.return_value.create_deidentify_template.return_value = {}
|
||||
result = self.hook.create_deidentify_template(organization_id="ORG_ID")
|
||||
assert result == {}
|
||||
|
|
@ -2,7 +2,7 @@
|
|||
// produces a `DatabaseConnection` via SSA `constructor_type` (through
|
||||
// `peel_identity_suffix`, which strips `.unwrap()` before matching). The
|
||||
// handler then calls `conn.execute(..)`, a callee name that appears in
|
||||
// neither `mutation_indicator_names` nor `read_indicator_names` for Rust —
|
||||
// neither `mutation_indicator_names` nor `read_indicator_names` for Rust ,
|
||||
// name-based classification returns `None`, so the ownership gate
|
||||
// already cannot flag the call. The type-map refinement should *still*
|
||||
// leave the call unflagged (the type map produces `DbMutation`, but
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ pub async fn handle_list_peer_docs(req: Req, ctx: Ctx) -> Result<String, ()> {
|
|||
let user = auth::require_auth(&req, &ctx).await?;
|
||||
let doc_ids: Vec<i64> = vec![1, 2, 3];
|
||||
|
||||
// Pure in-memory bookkeeping — no authorization decision here.
|
||||
// Pure in-memory bookkeeping, no authorization decision here.
|
||||
let mut counts: HashMap<i64, usize> = HashMap::new();
|
||||
let mut seen: HashSet<i64> = HashSet::new();
|
||||
for doc_id in &doc_ids {
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
// the row by id first to obtain the resource it needs to authorize, then
|
||||
// calls a named authorization function on the fetched row. This is the
|
||||
// canonical pattern in Lemmy's Actix handlers (and most row-level Rails /
|
||||
// Django authz code) — the authorization check appears textually after the
|
||||
// Django authz code), the authorization check appears textually after the
|
||||
// fetch but is the first thing the function does on the row.
|
||||
|
||||
use std::result::Result;
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ pub async fn handle_delete_doc(req: Req, ctx: Ctx, doc_id: i64) -> Result<String
|
|||
return json_err("cannot delete another user's doc", 403);
|
||||
}
|
||||
|
||||
// By construction, the row belongs to `user` — so any id read from it is authorized.
|
||||
// By construction, the row belongs to `user`, so any id read from it is authorized.
|
||||
let group_id = existing.get_i64("group_id");
|
||||
realtime::publish_to_group(group_id, "doc_deleted");
|
||||
Ok("ok".into())
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ pub async fn handle_update_doc(req: Req, ctx: Ctx, doc_id: i64) -> Result<String
|
|||
);
|
||||
let owner_id = existing.get_i64("user_id");
|
||||
|
||||
// Equality compared but no early exit — the check has no effect.
|
||||
// Equality compared but no early exit, the check has no effect.
|
||||
if owner_id != user.id {
|
||||
// missing return
|
||||
println!("not your doc (but proceeding anyway)");
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
// Phase 6 D05: an Axum `Json<UpdateDoc>` extractor whose `doc_id`
|
||||
// field is declared as `i64`. The DTO field-level taint analysis
|
||||
// proves the value reaching `db.exec` is numeric and exempts
|
||||
// `dto.doc_id` from the auth subject classifier — the rule must NOT
|
||||
// `dto.doc_id` from the auth subject classifier, the rule must NOT
|
||||
// fire because numeric DTO fields cannot bypass ownership.
|
||||
use axum::extract::Json;
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,70 @@
|
|||
// Function-parameter type annotations naming an in-memory container
|
||||
// (`RoaringBitmap`, `HashMap<K, V>`, `HashSet<T>`, ...) classify the
|
||||
// receiver as `TypeKind::LocalCollection`, which the auth analyser
|
||||
// maps to `SinkClass::InMemoryLocal` (always non-auth-relevant).
|
||||
// Without this, the verb-name dispatch (`is_mutation: insert/remove`)
|
||||
// classified `unsharded.insert(docid)` /
|
||||
// `task_ids.insert(task_id)` as `DbMutation` and fired
|
||||
// `missing_ownership_check` whenever the function had at least one
|
||||
// id-shaped parameter to pass `unit_has_user_input_evidence`.
|
||||
//
|
||||
// Cluster surfaced from
|
||||
// meilisearch/index-scheduler/src/scheduler/enterprise_edition/network.rs::balance_shards
|
||||
// (`unsharded: RoaringBitmap` typed parameter) and same-pattern
|
||||
// helpers across the index-scheduler.
|
||||
|
||||
use std::collections::{BTreeSet, HashMap, HashSet};
|
||||
|
||||
struct RoaringBitmap;
|
||||
impl RoaringBitmap {
|
||||
fn new() -> Self { Self }
|
||||
fn insert(&mut self, _x: u32) -> bool { true }
|
||||
fn remove(&mut self, _x: u32) -> bool { true }
|
||||
fn contains(&self, _x: u32) -> bool { true }
|
||||
}
|
||||
|
||||
// 1. Bare-typed RoaringBitmap parameter, function has id-like param
|
||||
// `docid` so user-input-evidence fires; the receiver type proves
|
||||
// the operation is in-memory bookkeeping.
|
||||
fn balance_shards(mut unsharded: RoaringBitmap, docid: u32) {
|
||||
unsharded.insert(docid);
|
||||
unsharded.remove(docid);
|
||||
}
|
||||
|
||||
// 2. `&mut RoaringBitmap` reference, ref-stripping must reach the
|
||||
// underlying type head.
|
||||
fn process_docids(docids: &mut RoaringBitmap, docid: u32) {
|
||||
docids.insert(docid);
|
||||
docids.remove(docid);
|
||||
let _ = docids.contains(docid);
|
||||
}
|
||||
|
||||
// 3. Lifetime-annotated reference: `&'a mut HashMap<...>`.
|
||||
// Module-path prefix would also be dropped; head matches `HashMap`.
|
||||
fn store_shard_docids<'a>(
|
||||
new_shard_docids: &'a mut HashMap<String, u32>,
|
||||
shard: String,
|
||||
docid: u32,
|
||||
) {
|
||||
new_shard_docids.insert(shard, docid);
|
||||
}
|
||||
|
||||
// 4. Std-collection HashSet typed param.
|
||||
fn add_user_id(ids: &mut HashSet<u64>, user_id: u64) {
|
||||
ids.insert(user_id);
|
||||
ids.remove(&user_id);
|
||||
}
|
||||
|
||||
// 5. Local var bound from constructor, already covered, but pinned
|
||||
// here as a regression guard for the `RoaringBitmap::new()`
|
||||
// constructor entry.
|
||||
fn build_local_set(task_id: u32) -> RoaringBitmap {
|
||||
let mut s = RoaringBitmap::new();
|
||||
s.insert(task_id);
|
||||
s
|
||||
}
|
||||
|
||||
// 6. BTreeSet typed param.
|
||||
fn collect_seen(seen: &mut BTreeSet<u32>, item_id: u32) {
|
||||
seen.insert(item_id);
|
||||
}
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
// Real-repo motivation (lemmy `LocalUserView` extractor).
|
||||
//
|
||||
// Lemmy's authenticated-actor extractor type is named `LocalUserView`
|
||||
// — every route handler signature is
|
||||
//, every route handler signature is
|
||||
// `pub async fn handler(.., local_user_view: LocalUserView)`. The
|
||||
// previous exact-name list in `is_self_actor_type_text`
|
||||
// (`CurrentUser`, `SessionUser`, `AuthUser`, `AdminUser`,
|
||||
|
|
@ -44,7 +44,7 @@ pub async fn write_self_note(
|
|||
pool: &mut Pool,
|
||||
local_user_view: LocalUserView,
|
||||
) -> Result<(), ()> {
|
||||
// Login predicate on the actor itself — subject is the actor.
|
||||
// Login predicate on the actor itself, subject is the actor.
|
||||
// No additional ownership check needed because the subject is the
|
||||
// caller's own row.
|
||||
let _ = is_admin(&local_user_view);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,78 @@
|
|||
// Internal helper whose parameter list contains type-segment idents
|
||||
// that lowercase-match the framework-request-name allow-list (`path`,
|
||||
// `request`, `ctx`, `body`, `path`). Before the
|
||||
// `collect_param_names` Rust-parameter arm, the recursive default arm
|
||||
// pulled `std`, `path`, `Path` out of `dst: &std::path::Path` and
|
||||
// pushed them into `unit.params`, `path` then matched the
|
||||
// framework-name list and gated `unit_has_user_input_evidence` open,
|
||||
// firing `missing_ownership_check` at every id-shaped operation in
|
||||
// the body.
|
||||
//
|
||||
// Cluster surfaced from
|
||||
// meilisearch/index-scheduler/src/scheduler/process_snapshot_creation.rs::remove_tasks
|
||||
// (`unsafe fn remove_tasks(tasks: &[Task], dst: &std::path::Path,
|
||||
// index_base_map_size: usize)`). None of the actual params (`tasks`,
|
||||
// `dst`, `sz`) match the user-input-evidence heuristic, so the rule
|
||||
// must NOT fire on the internal task-cleanup loop.
|
||||
|
||||
struct Task {
|
||||
uid: u32,
|
||||
}
|
||||
|
||||
struct Database;
|
||||
|
||||
impl Database {
|
||||
fn delete(&self, _w: &mut u32, _u: &u32) -> Result<(), ()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
struct TaskQueue {
|
||||
all_tasks: Database,
|
||||
canceled_by: Database,
|
||||
}
|
||||
|
||||
fn remove_tasks(
|
||||
tasks: &[Task],
|
||||
dst: &std::path::Path,
|
||||
sz: usize,
|
||||
) -> Result<(), ()> {
|
||||
let _ = (dst, sz);
|
||||
let mut wtxn = 0u32;
|
||||
let task_queue = TaskQueue {
|
||||
all_tasks: Database,
|
||||
canceled_by: Database,
|
||||
};
|
||||
let TaskQueue {
|
||||
all_tasks,
|
||||
canceled_by,
|
||||
} = task_queue;
|
||||
for task in tasks {
|
||||
all_tasks.delete(&mut wtxn, &task.uid)?;
|
||||
canceled_by.delete(&mut wtxn, &task.uid)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Same shape with a typed wrapper whose tail segment lowercases to
|
||||
// `path` (`PathBuf` → `pathbuf` does NOT match, but `Path` does).
|
||||
// Confirms the Rust `parameter` arm in `collect_param_names` keeps
|
||||
// `Path` out of `unit.params` even when wrapped in a generic.
|
||||
|
||||
struct Wrapper<T>(T);
|
||||
struct PathHandle;
|
||||
struct Item {
|
||||
uid: u32,
|
||||
}
|
||||
struct Repo;
|
||||
impl Repo {
|
||||
fn delete(&self, _u: &u32) {}
|
||||
}
|
||||
|
||||
fn cleanup_internal(out: Wrapper<PathHandle>, items: &[Item]) {
|
||||
let _ = out;
|
||||
let repo = Repo;
|
||||
for item in items {
|
||||
repo.delete(&item.uid);
|
||||
}
|
||||
}
|
||||
|
|
@ -4,7 +4,7 @@
|
|||
// (the call body wraps onto the next line for readability). Before
|
||||
// the line-counting fix, `row_population_data` recorded the
|
||||
// `let_declaration`'s start row while `op.line` saw the inner call's
|
||||
// start row — they differed by one and the row-fetch exemption
|
||||
// start row, they differed by one and the row-fetch exemption
|
||||
// missed. Recording the **call**'s start line aligns the two and
|
||||
// the exemption fires for the multi-line shape too.
|
||||
|
||||
|
|
@ -52,7 +52,7 @@ pub async fn lock_comment(
|
|||
let comment_id = req.comment_id;
|
||||
let local_instance_id = local_user_view.person.instance_id;
|
||||
|
||||
// Multi-line let — the let_declaration starts on this line, but
|
||||
// Multi-line let, the let_declaration starts on this line, but
|
||||
// the inner `CommentView::read(..)` call starts on the next line.
|
||||
// `op.line` for the read sink is the call's line, not the let's.
|
||||
let orig_comment =
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
// `let community = Community::read(pool, req.community_id)` records
|
||||
// `community → [req.community_id]` in `row_population_data`. An auth
|
||||
// check `check_community_user_action(&user, &community, ..)` then
|
||||
// authorises the row — and any **downstream** operation that re-uses
|
||||
// authorises the row, and any **downstream** operation that re-uses
|
||||
// `req.community_id` (a later mutation by the same id, or a related
|
||||
// view fetched by the same id) is materially covered by that check.
|
||||
//
|
||||
|
|
@ -71,7 +71,7 @@ pub async fn transfer_community(
|
|||
pool: &mut Pool,
|
||||
local_user_view: LocalUserView,
|
||||
) -> Result<(), ()> {
|
||||
// Row fetch — `community` is populated from `req.community_id`.
|
||||
// Row fetch, `community` is populated from `req.community_id`.
|
||||
let community = Community::read(pool, req.community_id)?;
|
||||
|
||||
// Authorisation check on the fetched row. Subject = `community`
|
||||
|
|
@ -84,7 +84,7 @@ pub async fn transfer_community(
|
|||
// the row that was fetched with this id).
|
||||
CommunityActions::delete_mods_for_community(pool, req.community_id)?;
|
||||
|
||||
// Local alias of the same request field — `var_alias_chain`
|
||||
// Local alias of the same request field, `var_alias_chain`
|
||||
// records `community_id → "req.community_id"` so the reverse-walk
|
||||
// also covers downstream sinks that pass the bare alias. Before
|
||||
// the alias-chain fix, the next read fired
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
// Phase 5 typed-extractor exclusion: an Axum-style `Path<i64>`
|
||||
// parameter is a framework-validated numeric extractor. The runtime
|
||||
// guarantees a numeric value, so even though `project_id` reaches a
|
||||
// SQL helper, the rule must NOT fire — the value cannot carry an
|
||||
// SQL helper, the rule must NOT fire, the value cannot carry an
|
||||
// injection payload nor bypass ownership.
|
||||
use axum::extract::Path;
|
||||
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ mod serde_json {
|
|||
}
|
||||
|
||||
// Real-repo shape from website/src/handlers/social.rs:
|
||||
// `realtime::publish_to_user(&ctx.env, &user.email, ...)` — publish
|
||||
// `realtime::publish_to_user(&ctx.env, &user.email, ...)`, publish
|
||||
// to the authed user's OWN channel keyed by their email. The
|
||||
// `email` / `username` / `handle` fields of a self-actor binding
|
||||
// reference the actor's own identity, just like `id` / `user_id`,
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ mod auth { pub async fn require_auth(_r: &super::Req, _c: &super::Ctx) -> Result
|
|||
|
||||
// The handler's `get_peer_ids(&db, user.id)` call below must not be
|
||||
// flagged. `user` is bound from `auth::require_auth(..)` so `user.id`
|
||||
// is the caller's own id — the call is self-referential, not a foreign
|
||||
// is the caller's own id, the call is self-referential, not a foreign
|
||||
// scoped id. The library-style helper below is a pass-through so its
|
||||
// body contains no DB sinks (the internal `user_id` → DB flow is a
|
||||
// separate pattern covered by helper-summary lifting).
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
// against an ACL table (`group_members`) with a WHERE clause that pins
|
||||
// the row to the current user (`gm.user_id = ?1` bound to `user.id`).
|
||||
// Every returned row is membership-gated by construction, so downstream
|
||||
// uses of the row's columns (`group_id` here) are authorized — the
|
||||
// uses of the row's columns (`group_id` here) are authorized, the
|
||||
// `realtime::publish_to_group` call MUST NOT be flagged as missing an
|
||||
// ownership check after B3.
|
||||
struct Ctx;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
// target: authorization happens inside `validate_target`, which
|
||||
// internally calls `authz::require_membership` against the same
|
||||
// `group_id` the handler subsequently mutates. The current rule cannot
|
||||
// see this transitively — B4 lifts per-function auth-check summaries
|
||||
// see this transitively, B4 lifts per-function auth-check summaries
|
||||
// (which positional params are auth-checked) so the handler-level call
|
||||
// to `validate_target(&db, group_id, user.id)` is recognised as an
|
||||
// auth check covering `group_id`. Result: `db.exec(..)` MUST NOT flag
|
||||
|
|
@ -45,7 +45,7 @@ pub async fn handle_create_comment(
|
|||
let user = auth::require_auth(&req, &ctx).await?;
|
||||
let db = Db;
|
||||
|
||||
// Authorization happens inside validate_target — helper-summary
|
||||
// Authorization happens inside validate_target, helper-summary
|
||||
// lifting propagates the per-param auth check so this covers
|
||||
// `group_id`.
|
||||
validate_target(&db, group_id, user.id).await?;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
// Phase 6 D06 (negative): same DTO shape as
|
||||
// `safe_dto_int_field_axum.rs` but the flow uses the `doc_id` field
|
||||
// whose declared type is `String`. Phase 6 must NOT exempt the
|
||||
// member-access subject — String DTO fields can carry an injection
|
||||
// member-access subject, String DTO fields can carry an injection
|
||||
// payload, so the auth rule must continue to fire.
|
||||
use axum::extract::Json;
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,28 @@
|
|||
// Vulnerable counterpart to `safe_local_collection_param_types.rs`
|
||||
// and `safe_param_type_segment_idents.rs`. Proves the LocalCollection
|
||||
// receiver-type override and the Rust `parameter` arm in
|
||||
// `collect_param_names` don't blanket-suppress real handlers that mix
|
||||
// in-memory containers with persistent-store calls (`db.update`).
|
||||
// Scoped identifier (`req.target_user_id`) flows into a real DB
|
||||
// mutation with no preceding ownership check, must still fire.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
struct DocumentRequest {
|
||||
target_user_id: u64,
|
||||
new_owner: u64,
|
||||
}
|
||||
|
||||
struct DbConnection;
|
||||
impl DbConnection {
|
||||
fn update_owner(&self, _doc_id: u64, _owner: u64) {}
|
||||
}
|
||||
|
||||
// `cache: &mut HashMap<u64, String>` is a local container, its
|
||||
// mutations are non-auth-relevant. But `db.update_owner` is a
|
||||
// real persistent-store write, classified as `DbMutation`, and the
|
||||
// handler still has no auth check on `req.target_user_id`.
|
||||
async fn change_owner(req: DocumentRequest, cache: &mut HashMap<u64, String>, db: DbConnection) {
|
||||
cache.remove(&req.target_user_id); // local container op, OK
|
||||
db.update_owner(req.target_user_id, req.new_owner); // <-- IDOR sink
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Vulnerable counterpart to `row_fetch_then_authorize.rs` — the row is
|
||||
// Vulnerable counterpart to `row_fetch_then_authorize.rs`, the row is
|
||||
// fetched by user-supplied id but no authorization function names it.
|
||||
// The row-fetch exemption must NOT fire here; the rule should still
|
||||
// flag the read as missing an ownership/membership check.
|
||||
|
|
|
|||
|
|
@ -33,12 +33,12 @@ pub async fn transfer_community(
|
|||
req: TransferCommunity,
|
||||
pool: &mut Pool,
|
||||
) -> Result<(), ()> {
|
||||
// Row fetch — populates `community → [req.community_id]` — but
|
||||
// Row fetch, populates `community → [req.community_id]`, but
|
||||
// no `check_*_action(&user, &community, ..)` follows.
|
||||
let _community = Community::read(pool, req.community_id)?;
|
||||
|
||||
// Mutation by id with no preceding ownership/membership check.
|
||||
// This is the genuine IDOR — must flag.
|
||||
// This is the genuine IDOR, must flag.
|
||||
CommunityActions::delete_mods_for_community(pool, req.community_id)?;
|
||||
|
||||
Ok(())
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ use rusqlite::Connection;
|
|||
|
||||
fn main() {
|
||||
let user_id = env::var("USER_ID").unwrap();
|
||||
// Rejecting shell metacharacters does NOT make SQL injection safe —
|
||||
// Rejecting shell metacharacters does NOT make SQL injection safe ,
|
||||
// the metachar gate only covers shell-family sinks.
|
||||
if user_id.contains(";") || user_id.contains("|") {
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
// rs-path-006: Negative-case guard for PathFact.
|
||||
//
|
||||
// No sanitiser and no narrowing — PathFact stays Top on every axis, so
|
||||
// No sanitiser and no narrowing, PathFact stays Top on every axis, so
|
||||
// the FILE_IO sink MUST fire. This fixture guards against PathFact
|
||||
// over-suppression sneaking into `is_path_safe_for_sink`.
|
||||
use std::env;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,96 @@
|
|||
// Real-repo shape from excalidraw's element manipulation libraries
|
||||
// (`packages/element/src/binding.ts`, `frame.ts`, `duplicate.ts`,
|
||||
// `DebugCanvas.tsx`). In a pure data-manipulation function whose
|
||||
// receiver is a JS built-in collection (`Map`, `Set`, `WeakMap`,
|
||||
// `WeakSet`, `Array`) — either declared inline (`new Map()`),
|
||||
// annotated directly (`m: Map<K, V>`), or aliased via a same-file
|
||||
// `type X = Map<K, V>` — the call site is a container operation,
|
||||
// not a data-layer read/mutation, and `js.auth.missing_ownership_check`
|
||||
// must not flag.
|
||||
//
|
||||
// Closes the excalidraw FP cluster (66 → ~9 on
|
||||
// `js.auth.missing_ownership_check`). The fix lives at the deepest
|
||||
// representable layer: SSA `TypeFacts::constructor_type` recognises
|
||||
// `new Map()` / `new Set()` constructors as
|
||||
// `TypeKind::LocalCollection`; `cfg::params::ts_type_to_local_collection`
|
||||
// extends `classify_param_type_ts` so explicitly-typed params resolve
|
||||
// to `LocalCollection` independent of NestJS decorator presence;
|
||||
// `cfg::dto::collect_type_alias_local_collections` populates a
|
||||
// per-file `TYPE_ALIAS_LC` set so same-file `type X = Map<...>`
|
||||
// aliases also resolve. The auth analyser already exempts
|
||||
// `LocalCollection`-typed receivers via
|
||||
// `auth_analysis::sink_class_for_type → InMemoryLocal`.
|
||||
|
||||
type ElementsMap = Map<string, { id: string; frameId?: string }>;
|
||||
type IdMap = Map<string, string>;
|
||||
type GroupSet = Set<string>;
|
||||
type ElementArray = readonly { id: string }[];
|
||||
|
||||
interface BindingFix {
|
||||
elementId: string;
|
||||
}
|
||||
|
||||
// ── 1. Direct Map<...> annotation on a parameter ────────────────────
|
||||
function lookupBinding(
|
||||
binding: BindingFix,
|
||||
origIdToDuplicateId: Map<string, string>,
|
||||
): string | undefined {
|
||||
return origIdToDuplicateId.get(binding.elementId);
|
||||
}
|
||||
|
||||
// ── 2. Same-file `type X = Map<...>` alias ─────────────────────────
|
||||
function debugRender(elementsMap: ElementsMap, id: string) {
|
||||
const bindable = elementsMap.get(id);
|
||||
if (!bindable) return null;
|
||||
return bindable;
|
||||
}
|
||||
|
||||
// ── 3. Set / WeakMap / WeakSet annotation ──────────────────────────
|
||||
function trackVisited(visited: Set<string>, key: string) {
|
||||
if (!visited.has(key)) {
|
||||
visited.add(key);
|
||||
}
|
||||
return visited.size;
|
||||
}
|
||||
|
||||
function rememberElement(
|
||||
cache: WeakMap<object, string>,
|
||||
obj: object,
|
||||
v: string,
|
||||
) {
|
||||
cache.set(obj, v);
|
||||
return cache.get(obj);
|
||||
}
|
||||
|
||||
// ── 4. Array generics (`T[]`, `Array<T>`, `ReadonlyArray<T>`) ──────
|
||||
function findItemArr(arr: { id: string }[], targetId: string) {
|
||||
return arr.find((x) => x.id === targetId);
|
||||
}
|
||||
|
||||
function findItemReadonly(arr: ElementArray, targetId: string) {
|
||||
return arr.find((x) => x.id === targetId);
|
||||
}
|
||||
|
||||
function findItemGeneric(arr: Array<string>, v: string) {
|
||||
return arr.find((x) => x === v);
|
||||
}
|
||||
|
||||
// ── 5. Local `new Map()` / `new Set()` constructors ────────────────
|
||||
function buildIndex(items: { id: string; v: string }[]) {
|
||||
const idx = new Map<string, string>();
|
||||
for (const it of items) {
|
||||
idx.set(it.id, it.v);
|
||||
}
|
||||
return idx.get(items[0]?.id ?? "");
|
||||
}
|
||||
|
||||
// ── 6. Type-alias chain (alias of alias) ───────────────────────────
|
||||
function aliasOfAlias(m: IdMap, k: string) {
|
||||
return m.get(k);
|
||||
}
|
||||
|
||||
// ── 7. Set with `add` / `has` (mutation-side) ──────────────────────
|
||||
function trackGroup(groups: GroupSet, g: string) {
|
||||
groups.add(g);
|
||||
return groups.has(g);
|
||||
}
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
// Vulnerable counterpart to `safe_local_collection_receiver.ts`.
|
||||
//
|
||||
// Pinned to prove the LocalCollection-receiver fix does NOT
|
||||
// blanket-suppress missing-ownership findings on real DB / API
|
||||
// receivers that happen to share method names (`get`, `find`, `set`)
|
||||
// with JS built-in collections. When the receiver type is a real
|
||||
// `Prisma` / `Repository` / `db` chain — not a tracked Map / Set /
|
||||
// Array — the auth analyser must still fire.
|
||||
|
||||
interface PrismaClient {
|
||||
user: {
|
||||
findUnique(args: { where: { id: string } }): Promise<{ id: string } | null>;
|
||||
update(args: { where: { id: string }; data: object }): Promise<void>;
|
||||
};
|
||||
}
|
||||
|
||||
declare const prisma: PrismaClient;
|
||||
|
||||
// User passes an attacker-controlled id. No prior auth check; receiver
|
||||
// is a Prisma client (NOT a Map / Set / Array), so the missing-ownership
|
||||
// rule must fire on `prisma.user.findUnique`.
|
||||
export async function dangerousFetch(targetUserId: string) {
|
||||
return prisma.user.findUnique({ where: { id: targetUserId } });
|
||||
}
|
||||
|
||||
export async function dangerousMutate(targetUserId: string, data: object) {
|
||||
return prisma.user.update({ where: { id: targetUserId }, data });
|
||||
}
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
// Helper-summary all_validated propagation (precision regression
|
||||
// guard). The helper performs an indirect-validator check on
|
||||
// `child.webhookUrl` and throws on failure; callers passing tainted
|
||||
// `child` should NOT see the helper's `param_to_sink` summary refire
|
||||
// because the validator inside the helper proved the path safe.
|
||||
//
|
||||
// Pinned by tests/lib::helper_with_validator_does_not_propagate_to_caller_via_summary.
|
||||
|
||||
import express, { Request, Response } from 'express';
|
||||
import axios from 'axios';
|
||||
|
||||
interface IWebhookFilterPart {
|
||||
webhookUrl?: string;
|
||||
}
|
||||
|
||||
declare function validateUrlSsrf(url: string): Promise<string | null>;
|
||||
|
||||
async function getWebhookResponse(child: IWebhookFilterPart) {
|
||||
const ssrfError = await validateUrlSsrf(child.webhookUrl);
|
||||
if (ssrfError) {
|
||||
throw new Error('blocked');
|
||||
}
|
||||
return await axios.post(child.webhookUrl, {});
|
||||
}
|
||||
|
||||
const app = express();
|
||||
app.use(express.json());
|
||||
|
||||
app.post('/run', async (req: Request, res: Response) => {
|
||||
const child: IWebhookFilterPart = req.body.filter;
|
||||
const r = await getWebhookResponse(child);
|
||||
res.json({ r });
|
||||
});
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
// Indirect-validator branch narrowing (precision regression guard).
|
||||
// Pattern: `const err = validateXxx(input); if (err) throw …;` —
|
||||
// the validator's input is treated as validated on the success
|
||||
// branch, so the downstream sink does not refire.
|
||||
//
|
||||
// Pinned by tests/lib::indirect_validator_narrowing_marks_arg_validated.
|
||||
|
||||
import express, { Request, Response } from 'express';
|
||||
import axios from 'axios';
|
||||
|
||||
declare function validateUrlSsrf(url: string): Promise<string | null>;
|
||||
|
||||
const app = express();
|
||||
|
||||
app.get('/proxy', async (req: Request, res: Response) => {
|
||||
const target = req.query.url as string;
|
||||
const ssrfError = await validateUrlSsrf(target);
|
||||
if (ssrfError) {
|
||||
throw new Error('blocked');
|
||||
}
|
||||
const response = await axios.get(target);
|
||||
res.send(response.data);
|
||||
});
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
// Strapi-style ORM accessor chain — `<obj>.db.query(MODEL_UID).<orm_method>(...)`.
|
||||
// MODEL_UID is a literal model identifier, not raw SQL; the trailing
|
||||
// findOne/findMany/create/update/delete/count are intrinsically parameterised
|
||||
// by the ORM (per-call values arrive through field-keyed object literals
|
||||
// that the driver escapes). Should NOT fire as a SQL-injection sink.
|
||||
|
||||
declare const strapi: any;
|
||||
|
||||
async function getApiToken(whereParams: Record<string, unknown>) {
|
||||
const token = await strapi.db.query('admin::api-token').findOne({
|
||||
select: ['id', 'name'],
|
||||
where: whereParams,
|
||||
});
|
||||
return token;
|
||||
}
|
||||
|
||||
async function listTokens() {
|
||||
return strapi.db.query('admin::api-token').findMany({
|
||||
where: { type: 'read-only' },
|
||||
});
|
||||
}
|
||||
|
||||
async function createToken(data: unknown) {
|
||||
return strapi.db.query('admin::api-token').create({ data });
|
||||
}
|
||||
|
||||
async function updateToken(id: number, data: unknown) {
|
||||
return strapi.db.query('admin::api-token').update({ where: { id }, data });
|
||||
}
|
||||
|
||||
async function deleteToken(id: number) {
|
||||
return strapi.db.query('admin::api-token').delete({ where: { id } });
|
||||
}
|
||||
|
||||
async function countTokens() {
|
||||
return strapi.db.query('admin::api-token').count();
|
||||
}
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
// Vulnerable counterpart — bare `connection.query(...)` and chained
|
||||
// `db.query(...).then(...)` whose arg 0 is concatenated with attacker
|
||||
// input. Both must still fire as SQL_QUERY sinks: the chain has no
|
||||
// ORM-method outer call (`.then` is a Promise method, not an ORM
|
||||
// accessor), and arg 0 is not a string literal in the second case.
|
||||
|
||||
import express, { Request, Response } from 'express';
|
||||
|
||||
declare const connection: any;
|
||||
declare const db: any;
|
||||
|
||||
const app = express();
|
||||
|
||||
app.get('/user', (req: Request, res: Response) => {
|
||||
const name = req.query.name as string;
|
||||
// bare SQL — real SQLi sink, no chain
|
||||
connection.query(`SELECT * FROM users WHERE name = '${name}'`);
|
||||
});
|
||||
|
||||
app.get('/by-id', async (req: Request, res: Response) => {
|
||||
const id = req.query.id as string;
|
||||
// chained `.then` is a Promise method, not an ORM accessor; arg 0 is
|
||||
// also a binary_expression (not a string literal) so the ORM-shape
|
||||
// recogniser refuses to suppress.
|
||||
db.query("SELECT * FROM users WHERE id = " + id).then((rows: any) => res.json(rows[0]));
|
||||
});
|
||||
37
tests/benchmark/cve_corpus/java/CVE-2022-1471/patched.java
Normal file
37
tests/benchmark/cve_corpus/java/CVE-2022-1471/patched.java
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
// Nyx CVE benchmark fixture (patched counterpart).
|
||||
//
|
||||
// CVE: CVE-2022-1471
|
||||
// Project: SnakeYAML (snakeyaml/snakeyaml)
|
||||
// License: Apache-2.0
|
||||
// (https://github.com/snakeyaml/snakeyaml/blob/master/LICENSE.txt)
|
||||
// Advisory: https://github.com/advisories/GHSA-mjmj-j48q-9wg2
|
||||
//
|
||||
// Patched variant: the parser is constructed with `SafeConstructor`,
|
||||
// which restricts the YAML tag handler set to primitives + standard
|
||||
// collections. SnakeYAML 2.0 ships with `SafeConstructor` as the
|
||||
// default; pre-2.0 consumers patched their own call sites to pass
|
||||
// `SafeConstructor` explicitly (the form below).
|
||||
//
|
||||
// Patched-fix simplification: the upstream remediation also covers
|
||||
// callers that need richer types via custom `Constructor` subclasses
|
||||
// with declared safe types — those are out of scope for this fixture.
|
||||
|
||||
import javax.servlet.http.HttpServlet;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
import org.yaml.snakeyaml.LoaderOptions;
|
||||
import org.yaml.snakeyaml.Yaml;
|
||||
import org.yaml.snakeyaml.constructor.SafeConstructor;
|
||||
|
||||
public class YamlConfigServlet extends HttpServlet {
|
||||
@Override
|
||||
protected void doPost(HttpServletRequest req, HttpServletResponse res) throws Exception {
|
||||
String body = req.getReader().readLine();
|
||||
// Patched: SafeConstructor forbids arbitrary class tags;
|
||||
// any non-primitive `!!…` payload throws ConstructorException.
|
||||
Yaml yaml = new Yaml(new SafeConstructor(new LoaderOptions()));
|
||||
Object loaded = yaml.load(body);
|
||||
res.setHeader("X-Yaml-Class", loaded.getClass().getName());
|
||||
res.setStatus(HttpServletResponse.SC_OK);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
// Nyx CVE benchmark fixture.
|
||||
//
|
||||
// CVE: CVE-2022-1471
|
||||
// Project: SnakeYAML (snakeyaml/snakeyaml; consumed via any app
|
||||
// that constructs `new Yaml()` and calls `.load()` on
|
||||
// attacker-controlled bytes)
|
||||
// License: Apache-2.0
|
||||
// (https://github.com/snakeyaml/snakeyaml/blob/master/LICENSE.txt)
|
||||
// Advisory: https://github.com/advisories/GHSA-mjmj-j48q-9wg2
|
||||
// https://nvd.nist.gov/vuln/detail/CVE-2022-1471
|
||||
// Vulnerable: SnakeYAML <= 1.33; the default `Constructor` accepts
|
||||
// arbitrary tags (`!!javax.script.ScriptEngineManager`,
|
||||
// `!!java.net.URLClassLoader`, etc.) and instantiates any
|
||||
// class via reflection, reaching RCE on consumers that
|
||||
// feed network input straight into Yaml.load().
|
||||
//
|
||||
// Verbatim load-bearing lines: the unsafe `new Yaml()` construction
|
||||
// and the `yaml.load(body)` call mirror the call-site shape called
|
||||
// out in the advisory's "vulnerable code" example. The patched fix
|
||||
// (next file) shows the SnakeYAML 2.0 fix pattern of explicitly
|
||||
// passing `new SafeConstructor(new LoaderOptions())`.
|
||||
//
|
||||
// Trims: imports trimmed to just SnakeYAML and Servlet API; no
|
||||
// helper / logging code.
|
||||
|
||||
import javax.servlet.http.HttpServlet;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
import org.yaml.snakeyaml.Yaml;
|
||||
|
||||
public class YamlConfigServlet extends HttpServlet {
|
||||
@Override
|
||||
protected void doPost(HttpServletRequest req, HttpServletResponse res) throws Exception {
|
||||
String body = req.getReader().readLine();
|
||||
// Vulnerable: default Constructor allows arbitrary class
|
||||
// instantiation via YAML tag handlers — `body` may contain
|
||||
// `!!javax.script.ScriptEngineManager` and friends.
|
||||
Yaml yaml = new Yaml();
|
||||
Object loaded = yaml.load(body);
|
||||
res.setHeader("X-Yaml-Class", loaded.getClass().getName());
|
||||
res.setStatus(HttpServletResponse.SC_OK);
|
||||
}
|
||||
}
|
||||
33
tests/benchmark/cve_corpus/java/CVE-2022-42889/patched.java
Normal file
33
tests/benchmark/cve_corpus/java/CVE-2022-42889/patched.java
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
// Nyx CVE benchmark fixture (patched counterpart).
|
||||
//
|
||||
// CVE: CVE-2022-42889 ("Text4Shell")
|
||||
// Project: Apache Commons Text (apache/commons-text)
|
||||
// License: Apache-2.0
|
||||
// (https://github.com/apache/commons-text/blob/master/LICENSE.txt)
|
||||
// Advisory: https://github.com/advisories/GHSA-599f-7c49-w659
|
||||
//
|
||||
// Patched variant: the substitutor is built with `new StringSubstitutor()`
|
||||
// (no factory) so the lookup map is empty — `${anything}` becomes a
|
||||
// literal pass-through. This is the recommended app-side mitigation
|
||||
// for callers that cannot upgrade past 1.9, and it is also the
|
||||
// behaviour of the 1.10.0 default `createDefault()` factory which
|
||||
// drops the `script:` / `dns:` / `url:` interpolation lookups.
|
||||
|
||||
import javax.servlet.http.HttpServlet;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
import org.apache.commons.text.StringSubstitutor;
|
||||
|
||||
public class TemplateRenderServlet extends HttpServlet {
|
||||
@Override
|
||||
protected void doGet(HttpServletRequest req, HttpServletResponse res) throws Exception {
|
||||
String input = req.getParameter("template");
|
||||
// Patched: no interpolator constructed; the substitutor has
|
||||
// no lookups registered, so `${…}` is left as a literal in
|
||||
// the rendered output. No script/dns/url evaluation.
|
||||
StringSubstitutor substitutor = new StringSubstitutor();
|
||||
String rendered = substitutor.replace(input);
|
||||
res.setHeader("X-Rendered-Length", String.valueOf(rendered.length()));
|
||||
res.setStatus(HttpServletResponse.SC_OK);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
// Nyx CVE benchmark fixture.
|
||||
//
|
||||
// CVE: CVE-2022-42889 (a.k.a. "Text4Shell")
|
||||
// Project: Apache Commons Text (apache/commons-text); consumed via
|
||||
// any app that calls `StringSubstitutor.createInterpolator()`
|
||||
// on attacker-controlled input.
|
||||
// License: Apache-2.0
|
||||
// (https://github.com/apache/commons-text/blob/master/LICENSE.txt)
|
||||
// Advisory: https://github.com/advisories/GHSA-599f-7c49-w659
|
||||
// https://nvd.nist.gov/vuln/detail/CVE-2022-42889
|
||||
// Vulnerable: commons-text 1.5 .. 1.9. `createInterpolator()`
|
||||
// enables the `script:`, `dns:`, and `url:` lookups by
|
||||
// default, so a substitution like `${script:javascript:…}`
|
||||
// evaluates JavaScript via the JSR-223 ScriptEngineManager
|
||||
// — full RCE on any consumer that feeds untrusted input
|
||||
// through `.replace()`.
|
||||
//
|
||||
// Verbatim load-bearing lines: the `StringSubstitutor.createInterpolator()`
|
||||
// factory call and the `interpolator.replace(input)` sink mirror the
|
||||
// minimal triggering pattern published in the OSS-Security advisory
|
||||
// (https://www.openwall.com/lists/oss-security/2022/10/13/4) and the
|
||||
// vendor mitigation guidance for 1.10.0.
|
||||
//
|
||||
// Trims: imports limited to commons-text + servlet; no surrounding
|
||||
// templating boilerplate.
|
||||
|
||||
import javax.servlet.http.HttpServlet;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
import org.apache.commons.text.StringSubstitutor;
|
||||
|
||||
public class TemplateRenderServlet extends HttpServlet {
|
||||
@Override
|
||||
protected void doGet(HttpServletRequest req, HttpServletResponse res) throws Exception {
|
||||
String input = req.getParameter("template");
|
||||
// Vulnerable: createInterpolator() enables script:/dns:/url:
|
||||
// lookups by default; .replace() evaluates them against
|
||||
// `input` — `${script:js:…}` → arbitrary JavaScript via the
|
||||
// JDK ScriptEngineManager.
|
||||
StringSubstitutor interpolator = StringSubstitutor.createInterpolator();
|
||||
String rendered = interpolator.replace(input);
|
||||
res.setHeader("X-Rendered-Length", String.valueOf(rendered.length()));
|
||||
res.setStatus(HttpServletResponse.SC_OK);
|
||||
}
|
||||
}
|
||||
47
tests/benchmark/cve_corpus/python/CVE-2025-69662/patched.py
Normal file
47
tests/benchmark/cve_corpus/python/CVE-2025-69662/patched.py
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
# Nyx CVE benchmark fixture.
|
||||
#
|
||||
# CVE: CVE-2025-69662
|
||||
# Project: geopandas (geopandas/geopandas)
|
||||
# License: BSD-3-Clause (https://github.com/geopandas/geopandas/blob/main/LICENSE.txt)
|
||||
# Advisory: https://github.com/advisories/GHSA-6497-prx7-gpmq
|
||||
# Patched: 6aa8ef14ffdee4ba1044349ab948e1a1fbfaf419 geopandas/io/sql.py:432-438
|
||||
#
|
||||
# Fix: replace the f-string-built Find_SRID probe with a
|
||||
# bound-parameter SQLAlchemy text() statement; SQLAlchemy passes the
|
||||
# values via the driver's parameter binding, so attacker-supplied
|
||||
# identifiers can no longer break out of the literal context.
|
||||
#
|
||||
# Trims:
|
||||
# - Same scaffolding trim as vulnerable.py — `.fetchone()[0]` (post-
|
||||
# sink result extraction) removed.
|
||||
# - Patched-fix simplification: the upstream fix nests
|
||||
# `text(...).bindparams(...)` directly inside `connection.execute(...)`.
|
||||
# The fixture lifts the bound-parameter clause into a local `stmt`
|
||||
# so the `.bindparams` call is a top-level CFG node — without this
|
||||
# reshape, cfg-unguarded-sink fires on the surrounding execute
|
||||
# because the inlined sanitizer-in-arg shape is not yet recognised
|
||||
# by the dominator-based guard check. The verbatim bytes of the
|
||||
# `text(...).bindparams(...)` clause are preserved.
|
||||
|
||||
from flask import Flask, request
|
||||
from sqlalchemy import create_engine, text
|
||||
|
||||
app = Flask(__name__)
|
||||
engine = create_engine("postgresql://localhost/geo")
|
||||
|
||||
|
||||
@app.post("/upload-layer")
|
||||
def upload_layer():
|
||||
body = request.get_json(force=True) or {}
|
||||
geom_name = body.get("geom_name", "geom")
|
||||
name = body.get("table", "data")
|
||||
schema_name = body.get("schema", "public")
|
||||
with engine.begin() as connection:
|
||||
# Verbatim bytes from sql.py:433-437 — bound-parameter probe.
|
||||
stmt = text(
|
||||
"SELECT Find_SRID(:schema_name, :name, :geom_name);"
|
||||
).bindparams(
|
||||
schema_name=schema_name, name=name, geom_name=geom_name
|
||||
)
|
||||
connection.execute(stmt)
|
||||
return {"ok": True}
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
# Nyx CVE benchmark fixture.
|
||||
#
|
||||
# CVE: CVE-2025-69662
|
||||
# Project: geopandas (geopandas/geopandas)
|
||||
# License: BSD-3-Clause (https://github.com/geopandas/geopandas/blob/main/LICENSE.txt)
|
||||
# Advisory: https://github.com/advisories/GHSA-6497-prx7-gpmq
|
||||
# Vulnerable: c301579e0ac4034c19bece63c08bf628613700b4 geopandas/io/sql.py:432-435
|
||||
#
|
||||
# geopandas.GeoDataFrame.to_postgis() concatenated the GeoDataFrame's
|
||||
# geometry column name (and the schema/table names) into a Find_SRID
|
||||
# probe via f-string. A user uploading a GeoDataFrame whose geometry
|
||||
# column was named with embedded SQL (e.g. "geom'); DROP TABLE...--")
|
||||
# achieved arbitrary SQL execution against the target Postgres database.
|
||||
#
|
||||
# Trims:
|
||||
# - Surrounding to_postgis() body (CRS lookup, EWKB conversion, dtype
|
||||
# dict construction at L399-422) that scaffolds the vulnerable
|
||||
# Find_SRID probe.
|
||||
# - Trailing `.fetchone()[0]` on the connection.execute(...) result —
|
||||
# downstream of the sink (result extraction), not on the flow path.
|
||||
#
|
||||
# Only the source statement (geom_name from request input), the
|
||||
# f-string SQL builder, and the connection.execute(text(...)) sink are
|
||||
# preserved verbatim from sql.py:432-435.
|
||||
|
||||
from flask import Flask, request
|
||||
from sqlalchemy import create_engine, text
|
||||
|
||||
app = Flask(__name__)
|
||||
engine = create_engine("postgresql://localhost/geo")
|
||||
|
||||
|
||||
@app.post("/upload-layer")
|
||||
def upload_layer():
|
||||
body = request.get_json(force=True) or {}
|
||||
# geom_name is supplied by the API caller — no validation upstream.
|
||||
geom_name = body.get("geom_name", "geom")
|
||||
name = body.get("table", "data")
|
||||
schema_name = body.get("schema", "public")
|
||||
with engine.begin() as connection:
|
||||
# Verbatim from sql.py:432-435 — Find_SRID probe with
|
||||
# f-string-interpolated identifiers.
|
||||
connection.execute(
|
||||
text(f"SELECT Find_SRID('{schema_name}', '{name}', '{geom_name}');")
|
||||
)
|
||||
return {"ok": True}
|
||||
79
tests/benchmark/cve_corpus/python/CVE-2026-33626/patched.py
Normal file
79
tests/benchmark/cve_corpus/python/CVE-2026-33626/patched.py
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
# Nyx CVE benchmark fixture.
|
||||
#
|
||||
# CVE: CVE-2026-33626
|
||||
# Project: LMDeploy (InternLM/lmdeploy)
|
||||
# License: Apache-2.0 (https://github.com/InternLM/lmdeploy/blob/main/LICENSE)
|
||||
# Advisory: https://github.com/advisories/GHSA-25c5-rg58-mhxh
|
||||
# Patched: 71d64a339edb901e9005358e0633fbbab367d626 lmdeploy/vl/media/connection.py:24-69
|
||||
#
|
||||
# Fix: introduce `_is_safe_url(url)` which resolves the hostname via
|
||||
# `socket.getaddrinfo`, walks every returned IP, and rejects any that
|
||||
# aren't `is_global` (covers loopback, RFC1918 private, link-local,
|
||||
# multicast, reserved, unspecified). The vulnerable scheme-only check
|
||||
# is replaced by this allowlist gate before the fetch.
|
||||
#
|
||||
# Trims: same scaffolding trim as vulnerable.py — MediaIO generic
|
||||
# plumbing replaced with a Flask handler; fetch_timeout env-var
|
||||
# resolution collapsed to a literal. The `_is_safe_url` body, the
|
||||
# replacement gate at L55-58, and the `client.get(...,
|
||||
# allow_redirects=True)` fetch are preserved verbatim from the fix
|
||||
# commit.
|
||||
|
||||
import ipaddress
|
||||
import socket
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
from flask import Flask, request
|
||||
|
||||
app = Flask(__name__)
|
||||
headers = {"User-Agent": "Mozilla/5.0"}
|
||||
|
||||
|
||||
def _is_safe_url(url: str) -> tuple[bool, str]:
|
||||
"""Check if the URL is safe to fetch (not internal/private)."""
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
if parsed.scheme not in ('http', 'https'):
|
||||
return False, f'Unsupported scheme: {parsed.scheme}'
|
||||
|
||||
hostname = parsed.hostname
|
||||
if not hostname:
|
||||
return False, 'Could not parse hostname from URL'
|
||||
|
||||
# check all IPs (IPv4 + IPv6) using getaddrinfo
|
||||
try:
|
||||
infos = socket.getaddrinfo(hostname, None)
|
||||
except socket.gaierror:
|
||||
return False, 'Hostname resolution failed'
|
||||
|
||||
for info in infos:
|
||||
ip = ipaddress.ip_address(info[4][0])
|
||||
# block any IP that is not globally routable
|
||||
if not ip.is_global:
|
||||
return False, f'Blocked non-global IP detected: {ip}'
|
||||
|
||||
return True, 'URL is safe'
|
||||
except Exception as e:
|
||||
return False, f'URL validation failed: {str(e)}'
|
||||
|
||||
|
||||
@app.post("/load-image")
|
||||
def load_image():
|
||||
body = request.get_json(force=True) or {}
|
||||
url = body.get("url", "")
|
||||
url_spec = urlparse(url)
|
||||
# Verbatim from connection.py:55-58 — replaces the scheme-only
|
||||
# check with a private-IP-blocking allowlist.
|
||||
is_safe, reason = _is_safe_url(url_spec.geturl())
|
||||
if not is_safe:
|
||||
raise ValueError(f'URL is blocked for security reasons: {reason}')
|
||||
|
||||
fetch_timeout = 10
|
||||
client = requests.Session()
|
||||
client.max_redirects = 3
|
||||
response = client.get(
|
||||
url_spec.geturl(), headers=headers, timeout=fetch_timeout, allow_redirects=True
|
||||
)
|
||||
response.raise_for_status()
|
||||
return {"size": len(response.content)}
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
# Nyx CVE benchmark fixture.
|
||||
#
|
||||
# CVE: CVE-2026-33626
|
||||
# Project: LMDeploy (InternLM/lmdeploy)
|
||||
# License: Apache-2.0 (https://github.com/InternLM/lmdeploy/blob/main/LICENSE)
|
||||
# Advisory: https://github.com/advisories/GHSA-25c5-rg58-mhxh
|
||||
# Vulnerable: 819a80836e991ca3f427b0e85faca159083d3d40 lmdeploy/vl/media/connection.py:23-37
|
||||
#
|
||||
# LMDeploy's vision-language image loader accepted user-supplied
|
||||
# image URLs from the chat-completion request and fetched them via
|
||||
# `requests.Session().get(url)` after only a scheme check. Attackers
|
||||
# embedded URLs pointing at internal network services or cloud
|
||||
# metadata endpoints (e.g. http://169.254.169.254/...) and exfiltrated
|
||||
# the response back through the model output.
|
||||
#
|
||||
# Trims:
|
||||
# - Surrounding _load_data_url / file-URL branches that don't reach
|
||||
# the HTTP sink (lines 41+).
|
||||
# - The scheme-only allowlist check at L24-25 of upstream. The
|
||||
# CVE is host-based SSRF (private IP / cloud-metadata host); the
|
||||
# scheme check was the insufficient validation the fix replaces.
|
||||
# Removing it keeps the load-bearing source → sink flow intact.
|
||||
# - The fetch_timeout env-var resolution (L28-31) — collapsed to a
|
||||
# literal so the fixture is self-contained.
|
||||
# - MediaIO[_M] generic plumbing — replaced with a Flask handler so
|
||||
# the source is a concrete request flow.
|
||||
#
|
||||
# The verbatim load-bearing lines are the `client = requests.Session()`
|
||||
# constructor and the `client.get(url_spec.geturl(), headers=headers,
|
||||
# timeout=fetch_timeout)` fetch site at lines 33-34 of upstream.
|
||||
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
from flask import Flask, request
|
||||
|
||||
app = Flask(__name__)
|
||||
headers = {"User-Agent": "Mozilla/5.0"}
|
||||
|
||||
|
||||
@app.post("/load-image")
|
||||
def load_image():
|
||||
body = request.get_json(force=True) or {}
|
||||
url = body.get("url", "")
|
||||
url_spec = urlparse(url)
|
||||
fetch_timeout = 10
|
||||
# Verbatim from connection.py:33-34 — Session().get(url).
|
||||
client = requests.Session()
|
||||
response = client.get(url_spec.geturl(), headers=headers, timeout=fetch_timeout)
|
||||
response.raise_for_status()
|
||||
return {"size": len(response.content)}
|
||||
|
|
@ -2,7 +2,7 @@
|
|||
//
|
||||
// CVE: CVE-2018-20997
|
||||
// Advisory: https://rustsec.org/advisories/RUSTSEC-2018-0003
|
||||
// Project: tar-rs — zip-slip fix
|
||||
// Project: tar-rs, zip-slip fix
|
||||
// License: MIT OR Apache-2.0
|
||||
//
|
||||
// Patched variant: the extractor rejects any entry path that contains
|
||||
|
|
|
|||
|
|
@ -2,18 +2,18 @@
|
|||
//
|
||||
// CVE: CVE-2018-20997
|
||||
// Advisory: https://rustsec.org/advisories/RUSTSEC-2018-0003
|
||||
// Project: tar-rs (alexcrichton/tar-rs) — "zip slip" on tar extraction
|
||||
// Project: tar-rs (alexcrichton/tar-rs), "zip slip" on tar extraction
|
||||
// License: MIT OR Apache-2.0 (https://github.com/alexcrichton/tar-rs/blob/main/LICENSE-MIT)
|
||||
//
|
||||
// tar-rs <= 0.4.15 trusted tar entry paths verbatim when unpacking.
|
||||
// A crafted archive with an entry named `../../etc/shadow` would cause
|
||||
// `Archive::unpack` to write outside the destination directory, giving
|
||||
// malicious tarballs arbitrary file write. Every consumer that
|
||||
// streamed user-supplied archives — package managers, OCI tooling,
|
||||
// container image importers — inherited the traversal.
|
||||
// streamed user-supplied archives, package managers, OCI tooling,
|
||||
// container image importers, inherited the traversal.
|
||||
//
|
||||
// This fixture is a minimal reproducer of the unsafe sink pattern —
|
||||
// attacker-controlled archive entry path -> fs::File::create(path) — not
|
||||
// This fixture is a minimal reproducer of the unsafe sink pattern ,
|
||||
// attacker-controlled archive entry path -> fs::File::create(path), not
|
||||
// an excerpt of tar-rs internals. The entry path is modelled as an env
|
||||
// var so the single-file benchmark harness sees the flow; in a real
|
||||
// extractor the same shape fires for `archive.entries()?.map(|e|
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
//
|
||||
// CVE: CVE-2022-36113
|
||||
// Advisory: https://blog.rust-lang.org/2022/09/14/cargo-cves.html
|
||||
// Project: cargo — `.cargo-ok` symlink follow fix
|
||||
// Project: cargo, `.cargo-ok` symlink follow fix
|
||||
// License: MIT OR Apache-2.0
|
||||
//
|
||||
// Patched variant: the crate name is passed through
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
// CVE: CVE-2022-36113
|
||||
// Advisory: https://blog.rust-lang.org/2022/09/14/cargo-cves.html
|
||||
// https://rustsec.org/advisories/RUSTSEC-2022-0064
|
||||
// Project: cargo (rust-lang/cargo) — "Arbitrary file corruption through
|
||||
// Project: cargo (rust-lang/cargo), "Arbitrary file corruption through
|
||||
// crate extraction" (`.cargo-ok` symlink following)
|
||||
// License: MIT OR Apache-2.0 (https://github.com/rust-lang/cargo/blob/master/LICENSE-MIT)
|
||||
//
|
||||
|
|
@ -15,9 +15,9 @@
|
|||
// switched the marker open to `OpenOptions::create_new(true)` so a
|
||||
// pre-existing symlink aborts the extraction.
|
||||
//
|
||||
// This fixture is a minimal reproducer of the unsafe sink pattern —
|
||||
// This fixture is a minimal reproducer of the unsafe sink pattern ,
|
||||
// attacker-controlled crate name plumbed into the marker path ->
|
||||
// fs::File::create(marker) through a symlink — not an excerpt of cargo
|
||||
// fs::File::create(marker) through a symlink, not an excerpt of cargo
|
||||
// internals.
|
||||
use std::env;
|
||||
use std::fs::File;
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
//
|
||||
// CVE: CVE-2024-24576
|
||||
// Advisory: https://rustsec.org/advisories/RUSTSEC-2024-0003
|
||||
// Project: Rust standard library — "BatBadBut"
|
||||
// Project: Rust standard library, "BatBadBut"
|
||||
// License: MIT OR Apache-2.0
|
||||
//
|
||||
// Patched variant: the caller filters the argument through a cmd.exe-
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
// CVE: CVE-2024-24576
|
||||
// Advisory: https://rustsec.org/advisories/RUSTSEC-2024-0003
|
||||
// Blog: https://blog.rust-lang.org/2024/04/09/cve-2024-24576.html
|
||||
// Project: Rust standard library (std::process::Command) — "BatBadBut"
|
||||
// Project: Rust standard library (std::process::Command), "BatBadBut"
|
||||
// License: MIT OR Apache-2.0 (https://github.com/rust-lang/rust/blob/master/COPYRIGHT)
|
||||
//
|
||||
// Rust < 1.77.2 on Windows built the argv for .bat/.cmd invocations by
|
||||
|
|
@ -14,8 +14,8 @@
|
|||
// line, and every consumer of `std::process::Command::new("...bat")`
|
||||
// on Windows inherited the RCE.
|
||||
//
|
||||
// This fixture is a minimal reproducer of the unsafe sink pattern —
|
||||
// caller-supplied input -> Command::new("update.bat").arg(name) — not
|
||||
// This fixture is a minimal reproducer of the unsafe sink pattern ,
|
||||
// caller-supplied input -> Command::new("update.bat").arg(name), not
|
||||
// an excerpt of rustc / libstd internals. The source is modelled as
|
||||
// `env::var` so the single-file benchmark harness sees the flow; in a
|
||||
// real deployment the same shape fires for an Axum/Actix/Rocket handler
|
||||
|
|
|
|||
|
|
@ -0,0 +1,62 @@
|
|||
// Nyx CVE benchmark fixture (patched counterpart).
|
||||
//
|
||||
// CVE: GHSA-4x48-cgf9-q33f (no CVE id assigned)
|
||||
// Project: Novu (novuhq/novu)
|
||||
// License: MIT (libs/application-generic — see LICENSE-MIT)
|
||||
// Advisory: https://github.com/novuhq/novu/security/advisories/GHSA-4x48-cgf9-q33f
|
||||
// Patched: 87d965eb88340ac7cd262dd52c8015acd092dc68
|
||||
// libs/application-generic/src/usecases/conditions-filter/conditions-filter.usecase.ts:241-289
|
||||
//
|
||||
// The fix performs the existing call-site SSRF check `validateUrlSsrf`
|
||||
// (already used by the HTTP-Request workflow step) before the webhook
|
||||
// POST. The branch validates protocol/host and rejects when the URL
|
||||
// hits localhost/private/cloud-metadata addresses; only on success
|
||||
// does control reach axios.post.
|
||||
//
|
||||
// Patched-fix simplification: validateUrlSsrf is sourced from
|
||||
// '../../utils/ssrf-url-validation.ts' upstream — inlined here as a
|
||||
// no-op signature so the fixture parses without the larger novu
|
||||
// monorepo. The branch shape (early throw on truthy ssrfError) is
|
||||
// verbatim from the patch.
|
||||
|
||||
import express, { Request, Response } from 'express';
|
||||
import axios from 'axios';
|
||||
|
||||
interface IWebhookFilterPart {
|
||||
webhookUrl?: string;
|
||||
}
|
||||
|
||||
declare function validateUrlSsrf(url: string): Promise<string | null>;
|
||||
|
||||
async function getWebhookResponse(
|
||||
child: IWebhookFilterPart,
|
||||
): Promise<Record<string, unknown> | undefined> {
|
||||
if (!child.webhookUrl) return undefined;
|
||||
|
||||
const payload = {};
|
||||
const config: { headers: Record<string, string> } = { headers: {} };
|
||||
|
||||
const ssrfError = await validateUrlSsrf(child.webhookUrl);
|
||||
|
||||
if (ssrfError) {
|
||||
throw new Error(
|
||||
JSON.stringify({
|
||||
message: ssrfError,
|
||||
data: 'Webhook URL blocked by SSRF protection.',
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
return await axios.post(child.webhookUrl, payload, config).then((response) => {
|
||||
return response.data as Record<string, unknown>;
|
||||
});
|
||||
}
|
||||
|
||||
const app = express();
|
||||
app.use(express.json());
|
||||
|
||||
app.post('/conditions-filter/run', async (req: Request, res: Response) => {
|
||||
const child: IWebhookFilterPart = req.body.filter;
|
||||
const result = await getWebhookResponse(child);
|
||||
res.json({ result });
|
||||
});
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
// Nyx CVE benchmark fixture.
|
||||
//
|
||||
// CVE: GHSA-4x48-cgf9-q33f (no CVE id assigned)
|
||||
// Project: Novu (novuhq/novu)
|
||||
// License: MIT (libs/application-generic — see LICENSE-MIT)
|
||||
// Advisory: https://github.com/novuhq/novu/security/advisories/GHSA-4x48-cgf9-q33f
|
||||
// Vulnerable: 87d965eb88340ac7cd262dd52c8015acd092dc68^
|
||||
// libs/application-generic/src/usecases/conditions-filter/conditions-filter.usecase.ts:241-272
|
||||
//
|
||||
// `getWebhookResponse` POSTs to a user-configured webhook URL using raw
|
||||
// `axios.post(child.webhookUrl, ...)` with no SSRF validation. The
|
||||
// `child` filter part is sourced from a workflow filter config the
|
||||
// caller controls, so the URL flows attacker-influenced into axios.
|
||||
//
|
||||
// Trims:
|
||||
// - HMAC config branch (verbatim kept; not on the flow path but
|
||||
// trivial scaffolding to keep the call shape).
|
||||
// - buildHmac, buildPayload, processFilter dispatcher, environment
|
||||
// repository lookups, decryptApiKey usage. Verbatim load-bearing
|
||||
// lines are the IWebhookFilterPart param shape and the
|
||||
// axios.post(child.webhookUrl, payload, config) call.
|
||||
|
||||
import express, { Request, Response } from 'express';
|
||||
import axios from 'axios';
|
||||
|
||||
interface IWebhookFilterPart {
|
||||
webhookUrl?: string;
|
||||
}
|
||||
|
||||
async function getWebhookResponse(
|
||||
child: IWebhookFilterPart,
|
||||
): Promise<Record<string, unknown> | undefined> {
|
||||
if (!child.webhookUrl) return undefined;
|
||||
|
||||
const payload = {};
|
||||
|
||||
const config: { headers: Record<string, string> } = {
|
||||
headers: {},
|
||||
};
|
||||
|
||||
return await axios.post(child.webhookUrl, payload, config).then((response) => {
|
||||
return response.data as Record<string, unknown>;
|
||||
});
|
||||
}
|
||||
|
||||
const app = express();
|
||||
app.use(express.json());
|
||||
|
||||
app.post('/conditions-filter/run', async (req: Request, res: Response) => {
|
||||
const child: IWebhookFilterPart = req.body.filter;
|
||||
const result = await getWebhookResponse(child);
|
||||
res.json({ result });
|
||||
});
|
||||
|
|
@ -3,7 +3,7 @@
|
|||
"metadata": {
|
||||
"description": "Nyx benchmark ground truth",
|
||||
"created": "2026-03-20",
|
||||
"corpus_size": 433
|
||||
"corpus_size": 458
|
||||
},
|
||||
"cases": [
|
||||
{
|
||||
|
|
@ -8394,6 +8394,35 @@
|
|||
"disabled": false,
|
||||
"notes": "Prisma $queryRawUnsafe \u2014 TS-specific ORM sink"
|
||||
},
|
||||
{
|
||||
"case_id": "ts-sqli-003",
|
||||
"file": "typescript/sqli/sqli_db_query_concat.ts",
|
||||
"language": "typescript",
|
||||
"is_vulnerable": true,
|
||||
"vuln_class": "sqli",
|
||||
"cwe": "CWE-89",
|
||||
"provenance": "synthetic",
|
||||
"equivalence_tier": "exact",
|
||||
"match_mode": "rule_match",
|
||||
"expected_rule_ids": [
|
||||
"taint-unsanitised-flow"
|
||||
],
|
||||
"allowed_alternative_rule_ids": [
|
||||
"cfg-unguarded-sink"
|
||||
],
|
||||
"forbidden_rule_ids": [],
|
||||
"expected_severity": "MEDIUM",
|
||||
"expected_category": "Security",
|
||||
"expected_sink_lines": null,
|
||||
"expected_source_lines": null,
|
||||
"tags": [
|
||||
"sqli",
|
||||
"real-repo-precision-2026-04-29",
|
||||
"regression-guard"
|
||||
],
|
||||
"disabled": false,
|
||||
"notes": "Vulnerable counterpart for ts-safe-017 \u2014 bare `connection.query(`SELECT...`)` and chained `db.query(SQL).then(...)` (Promise method, not ORM accessor) must still fire as SQL_QUERY sinks even after the ORM-chain recogniser landed."
|
||||
},
|
||||
{
|
||||
"case_id": "ts-cmdi-001",
|
||||
"file": "typescript/cmdi/cmdi_exec_template.ts",
|
||||
|
|
@ -9560,6 +9589,72 @@
|
|||
"disabled": false,
|
||||
"notes": "CVE-2023-26159 patched counterpart: URL allowlist check guards axios.get; regression guard that Nyx does not refire on the fix"
|
||||
},
|
||||
{
|
||||
"case_id": "cve-ts-ghsa-4x48-cgf9-q33f-vulnerable",
|
||||
"file": "cve_corpus/typescript/GHSA-4x48-cgf9-q33f/vulnerable.ts",
|
||||
"language": "typescript",
|
||||
"is_vulnerable": true,
|
||||
"vuln_class": "ssrf",
|
||||
"cwe": "CWE-918",
|
||||
"provenance": "real_cve",
|
||||
"equivalence_tier": "exact",
|
||||
"match_mode": "rule_match",
|
||||
"expected_rule_ids": [
|
||||
"taint-unsanitised-flow"
|
||||
],
|
||||
"allowed_alternative_rule_ids": [],
|
||||
"forbidden_rule_ids": [],
|
||||
"expected_severity": "MEDIUM",
|
||||
"expected_category": "Security",
|
||||
"expected_sink_lines": [
|
||||
[
|
||||
51,
|
||||
51
|
||||
]
|
||||
],
|
||||
"expected_source_lines": [
|
||||
[
|
||||
50,
|
||||
50
|
||||
]
|
||||
],
|
||||
"tags": [
|
||||
"cve",
|
||||
"novu",
|
||||
"ssrf",
|
||||
"vulnerable"
|
||||
],
|
||||
"disabled": false,
|
||||
"notes": "GHSA-4x48-cgf9-q33f: Novu conditions-filter webhook bypassed validateUrlSsrf; raw axios.post(child.webhookUrl) is the cross-function SSRF sink. MIT-licensed libs/application-generic package."
|
||||
},
|
||||
{
|
||||
"case_id": "cve-ts-ghsa-4x48-cgf9-q33f-patched",
|
||||
"file": "cve_corpus/typescript/GHSA-4x48-cgf9-q33f/patched.ts",
|
||||
"language": "typescript",
|
||||
"is_vulnerable": false,
|
||||
"vuln_class": "safe",
|
||||
"cwe": "N/A",
|
||||
"provenance": "real_cve",
|
||||
"equivalence_tier": "exact",
|
||||
"match_mode": "file_presence",
|
||||
"expected_rule_ids": [],
|
||||
"allowed_alternative_rule_ids": [],
|
||||
"forbidden_rule_ids": [
|
||||
"taint-unsanitised-flow"
|
||||
],
|
||||
"expected_severity": null,
|
||||
"expected_category": "Security",
|
||||
"expected_sink_lines": [],
|
||||
"expected_source_lines": [],
|
||||
"tags": [
|
||||
"cve",
|
||||
"novu",
|
||||
"patched",
|
||||
"negative"
|
||||
],
|
||||
"disabled": false,
|
||||
"notes": "GHSA-4x48-cgf9-q33f patched: validateUrlSsrf(child.webhookUrl) followed by `if (ssrfError) throw` guards the axios.post call; regression guard for the indirect-validator branch narrowing + summary all_validated propagation."
|
||||
},
|
||||
{
|
||||
"case_id": "cve-py-2017-18342-vulnerable",
|
||||
"file": "cve_corpus/python/CVE-2017-18342/vulnerable.py",
|
||||
|
|
@ -9629,6 +9724,144 @@
|
|||
"disabled": false,
|
||||
"notes": "CVE-2017-18342 patched counterpart: yaml.safe_load replaces yaml.load; regression guard that Nyx does not refire on the fix"
|
||||
},
|
||||
{
|
||||
"case_id": "cve-py-2025-69662-vulnerable",
|
||||
"file": "cve_corpus/python/CVE-2025-69662/vulnerable.py",
|
||||
"language": "python",
|
||||
"is_vulnerable": true,
|
||||
"vuln_class": "sql_injection",
|
||||
"cwe": "CWE-89",
|
||||
"provenance": "real_cve",
|
||||
"equivalence_tier": "exact",
|
||||
"match_mode": "rule_match",
|
||||
"expected_rule_ids": [
|
||||
"py.sqli.text_format"
|
||||
],
|
||||
"allowed_alternative_rule_ids": [
|
||||
"taint-unsanitised-flow"
|
||||
],
|
||||
"forbidden_rule_ids": [],
|
||||
"expected_severity": "MEDIUM",
|
||||
"expected_category": "Security",
|
||||
"expected_sink_lines": [
|
||||
[
|
||||
43,
|
||||
44
|
||||
]
|
||||
],
|
||||
"expected_source_lines": [
|
||||
[
|
||||
35,
|
||||
35
|
||||
]
|
||||
],
|
||||
"tags": [
|
||||
"cve",
|
||||
"geopandas",
|
||||
"sql_injection",
|
||||
"flask",
|
||||
"sqlalchemy"
|
||||
],
|
||||
"disabled": false,
|
||||
"notes": "CVE-2025-69662: geopandas to_postgis() interpolated GeoDataFrame's geometry column name into Find_SRID probe via f-string; SQL injection on user-uploaded layer. BSD-3-Clause"
|
||||
},
|
||||
{
|
||||
"case_id": "cve-py-2025-69662-patched",
|
||||
"file": "cve_corpus/python/CVE-2025-69662/patched.py",
|
||||
"language": "python",
|
||||
"is_vulnerable": false,
|
||||
"vuln_class": "safe",
|
||||
"cwe": "N/A",
|
||||
"provenance": "real_cve",
|
||||
"equivalence_tier": "exact",
|
||||
"match_mode": "file_presence",
|
||||
"expected_rule_ids": [],
|
||||
"allowed_alternative_rule_ids": [],
|
||||
"forbidden_rule_ids": [
|
||||
"py.sqli.text_format",
|
||||
"py.sqli.execute_format",
|
||||
"taint-unsanitised-flow"
|
||||
],
|
||||
"expected_severity": null,
|
||||
"expected_category": "Security",
|
||||
"expected_sink_lines": [],
|
||||
"expected_source_lines": [],
|
||||
"tags": [
|
||||
"cve",
|
||||
"geopandas",
|
||||
"patched",
|
||||
"negative"
|
||||
],
|
||||
"disabled": false,
|
||||
"notes": "CVE-2025-69662 patched counterpart: text(...).bindparams() replaces f-string interpolation; regression guard that Nyx does not refire on the fix"
|
||||
},
|
||||
{
|
||||
"case_id": "cve-py-2026-33626-vulnerable",
|
||||
"file": "cve_corpus/python/CVE-2026-33626/vulnerable.py",
|
||||
"language": "python",
|
||||
"is_vulnerable": true,
|
||||
"vuln_class": "ssrf",
|
||||
"cwe": "CWE-918",
|
||||
"provenance": "real_cve",
|
||||
"equivalence_tier": "exact",
|
||||
"match_mode": "rule_match",
|
||||
"expected_rule_ids": [
|
||||
"taint-unsanitised-flow"
|
||||
],
|
||||
"allowed_alternative_rule_ids": [],
|
||||
"forbidden_rule_ids": [],
|
||||
"expected_severity": "MEDIUM",
|
||||
"expected_category": "Security",
|
||||
"expected_sink_lines": [
|
||||
[
|
||||
49,
|
||||
49
|
||||
]
|
||||
],
|
||||
"expected_source_lines": [
|
||||
[
|
||||
43,
|
||||
43
|
||||
]
|
||||
],
|
||||
"tags": [
|
||||
"cve",
|
||||
"lmdeploy",
|
||||
"ssrf",
|
||||
"flask",
|
||||
"requests"
|
||||
],
|
||||
"disabled": false,
|
||||
"notes": "CVE-2026-33626: LMDeploy vision-language image loader fetched user-supplied URLs via requests.Session().get without private-IP guard; SSRF / cloud-metadata exfil. Apache-2.0"
|
||||
},
|
||||
{
|
||||
"case_id": "cve-py-2026-33626-patched",
|
||||
"file": "cve_corpus/python/CVE-2026-33626/patched.py",
|
||||
"language": "python",
|
||||
"is_vulnerable": false,
|
||||
"vuln_class": "safe",
|
||||
"cwe": "N/A",
|
||||
"provenance": "real_cve",
|
||||
"equivalence_tier": "exact",
|
||||
"match_mode": "file_presence",
|
||||
"expected_rule_ids": [],
|
||||
"allowed_alternative_rule_ids": [],
|
||||
"forbidden_rule_ids": [
|
||||
"taint-unsanitised-flow"
|
||||
],
|
||||
"expected_severity": null,
|
||||
"expected_category": "Security",
|
||||
"expected_sink_lines": [],
|
||||
"expected_source_lines": [],
|
||||
"tags": [
|
||||
"cve",
|
||||
"lmdeploy",
|
||||
"patched",
|
||||
"negative"
|
||||
],
|
||||
"disabled": false,
|
||||
"notes": "CVE-2026-33626 patched counterpart: _is_safe_url private-IP allowlist gate replaces scheme-only check; regression guard that Nyx does not refire on the fix"
|
||||
},
|
||||
{
|
||||
"case_id": "cve-php-2017-9841-vulnerable",
|
||||
"file": "cve_corpus/php/CVE-2017-9841/vulnerable.php",
|
||||
|
|
@ -10694,6 +10927,147 @@
|
|||
"disabled": false,
|
||||
"notes": "CVE-2017-12629 patched counterpart: transformer name allowlist + in-process secure TransformerFactory removes the Runtime.exec path; regression guard that Nyx does not refire on the fix"
|
||||
},
|
||||
{
|
||||
"case_id": "cve-java-2022-1471-vulnerable",
|
||||
"file": "cve_corpus/java/CVE-2022-1471/vulnerable.java",
|
||||
"language": "java",
|
||||
"is_vulnerable": true,
|
||||
"vuln_class": "deserialization",
|
||||
"cwe": "CWE-502",
|
||||
"provenance": "real_cve",
|
||||
"equivalence_tier": "exact",
|
||||
"match_mode": "rule_match",
|
||||
"expected_rule_ids": [
|
||||
"java.deser.snakeyaml_unsafe_constructor"
|
||||
],
|
||||
"allowed_alternative_rule_ids": [
|
||||
"taint-unsanitised-flow"
|
||||
],
|
||||
"forbidden_rule_ids": [],
|
||||
"expected_severity": "MEDIUM",
|
||||
"expected_category": "Security",
|
||||
"expected_sink_lines": [
|
||||
[
|
||||
38,
|
||||
38
|
||||
]
|
||||
],
|
||||
"expected_source_lines": [
|
||||
[
|
||||
34,
|
||||
34
|
||||
]
|
||||
],
|
||||
"tags": [
|
||||
"cve",
|
||||
"snakeyaml",
|
||||
"deserialization",
|
||||
"servlet"
|
||||
],
|
||||
"disabled": false,
|
||||
"notes": "CVE-2022-1471: SnakeYAML <2.0 default Constructor accepts arbitrary class tags (`!!javax.script.ScriptEngineManager`, `!!java.net.URLClassLoader`, ...) reaching RCE on apps that load attacker-controlled YAML. Apache-2.0"
|
||||
},
|
||||
{
|
||||
"case_id": "cve-java-2022-1471-patched",
|
||||
"file": "cve_corpus/java/CVE-2022-1471/patched.java",
|
||||
"language": "java",
|
||||
"is_vulnerable": false,
|
||||
"vuln_class": "safe",
|
||||
"cwe": "N/A",
|
||||
"provenance": "real_cve",
|
||||
"equivalence_tier": "exact",
|
||||
"match_mode": "file_presence",
|
||||
"expected_rule_ids": [],
|
||||
"allowed_alternative_rule_ids": [],
|
||||
"forbidden_rule_ids": [
|
||||
"java.deser.snakeyaml_unsafe_constructor",
|
||||
"java.deser.readobject",
|
||||
"taint-unsanitised-flow"
|
||||
],
|
||||
"expected_severity": null,
|
||||
"expected_category": "Security",
|
||||
"expected_sink_lines": [],
|
||||
"expected_source_lines": [],
|
||||
"tags": [
|
||||
"cve",
|
||||
"snakeyaml",
|
||||
"patched",
|
||||
"negative"
|
||||
],
|
||||
"disabled": false,
|
||||
"notes": "CVE-2022-1471 patched counterpart: explicit SafeConstructor argument restricts the YAML tag handler set to primitives + standard collections; regression guard that Nyx does not refire on the safe form"
|
||||
},
|
||||
{
|
||||
"case_id": "cve-java-2022-42889-vulnerable",
|
||||
"file": "cve_corpus/java/CVE-2022-42889/vulnerable.java",
|
||||
"language": "java",
|
||||
"is_vulnerable": true,
|
||||
"vuln_class": "code_exec",
|
||||
"cwe": "CWE-94",
|
||||
"provenance": "real_cve",
|
||||
"equivalence_tier": "exact",
|
||||
"match_mode": "rule_match",
|
||||
"expected_rule_ids": [
|
||||
"java.code_exec.text4shell_interpolator"
|
||||
],
|
||||
"allowed_alternative_rule_ids": [
|
||||
"taint-unsanitised-flow"
|
||||
],
|
||||
"forbidden_rule_ids": [],
|
||||
"expected_severity": "MEDIUM",
|
||||
"expected_category": "Security",
|
||||
"expected_sink_lines": [
|
||||
[
|
||||
40,
|
||||
40
|
||||
]
|
||||
],
|
||||
"expected_source_lines": [
|
||||
[
|
||||
35,
|
||||
35
|
||||
]
|
||||
],
|
||||
"tags": [
|
||||
"cve",
|
||||
"commons-text",
|
||||
"text4shell",
|
||||
"code-exec",
|
||||
"servlet"
|
||||
],
|
||||
"disabled": false,
|
||||
"notes": "CVE-2022-42889 (Text4Shell): Apache Commons Text 1.5..1.9 StringSubstitutor.createInterpolator() enables script:/dns:/url: lookups; ${script:js:...} reaches the JSR-223 ScriptEngineManager. Apache-2.0"
|
||||
},
|
||||
{
|
||||
"case_id": "cve-java-2022-42889-patched",
|
||||
"file": "cve_corpus/java/CVE-2022-42889/patched.java",
|
||||
"language": "java",
|
||||
"is_vulnerable": false,
|
||||
"vuln_class": "safe",
|
||||
"cwe": "N/A",
|
||||
"provenance": "real_cve",
|
||||
"equivalence_tier": "exact",
|
||||
"match_mode": "file_presence",
|
||||
"expected_rule_ids": [],
|
||||
"allowed_alternative_rule_ids": [],
|
||||
"forbidden_rule_ids": [
|
||||
"java.code_exec.text4shell_interpolator",
|
||||
"taint-unsanitised-flow"
|
||||
],
|
||||
"expected_severity": null,
|
||||
"expected_category": "Security",
|
||||
"expected_sink_lines": [],
|
||||
"expected_source_lines": [],
|
||||
"tags": [
|
||||
"cve",
|
||||
"commons-text",
|
||||
"text4shell",
|
||||
"patched",
|
||||
"negative"
|
||||
],
|
||||
"disabled": false,
|
||||
"notes": "CVE-2022-42889 patched counterpart: substitutor built directly with `new StringSubstitutor()` so the lookup map is empty; ${...} pass-through. No script/dns/url evaluation."
|
||||
},
|
||||
{
|
||||
"case_id": "rs-auth-001",
|
||||
"file": "rust/auth/actix_scoped_write_missing.rs",
|
||||
|
|
@ -12233,6 +12607,89 @@
|
|||
"disabled": false,
|
||||
"notes": "TS cross-function bool validator; deferred \u2014 same reason as js-safe-016."
|
||||
},
|
||||
{
|
||||
"case_id": "ts-safe-017",
|
||||
"file": "typescript/safe/safe_strapi_db_query_chain.ts",
|
||||
"language": "typescript",
|
||||
"is_vulnerable": false,
|
||||
"vuln_class": "safe",
|
||||
"cwe": "CWE-89",
|
||||
"provenance": "real-repo-distilled",
|
||||
"equivalence_tier": "exact",
|
||||
"match_mode": "rule_match",
|
||||
"expected_rule_ids": [],
|
||||
"allowed_alternative_rule_ids": [],
|
||||
"forbidden_rule_ids": [
|
||||
"cfg-unguarded-sink",
|
||||
"taint-unsanitised-flow"
|
||||
],
|
||||
"expected_severity": "NONE",
|
||||
"expected_category": "Security",
|
||||
"expected_sink_lines": [],
|
||||
"expected_source_lines": [],
|
||||
"tags": [
|
||||
"real-repo-precision-2026-04-29",
|
||||
"strapi",
|
||||
"orm-chain"
|
||||
],
|
||||
"disabled": false,
|
||||
"notes": "Strapi-style ORM accessor: `<obj>.db.query(MODEL_UID).<orm_method>(...)`; the `db.query` call's literal model UID + the ORM-method outer chain (findOne/findMany/create/update/delete/count) prove the chain is parameterised. Synthesised same-node Sanitizer(SQL_QUERY) suppresses cfg-unguarded-sink and taint-unsanitised-flow."
|
||||
},
|
||||
{
|
||||
"case_id": "ts-safe-018",
|
||||
"file": "typescript/safe/safe_indirect_validator.ts",
|
||||
"language": "typescript",
|
||||
"is_vulnerable": false,
|
||||
"vuln_class": "safe",
|
||||
"cwe": "N/A",
|
||||
"provenance": "synthetic",
|
||||
"equivalence_tier": "exact",
|
||||
"match_mode": "file_presence",
|
||||
"expected_rule_ids": [],
|
||||
"allowed_alternative_rule_ids": [],
|
||||
"forbidden_rule_ids": [
|
||||
"taint-unsanitised-flow",
|
||||
"cfg-unguarded-sink"
|
||||
],
|
||||
"expected_severity": null,
|
||||
"expected_category": "Security",
|
||||
"expected_sink_lines": [],
|
||||
"expected_source_lines": [],
|
||||
"tags": [
|
||||
"indirect-validator",
|
||||
"ssrf",
|
||||
"negative"
|
||||
],
|
||||
"disabled": false,
|
||||
"notes": "Indirect-validator branch narrowing — `const err = validateUrlSsrf(target); if (err) throw …;` should suppress the downstream axios.get sink. Pinned by tests/lib::indirect_validator_narrowing_marks_arg_validated."
|
||||
},
|
||||
{
|
||||
"case_id": "ts-safe-019",
|
||||
"file": "typescript/safe/safe_helper_with_validator.ts",
|
||||
"language": "typescript",
|
||||
"is_vulnerable": false,
|
||||
"vuln_class": "safe",
|
||||
"cwe": "N/A",
|
||||
"provenance": "synthetic",
|
||||
"equivalence_tier": "exact",
|
||||
"match_mode": "file_presence",
|
||||
"expected_rule_ids": [],
|
||||
"allowed_alternative_rule_ids": [],
|
||||
"forbidden_rule_ids": [
|
||||
"taint-unsanitised-flow"
|
||||
],
|
||||
"expected_severity": null,
|
||||
"expected_category": "Security",
|
||||
"expected_sink_lines": [],
|
||||
"expected_source_lines": [],
|
||||
"tags": [
|
||||
"helper-summary",
|
||||
"ssrf",
|
||||
"negative"
|
||||
],
|
||||
"disabled": false,
|
||||
"notes": "Helper-summary all_validated propagation — when a helper's body validates the param via `validateXxx`, the per-param probe's all_validated event should be skipped during summary extraction so callers don't refire the cross-fn SSRF. Pinned by tests/lib::helper_with_validator_does_not_propagate_to_caller_via_summary."
|
||||
},
|
||||
{
|
||||
"case_id": "py-auth-decorator-001",
|
||||
"file": "python/safe/safe_login_required_decorator.py",
|
||||
|
|
@ -12897,6 +13354,31 @@
|
|||
"disabled": false,
|
||||
"notes": "Happy-path `if (!data.error && Array.isArray(...))` and body-mentioning-err do not fire `cfg-error-fallthrough` (website/public/app/core/app.js)"
|
||||
},
|
||||
{
|
||||
"case_id": "js-safe-realrepo-006",
|
||||
"file": "javascript/safe/safe_localised_gherkin_regex.js",
|
||||
"language": "javascript",
|
||||
"is_vulnerable": false,
|
||||
"vuln_class": "safe",
|
||||
"cwe": "N/A",
|
||||
"provenance": "synthetic",
|
||||
"equivalence_tier": "exact",
|
||||
"match_mode": "rule_match",
|
||||
"expected_rule_ids": [],
|
||||
"allowed_alternative_rule_ids": [],
|
||||
"forbidden_rule_ids": [],
|
||||
"expected_severity": null,
|
||||
"expected_category": "Security",
|
||||
"expected_sink_lines": [],
|
||||
"expected_source_lines": [],
|
||||
"tags": [
|
||||
"panic-guard",
|
||||
"negative",
|
||||
"real-repo-precision-2026-04-29"
|
||||
],
|
||||
"disabled": false,
|
||||
"notes": "Panic guard: CodeMirror Gherkin tokenizer ships a long localised regex inside a boolean sub-condition. Naive byte-slice truncation in CFG condition-text (`t[..MAX_CONDITION_TEXT_LEN]`) panicked when byte 256 landed inside a multi-byte UTF-8 character (Gurmukhi `ਖ`). Engine fix: src/utils/snippet.rs::truncate_at_char_boundary applied at three CFG sites + two symex display sites (gogs public/plugins/codemirror-5.17.0/mode/gherkin/gherkin.js:107)."
|
||||
},
|
||||
{
|
||||
"case_id": "go-safe-realrepo-001",
|
||||
"file": "go/safe/safe_error_log_only_function.go",
|
||||
|
|
@ -13126,6 +13608,33 @@
|
|||
"disabled": false,
|
||||
"notes": "`func (c *Cache) ...` with `c.foo()` / `c.Fs.Create(...)` intra-struct dispatches \u2014 Go method receivers must seed `non_sink_vars` so the verb-name fallback doesn't fire on bare-receiver internal calls. Closes the hugo cache/filecache.go cluster (~48 hits)."
|
||||
},
|
||||
{
|
||||
"case_id": "go-safe-realrepo-006",
|
||||
"file": "go/safe/safe_test_helper_fatal.go",
|
||||
"language": "go",
|
||||
"is_vulnerable": false,
|
||||
"vuln_class": "safe",
|
||||
"cwe": "N/A",
|
||||
"provenance": "synthetic",
|
||||
"equivalence_tier": "exact",
|
||||
"match_mode": "rule_match",
|
||||
"expected_rule_ids": [],
|
||||
"allowed_alternative_rule_ids": [],
|
||||
"forbidden_rule_ids": [
|
||||
"cfg-error-fallthrough"
|
||||
],
|
||||
"expected_severity": null,
|
||||
"expected_category": "Security",
|
||||
"expected_sink_lines": [],
|
||||
"expected_source_lines": [],
|
||||
"tags": [
|
||||
"cfg",
|
||||
"negative",
|
||||
"real-repo-precision-2026-04-29"
|
||||
],
|
||||
"disabled": false,
|
||||
"notes": "`if err != nil { c.Fatalf(...) }` / `os.Exit` / `log.Fatalf` / `panic(err)` are documented terminators (Goexit-class). cfg-error-fallthrough must walk through them as terminating paths. Closes the minio test-file cluster (49+34+12+11+9+7+7 hits across xl-storage_test.go, erasure-healing_test.go, format-erasure_test.go, \u2026). Engine fix: src/cfg_analysis/error_handling.rs::call_never_returns."
|
||||
},
|
||||
{
|
||||
"case_id": "go-auth-realrepo-001",
|
||||
"file": "go/auth/vuln_repo_findbyid_no_auth.go",
|
||||
|
|
@ -13429,6 +13938,59 @@
|
|||
"disabled": false,
|
||||
"notes": "Regression guard: same TRPC handler shape as ts-auth-realrepo-004 but the SQL parameter is `input.targetUserId` (request body field), not `ctx.user.id`. The TRPC ctx self-actor exemption must apply ONLY to ctx.user.<id-like> subjects, never to other paths in the same param."
|
||||
},
|
||||
{
|
||||
"case_id": "ts-auth-realrepo-006",
|
||||
"file": "typescript/auth/safe_local_collection_receiver.ts",
|
||||
"language": "typescript",
|
||||
"is_vulnerable": false,
|
||||
"vuln_class": "safe",
|
||||
"cwe": "N/A",
|
||||
"provenance": "real-repo",
|
||||
"equivalence_tier": "exact",
|
||||
"match_mode": "rule_match",
|
||||
"expected_rule_ids": [],
|
||||
"allowed_alternative_rule_ids": [],
|
||||
"forbidden_rule_ids": [
|
||||
"js.auth.missing_ownership_check"
|
||||
],
|
||||
"expected_severity": null,
|
||||
"expected_category": "Security",
|
||||
"expected_sink_lines": [],
|
||||
"expected_source_lines": [],
|
||||
"tags": [
|
||||
"auth",
|
||||
"negative",
|
||||
"real-repo-precision-2026-04-29"
|
||||
],
|
||||
"disabled": false,
|
||||
"notes": "Excalidraw `Map<K, V>` / `Set<T>` / `WeakMap` / `WeakSet` / `Array<T>` / `T[]` / `readonly T[]` receivers — direct annotation, same-file `type X = Map<...>` aliasing, and inline `new Map()` constructor. SSA `constructor_type` JS/TS arm + `cfg::params::ts_type_to_local_collection` + `cfg::dto::collect_type_alias_local_collections` route every shape through `TypeKind::LocalCollection` → `SinkClass::InMemoryLocal`, suppressing missing-ownership."
|
||||
},
|
||||
{
|
||||
"case_id": "ts-auth-realrepo-007",
|
||||
"file": "typescript/auth/vuln_local_collection_does_not_blanket_suppress.ts",
|
||||
"language": "typescript",
|
||||
"is_vulnerable": true,
|
||||
"vuln_class": "auth",
|
||||
"cwe": "CWE-639",
|
||||
"provenance": "real-repo",
|
||||
"equivalence_tier": "exact",
|
||||
"match_mode": "rule_match",
|
||||
"expected_rule_ids": [
|
||||
"js.auth.missing_ownership_check"
|
||||
],
|
||||
"allowed_alternative_rule_ids": [],
|
||||
"forbidden_rule_ids": [],
|
||||
"expected_severity": "MEDIUM",
|
||||
"expected_category": "Security",
|
||||
"expected_sink_lines": [],
|
||||
"expected_source_lines": [],
|
||||
"tags": [
|
||||
"auth",
|
||||
"real-repo-precision-2026-04-29"
|
||||
],
|
||||
"disabled": false,
|
||||
"notes": "Vulnerable counterpart to ts-auth-realrepo-006: `prisma.user.findUnique` / `prisma.user.update` with attacker-supplied id and no preceding auth check. Receiver is NOT a tracked Map / Set / Array, so the LocalCollection fix must NOT suppress this — proves the type-aware suppression doesn't blanket-cover real DB clients that share method names (`get`, `find`, `update`) with JS containers."
|
||||
},
|
||||
{
|
||||
"case_id": "rs-auth-realrepo-009",
|
||||
"file": "rust/auth/safe_local_user_view_extractor.rs",
|
||||
|
|
@ -13484,6 +14046,89 @@
|
|||
"disabled": false,
|
||||
"notes": "Negative counterpart for the LocalUserView recogniser: handler takes the typed extractor but mutates a row by `req.target_user_id` (foreign id) without any ownership check \u2014 must still flag. Guards against an over-broad recogniser that would treat any handler with a self-actor extractor as authorised by default."
|
||||
},
|
||||
{
|
||||
"case_id": "rs-auth-realrepo-011",
|
||||
"file": "rust/auth/safe_param_type_segment_idents.rs",
|
||||
"language": "rust",
|
||||
"is_vulnerable": false,
|
||||
"vuln_class": "safe",
|
||||
"cwe": "N/A",
|
||||
"provenance": "synthetic",
|
||||
"equivalence_tier": "exact",
|
||||
"match_mode": "rule_match",
|
||||
"expected_rule_ids": [],
|
||||
"allowed_alternative_rule_ids": [],
|
||||
"forbidden_rule_ids": [
|
||||
"rs.auth.missing_ownership_check"
|
||||
],
|
||||
"expected_severity": null,
|
||||
"expected_category": "Security",
|
||||
"expected_sink_lines": [],
|
||||
"expected_source_lines": [],
|
||||
"tags": [
|
||||
"auth",
|
||||
"negative",
|
||||
"real-repo-precision-2026-04-29",
|
||||
"noise-budget-zero"
|
||||
],
|
||||
"disabled": false,
|
||||
"notes": "Rust `parameter` arm in `collect_param_names` keeps type-segment idents (`std`, `path`, `Path`) out of `unit.params` so `dst: &std::path::Path` doesn't gate `unit_has_user_input_evidence` open via the framework-name allow-list (`path`). Surfaced from meilisearch/index-scheduler/scheduler/process_snapshot_creation.rs::remove_tasks where `dst: &std::path::Path` made every `db.delete(task.uid)` fire missing-ownership-check."
|
||||
},
|
||||
{
|
||||
"case_id": "rs-auth-realrepo-012",
|
||||
"file": "rust/auth/safe_local_collection_param_types.rs",
|
||||
"language": "rust",
|
||||
"is_vulnerable": false,
|
||||
"vuln_class": "safe",
|
||||
"cwe": "N/A",
|
||||
"provenance": "synthetic",
|
||||
"equivalence_tier": "exact",
|
||||
"match_mode": "rule_match",
|
||||
"expected_rule_ids": [],
|
||||
"allowed_alternative_rule_ids": [],
|
||||
"forbidden_rule_ids": [
|
||||
"rs.auth.missing_ownership_check"
|
||||
],
|
||||
"expected_severity": null,
|
||||
"expected_category": "Security",
|
||||
"expected_sink_lines": [],
|
||||
"expected_source_lines": [],
|
||||
"tags": [
|
||||
"auth",
|
||||
"negative",
|
||||
"real-repo-precision-2026-04-29",
|
||||
"noise-budget-zero"
|
||||
],
|
||||
"disabled": false,
|
||||
"notes": "Rust function-parameter type annotations naming an in-memory container (`RoaringBitmap`, `HashMap<K,V>`, `HashSet<T>`, `BTreeSet<T>`) classify the receiver as `TypeKind::LocalCollection` \u2192 `SinkClass::InMemoryLocal`, suppressing the verb-name dispatch's DbMutation classification. Surfaced from meilisearch/index-scheduler/scheduler/enterprise_edition/network.rs::balance_shards (`unsharded: RoaringBitmap`). Mirrors the JS/TS `ts_type_to_local_collection` fix from 2026-04-29."
|
||||
},
|
||||
{
|
||||
"case_id": "rs-auth-realrepo-013",
|
||||
"file": "rust/auth/unsafe_handler_local_collection_does_not_blanket_suppress.rs",
|
||||
"language": "rust",
|
||||
"is_vulnerable": true,
|
||||
"vuln_class": "auth",
|
||||
"cwe": "CWE-285",
|
||||
"provenance": "synthetic",
|
||||
"equivalence_tier": "exact",
|
||||
"match_mode": "rule_match",
|
||||
"expected_rule_ids": [
|
||||
"rs.auth.missing_ownership_check"
|
||||
],
|
||||
"allowed_alternative_rule_ids": [],
|
||||
"forbidden_rule_ids": [],
|
||||
"expected_severity": "High",
|
||||
"expected_category": "Security",
|
||||
"expected_sink_lines": [],
|
||||
"expected_source_lines": [],
|
||||
"tags": [
|
||||
"auth",
|
||||
"positive",
|
||||
"real-repo-precision-2026-04-29"
|
||||
],
|
||||
"disabled": false,
|
||||
"notes": "Negative-counterpart guard for the LocalCollection / parameter-name fixes: handler takes a HashMap typed param (in-memory bookkeeping) but ALSO calls `db.update_owner(req.target_user_id, ...)` (real DbMutation). The cache mutation must not blanket-suppress the persistent-store mutation \u2014 the rule must still fire on `db.update_owner`."
|
||||
},
|
||||
{
|
||||
"case_id": "ruby-safe-ar-query-shapes-001",
|
||||
"file": "ruby/safe/safe_active_record_query_shapes.rb",
|
||||
|
|
@ -13715,6 +14360,120 @@
|
|||
],
|
||||
"disabled": false,
|
||||
"notes": "Concatenated SQL passed to em.createQuery(...) \u2014 receiver-chain walk sees binary_expression at arg 0, refuses to synthesise sanitizer, structural sink fires. Regression guard for the JPA parameterised-execute fix."
|
||||
},
|
||||
{
|
||||
"case_id": "py-auth-realrepo-005",
|
||||
"file": "python/safe/safe_fastapi_route_dependencies_auth.py",
|
||||
"language": "python",
|
||||
"is_vulnerable": false,
|
||||
"vuln_class": "safe",
|
||||
"cwe": "N/A",
|
||||
"provenance": "real-repo",
|
||||
"equivalence_tier": "exact",
|
||||
"match_mode": "rule_match",
|
||||
"expected_rule_ids": [],
|
||||
"allowed_alternative_rule_ids": [],
|
||||
"forbidden_rule_ids": [
|
||||
"py.auth.missing_ownership_check"
|
||||
],
|
||||
"expected_severity": null,
|
||||
"expected_category": "Security",
|
||||
"expected_sink_lines": [],
|
||||
"expected_source_lines": [],
|
||||
"tags": [
|
||||
"auth",
|
||||
"fastapi",
|
||||
"real-repo-precision-2026-04-29"
|
||||
],
|
||||
"disabled": false,
|
||||
"notes": "Distilled from airflow api_fastapi/core_api/routes/public/connections.py: FastAPI route decorator carries `dependencies=[Depends(requires_access_connection(method=\"DELETE\"))]`; the Flask extractor's new `dependencies=` kwarg walker plus inject_middleware_auth subject synthesis recognises the auth gate."
|
||||
},
|
||||
{
|
||||
"case_id": "py-auth-realrepo-007",
|
||||
"file": "python/safe/safe_fastapi_route_level_row_fetch.py",
|
||||
"language": "python",
|
||||
"is_vulnerable": false,
|
||||
"vuln_class": "safe",
|
||||
"cwe": "N/A",
|
||||
"provenance": "real-repo",
|
||||
"equivalence_tier": "exact",
|
||||
"match_mode": "rule_match",
|
||||
"expected_rule_ids": [],
|
||||
"allowed_alternative_rule_ids": [],
|
||||
"forbidden_rule_ids": [
|
||||
"py.auth.missing_ownership_check"
|
||||
],
|
||||
"expected_severity": null,
|
||||
"expected_category": "Security",
|
||||
"expected_sink_lines": [],
|
||||
"expected_source_lines": [],
|
||||
"tags": [
|
||||
"auth",
|
||||
"fastapi",
|
||||
"real-repo-precision-2026-04-29"
|
||||
],
|
||||
"disabled": false,
|
||||
"notes": "Distilled from airflow api_fastapi/core_api/routes/public/dag_run.py: FastAPI route decorator carries `dependencies=[Depends(requires_access_dag(method=\"GET\"))]`; the route-level guard must cover not only direct path-param subjects (filter_by(dag_id=dag_id)) but also row-variable receivers (`dag.cleanup_runs(...)` after `dag = session.scalar(select(DagModel)...)`). Pinned by the `is_route_level` short-circuit in `auth_check_covers_subject` plus the kind-aware `function_params_route_handler` that includes id-like Python typed params (`dag_id: str`) in `unit.params`."
|
||||
},
|
||||
{
|
||||
"case_id": "py-auth-realrepo-006",
|
||||
"file": "python/safe/safe_pytest_sqlalchemy_session.py",
|
||||
"language": "python",
|
||||
"is_vulnerable": false,
|
||||
"vuln_class": "safe",
|
||||
"cwe": "N/A",
|
||||
"provenance": "real-repo",
|
||||
"equivalence_tier": "exact",
|
||||
"match_mode": "rule_match",
|
||||
"expected_rule_ids": [],
|
||||
"allowed_alternative_rule_ids": [],
|
||||
"forbidden_rule_ids": [
|
||||
"py.auth.missing_ownership_check",
|
||||
"py.auth.token_override_without_validation"
|
||||
],
|
||||
"expected_severity": null,
|
||||
"expected_category": "Security",
|
||||
"expected_sink_lines": [],
|
||||
"expected_source_lines": [],
|
||||
"tags": [
|
||||
"auth",
|
||||
"pytest",
|
||||
"real-repo-precision-2026-04-29"
|
||||
],
|
||||
"disabled": false,
|
||||
"notes": "Distilled from airflow tests/unit/models/test_backfill.py: pytest test methods with SQLAlchemy `session` fixture and `session.commit()` calls. Bare `session.<sqlalchemy_verb>` no longer counts as auth Session evidence; only `session.<identity_field>` (user/user_id/...) does."
|
||||
},
|
||||
{
|
||||
"case_id": "py-auth-realrepo-007",
|
||||
"file": "python/auth/vuln_fastapi_route_no_dependencies.py",
|
||||
"language": "python",
|
||||
"is_vulnerable": true,
|
||||
"vuln_class": "auth",
|
||||
"cwe": "CWE-862",
|
||||
"provenance": "real-repo",
|
||||
"equivalence_tier": "exact",
|
||||
"match_mode": "rule_match",
|
||||
"expected_rule_ids": [
|
||||
"py.auth.missing_ownership_check"
|
||||
],
|
||||
"allowed_alternative_rule_ids": [],
|
||||
"forbidden_rule_ids": [],
|
||||
"expected_severity": "HIGH",
|
||||
"expected_category": "Security",
|
||||
"expected_sink_lines": [
|
||||
[
|
||||
15,
|
||||
15
|
||||
]
|
||||
],
|
||||
"expected_source_lines": [],
|
||||
"tags": [
|
||||
"auth",
|
||||
"fastapi",
|
||||
"real-repo-precision-2026-04-29"
|
||||
],
|
||||
"disabled": false,
|
||||
"notes": "Vulnerable counterpart to py-auth-realrepo-005: same FastAPI route shape but no `dependencies=[Depends(...)]` keyword arg. Regression guard: the dependency-injection recogniser must not blanket-suppress every FastAPI route."
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"benchmark_version": "1.0",
|
||||
"timestamp": "2026-04-29T05:42:03Z",
|
||||
"timestamp": "2026-04-29T21:50:34Z",
|
||||
"scanner_version": "0.5.0",
|
||||
"scanner_config": {
|
||||
"analysis_mode": "Full",
|
||||
|
|
@ -9,9 +9,9 @@
|
|||
"state_analysis_enabled": true,
|
||||
"worker_threads": 1
|
||||
},
|
||||
"ground_truth_hash": "sha256:3e034f1fc5c7bb7838f1fb2c63de5ca5a36aacfdf5d66cf25f30bff99f25f1cf",
|
||||
"corpus_size": 433,
|
||||
"cases_run": 432,
|
||||
"ground_truth_hash": "sha256:5b391d654f88673e5a200af875d513cf83812af747739395e8315768b8983ce3",
|
||||
"corpus_size": 458,
|
||||
"cases_run": 457,
|
||||
"cases_skipped": 1,
|
||||
"outcomes": [
|
||||
{
|
||||
|
|
@ -1306,6 +1306,74 @@
|
|||
"security_finding_count": 2,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "cve-java-2022-1471-patched",
|
||||
"file": "cve_corpus/java/CVE-2022-1471/patched.java",
|
||||
"language": "java",
|
||||
"vuln_class": "safe",
|
||||
"is_vulnerable": false,
|
||||
"outcome_file_level": "TN",
|
||||
"outcome_rule_level": "TN",
|
||||
"outcome_location_level": null,
|
||||
"matched_rule_ids": [],
|
||||
"unexpected_rule_ids": [],
|
||||
"all_finding_ids": [],
|
||||
"security_finding_count": 0,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "cve-java-2022-1471-vulnerable",
|
||||
"file": "cve_corpus/java/CVE-2022-1471/vulnerable.java",
|
||||
"language": "java",
|
||||
"vuln_class": "deserialization",
|
||||
"is_vulnerable": true,
|
||||
"outcome_file_level": "TP",
|
||||
"outcome_rule_level": "TP",
|
||||
"outcome_location_level": "TP",
|
||||
"matched_rule_ids": [
|
||||
"java.deser.snakeyaml_unsafe_constructor"
|
||||
],
|
||||
"unexpected_rule_ids": [],
|
||||
"all_finding_ids": [
|
||||
"java.deser.snakeyaml_unsafe_constructor"
|
||||
],
|
||||
"security_finding_count": 1,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "cve-java-2022-42889-patched",
|
||||
"file": "cve_corpus/java/CVE-2022-42889/patched.java",
|
||||
"language": "java",
|
||||
"vuln_class": "safe",
|
||||
"is_vulnerable": false,
|
||||
"outcome_file_level": "TN",
|
||||
"outcome_rule_level": "TN",
|
||||
"outcome_location_level": null,
|
||||
"matched_rule_ids": [],
|
||||
"unexpected_rule_ids": [],
|
||||
"all_finding_ids": [],
|
||||
"security_finding_count": 0,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "cve-java-2022-42889-vulnerable",
|
||||
"file": "cve_corpus/java/CVE-2022-42889/vulnerable.java",
|
||||
"language": "java",
|
||||
"vuln_class": "code_exec",
|
||||
"is_vulnerable": true,
|
||||
"outcome_file_level": "TP",
|
||||
"outcome_rule_level": "TP",
|
||||
"outcome_location_level": "TP",
|
||||
"matched_rule_ids": [
|
||||
"java.code_exec.text4shell_interpolator"
|
||||
],
|
||||
"unexpected_rule_ids": [],
|
||||
"all_finding_ids": [
|
||||
"java.code_exec.text4shell_interpolator"
|
||||
],
|
||||
"security_finding_count": 1,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "cve-js-2019-14939-patched",
|
||||
"file": "cve_corpus/javascript/CVE-2019-14939/patched.js",
|
||||
|
|
@ -1520,6 +1588,76 @@
|
|||
"security_finding_count": 2,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "cve-py-2025-69662-patched",
|
||||
"file": "cve_corpus/python/CVE-2025-69662/patched.py",
|
||||
"language": "python",
|
||||
"vuln_class": "safe",
|
||||
"is_vulnerable": false,
|
||||
"outcome_file_level": "TN",
|
||||
"outcome_rule_level": "TN",
|
||||
"outcome_location_level": null,
|
||||
"matched_rule_ids": [],
|
||||
"unexpected_rule_ids": [],
|
||||
"all_finding_ids": [],
|
||||
"security_finding_count": 0,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "cve-py-2025-69662-vulnerable",
|
||||
"file": "cve_corpus/python/CVE-2025-69662/vulnerable.py",
|
||||
"language": "python",
|
||||
"vuln_class": "sql_injection",
|
||||
"is_vulnerable": true,
|
||||
"outcome_file_level": "TP",
|
||||
"outcome_rule_level": "TP",
|
||||
"outcome_location_level": "TP",
|
||||
"matched_rule_ids": [
|
||||
"taint-unsanitised-flow (source 35:12)",
|
||||
"py.sqli.text_format"
|
||||
],
|
||||
"unexpected_rule_ids": [],
|
||||
"all_finding_ids": [
|
||||
"taint-unsanitised-flow (source 35:12)",
|
||||
"py.sqli.text_format"
|
||||
],
|
||||
"security_finding_count": 2,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "cve-py-2026-33626-patched",
|
||||
"file": "cve_corpus/python/CVE-2026-33626/patched.py",
|
||||
"language": "python",
|
||||
"vuln_class": "safe",
|
||||
"is_vulnerable": false,
|
||||
"outcome_file_level": "TN",
|
||||
"outcome_rule_level": "TN",
|
||||
"outcome_location_level": null,
|
||||
"matched_rule_ids": [],
|
||||
"unexpected_rule_ids": [],
|
||||
"all_finding_ids": [],
|
||||
"security_finding_count": 0,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "cve-py-2026-33626-vulnerable",
|
||||
"file": "cve_corpus/python/CVE-2026-33626/vulnerable.py",
|
||||
"language": "python",
|
||||
"vuln_class": "ssrf",
|
||||
"is_vulnerable": true,
|
||||
"outcome_file_level": "TP",
|
||||
"outcome_rule_level": "TP",
|
||||
"outcome_location_level": "TP",
|
||||
"matched_rule_ids": [
|
||||
"taint-unsanitised-flow (source 43:12)"
|
||||
],
|
||||
"unexpected_rule_ids": [],
|
||||
"all_finding_ids": [
|
||||
"taint-unsanitised-flow (source 43:12)"
|
||||
],
|
||||
"security_finding_count": 1,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "cve-rb-2013-0156-patched",
|
||||
"file": "cve_corpus/ruby/CVE-2013-0156/patched.rb",
|
||||
|
|
@ -1737,6 +1875,40 @@
|
|||
"security_finding_count": 1,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "cve-ts-ghsa-4x48-cgf9-q33f-patched",
|
||||
"file": "cve_corpus/typescript/GHSA-4x48-cgf9-q33f/patched.ts",
|
||||
"language": "typescript",
|
||||
"vuln_class": "safe",
|
||||
"is_vulnerable": false,
|
||||
"outcome_file_level": "TN",
|
||||
"outcome_rule_level": "TN",
|
||||
"outcome_location_level": null,
|
||||
"matched_rule_ids": [],
|
||||
"unexpected_rule_ids": [],
|
||||
"all_finding_ids": [],
|
||||
"security_finding_count": 0,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "cve-ts-ghsa-4x48-cgf9-q33f-vulnerable",
|
||||
"file": "cve_corpus/typescript/GHSA-4x48-cgf9-q33f/vulnerable.ts",
|
||||
"language": "typescript",
|
||||
"vuln_class": "ssrf",
|
||||
"is_vulnerable": true,
|
||||
"outcome_file_level": "TP",
|
||||
"outcome_rule_level": "TP",
|
||||
"outcome_location_level": "TP",
|
||||
"matched_rule_ids": [
|
||||
"taint-unsanitised-flow (source 50:5)"
|
||||
],
|
||||
"unexpected_rule_ids": [],
|
||||
"all_finding_ids": [
|
||||
"taint-unsanitised-flow (source 50:5)"
|
||||
],
|
||||
"security_finding_count": 1,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "go-auth-realrepo-001",
|
||||
"file": "go/auth/vuln_repo_findbyid_no_auth.go",
|
||||
|
|
@ -2371,6 +2543,21 @@
|
|||
"security_finding_count": 0,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "go-safe-realrepo-006",
|
||||
"file": "go/safe/safe_test_helper_fatal.go",
|
||||
"language": "go",
|
||||
"vuln_class": "safe",
|
||||
"is_vulnerable": false,
|
||||
"outcome_file_level": "TN",
|
||||
"outcome_rule_level": "TN",
|
||||
"outcome_location_level": null,
|
||||
"matched_rule_ids": [],
|
||||
"unexpected_rule_ids": [],
|
||||
"all_finding_ids": [],
|
||||
"security_finding_count": 0,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "go-sqli-001",
|
||||
"file": "go/sqli/sqli_concat.go",
|
||||
|
|
@ -3590,6 +3777,21 @@
|
|||
"security_finding_count": 0,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "js-safe-realrepo-006",
|
||||
"file": "javascript/safe/safe_localised_gherkin_regex.js",
|
||||
"language": "javascript",
|
||||
"vuln_class": "safe",
|
||||
"is_vulnerable": false,
|
||||
"outcome_file_level": "TN",
|
||||
"outcome_rule_level": "TN",
|
||||
"outcome_location_level": null,
|
||||
"matched_rule_ids": [],
|
||||
"unexpected_rule_ids": [],
|
||||
"all_finding_ids": [],
|
||||
"security_finding_count": 0,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "js-sqli-001",
|
||||
"file": "javascript/sqli/sqli_concat.js",
|
||||
|
|
@ -4497,6 +4699,70 @@
|
|||
"security_finding_count": 1,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "py-auth-realrepo-005",
|
||||
"file": "python/safe/safe_fastapi_route_dependencies_auth.py",
|
||||
"language": "python",
|
||||
"vuln_class": "safe",
|
||||
"is_vulnerable": false,
|
||||
"outcome_file_level": "TN",
|
||||
"outcome_rule_level": "TN",
|
||||
"outcome_location_level": null,
|
||||
"matched_rule_ids": [],
|
||||
"unexpected_rule_ids": [],
|
||||
"all_finding_ids": [],
|
||||
"security_finding_count": 0,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "py-auth-realrepo-006",
|
||||
"file": "python/safe/safe_pytest_sqlalchemy_session.py",
|
||||
"language": "python",
|
||||
"vuln_class": "safe",
|
||||
"is_vulnerable": false,
|
||||
"outcome_file_level": "TN",
|
||||
"outcome_rule_level": "TN",
|
||||
"outcome_location_level": null,
|
||||
"matched_rule_ids": [],
|
||||
"unexpected_rule_ids": [],
|
||||
"all_finding_ids": [],
|
||||
"security_finding_count": 0,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "py-auth-realrepo-007",
|
||||
"file": "python/safe/safe_fastapi_route_level_row_fetch.py",
|
||||
"language": "python",
|
||||
"vuln_class": "safe",
|
||||
"is_vulnerable": false,
|
||||
"outcome_file_level": "TN",
|
||||
"outcome_rule_level": "TN",
|
||||
"outcome_location_level": null,
|
||||
"matched_rule_ids": [],
|
||||
"unexpected_rule_ids": [],
|
||||
"all_finding_ids": [],
|
||||
"security_finding_count": 0,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "py-auth-realrepo-007",
|
||||
"file": "python/auth/vuln_fastapi_route_no_dependencies.py",
|
||||
"language": "python",
|
||||
"vuln_class": "auth",
|
||||
"is_vulnerable": true,
|
||||
"outcome_file_level": "TP",
|
||||
"outcome_rule_level": "TP",
|
||||
"outcome_location_level": "TP",
|
||||
"matched_rule_ids": [
|
||||
"py.auth.missing_ownership_check"
|
||||
],
|
||||
"unexpected_rule_ids": [],
|
||||
"all_finding_ids": [
|
||||
"py.auth.missing_ownership_check"
|
||||
],
|
||||
"security_finding_count": 1,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "py-cmdi-001",
|
||||
"file": "python/cmdi/cmdi_direct.py",
|
||||
|
|
@ -5630,6 +5896,55 @@
|
|||
"security_finding_count": 1,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "rs-auth-realrepo-011",
|
||||
"file": "rust/auth/safe_param_type_segment_idents.rs",
|
||||
"language": "rust",
|
||||
"vuln_class": "safe",
|
||||
"is_vulnerable": false,
|
||||
"outcome_file_level": "TN",
|
||||
"outcome_rule_level": "TN",
|
||||
"outcome_location_level": null,
|
||||
"matched_rule_ids": [],
|
||||
"unexpected_rule_ids": [],
|
||||
"all_finding_ids": [],
|
||||
"security_finding_count": 0,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "rs-auth-realrepo-012",
|
||||
"file": "rust/auth/safe_local_collection_param_types.rs",
|
||||
"language": "rust",
|
||||
"vuln_class": "safe",
|
||||
"is_vulnerable": false,
|
||||
"outcome_file_level": "TN",
|
||||
"outcome_rule_level": "TN",
|
||||
"outcome_location_level": null,
|
||||
"matched_rule_ids": [],
|
||||
"unexpected_rule_ids": [],
|
||||
"all_finding_ids": [],
|
||||
"security_finding_count": 0,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "rs-auth-realrepo-013",
|
||||
"file": "rust/auth/unsafe_handler_local_collection_does_not_blanket_suppress.rs",
|
||||
"language": "rust",
|
||||
"vuln_class": "auth",
|
||||
"is_vulnerable": true,
|
||||
"outcome_file_level": "TP",
|
||||
"outcome_rule_level": "TP",
|
||||
"outcome_location_level": null,
|
||||
"matched_rule_ids": [
|
||||
"rs.auth.missing_ownership_check"
|
||||
],
|
||||
"unexpected_rule_ids": [],
|
||||
"all_finding_ids": [
|
||||
"rs.auth.missing_ownership_check"
|
||||
],
|
||||
"security_finding_count": 1,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "rs-auth-typed-extractors-001",
|
||||
"file": "rust/auth/safe_typed_path_int_extractor.rs",
|
||||
|
|
@ -7043,6 +7358,42 @@
|
|||
"security_finding_count": 1,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "ts-auth-realrepo-006",
|
||||
"file": "typescript/auth/safe_local_collection_receiver.ts",
|
||||
"language": "typescript",
|
||||
"vuln_class": "safe",
|
||||
"is_vulnerable": false,
|
||||
"outcome_file_level": "TN",
|
||||
"outcome_rule_level": "TN",
|
||||
"outcome_location_level": null,
|
||||
"matched_rule_ids": [],
|
||||
"unexpected_rule_ids": [],
|
||||
"all_finding_ids": [],
|
||||
"security_finding_count": 0,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "ts-auth-realrepo-007",
|
||||
"file": "typescript/auth/vuln_local_collection_does_not_blanket_suppress.ts",
|
||||
"language": "typescript",
|
||||
"vuln_class": "auth",
|
||||
"is_vulnerable": true,
|
||||
"outcome_file_level": "TP",
|
||||
"outcome_rule_level": "TP",
|
||||
"outcome_location_level": null,
|
||||
"matched_rule_ids": [
|
||||
"js.auth.missing_ownership_check",
|
||||
"js.auth.missing_ownership_check"
|
||||
],
|
||||
"unexpected_rule_ids": [],
|
||||
"all_finding_ids": [
|
||||
"js.auth.missing_ownership_check",
|
||||
"js.auth.missing_ownership_check"
|
||||
],
|
||||
"security_finding_count": 2,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "ts-cmdi-001",
|
||||
"file": "typescript/cmdi/cmdi_exec_template.ts",
|
||||
|
|
@ -7493,6 +7844,53 @@
|
|||
"security_finding_count": 0,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "ts-safe-017",
|
||||
"file": "typescript/safe/safe_strapi_db_query_chain.ts",
|
||||
"language": "typescript",
|
||||
"vuln_class": "safe",
|
||||
"is_vulnerable": false,
|
||||
"outcome_file_level": "TN",
|
||||
"outcome_rule_level": "TN",
|
||||
"outcome_location_level": null,
|
||||
"matched_rule_ids": [],
|
||||
"unexpected_rule_ids": [],
|
||||
"all_finding_ids": [
|
||||
"ts.quality.any_annotation"
|
||||
],
|
||||
"security_finding_count": 0,
|
||||
"non_security_finding_count": 1
|
||||
},
|
||||
{
|
||||
"case_id": "ts-safe-018",
|
||||
"file": "typescript/safe/safe_indirect_validator.ts",
|
||||
"language": "typescript",
|
||||
"vuln_class": "safe",
|
||||
"is_vulnerable": false,
|
||||
"outcome_file_level": "TN",
|
||||
"outcome_rule_level": "TN",
|
||||
"outcome_location_level": null,
|
||||
"matched_rule_ids": [],
|
||||
"unexpected_rule_ids": [],
|
||||
"all_finding_ids": [],
|
||||
"security_finding_count": 0,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "ts-safe-019",
|
||||
"file": "typescript/safe/safe_helper_with_validator.ts",
|
||||
"language": "typescript",
|
||||
"vuln_class": "safe",
|
||||
"is_vulnerable": false,
|
||||
"outcome_file_level": "TN",
|
||||
"outcome_rule_level": "TN",
|
||||
"outcome_location_level": null,
|
||||
"matched_rule_ids": [],
|
||||
"unexpected_rule_ids": [],
|
||||
"all_finding_ids": [],
|
||||
"security_finding_count": 0,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "ts-secrets-001",
|
||||
"file": "typescript/secrets/fallback_secret.ts",
|
||||
|
|
@ -7552,6 +7950,30 @@
|
|||
"security_finding_count": 2,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "ts-sqli-003",
|
||||
"file": "typescript/sqli/sqli_db_query_concat.ts",
|
||||
"language": "typescript",
|
||||
"vuln_class": "sqli",
|
||||
"is_vulnerable": true,
|
||||
"outcome_file_level": "TP",
|
||||
"outcome_rule_level": "TP",
|
||||
"outcome_location_level": null,
|
||||
"matched_rule_ids": [
|
||||
"taint-unsanitised-flow (source 15:5)",
|
||||
"taint-unsanitised-flow (source 21:5)"
|
||||
],
|
||||
"unexpected_rule_ids": [],
|
||||
"all_finding_ids": [
|
||||
"ts.quality.any_annotation",
|
||||
"ts.quality.any_annotation",
|
||||
"taint-unsanitised-flow (source 15:5)",
|
||||
"taint-unsanitised-flow (source 21:5)",
|
||||
"ts.quality.any_annotation"
|
||||
],
|
||||
"security_finding_count": 2,
|
||||
"non_security_finding_count": 3
|
||||
},
|
||||
{
|
||||
"case_id": "ts-ssrf-001",
|
||||
"file": "typescript/ssrf/ssrf_axios_user_url.ts",
|
||||
|
|
@ -7771,22 +8193,22 @@
|
|||
}
|
||||
],
|
||||
"aggregate_file_level": {
|
||||
"tp": 216,
|
||||
"tp": 225,
|
||||
"fp": 1,
|
||||
"fn_": 0,
|
||||
"tn": 215,
|
||||
"precision": 0.9953917050691244,
|
||||
"tn": 231,
|
||||
"precision": 0.995575221238938,
|
||||
"recall": 1.0,
|
||||
"f1": 0.997690531177829
|
||||
"f1": 0.9977827050997783
|
||||
},
|
||||
"aggregate_rule_level": {
|
||||
"tp": 216,
|
||||
"tp": 225,
|
||||
"fp": 1,
|
||||
"fn_": 0,
|
||||
"tn": 215,
|
||||
"precision": 0.9953917050691244,
|
||||
"tn": 231,
|
||||
"precision": 0.995575221238938,
|
||||
"recall": 1.0,
|
||||
"f1": 0.997690531177829
|
||||
"f1": 0.9977827050997783
|
||||
},
|
||||
"by_language": {
|
||||
"c": {
|
||||
|
|
@ -7811,16 +8233,16 @@
|
|||
"tp": 25,
|
||||
"fp": 1,
|
||||
"fn_": 0,
|
||||
"tn": 27,
|
||||
"tn": 28,
|
||||
"precision": 0.9615384615384616,
|
||||
"recall": 1.0,
|
||||
"f1": 0.9803921568627451
|
||||
},
|
||||
"java": {
|
||||
"tp": 17,
|
||||
"tp": 19,
|
||||
"fp": 0,
|
||||
"fn_": 0,
|
||||
"tn": 18,
|
||||
"tn": 20,
|
||||
"precision": 1.0,
|
||||
"recall": 1.0,
|
||||
"f1": 1.0
|
||||
|
|
@ -7829,7 +8251,7 @@
|
|||
"tp": 19,
|
||||
"fp": 0,
|
||||
"fn_": 0,
|
||||
"tn": 23,
|
||||
"tn": 24,
|
||||
"precision": 1.0,
|
||||
"recall": 1.0,
|
||||
"f1": 1.0
|
||||
|
|
@ -7844,10 +8266,10 @@
|
|||
"f1": 1.0
|
||||
},
|
||||
"python": {
|
||||
"tp": 23,
|
||||
"tp": 26,
|
||||
"fp": 0,
|
||||
"fn_": 0,
|
||||
"tn": 23,
|
||||
"tn": 28,
|
||||
"precision": 1.0,
|
||||
"recall": 1.0,
|
||||
"f1": 1.0
|
||||
|
|
@ -7862,19 +8284,19 @@
|
|||
"f1": 1.0
|
||||
},
|
||||
"rust": {
|
||||
"tp": 33,
|
||||
"tp": 34,
|
||||
"fp": 0,
|
||||
"fn_": 0,
|
||||
"tn": 37,
|
||||
"tn": 39,
|
||||
"precision": 1.0,
|
||||
"recall": 1.0,
|
||||
"f1": 1.0
|
||||
},
|
||||
"typescript": {
|
||||
"tp": 29,
|
||||
"tp": 32,
|
||||
"fp": 0,
|
||||
"fn_": 0,
|
||||
"tn": 18,
|
||||
"tn": 23,
|
||||
"precision": 1.0,
|
||||
"recall": 1.0,
|
||||
"f1": 1.0
|
||||
|
|
@ -7882,7 +8304,7 @@
|
|||
},
|
||||
"by_vuln_class": {
|
||||
"auth": {
|
||||
"tp": 13,
|
||||
"tp": 16,
|
||||
"fp": 0,
|
||||
"fn_": 0,
|
||||
"tn": 0,
|
||||
|
|
@ -7909,7 +8331,7 @@
|
|||
"f1": 1.0
|
||||
},
|
||||
"code_exec": {
|
||||
"tp": 2,
|
||||
"tp": 3,
|
||||
"fp": 0,
|
||||
"fn_": 0,
|
||||
"tn": 0,
|
||||
|
|
@ -7945,7 +8367,7 @@
|
|||
"f1": 1.0
|
||||
},
|
||||
"deserialization": {
|
||||
"tp": 4,
|
||||
"tp": 5,
|
||||
"fp": 0,
|
||||
"fn_": 0,
|
||||
"tn": 0,
|
||||
|
|
@ -8002,7 +8424,7 @@
|
|||
"tp": 0,
|
||||
"fp": 1,
|
||||
"fn_": 0,
|
||||
"tn": 215,
|
||||
"tn": 231,
|
||||
"precision": 0.0,
|
||||
"recall": 1.0,
|
||||
"f1": 0.0
|
||||
|
|
@ -8016,8 +8438,17 @@
|
|||
"recall": 1.0,
|
||||
"f1": 1.0
|
||||
},
|
||||
"sql_injection": {
|
||||
"tp": 1,
|
||||
"fp": 0,
|
||||
"fn_": 0,
|
||||
"tn": 0,
|
||||
"precision": 1.0,
|
||||
"recall": 1.0,
|
||||
"f1": 1.0
|
||||
},
|
||||
"sqli": {
|
||||
"tp": 29,
|
||||
"tp": 30,
|
||||
"fp": 0,
|
||||
"fn_": 0,
|
||||
"tn": 0,
|
||||
|
|
@ -8026,7 +8457,7 @@
|
|||
"f1": 1.0
|
||||
},
|
||||
"ssrf": {
|
||||
"tp": 26,
|
||||
"tp": 28,
|
||||
"fp": 0,
|
||||
"fn_": 0,
|
||||
"tn": 0,
|
||||
|
|
@ -8046,31 +8477,31 @@
|
|||
},
|
||||
"by_confidence": {
|
||||
">=High": {
|
||||
"tp": 90,
|
||||
"fp": 90,
|
||||
"fn_": 126,
|
||||
"tn": 126,
|
||||
"precision": 0.5,
|
||||
"recall": 0.4166666666666667,
|
||||
"f1": 0.45454545454545453
|
||||
"tp": 79,
|
||||
"fp": 104,
|
||||
"fn_": 146,
|
||||
"tn": 128,
|
||||
"precision": 0.43169398907103823,
|
||||
"recall": 0.3511111111111111,
|
||||
"f1": 0.3872549019607843
|
||||
},
|
||||
">=Low": {
|
||||
"tp": 94,
|
||||
"fp": 102,
|
||||
"fn_": 122,
|
||||
"tn": 114,
|
||||
"precision": 0.47959183673469385,
|
||||
"recall": 0.4351851851851852,
|
||||
"f1": 0.4563106796116505
|
||||
"tp": 81,
|
||||
"fp": 116,
|
||||
"fn_": 144,
|
||||
"tn": 116,
|
||||
"precision": 0.41116751269035534,
|
||||
"recall": 0.36,
|
||||
"f1": 0.3838862559241706
|
||||
},
|
||||
">=Medium": {
|
||||
"tp": 94,
|
||||
"fp": 102,
|
||||
"fn_": 122,
|
||||
"tn": 114,
|
||||
"precision": 0.47959183673469385,
|
||||
"recall": 0.4351851851851852,
|
||||
"f1": 0.4563106796116505
|
||||
"tp": 81,
|
||||
"fp": 116,
|
||||
"fn_": 144,
|
||||
"tn": 116,
|
||||
"precision": 0.41116751269035534,
|
||||
"recall": 0.36,
|
||||
"f1": 0.3838862559241706
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -191,7 +191,7 @@ struct BenchmarkResults {
|
|||
// ── Scanning ─────────────────────────────────────────────────────────
|
||||
|
||||
fn scan_corpus_file(corpus_root: &Path, relative_path: &str) -> Vec<Diag> {
|
||||
// `cve_corpus/*` cases live in a sibling of `corpus/` — see
|
||||
// `cve_corpus/*` cases live in a sibling of `corpus/`, see
|
||||
// `tests/benchmark/cve_corpus/`.
|
||||
let source = if relative_path.starts_with("cve_corpus/") {
|
||||
corpus_root
|
||||
|
|
@ -679,7 +679,7 @@ fn benchmark_evaluation() {
|
|||
// on this corpus, so 5pp is generous enough to absorb honest
|
||||
// FP↔TN trades while still catching a real regression in a
|
||||
// vulnerability class. When you land a durable, measurable
|
||||
// improvement, tighten these floors — do not relax them to paper
|
||||
// improvement, tighten these floors, do not relax them to paper
|
||||
// over a regression.
|
||||
let rule = &results.aggregate_rule_level;
|
||||
assert!(
|
||||
|
|
@ -790,7 +790,7 @@ fn score_rule_level_with_diags(
|
|||
|
||||
fn sha256_hex(data: &[u8]) -> String {
|
||||
use std::io::Write;
|
||||
// Simple SHA-256 via command — avoids adding a crypto dependency.
|
||||
// Simple SHA-256 via command, avoids adding a crypto dependency.
|
||||
let mut child = std::process::Command::new("shasum")
|
||||
.args(["-a", "256"])
|
||||
.stdin(std::process::Stdio::piped())
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
//! Nyx's surface is a `clap` parser plus a handful of downstream validators
|
||||
//! (`SeverityFilter::parse`, `Severity::from_str`, `Confidence::from_str`,
|
||||
//! `apply_profile`). These tests lock in the user-visible contract that
|
||||
//! bad input exits non-zero with a message that names the offending flag —
|
||||
//! bad input exits non-zero with a message that names the offending flag ,
|
||||
//! a scanner that silently accepts a typo'd severity and returns zero
|
||||
//! findings is a footgun in CI.
|
||||
//!
|
||||
|
|
@ -268,7 +268,7 @@ fn scan_quiet_suppresses_preview_banner() {
|
|||
.stderr(predicate::str::contains("Preview for C/C++").not());
|
||||
}
|
||||
|
||||
/// JSON output format must not print the Preview banner either — machine-
|
||||
/// JSON output format must not print the Preview banner either, machine-
|
||||
/// readable output has to stay clean on both stdout and stderr.
|
||||
#[test]
|
||||
fn scan_json_format_suppresses_preview_banner() {
|
||||
|
|
|
|||
|
|
@ -179,7 +179,7 @@ pub fn validate_expectations(diags: &[Diag], fixture_dir: &Path) {
|
|||
}
|
||||
}
|
||||
|
||||
// Noise budget (optional — omitted on tight safe-code fixtures)
|
||||
// Noise budget (optional, omitted on tight safe-code fixtures)
|
||||
if let Some(budget) = &exp.noise_budget {
|
||||
assert_max_findings(diags, budget.max_total_findings, budget.max_high_findings);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,8 +3,8 @@
|
|||
//! Production defaults run the scanner with `worker_threads > 1`, and callers
|
||||
//! embedding `nyx_scanner` (the forthcoming `serve` UI, CI wrappers, scripted
|
||||
//! harnesses) may invoke `scan_no_index` from multiple threads in the same
|
||||
//! process. Shared engine state — label tables, framework-detection caches,
|
||||
//! tree-sitter thread-local parsers, rayon globals, `once_cell` statics —
|
||||
//! process. Shared engine state, label tables, framework-detection caches,
|
||||
//! tree-sitter thread-local parsers, rayon globals, `once_cell` statics ,
|
||||
//! must tolerate two simultaneous walks without races, panics, or diverging
|
||||
//! outputs.
|
||||
//!
|
||||
|
|
@ -86,7 +86,7 @@ fn build_tree(root: &Path) {
|
|||
}
|
||||
|
||||
/// Canonicalize a diag list for equality comparison. Finding output ordering
|
||||
/// depends on rayon scheduling — the individual fields must be identical but
|
||||
/// depends on rayon scheduling, the individual fields must be identical but
|
||||
/// the sequence is not. We sort by a stable composite key and stringify
|
||||
/// (Diag itself doesn't derive Ord).
|
||||
fn canonical_fingerprint(diags: &[Diag]) -> Vec<String> {
|
||||
|
|
@ -104,7 +104,7 @@ fn two_concurrent_scans_produce_identical_findings() {
|
|||
let root = tmp.path().to_path_buf();
|
||||
build_tree(&root);
|
||||
|
||||
// Capture an initial single-threaded run so we have a reference point —
|
||||
// Capture an initial single-threaded run so we have a reference point ,
|
||||
// if the concurrent run produced a subset we want to know whether that
|
||||
// matches a known-good baseline or diverges from it.
|
||||
let baseline = scan_no_index(&root, &test_cfg()).expect("baseline scan must succeed");
|
||||
|
|
@ -138,7 +138,7 @@ fn two_concurrent_scans_produce_identical_findings() {
|
|||
);
|
||||
}
|
||||
|
||||
/// Four concurrent scans over the same tree — larger blast radius for
|
||||
/// Four concurrent scans over the same tree, larger blast radius for
|
||||
/// serialization bugs in shared caches. Runs on a small tree to keep
|
||||
/// CI time reasonable.
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -4,16 +4,16 @@
|
|||
//! Three fixtures cover the documented transfer forms currently tractable
|
||||
//! against the JS/Python abstract-suppression pipelines:
|
||||
//!
|
||||
//! * `cross_file_abstract_port_range` (Python) — Identity transfer on an
|
||||
//! * `cross_file_abstract_port_range` (Python), Identity transfer on an
|
||||
//! integer-typed passthrough. The caller's literal `8080` crosses the
|
||||
//! file boundary and SHELL_ESCAPE suppression fires on the bounded int.
|
||||
//! * `cross_file_abstract_bounded_index` (Python) — Clamped transfer
|
||||
//! * `cross_file_abstract_bounded_index` (Python), Clamped transfer
|
||||
//! derived from a baseline-invariant fact. The callee returns a
|
||||
//! literal `42`; the per-parameter transfer attaches it as
|
||||
//! `Clamped { 42, 42 }` and the caller sees a bounded integer
|
||||
//! without the return-abstract channel alone carrying the fact
|
||||
//! through summary resolution ambiguity.
|
||||
//! * `cross_file_abstract_url_prefix_lock` (JS) — String-prefix transfer
|
||||
//! * `cross_file_abstract_url_prefix_lock` (JS), String-prefix transfer
|
||||
//! across an Identity wrapper. The caller writes
|
||||
//! `url = asIs('https://internal/...' + userPath)` and passes `url` to
|
||||
//! `axios.get`. The CFG node's `string_prefix` is consumed by the
|
||||
|
|
@ -21,7 +21,7 @@
|
|||
//! prefix locks the host and SSRF suppression fires.
|
||||
//!
|
||||
//! Each fixture's `expectations.json` treats the cross-file SHELL/SSRF
|
||||
//! sink as *forbidden* on the main file — if cross-file abstract
|
||||
//! sink as *forbidden* on the main file, if cross-file abstract
|
||||
//! propagation regresses, the sink fires and the forbidden-finding
|
||||
//! assertion trips.
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
//! Three fixtures cover distinct structural shapes of the summary
|
||||
//! channel:
|
||||
//!
|
||||
//! * `cross_file_alias_mutating_helper` (Java) — a void-returning
|
||||
//! * `cross_file_alias_mutating_helper` (Java), a void-returning
|
||||
//! helper that stores its second argument into a field of its first
|
||||
//! argument. Without the points-to channel the cross-file summary
|
||||
//! loses every taint edge (void return, no container-op in
|
||||
|
|
@ -14,20 +14,20 @@
|
|||
//! edge and the caller observes the field write through the argument
|
||||
//! alias, producing a Runtime.exec finding.
|
||||
//!
|
||||
//! * `cross_file_alias_returned_alias` (JS) — a passthrough helper
|
||||
//! * `cross_file_alias_returned_alias` (JS), a passthrough helper
|
||||
//! whose return aliases its first parameter. `param_to_return` with
|
||||
//! `Identity` already covered the taint cap; the points-to channel
|
||||
//! adds the heap-identity alias `Param(0) → Return` so the caller
|
||||
//! threads the points-to set through the call. The existing
|
||||
//! shell-exec sink must still fire — a regression guard on the
|
||||
//! shell-exec sink must still fire, a regression guard on the
|
||||
//! return-alias channel.
|
||||
//!
|
||||
//! * `cross_file_alias_bounded_graph` (Python) — a helper with a 20-
|
||||
//! * `cross_file_alias_bounded_graph` (Python), a helper with a 20-
|
||||
//! edge alias graph that intentionally overflows `MAX_ALIAS_EDGES`.
|
||||
//! The assertion is that the scan *terminates* under the bounded
|
||||
//! analysis and falls back to the conservative
|
||||
//! `PointsToSummary::overflow` behaviour, not a specific finding
|
||||
//! count — overflow is an operational guarantee, not a precision one.
|
||||
//! count, overflow is an operational guarantee, not a precision one.
|
||||
|
||||
mod common;
|
||||
|
||||
|
|
@ -76,7 +76,7 @@ fn cross_file_container_factory() {
|
|||
}
|
||||
|
||||
/// Receiver-chain regression: tainted receiver flows through
|
||||
/// `tainted.trim().toLowerCase()` — both zero-arg — and into
|
||||
/// `tainted.trim().toLowerCase()`, both zero-arg, and into
|
||||
/// `Runtime.exec`. Pins the existing receiver-fallback behaviour so
|
||||
/// heap-aliasing changes do not regress it.
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
//! The body-loading path is pure plumbing: the taint engine carries a
|
||||
//! `cross_file_bodies` field on `SsaTaintTransfer` that the cross-file
|
||||
//! inline path consumes. This test guards the *availability*
|
||||
//! invariant — if pass 1 stops populating `bodies_by_key`, the inline
|
||||
//! invariant, if pass 1 stops populating `bodies_by_key`, the inline
|
||||
//! path would silently fall back to summary resolution even when
|
||||
//! cross-file bodies could have given context-sensitive precision.
|
||||
//!
|
||||
|
|
@ -19,7 +19,7 @@ use nyx_scanner::symbol::Lang;
|
|||
use nyx_scanner::utils::config::{AnalysisMode, Config};
|
||||
use std::path::Path;
|
||||
|
||||
/// Test-local config mirror of `tests/common/mod.rs::test_config` —
|
||||
/// Test-local config mirror of `tests/common/mod.rs::test_config` ,
|
||||
/// kept inline so this file does not need to pull in the shared module
|
||||
/// (which `cargo test --test cross_file_body_loading_tests` would
|
||||
/// require extra wiring for).
|
||||
|
|
@ -39,7 +39,7 @@ fn test_config() -> Config {
|
|||
/// Replay the pass-1 body-collection logic from `scan_filesystem` on a
|
||||
/// handful of files and return the resulting `GlobalSummaries`.
|
||||
///
|
||||
/// This mirrors the fold-body of `scan_filesystem`'s pass-1 rayon loop —
|
||||
/// This mirrors the fold-body of `scan_filesystem`'s pass-1 rayon loop ,
|
||||
/// the production code uses the same `analyse_file_fused` entry point
|
||||
/// and the same `insert` / `insert_ssa` / `insert_body` trio. Keeping
|
||||
/// the test close to that shape catches drift between the fused pipeline
|
||||
|
|
@ -72,7 +72,7 @@ fn cross_file_body_loading_smoke_python_two_files() {
|
|||
let root = tmp.path();
|
||||
|
||||
// `a.py` defines a helper that takes one parameter, does a trivial
|
||||
// string op, and returns. The body is intentionally small — we only
|
||||
// string op, and returns. The body is intentionally small, we only
|
||||
// care that *any* eligible body is produced, not that it has
|
||||
// interesting taint content.
|
||||
let a_py = root.join("a.py");
|
||||
|
|
@ -133,7 +133,7 @@ fn cross_file_body_loading_smoke_python_two_files() {
|
|||
body.param_count
|
||||
);
|
||||
|
||||
// Quick sanity on the SSA shape — an eligible body must have at
|
||||
// Quick sanity on the SSA shape, an eligible body must have at
|
||||
// least one block. Zero blocks would mean we stored an empty stub,
|
||||
// which would let the inline path silently do nothing on every
|
||||
// inline attempt.
|
||||
|
|
@ -146,7 +146,7 @@ fn cross_file_body_loading_smoke_python_two_files() {
|
|||
#[test]
|
||||
fn cross_file_body_loading_empty_without_callees() {
|
||||
// A single file with no inter-procedural flow is still expected to
|
||||
// produce a body for its one function — that's what body loading
|
||||
// produce a body for its one function, that's what body loading
|
||||
// enables. The *empty* case this test guards is "bodies_by_key
|
||||
// returns None when no bodies are loaded," which keeps the
|
||||
// threaded-through `Option` explicit for inline consumers.
|
||||
|
|
|
|||
|
|
@ -8,8 +8,8 @@
|
|||
//! findings.
|
||||
//!
|
||||
//! This binary is split from `cross_file_context_tests.rs` because
|
||||
//! Cargo compiles each `tests/*.rs` file into its own test binary —
|
||||
//! separate processes — so the `NYX_CONTEXT_SENSITIVE` env flip here
|
||||
//! Cargo compiles each `tests/*.rs` file into its own test binary ,
|
||||
//! separate processes, so the `NYX_CONTEXT_SENSITIVE` env flip here
|
||||
//! does not race against the default-on tests running in parallel.
|
||||
//!
|
||||
//! The switch is read by `AnalysisOptions::current()` via the legacy
|
||||
|
|
|
|||
|
|
@ -6,24 +6,24 @@
|
|||
//! The four fixtures under `tests/fixtures/cross_file_context_*` cover
|
||||
//! the documented precision wins and guardrails:
|
||||
//!
|
||||
//! * `cross_file_context_two_call_sites` (Python) — two calls to the same
|
||||
//! * `cross_file_context_two_call_sites` (Python), two calls to the same
|
||||
//! cross-file helper, one tainted and one with a constant literal.
|
||||
//! Asserts the tainted call still produces a finding.
|
||||
//! * `cross_file_context_callback` (JS) — cross-file helper invokes a
|
||||
//! * `cross_file_context_callback` (JS), cross-file helper invokes a
|
||||
//! caller-side function passed as a callback. Inline re-analysis of
|
||||
//! the helper must resolve the callback binding and surface the
|
||||
//! flow through `child_process.exec`.
|
||||
//! * `cross_file_context_sanitizer` (JS) — cross-file sanitizer applied
|
||||
//! * `cross_file_context_sanitizer` (JS), cross-file sanitizer applied
|
||||
//! before an HTML sink. Regression guard: cross-file inline must not
|
||||
//! introduce a taint finding when the sanitiser is recognised.
|
||||
//! * `cross_file_context_deep_chain` (Python) — A -> B -> C chain with
|
||||
//! * `cross_file_context_deep_chain` (Python), A -> B -> C chain with
|
||||
//! the sink in C. k=1 means B->C resolves via summary; the end-to-end
|
||||
//! finding must still surface so callers cannot lose recall on deep
|
||||
//! chains.
|
||||
//!
|
||||
//! The `bodies_by_key_populated_for_cross_file_fixtures` test is a
|
||||
//! direct `GlobalSummaries`-level assertion that pass 1 loaded cross-file
|
||||
//! SSA bodies for each fixture — i.e. the cross-file inline path has
|
||||
//! SSA bodies for each fixture, i.e. the cross-file inline path has
|
||||
//! something to consult. If this assertion flips to zero, cross-file
|
||||
//! inline would silently fall back to summary resolution and every
|
||||
//! expectations.json check above would be driven by the less precise
|
||||
|
|
@ -65,7 +65,7 @@ fn test_config() -> Config {
|
|||
/// Walk a fixture directory and replay the pass-1 body collection that
|
||||
/// `scan_filesystem` does, returning the merged `GlobalSummaries`.
|
||||
///
|
||||
/// This is used purely for the availability assertion — the actual
|
||||
/// This is used purely for the availability assertion, the actual
|
||||
/// scans under test go through the regular `scan_no_index` entry point.
|
||||
fn pass1_bodies(root: &Path) -> GlobalSummaries {
|
||||
let cfg = test_config();
|
||||
|
|
@ -132,7 +132,7 @@ fn cross_file_context_sanitizer() {
|
|||
}
|
||||
|
||||
/// Three-file deep chain (A -> B -> C) with the sink in C. The
|
||||
/// end-to-end flow must still surface — k=1 depth cap on inline does
|
||||
/// end-to-end flow must still surface, k=1 depth cap on inline does
|
||||
/// not drop recall because B -> C resolves via summary.
|
||||
#[test]
|
||||
fn cross_file_context_deep_chain() {
|
||||
|
|
|
|||
|
|
@ -4,18 +4,18 @@
|
|||
//! Three fixtures cover distinct structural shapes of the per-return-path
|
||||
//! transform:
|
||||
//!
|
||||
//! * `cross_file_phi_validated_branch` (Python) — a callee whose two
|
||||
//! * `cross_file_phi_validated_branch` (Python), a callee whose two
|
||||
//! return branches are both `Identity` on the value, differing only in
|
||||
//! the predicate gate. The required SQLi finding confirms the
|
||||
//! summary-application path does not regress on the common "union is
|
||||
//! precise enough" case.
|
||||
//! * `cross_file_phi_partial_sanitiser` (JS) — the callee has two
|
||||
//! * `cross_file_phi_partial_sanitiser` (JS), the callee has two
|
||||
//! returns with *different* transforms (Identity vs
|
||||
//! StripBits(HTML_ESCAPE)). The caller invokes the unsanitised branch,
|
||||
//! so the XSS sink must still fire — a regression guard against a
|
||||
//! so the XSS sink must still fire, a regression guard against a
|
||||
//! per-path application that over-eagerly attributes sanitation across
|
||||
//! all branches.
|
||||
//! * `cross_file_phi_both_branches_safe` (Go) — both return paths run
|
||||
//! * `cross_file_phi_both_branches_safe` (Go), both return paths run
|
||||
//! the same sanitising validator. The SQL sink is on the forbidden
|
||||
//! list: if the per-path decomposition regresses to "either branch
|
||||
//! could be raw" the caller would pick up a false positive.
|
||||
|
|
|
|||
|
|
@ -6,10 +6,10 @@
|
|||
//! clear error instead of panicking, hanging, or producing nonsense
|
||||
//! findings. These tests exercise both classes of corruption:
|
||||
//!
|
||||
//! 1. Truncation to zero bytes — SQLite treats a zero-length file as a
|
||||
//! 1. Truncation to zero bytes, SQLite treats a zero-length file as a
|
||||
//! fresh empty DB. We expect the indexer to bootstrap the schema and
|
||||
//! carry on.
|
||||
//! 2. Arbitrary garbage in the header — SQLite rejects this with
|
||||
//! 2. Arbitrary garbage in the header, SQLite rejects this with
|
||||
//! `SQLITE_NOTADB` during pragma/schema execution. We expect the
|
||||
//! indexer to return a structured error, not a panic.
|
||||
//!
|
||||
|
|
@ -122,7 +122,7 @@ fn zero_truncated_db_rebuilds_on_init() {
|
|||
let pool = Indexer::init(&db_path)
|
||||
.expect("Indexer::init should bootstrap a schema into an empty file");
|
||||
|
||||
// After init, the DB is empty of prior state — an indexed scan should
|
||||
// After init, the DB is empty of prior state, an indexed scan should
|
||||
// still run end-to-end but will effectively be acting like a cold
|
||||
// rebuild. We don't re-call build_index here because the plan is to
|
||||
// confirm the raw init path is resilient.
|
||||
|
|
@ -143,14 +143,14 @@ fn zero_truncated_db_rebuilds_on_init() {
|
|||
}
|
||||
|
||||
/// Clobber the SQLite magic header with garbage bytes. This is the
|
||||
/// "actual corruption" case — SQLite rejects it with `SQLITE_NOTADB` the
|
||||
/// "actual corruption" case, SQLite rejects it with `SQLITE_NOTADB` the
|
||||
/// first time pragma or SQL is executed, which surfaces as
|
||||
/// `NyxError::Sql(_)` from `Indexer::init`.
|
||||
#[test]
|
||||
fn garbage_header_db_returns_structured_error() {
|
||||
let (_project_name, db_path, _project, _db_dir) = build_indexed_project();
|
||||
|
||||
// Write 100 bytes of `0xFF` — guaranteed not to match SQLite's header
|
||||
// Write 100 bytes of `0xFF`, guaranteed not to match SQLite's header
|
||||
// magic "SQLite format 3\0".
|
||||
clobber_header(&db_path, 0xFF, 100);
|
||||
|
||||
|
|
@ -186,7 +186,7 @@ fn garbage_header_db_returns_structured_error() {
|
|||
// NOTE: A mid-file corruption test (garbage at bytes 100..200, preserving
|
||||
// SQLite magic) was attempted and is deliberately omitted. That shape
|
||||
// triggers a slow corruption-detection path in SQLite where `Indexer::init`
|
||||
// takes 150–200 seconds before returning — unsuitable for CI wall-clock
|
||||
// takes 150–200 seconds before returning, unsuitable for CI wall-clock
|
||||
// budgets. The two tests above already cover the "corrupt-on-arrival"
|
||||
// cases that users actually hit (crash-truncated file, deliberate clobber).
|
||||
// A follow-up should either short-circuit `PRAGMA integrity_check` up
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
//! [`nyx_scanner::taint::analyse_file`] must preserve distinct flows
|
||||
//! that share a source but differ on validation status or intermediate
|
||||
//! variables. Historically the dedup collapsed all `(body_id, sink,
|
||||
//! source)` siblings, preferring the validated one — so an unguarded
|
||||
//! source)` siblings, preferring the validated one, so an unguarded
|
||||
//! exploit on a sibling branch was silently dropped in favour of a
|
||||
//! neighbouring guarded flow.
|
||||
//!
|
||||
|
|
@ -35,7 +35,7 @@ fn dedup_preserves_validated_and_unvalidated_flows() {
|
|||
validate_expectations(&diags, &dir);
|
||||
|
||||
// Load-bearing assertion: the two flows live on distinct sink
|
||||
// lines (6 and 8 in the source — actual lines depend on the
|
||||
// lines (6 and 8 in the source, actual lines depend on the
|
||||
// fixture file format, so we only assert distinct sinks).
|
||||
let taint: Vec<&nyx_scanner::commands::scan::Diag> = diags
|
||||
.iter()
|
||||
|
|
@ -58,7 +58,7 @@ fn dedup_preserves_validated_and_unvalidated_flows() {
|
|||
.collect::<Vec<_>>(),
|
||||
);
|
||||
|
||||
// The two findings must live on different source lines — if the
|
||||
// The two findings must live on different source lines, if the
|
||||
// engine collapses them into one, the test will fail here even
|
||||
// when the count assertion above coincidentally passes (e.g. if
|
||||
// a future change started emitting one validated and one
|
||||
|
|
@ -73,7 +73,7 @@ fn dedup_preserves_validated_and_unvalidated_flows() {
|
|||
|
||||
// Every taint finding must carry a stable `finding_id` that
|
||||
// downstream formatters can reference. This is the plumbing that
|
||||
// feeds alternative-path cross-linking — verify it is non-empty
|
||||
// feeds alternative-path cross-linking, verify it is non-empty
|
||||
// for every taint finding so regressions in `analyse_file`'s
|
||||
// post-dedup `make_finding_id` pass surface here.
|
||||
for d in &taint {
|
||||
|
|
@ -87,7 +87,7 @@ fn dedup_preserves_validated_and_unvalidated_flows() {
|
|||
);
|
||||
}
|
||||
|
||||
// At least one validated/unvalidated split must be present — the
|
||||
// At least one validated/unvalidated split must be present, the
|
||||
// whole point of the fixture is that a guarded branch and an
|
||||
// unguarded branch reach `exec(input)` and both must report.
|
||||
// We do not require an exact split since future sanitization
|
||||
|
|
@ -103,7 +103,7 @@ fn dedup_preserves_validated_and_unvalidated_flows() {
|
|||
is not behind any allowlist. Found only validated findings.",
|
||||
);
|
||||
// `validated` may legitimately be empty if the engine does not yet
|
||||
// recognise `isWhitelisted` as a predicate — the fixture is still
|
||||
// recognise `isWhitelisted` as a predicate, the fixture is still
|
||||
// load-bearing because the `min_count: 2` in expectations.json
|
||||
// asserts both findings surface regardless of which is classified
|
||||
// as validated. Drop the assertion to avoid gating the regression
|
||||
|
|
|
|||
|
|
@ -2,9 +2,9 @@
|
|||
//!
|
||||
//! The scanner's two-pass pipeline runs rayon `par_iter` over files in
|
||||
//! both pass-1 (summary extraction) and pass-2 (rule evaluation), and
|
||||
//! merges summaries via `try_reduce`. A latent ordering bug — a
|
||||
//! merges summaries via `try_reduce`. A latent ordering bug, a
|
||||
//! shared mutable state hit unprotected from multiple threads, or a
|
||||
//! `HashMap` iteration order leaking into a finding identity — can
|
||||
//! `HashMap` iteration order leaking into a finding identity, can
|
||||
//! surface as a diagnostic that appears with 4 workers but not with 1.
|
||||
//!
|
||||
//! This test runs the same fixture under worker-thread counts of 1,
|
||||
|
|
@ -14,7 +14,7 @@
|
|||
//! assertion fires only on real output divergence.
|
||||
//!
|
||||
//! If this test ever flakes, prefer investigating the engine over
|
||||
//! weakening the normaliser — engine-level determinism across thread
|
||||
//! weakening the normaliser, engine-level determinism across thread
|
||||
//! counts is load-bearing for reproducible CI runs.
|
||||
mod common;
|
||||
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ use nyx_scanner::rank::{compute_attack_rank, rank_diags};
|
|||
// ── Diag factories ─────────────────────────────────────────────────────
|
||||
|
||||
/// A converged taint finding that the points-based scorer will score
|
||||
/// as `Confidence::High`. Used as the "clean" baseline — any delta
|
||||
/// as `Confidence::High`. Used as the "clean" baseline, any delta
|
||||
/// against this must come from attached engine notes.
|
||||
fn high_confidence_taint_diag(path: &str, line: u32) -> Diag {
|
||||
Diag {
|
||||
|
|
@ -204,7 +204,7 @@ fn rank_diags_sorts_converged_above_capped_at_same_severity() {
|
|||
#[test]
|
||||
fn rank_diags_preserves_severity_tier_under_bail() {
|
||||
// High + Bail must still outrank Medium + clean at the same
|
||||
// evidence-strength baseline — this is the tier-boundary invariant
|
||||
// evidence-strength baseline, this is the tier-boundary invariant
|
||||
// that the -8 completeness magnitude is calibrated for.
|
||||
let mut high_bailed = high_confidence_taint_diag("a.rs", 1);
|
||||
attach_notes(
|
||||
|
|
@ -421,7 +421,7 @@ fn sarif_omits_loss_direction_for_informational_only() {
|
|||
fn every_engine_note_direction_is_documented() {
|
||||
// Enumerate every EngineNote variant and assert its direction.
|
||||
// The intent is that a contributor adding a new variant will cause
|
||||
// this test to fail to compile (no match arm) — a structural guard
|
||||
// this test to fail to compile (no match arm), a structural guard
|
||||
// against silent misclassification.
|
||||
fn check(note: EngineNote, expected: LossDirection) {
|
||||
assert_eq!(
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
//! test forces a specific cap-site to fire on a tiny fixture by
|
||||
//! overriding the engine's safety cap, then asserts either that the
|
||||
//! corresponding observability counter moved *or* that the note
|
||||
//! propagated to a produced finding — whichever is the more stable
|
||||
//! propagated to a produced finding, whichever is the more stable
|
||||
//! signal for that cap.
|
||||
|
||||
mod common;
|
||||
|
|
@ -19,7 +19,7 @@ use std::path::Path;
|
|||
use std::sync::Mutex;
|
||||
|
||||
/// Process-wide atomics for cap overrides mean tests that fiddle with
|
||||
/// them must run serially — cargo test defaults to parallel.
|
||||
/// them must run serially, cargo test defaults to parallel.
|
||||
static CAP_GUARD: Mutex<()> = Mutex::new(());
|
||||
|
||||
fn fixture(name: &str) -> std::path::PathBuf {
|
||||
|
|
@ -32,7 +32,7 @@ fn fixture(name: &str) -> std::path::PathBuf {
|
|||
fn worklist_cap_trips_observability_counter() {
|
||||
let _guard = CAP_GUARD.lock().unwrap_or_else(|e| e.into_inner());
|
||||
// Force a very tight worklist budget so every body with > 0 blocks
|
||||
// trips the cap. The observability counter is the stable signal —
|
||||
// trips the cap. The observability counter is the stable signal ,
|
||||
// note attribution to a specific finding may be lost on bodies that
|
||||
// capped *before* emitting their sink event.
|
||||
reset_worklist_observability();
|
||||
|
|
@ -59,7 +59,7 @@ fn origins_cap_trips_observability_on_multi_source_fixture() {
|
|||
// Set origins to 1 and scan a fixture with multiple top-level
|
||||
// sources flowing into the same sink. Any non-trivial taint flow
|
||||
// will produce at least one tainted value whose origin list hit the
|
||||
// cap — detected by the post-hoc saturation scan at the end of
|
||||
// cap, detected by the post-hoc saturation scan at the end of
|
||||
// `run_ssa_taint_internal`.
|
||||
reset_origins_observability();
|
||||
set_max_origins_override(1);
|
||||
|
|
|
|||
125
tests/fetch_data_exfil_integration_tests.rs
Normal file
125
tests/fetch_data_exfil_integration_tests.rs
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
//! Integration tests for the `Cap::DATA_EXFIL` detector class.
|
||||
//!
|
||||
//! Validates per-cap attribution at multi-gate call sites: a single `fetch`
|
||||
//! call carries both an SSRF gate (URL flow) and a DATA_EXFIL gate (body /
|
||||
//! headers / json flow), and a tainted body must not surface as SSRF and
|
||||
//! vice versa. Also sanity-checks the SARIF output so the new finding
|
||||
//! class produces a distinct rule id.
|
||||
|
||||
mod common;
|
||||
|
||||
use common::scan_fixture_dir;
|
||||
use nyx_scanner::commands::scan::Diag;
|
||||
use nyx_scanner::utils::config::AnalysisMode;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn js_fixture_dir() -> PathBuf {
|
||||
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.join("tests")
|
||||
.join("fixtures")
|
||||
.join("js")
|
||||
}
|
||||
|
||||
fn diags_for(file: &str) -> Vec<Diag> {
|
||||
let dir = js_fixture_dir();
|
||||
let all = scan_fixture_dir(&dir, AnalysisMode::Full);
|
||||
all.into_iter().filter(|d| d.path.ends_with(file)).collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fetch_body_data_exfil_emits_data_exfil_not_ssrf() {
|
||||
let diags = diags_for("fetch_body_data_exfil.js");
|
||||
let exfil = diags
|
||||
.iter()
|
||||
.filter(|d| d.id.starts_with("taint-data-exfiltration"))
|
||||
.count();
|
||||
let plain_taint = diags
|
||||
.iter()
|
||||
.filter(|d| d.id.starts_with("taint-unsanitised-flow"))
|
||||
.count();
|
||||
assert!(
|
||||
exfil >= 1,
|
||||
"expected at least one taint-data-exfiltration finding, got 0.\n\
|
||||
Diags: {:#?}",
|
||||
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
|
||||
);
|
||||
assert_eq!(
|
||||
plain_taint,
|
||||
0,
|
||||
"fixed-URL fetch with tainted body must NOT emit SSRF \
|
||||
(taint-unsanitised-flow), got {plain_taint}.\n\
|
||||
Diags: {:#?}",
|
||||
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fetch_ssrf_url_tainted_emits_ssrf_not_data_exfil() {
|
||||
let diags = diags_for("fetch_ssrf_url_tainted.js");
|
||||
let ssrf = diags
|
||||
.iter()
|
||||
.filter(|d| d.id.starts_with("taint-unsanitised-flow"))
|
||||
.count();
|
||||
let exfil = diags
|
||||
.iter()
|
||||
.filter(|d| d.id.starts_with("taint-data-exfiltration"))
|
||||
.count();
|
||||
assert!(
|
||||
ssrf >= 1,
|
||||
"expected at least one taint-unsanitised-flow (SSRF) finding, got 0.\n\
|
||||
Diags: {:#?}",
|
||||
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
|
||||
);
|
||||
assert_eq!(
|
||||
exfil,
|
||||
0,
|
||||
"tainted-URL fetch must NOT emit DATA_EXFIL, got {exfil}.\n\
|
||||
Diags: {:#?}",
|
||||
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sarif_distinguishes_data_exfil_rule_id_from_ssrf() {
|
||||
use nyx_scanner::output::build_sarif;
|
||||
|
||||
let dir = js_fixture_dir();
|
||||
let diags = scan_fixture_dir(&dir, AnalysisMode::Full);
|
||||
let sarif = build_sarif(&diags, &dir);
|
||||
|
||||
let rules = sarif["runs"][0]["tool"]["driver"]["rules"]
|
||||
.as_array()
|
||||
.expect("SARIF rules array");
|
||||
let rule_ids: Vec<&str> = rules.iter().filter_map(|r| r["id"].as_str()).collect();
|
||||
|
||||
assert!(
|
||||
rule_ids.contains(&"taint-data-exfiltration"),
|
||||
"SARIF rules must contain taint-data-exfiltration, got: {rule_ids:?}"
|
||||
);
|
||||
assert!(
|
||||
rule_ids.contains(&"taint-unsanitised-flow"),
|
||||
"SARIF rules must contain taint-unsanitised-flow, got: {rule_ids:?}"
|
||||
);
|
||||
|
||||
// Each finding should reference exactly one rule, and the cap-specific
|
||||
// class must not be folded back into the generic taint bucket.
|
||||
let results = sarif["runs"][0]["results"]
|
||||
.as_array()
|
||||
.expect("SARIF results array");
|
||||
let exfil_results = results
|
||||
.iter()
|
||||
.filter(|r| r["ruleId"].as_str() == Some("taint-data-exfiltration"))
|
||||
.count();
|
||||
let ssrf_results = results
|
||||
.iter()
|
||||
.filter(|r| r["ruleId"].as_str() == Some("taint-unsanitised-flow"))
|
||||
.count();
|
||||
assert!(
|
||||
exfil_results >= 1,
|
||||
"expected >= 1 SARIF result with ruleId taint-data-exfiltration, got {exfil_results}",
|
||||
);
|
||||
assert!(
|
||||
ssrf_results >= 1,
|
||||
"expected >= 1 SARIF result with ruleId taint-unsanitised-flow, got {ssrf_results}",
|
||||
);
|
||||
}
|
||||
2
tests/fixtures/async_rust/main.rs
vendored
2
tests/fixtures/async_rust/main.rs
vendored
|
|
@ -1,7 +1,7 @@
|
|||
// Regression fixture: Rust async flow through `tokio::process::Command`.
|
||||
//
|
||||
// Per docs/language-maturity.md, Rust's Tokio process variants are not
|
||||
// yet covered — the Tokio async process APIs are a known gap. The
|
||||
// yet covered, the Tokio async process APIs are a known gap. The
|
||||
// fixture is checked in so that when Rust async-process coverage lands,
|
||||
// the engine begins producing the intended finding and the
|
||||
// `forbidden_findings` assertion forces whoever adds the coverage to
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
// Target: authorization happens inside `require_owner`, which
|
||||
// delegates to `require_group_member` (a configured authorization
|
||||
// check name). The handler in `cross_file_helper_handler.rs`
|
||||
// delegates ownership validation to this helper — cross-file helper
|
||||
// delegates ownership validation to this helper, cross-file helper
|
||||
// lifting should recognise the call as an auth check covering the
|
||||
// supplied `row`.
|
||||
struct Db;
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
// produces a `DatabaseConnection` via SSA `constructor_type` (through
|
||||
// `peel_identity_suffix`, which strips `.unwrap()` before matching). The
|
||||
// handler then calls `conn.execute(..)`, a callee name that appears in
|
||||
// neither `mutation_indicator_names` nor `read_indicator_names` for Rust —
|
||||
// neither `mutation_indicator_names` nor `read_indicator_names` for Rust ,
|
||||
// name-based classification returns `None`, so the ownership gate
|
||||
// already cannot flag the call. The type-map refinement should *still*
|
||||
// leave the call unflagged (the type map produces `DbMutation`, but
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ pub async fn handle_list_peer_docs(req: Req, ctx: Ctx) -> Result<String, ()> {
|
|||
let user = auth::require_auth(&req, &ctx).await?;
|
||||
let doc_ids: Vec<i64> = vec![1, 2, 3];
|
||||
|
||||
// Pure in-memory bookkeeping — no authorization decision here.
|
||||
// Pure in-memory bookkeeping, no authorization decision here.
|
||||
let mut counts: HashMap<i64, usize> = HashMap::new();
|
||||
let mut seen: HashSet<i64> = HashSet::new();
|
||||
for doc_id in &doc_ids {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
// B4 regression guard: `format_target` does NOT auth-check
|
||||
// `group_id` — it just constructs a string from it. The helper-lift
|
||||
// `group_id`, it just constructs a string from it. The helper-lift
|
||||
// pass must not synthesise a covering AuthCheck on the handler's call
|
||||
// site, so the subsequent `db.exec("INSERT INTO comments …", &[group_id])`
|
||||
// MUST still flag.
|
||||
|
|
@ -19,7 +19,7 @@ mod auth {
|
|||
}
|
||||
|
||||
fn format_target(group_id: i64, suffix: &str) -> String {
|
||||
// No auth check here — pure formatting.
|
||||
// No auth check here, pure formatting.
|
||||
format!("group:{}{}", group_id, suffix)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ pub async fn handle_delete_doc(req: Req, ctx: Ctx, doc_id: i64) -> Result<String
|
|||
return json_err("cannot delete another user's doc", 403);
|
||||
}
|
||||
|
||||
// By construction, the row belongs to `user` — so any id read from it is authorized.
|
||||
// By construction, the row belongs to `user`, so any id read from it is authorized.
|
||||
let group_id = existing.get_i64("group_id");
|
||||
realtime::publish_to_group(group_id, "doc_deleted");
|
||||
Ok("ok".into())
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ pub async fn handle_update_doc(req: Req, ctx: Ctx, doc_id: i64) -> Result<String
|
|||
);
|
||||
let owner_id = existing.get_i64("user_id");
|
||||
|
||||
// Equality compared but no early exit — the check has no effect.
|
||||
// Equality compared but no early exit, the check has no effect.
|
||||
if owner_id != user.id {
|
||||
// missing return
|
||||
println!("not your doc (but proceeding anyway)");
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ mod auth { pub async fn require_auth(_r: &super::Req, _c: &super::Ctx) -> Result
|
|||
|
||||
// The handler's `get_peer_ids(&db, user.id)` call below must not be
|
||||
// flagged. `user` is bound from `auth::require_auth(..)` so `user.id`
|
||||
// is the caller's own id — the call is self-referential, not a foreign
|
||||
// is the caller's own id, the call is self-referential, not a foreign
|
||||
// scoped id. The library-style helper below is a pass-through so its
|
||||
// body contains no DB sinks (the internal `user_id` → DB flow is a
|
||||
// separate pattern covered by helper-summary lifting).
|
||||
|
|
|
|||
2
tests/fixtures/auth_analysis/sql_join_acl.rs
vendored
2
tests/fixtures/auth_analysis/sql_join_acl.rs
vendored
|
|
@ -2,7 +2,7 @@
|
|||
// against an ACL table (`group_members`) with a WHERE clause that pins
|
||||
// the row to the current user (`gm.user_id = ?1` bound to `user.id`).
|
||||
// Every returned row is membership-gated by construction, so downstream
|
||||
// uses of the row's columns (`group_id` here) are authorized — the
|
||||
// uses of the row's columns (`group_id` here) are authorized, the
|
||||
// `realtime::publish_to_group` call MUST NOT be flagged as missing an
|
||||
// ownership check after B3.
|
||||
struct Ctx;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
// B3 regression guard: the SELECT JOINs through `audit_log` (NOT in
|
||||
// the configured ACL list) and the WHERE clause pins on
|
||||
// `al.user_id = ?1`. The audit-log row's user is the audit subject,
|
||||
// not the doc owner — so this query does NOT prove caller ownership
|
||||
// not the doc owner, so this query does NOT prove caller ownership
|
||||
// of the returned `doc_id`. The downstream realtime publish MUST
|
||||
// still flag for a missing ownership check after B3.
|
||||
struct Ctx;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
// target: authorization happens inside `validate_target`, which
|
||||
// internally calls `authz::require_membership` against the same
|
||||
// `group_id` the handler subsequently mutates. The current rule cannot
|
||||
// see this transitively — B4 lifts per-function auth-check summaries
|
||||
// see this transitively, B4 lifts per-function auth-check summaries
|
||||
// (which positional params are auth-checked) so the handler-level call
|
||||
// to `validate_target(&db, group_id, user.id)` is recognised as an
|
||||
// auth check covering `group_id`. Result: `db.exec(..)` MUST NOT flag
|
||||
|
|
@ -45,7 +45,7 @@ pub async fn handle_create_comment(
|
|||
let user = auth::require_auth(&req, &ctx).await?;
|
||||
let db = Db;
|
||||
|
||||
// Authorization happens inside validate_target — helper-summary
|
||||
// Authorization happens inside validate_target, helper-summary
|
||||
// lifting propagates the per-param auth check so this covers
|
||||
// `group_id`.
|
||||
validate_target(&db, group_id, user.id).await?;
|
||||
|
|
|
|||
31
tests/fixtures/fp_guards/auth_local_collection_receiver/App.ts
vendored
Normal file
31
tests/fixtures/fp_guards/auth_local_collection_receiver/App.ts
vendored
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
// FP guard for `js.auth.missing_ownership_check` — JS built-in
|
||||
// container receivers must not be classified as data-layer sinks.
|
||||
// See `tests/benchmark/corpus/typescript/auth/safe_local_collection_receiver.ts`
|
||||
// for the full real-repo distillation.
|
||||
|
||||
type ElementsMap = Map<string, { id: string }>;
|
||||
|
||||
function fromAlias(elementsMap: ElementsMap, id: string) {
|
||||
return elementsMap.get(id);
|
||||
}
|
||||
|
||||
function fromDirectGeneric(m: Map<string, string>, k: string) {
|
||||
return m.get(k);
|
||||
}
|
||||
|
||||
function fromArrayShorthand(arr: { id: string }[], targetId: string) {
|
||||
return arr.find((x) => x.id === targetId);
|
||||
}
|
||||
|
||||
function fromLocalConstructor() {
|
||||
const cache = new Map<string, string>();
|
||||
cache.set("a", "x");
|
||||
return cache.get("a");
|
||||
}
|
||||
|
||||
function fromSet(visited: Set<string>, k: string) {
|
||||
if (!visited.has(k)) {
|
||||
visited.add(k);
|
||||
}
|
||||
return visited.size;
|
||||
}
|
||||
16
tests/fixtures/fp_guards/auth_local_collection_receiver/expectations.json
vendored
Normal file
16
tests/fixtures/fp_guards/auth_local_collection_receiver/expectations.json
vendored
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
{
|
||||
"required_findings": [],
|
||||
"forbidden_findings": [
|
||||
{ "id_prefix": "js.auth.missing_ownership_check" }
|
||||
],
|
||||
"noise_budget": {
|
||||
"max_total_findings": 1,
|
||||
"max_high_findings": 0
|
||||
},
|
||||
"performance_expectations": {
|
||||
"max_ms_no_index": 1000,
|
||||
"max_ms_index_cold": 1500,
|
||||
"max_ms_index_warm": 500,
|
||||
"ci_mode": "lenient"
|
||||
}
|
||||
}
|
||||
16
tests/fixtures/fp_guards/auth_rust_param_typed_local_collection/expectations.json
vendored
Normal file
16
tests/fixtures/fp_guards/auth_rust_param_typed_local_collection/expectations.json
vendored
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
{
|
||||
"required_findings": [],
|
||||
"forbidden_findings": [
|
||||
{ "id_prefix": "rs.auth.missing_ownership_check" }
|
||||
],
|
||||
"noise_budget": {
|
||||
"max_total_findings": 2,
|
||||
"max_high_findings": 0
|
||||
},
|
||||
"performance_expectations": {
|
||||
"max_ms_no_index": 1000,
|
||||
"max_ms_index_cold": 1500,
|
||||
"max_ms_index_warm": 500,
|
||||
"ci_mode": "lenient"
|
||||
}
|
||||
}
|
||||
93
tests/fixtures/fp_guards/auth_rust_param_typed_local_collection/snapshot.rs
vendored
Normal file
93
tests/fixtures/fp_guards/auth_rust_param_typed_local_collection/snapshot.rs
vendored
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
// Real-repo precision guard mirroring meilisearch's index-scheduler
|
||||
// shape:
|
||||
// crates/index-scheduler/src/scheduler/process_snapshot_creation.rs::remove_tasks
|
||||
// (`unsafe fn remove_tasks(tasks: &[Task], dst: &std::path::Path,
|
||||
// index_base_map_size: usize)` plus per-loop bitmap mutations on
|
||||
// destructured heed `Database` handles), plus the LocalCollection
|
||||
// receiver-type cluster
|
||||
// (`crates/index-scheduler/src/scheduler/enterprise_edition/network.rs::balance_shards`,
|
||||
// `unsharded: RoaringBitmap`).
|
||||
//
|
||||
// Both engine fixes must hold: the Rust `parameter` arm in
|
||||
// `collect_param_names` (only descends into `pattern`, never `type`)
|
||||
// and the Rust LocalCollection type-text classifier
|
||||
// (`rust_type_to_local_collection`). Without either, this file would
|
||||
// produce missing-ownership-check findings on internal helpers /
|
||||
// in-memory bitmap mutations.
|
||||
|
||||
use std::collections::{BTreeSet, HashMap, HashSet};
|
||||
|
||||
struct RoaringBitmap;
|
||||
impl RoaringBitmap {
|
||||
fn new() -> Self { Self }
|
||||
fn insert(&mut self, _x: u32) -> bool { true }
|
||||
fn remove(&mut self, _x: u32) -> bool { true }
|
||||
fn contains(&self, _x: u32) -> bool { true }
|
||||
}
|
||||
|
||||
struct Task { uid: u32 }
|
||||
|
||||
struct Database;
|
||||
impl Database {
|
||||
fn delete(&self, _w: &mut u32, _u: &u32) -> Result<(), ()> { Ok(()) }
|
||||
}
|
||||
|
||||
struct TaskQueue {
|
||||
all_tasks: Database,
|
||||
canceled_by: Database,
|
||||
}
|
||||
|
||||
// Rust `parameter` arm: type-segment idents (`std`, `path`, `Path`)
|
||||
// must NOT pollute `unit.params` and gate user-input-evidence open.
|
||||
unsafe fn remove_tasks(
|
||||
tasks: &[Task],
|
||||
dst: &std::path::Path,
|
||||
sz: usize,
|
||||
) -> Result<(), ()> {
|
||||
let _ = (dst, sz);
|
||||
let mut wtxn = 0u32;
|
||||
let task_queue = TaskQueue {
|
||||
all_tasks: Database,
|
||||
canceled_by: Database,
|
||||
};
|
||||
let TaskQueue { all_tasks, canceled_by } = task_queue;
|
||||
for task in tasks {
|
||||
all_tasks.delete(&mut wtxn, &task.uid)?;
|
||||
canceled_by.delete(&mut wtxn, &task.uid)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// LocalCollection typed param: `unsharded: RoaringBitmap` resolves to
|
||||
// `TypeKind::LocalCollection`, so `unsharded.insert(docid)` /
|
||||
// `unsharded.remove(docid)` classify as `SinkClass::InMemoryLocal`
|
||||
// (non-auth-relevant).
|
||||
fn balance_shards(mut unsharded: RoaringBitmap, docid: u32) {
|
||||
unsharded.insert(docid);
|
||||
unsharded.remove(docid);
|
||||
}
|
||||
|
||||
// `&'a mut HashMap<...>` reference + lifetime: ref-stripping must
|
||||
// reach the type head.
|
||||
fn store_shard_docids<'a>(
|
||||
new_shard_docids: &'a mut HashMap<String, u32>,
|
||||
shard: String,
|
||||
docid: u32,
|
||||
) {
|
||||
new_shard_docids.insert(shard, docid);
|
||||
}
|
||||
|
||||
fn add_user_id(ids: &mut HashSet<u64>, user_id: u64) {
|
||||
ids.insert(user_id);
|
||||
ids.remove(&user_id);
|
||||
}
|
||||
|
||||
fn collect_seen(seen: &mut BTreeSet<u32>, item_id: u32) {
|
||||
seen.insert(item_id);
|
||||
}
|
||||
|
||||
fn build_local_set(task_id: u32) -> RoaringBitmap {
|
||||
let mut s = RoaringBitmap::new();
|
||||
s.insert(task_id);
|
||||
s
|
||||
}
|
||||
41
tests/fixtures/fp_guards/cfg_utf8_long_condition/App.js
vendored
Normal file
41
tests/fixtures/fp_guards/cfg_utf8_long_condition/App.js
vendored
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
// FP guard / panic guard — CFG condition-text truncation must be UTF-8 safe.
|
||||
//
|
||||
// Reproduces the gogs scan crash where a CodeMirror Gherkin tokenizer ships a
|
||||
// long localised regex inside a boolean sub-condition (`stream.match(/.../) &&
|
||||
// other`). When `push_condition_node` textualises the sub-expression, the
|
||||
// regex literal exceeds MAX_CONDITION_TEXT_LEN (256 bytes); naive byte-slice
|
||||
// truncation panicked when byte 256 landed inside a multi-byte UTF-8
|
||||
// character (here Gurmukhi `ਖ`, three bytes). Engine fix in
|
||||
// `src/utils/snippet.rs::truncate_at_char_boundary`, applied at three CFG
|
||||
// sites + two symex display sites.
|
||||
//
|
||||
// Invariant: scanning this file must terminate without panicking the rayon
|
||||
// worker, regardless of where byte 256 lands.
|
||||
|
||||
function tokenLocalisedFeatureKeyword(stream, state) {
|
||||
if (
|
||||
!state.inKeywordLine &&
|
||||
state.allowFeature &&
|
||||
stream.match(/(機能|功能|フィーチャ|기능|โครงหลัก|ความสามารถ|ความต้องการทางธุรกิจ|ಹೆಚ್ಚಳ|గుణము|ಮುಹಾಂದರಾ|ਮੁਹਾਂਦਰਾ|ਨਕਸ਼ ਨੁਹਾਰ|ਖਾਸੀਅਤ|रूप लेख|وِیژگی|خاصية|תכונה|Функціонал|Функция|Функционалност|Функционал|Үзенчәлеклелек|Свойство|Особина|Мөмкинлек|Могућност|Λειτουργία|Δυνατότητα|Właściwość|Vlastnosť|Trajto|Tính năng|Savybė|Požiadavka|Požadavek|Potrzeba biznesowa|Özellik|Osobina|Ominaisuus|Omadus|Mogućnost|Mogucnost|Jellemző|Funzionalità|Funktionalitéit|Funktionalität|Funkcja|Funkcionalnost|Funkcionalitāte|Funkcia|Fungsi|Functionaliteit|Funcționalitate|Funcţionalitate|Functionalitate|Funcionalitat|Funcionalidade|Fonctionnalité|Fitur|Fīča|Feature|Eiginleiki|Egenskap|Egenskab|Característica|Caracteristica|Business Need|Aspekt|Arwedd|Ability):/)
|
||||
) {
|
||||
state.inKeywordLine = true;
|
||||
return "keyword";
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// Sanity: also exercise the let-match-guard truncation site
|
||||
// (`emit_rust_match_guard_if`) by way of a JS analogue with a CFG-relevant
|
||||
// boolean chain that wraps localised text into the second branch. The CFG
|
||||
// builder still has to textualise both arms.
|
||||
function classify(s) {
|
||||
if (
|
||||
s.length > 0 &&
|
||||
s.indexOf("ਨਕਸ਼ ਨੁਹਾਰ ਖਾਸੀਅਤ रूप लेख وِیژگی خاصية תכונה Функціонал Функция Функционалност Функционал Үзенчәлеклелек Свойство Особина Мөмкинлек Могућност Λειτουργία Δυνατότητα") >= 0
|
||||
) {
|
||||
return "localised";
|
||||
}
|
||||
return "ascii";
|
||||
}
|
||||
|
||||
module.exports = { tokenLocalisedFeatureKeyword, classify };
|
||||
14
tests/fixtures/fp_guards/cfg_utf8_long_condition/expectations.json
vendored
Normal file
14
tests/fixtures/fp_guards/cfg_utf8_long_condition/expectations.json
vendored
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
{
|
||||
"required_findings": [],
|
||||
"forbidden_findings": [],
|
||||
"noise_budget": {
|
||||
"max_total_findings": 0,
|
||||
"max_high_findings": 0
|
||||
},
|
||||
"performance_expectations": {
|
||||
"max_ms_no_index": 1000,
|
||||
"max_ms_index_cold": 1500,
|
||||
"max_ms_index_warm": 500,
|
||||
"ci_mode": "lenient"
|
||||
}
|
||||
}
|
||||
51
tests/fixtures/fp_guards/framework_fastapi_route_level_auth/App.py
vendored
Normal file
51
tests/fixtures/fp_guards/framework_fastapi_route_level_auth/App.py
vendored
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
"""
|
||||
FP guard for FastAPI / Flask route-level dependency-injection auth.
|
||||
|
||||
The `dependencies=[Depends(requires_access_dag(...))]` decorator
|
||||
authorises the entire handler — every value the handler receives,
|
||||
every row it fetches, and every operation downstream. The
|
||||
`is_route_level` flag on the injected AuthCheck tells
|
||||
`auth_check_covers_subject` to short-circuit `true`, suppressing
|
||||
`py.auth.missing_ownership_check` on the body's ORM calls (`filter_by`,
|
||||
`scalar`, …) and on row-variable receivers (`dag.cleanup_runs(...)`).
|
||||
|
||||
A bare route with no `dependencies=` keyword is a real ownership-
|
||||
check FP — the engine must still flag it. The vulnerable
|
||||
counterpart lives in
|
||||
`tests/benchmark/corpus/python/auth/vuln_fastapi_route_no_dependencies.py`.
|
||||
"""
|
||||
from fastapi import Depends, FastAPI
|
||||
|
||||
router = FastAPI()
|
||||
|
||||
|
||||
def requires_access_dag(method: str, access_entity=None):
|
||||
def check():
|
||||
...
|
||||
return check
|
||||
|
||||
|
||||
@router.get(
|
||||
"/{dag_id}/runs/{run_id}",
|
||||
dependencies=[Depends(requires_access_dag(method="GET"))],
|
||||
)
|
||||
def get_dag_run(dag_id: str, run_id: str, session):
|
||||
"""Path params + ORM call covered by route-level guard."""
|
||||
dag_run = session.scalar(
|
||||
select(DagRun).filter_by(dag_id=dag_id, run_id=run_id)
|
||||
)
|
||||
if dag_run is None:
|
||||
raise HTTPException(404, "not found")
|
||||
return dag_run
|
||||
|
||||
|
||||
@router.delete(
|
||||
"/{dag_id}",
|
||||
dependencies=[Depends(requires_access_dag(method="DELETE"))],
|
||||
)
|
||||
def delete_dag(dag_id: str, session):
|
||||
"""Row fetch + row-variable method call covered by route-level guard."""
|
||||
dag = session.scalar(select(DagModel).where(DagModel.dag_id == dag_id))
|
||||
if dag is None:
|
||||
raise HTTPException(404, "not found")
|
||||
dag.cleanup_runs(session=session)
|
||||
16
tests/fixtures/fp_guards/framework_fastapi_route_level_auth/expectations.json
vendored
Normal file
16
tests/fixtures/fp_guards/framework_fastapi_route_level_auth/expectations.json
vendored
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
{
|
||||
"required_findings": [],
|
||||
"forbidden_findings": [
|
||||
{ "id_prefix": "py.auth.missing_ownership_check" }
|
||||
],
|
||||
"noise_budget": {
|
||||
"max_total_findings": 2,
|
||||
"max_high_findings": 0
|
||||
},
|
||||
"performance_expectations": {
|
||||
"max_ms_no_index": 1500,
|
||||
"max_ms_index_cold": 2000,
|
||||
"max_ms_index_warm": 800,
|
||||
"ci_mode": "lenient"
|
||||
}
|
||||
}
|
||||
40
tests/fixtures/fp_guards/framework_strapi_db_query_chain/App.ts
vendored
Normal file
40
tests/fixtures/fp_guards/framework_strapi_db_query_chain/App.ts
vendored
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
// Strapi-style ORM accessor: `<obj>.db.query(MODEL_UID).<orm_method>(...)`.
|
||||
// MODEL_UID is a literal model identifier (not raw SQL); the trailing
|
||||
// findOne/findMany/create/update/delete/count are intrinsically
|
||||
// parameterised — the actual SQL is generated by the ORM and per-call
|
||||
// values arrive through field-keyed object literals the driver escapes.
|
||||
//
|
||||
// FP-guard: cfg-unguarded-sink and taint-unsanitised-flow must NOT
|
||||
// fire on this shape.
|
||||
|
||||
declare const strapi: any;
|
||||
|
||||
async function getApiToken(whereParams: Record<string, unknown>) {
|
||||
return strapi.db.query('admin::api-token').findOne({
|
||||
select: ['id', 'name', 'lastUsedAt'],
|
||||
populate: ['permissions'],
|
||||
where: whereParams,
|
||||
});
|
||||
}
|
||||
|
||||
async function listTokens() {
|
||||
return strapi.db.query('admin::api-token').findMany({
|
||||
where: { type: 'read-only' },
|
||||
});
|
||||
}
|
||||
|
||||
async function createToken(data: unknown) {
|
||||
return strapi.db.query('admin::api-token').create({ data });
|
||||
}
|
||||
|
||||
async function updateToken(id: number, data: unknown) {
|
||||
return strapi.db.query('admin::api-token').update({ where: { id }, data });
|
||||
}
|
||||
|
||||
async function deleteToken(id: number) {
|
||||
return strapi.db.query('admin::api-token').delete({ where: { id } });
|
||||
}
|
||||
|
||||
async function countTokens() {
|
||||
return strapi.db.query('admin::api-token').count();
|
||||
}
|
||||
17
tests/fixtures/fp_guards/framework_strapi_db_query_chain/expectations.json
vendored
Normal file
17
tests/fixtures/fp_guards/framework_strapi_db_query_chain/expectations.json
vendored
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
{
|
||||
"required_findings": [],
|
||||
"forbidden_findings": [
|
||||
{ "id_prefix": "cfg-unguarded-sink" },
|
||||
{ "id_prefix": "taint-unsanitised-flow" }
|
||||
],
|
||||
"noise_budget": {
|
||||
"max_total_findings": 3,
|
||||
"max_high_findings": 0
|
||||
},
|
||||
"performance_expectations": {
|
||||
"max_ms_no_index": 1000,
|
||||
"max_ms_index_cold": 1500,
|
||||
"max_ms_index_warm": 500,
|
||||
"ci_mode": "lenient"
|
||||
}
|
||||
}
|
||||
13
tests/fixtures/js/fetch_body_data_exfil.js
vendored
Normal file
13
tests/fixtures/js/fetch_body_data_exfil.js
vendored
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
// DATA_EXFIL fixture: a fixed destination URL and an attacker-influenced
|
||||
// body. SSRF must NOT fire (destination is hardcoded) but `Cap::DATA_EXFIL`
|
||||
// must fire on the body field — request-bound bytes are leaving the process
|
||||
// via the outbound request payload.
|
||||
//
|
||||
// Driven by `fetch_data_exfil_integration_tests.rs`.
|
||||
function leakBody(req) {
|
||||
var payload = req.body.message;
|
||||
fetch('/endpoint', {
|
||||
method: 'POST',
|
||||
body: payload,
|
||||
});
|
||||
}
|
||||
10
tests/fixtures/js/fetch_ssrf_url_tainted.js
vendored
Normal file
10
tests/fixtures/js/fetch_ssrf_url_tainted.js
vendored
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
// SSRF regression fixture: attacker-controlled destination URL. SSRF must
|
||||
// fire on the URL flow (arg 0) and `Cap::DATA_EXFIL` must NOT fire — the two
|
||||
// classes share the callee but cap attribution is per-position so a tainted
|
||||
// URL never surfaces as data exfiltration.
|
||||
//
|
||||
// Driven by `fetch_data_exfil_integration_tests.rs`.
|
||||
function proxy(req) {
|
||||
var target = req.query.target;
|
||||
fetch(target);
|
||||
}
|
||||
4
tests/fixtures/mixed_project/config.rs
vendored
4
tests/fixtures/mixed_project/config.rs
vendored
|
|
@ -2,7 +2,7 @@ use std::env;
|
|||
use std::fs;
|
||||
use std::process::Command;
|
||||
|
||||
/// Infrastructure provisioning tool — Rust core.
|
||||
/// Infrastructure provisioning tool, Rust core.
|
||||
/// Reads infrastructure config from environment and executes provisioning commands.
|
||||
|
||||
struct InfraConfig {
|
||||
|
|
@ -56,7 +56,7 @@ fn apply_terraform() {
|
|||
.unwrap();
|
||||
}
|
||||
|
||||
/// Destroys infrastructure — reads target from env.
|
||||
/// Destroys infrastructure, reads target from env.
|
||||
/// VULN: env var flows into Command
|
||||
fn destroy_cluster() {
|
||||
let cluster = env::var("DESTROY_TARGET").unwrap();
|
||||
|
|
|
|||
17
tests/fixtures/patterns/java/negative.java
vendored
17
tests/fixtures/patterns/java/negative.java
vendored
|
|
@ -1,5 +1,9 @@
|
|||
import java.sql.*;
|
||||
import java.security.SecureRandom;
|
||||
import org.yaml.snakeyaml.Yaml;
|
||||
import org.yaml.snakeyaml.LoaderOptions;
|
||||
import org.yaml.snakeyaml.constructor.SafeConstructor;
|
||||
import org.apache.commons.text.StringSubstitutor;
|
||||
|
||||
class Negative {
|
||||
// Safe: parameterized query
|
||||
|
|
@ -19,4 +23,17 @@ class Negative {
|
|||
void safeLiteralQuery(Statement stmt) throws Exception {
|
||||
stmt.executeQuery("SELECT COUNT(*) FROM users");
|
||||
}
|
||||
|
||||
// Safe: SnakeYAML 2.0 / explicit SafeConstructor — CVE-2022-1471 fix shape.
|
||||
void safeSnakeyamlSafeConstructor(String body) {
|
||||
LoaderOptions opts = new LoaderOptions();
|
||||
Yaml yaml = new Yaml(new SafeConstructor(opts));
|
||||
Object data = yaml.load(body);
|
||||
}
|
||||
|
||||
// Safe: empty StringSubstitutor — no interpolator factory — CVE-2022-42889 fix shape.
|
||||
String safeStringSubstitutorPassthrough(String input) {
|
||||
StringSubstitutor s = new StringSubstitutor();
|
||||
return s.replace(input);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
14
tests/fixtures/patterns/java/positive.java
vendored
14
tests/fixtures/patterns/java/positive.java
vendored
|
|
@ -1,6 +1,8 @@
|
|||
import java.io.*;
|
||||
import java.util.Random;
|
||||
import java.security.MessageDigest;
|
||||
import org.yaml.snakeyaml.Yaml;
|
||||
import org.apache.commons.text.StringSubstitutor;
|
||||
|
||||
class Positive {
|
||||
// java.deser.readobject
|
||||
|
|
@ -45,4 +47,16 @@ class Positive {
|
|||
void triggerGetWriterPrint(javax.servlet.http.HttpServletResponse resp) throws Exception {
|
||||
resp.getWriter().println("<html>" + "data" + "</html>");
|
||||
}
|
||||
|
||||
// java.deser.snakeyaml_unsafe_constructor — CVE-2022-1471 regression guard.
|
||||
void triggerSnakeyamlUnsafeConstructor() throws Exception {
|
||||
Yaml yaml = new Yaml();
|
||||
Object data = yaml.load("payload");
|
||||
}
|
||||
|
||||
// java.code_exec.text4shell_interpolator — CVE-2022-42889 regression guard.
|
||||
String triggerText4ShellInterpolator(String input) {
|
||||
StringSubstitutor s = StringSubstitutor.createInterpolator();
|
||||
return s.replace(input);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
8
tests/fixtures/patterns/python/positive.py
vendored
8
tests/fixtures/patterns/python/positive.py
vendored
|
|
@ -42,6 +42,14 @@ def trigger_yaml(data):
|
|||
def trigger_sql_concat(cursor, user):
|
||||
cursor.execute("SELECT * FROM users WHERE name = '" + user + "'")
|
||||
|
||||
# py.sqli.execute_format (f-string variant)
|
||||
def trigger_sql_fstring(cursor, user):
|
||||
cursor.execute(f"SELECT * FROM users WHERE name = '{user}'")
|
||||
|
||||
# py.sqli.text_format
|
||||
def trigger_sqlalchemy_text_fstring(connection, user):
|
||||
connection.execute(text(f"SELECT * FROM users WHERE name = '{user}'"))
|
||||
|
||||
# py.crypto.md5
|
||||
def trigger_md5(data):
|
||||
hashlib.md5(data)
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ mod auth {
|
|||
|
||||
// Negative control: the handler validates ownership via
|
||||
// `authz::require_group_member(...)?` before the realtime publish. Phase C
|
||||
// should NOT emit `rs.auth.missing_ownership_check.taint` here — the
|
||||
// should NOT emit `rs.auth.missing_ownership_check.taint` here, the
|
||||
// sanitizer clears `UNAUTHORIZED_ID` from the argument SSA values.
|
||||
pub async fn handle_publish_checked(Path(group_id): Path<i64>) -> Result<&'static str, ()> {
|
||||
let user = auth::current_user();
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"description": "fetch({url: taintedUrl, body: fixed}) — destination-aware object-literal case. url is tainted, must fire.",
|
||||
"tags": ["taint", "ssrf", "fetch", "destination-aware", "object-config"],
|
||||
"description": "fetch({url: taintedUrl, body: fixed}) — destination-aware object-literal case. url is tainted (SSRF), body is fixed. SSRF must fire and the cross-boundary data-exfiltration class (Cap::DATA_EXFIL) must NOT fire — the two classes share the callee but cap attribution is per-position.",
|
||||
"tags": ["taint", "ssrf", "fetch", "destination-aware", "object-config", "cap-attribution"],
|
||||
"modes": ["full"],
|
||||
"expected": [
|
||||
{
|
||||
|
|
@ -10,6 +10,12 @@
|
|||
"line_range": [6, 14],
|
||||
"evidence_contains": [],
|
||||
"notes": "req.query.target → fetch({url: target, ...}) — tainted destination field under object-literal shape."
|
||||
},
|
||||
{
|
||||
"rule_id": "taint-data-exfiltration",
|
||||
"must_not_match": true,
|
||||
"line_range": [6, 14],
|
||||
"notes": "body is a fixed literal '{}' — DATA_EXFIL must NOT fire on this site (regression guard for per-cap attribution)."
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue