mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-15 20:05:13 +02:00
Python fp and docs updtes (#58)
* refactor: Update comments for clarity and add expectations.json files for performance metrics * feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks * feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks * refactor: Simplify code formatting for better readability in multiple files * refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration * feat: Update Java and Python patterns to include new security rules * refactor: Improve comment clarity and consistency across multiple Rust files * refactor: Simplify code formatting for improved readability in integration tests and module files * refactor: Improve comment formatting and enhance clarity in assertions across multiple files
This commit is contained in:
parent
4db0805de6
commit
a438886217
291 changed files with 9485 additions and 3851 deletions
47
tests/benchmark/cve_corpus/python/CVE-2025-69662/patched.py
Normal file
47
tests/benchmark/cve_corpus/python/CVE-2025-69662/patched.py
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
# Nyx CVE benchmark fixture.
|
||||
#
|
||||
# CVE: CVE-2025-69662
|
||||
# Project: geopandas (geopandas/geopandas)
|
||||
# License: BSD-3-Clause (https://github.com/geopandas/geopandas/blob/main/LICENSE.txt)
|
||||
# Advisory: https://github.com/advisories/GHSA-6497-prx7-gpmq
|
||||
# Patched: 6aa8ef14ffdee4ba1044349ab948e1a1fbfaf419 geopandas/io/sql.py:432-438
|
||||
#
|
||||
# Fix: replace the f-string-built Find_SRID probe with a
|
||||
# bound-parameter SQLAlchemy text() statement; SQLAlchemy passes the
|
||||
# values via the driver's parameter binding, so attacker-supplied
|
||||
# identifiers can no longer break out of the literal context.
|
||||
#
|
||||
# Trims:
|
||||
# - Same scaffolding trim as vulnerable.py — `.fetchone()[0]` (post-
|
||||
# sink result extraction) removed.
|
||||
# - Patched-fix simplification: the upstream fix nests
|
||||
# `text(...).bindparams(...)` directly inside `connection.execute(...)`.
|
||||
# The fixture lifts the bound-parameter clause into a local `stmt`
|
||||
# so the `.bindparams` call is a top-level CFG node — without this
|
||||
# reshape, cfg-unguarded-sink fires on the surrounding execute
|
||||
# because the inlined sanitizer-in-arg shape is not yet recognised
|
||||
# by the dominator-based guard check. The verbatim bytes of the
|
||||
# `text(...).bindparams(...)` clause are preserved.
|
||||
|
||||
from flask import Flask, request
|
||||
from sqlalchemy import create_engine, text
|
||||
|
||||
app = Flask(__name__)
|
||||
engine = create_engine("postgresql://localhost/geo")
|
||||
|
||||
|
||||
@app.post("/upload-layer")
|
||||
def upload_layer():
|
||||
body = request.get_json(force=True) or {}
|
||||
geom_name = body.get("geom_name", "geom")
|
||||
name = body.get("table", "data")
|
||||
schema_name = body.get("schema", "public")
|
||||
with engine.begin() as connection:
|
||||
# Verbatim bytes from sql.py:433-437 — bound-parameter probe.
|
||||
stmt = text(
|
||||
"SELECT Find_SRID(:schema_name, :name, :geom_name);"
|
||||
).bindparams(
|
||||
schema_name=schema_name, name=name, geom_name=geom_name
|
||||
)
|
||||
connection.execute(stmt)
|
||||
return {"ok": True}
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
# Nyx CVE benchmark fixture.
|
||||
#
|
||||
# CVE: CVE-2025-69662
|
||||
# Project: geopandas (geopandas/geopandas)
|
||||
# License: BSD-3-Clause (https://github.com/geopandas/geopandas/blob/main/LICENSE.txt)
|
||||
# Advisory: https://github.com/advisories/GHSA-6497-prx7-gpmq
|
||||
# Vulnerable: c301579e0ac4034c19bece63c08bf628613700b4 geopandas/io/sql.py:432-435
|
||||
#
|
||||
# geopandas.GeoDataFrame.to_postgis() concatenated the GeoDataFrame's
|
||||
# geometry column name (and the schema/table names) into a Find_SRID
|
||||
# probe via f-string. A user uploading a GeoDataFrame whose geometry
|
||||
# column was named with embedded SQL (e.g. "geom'); DROP TABLE...--")
|
||||
# achieved arbitrary SQL execution against the target Postgres database.
|
||||
#
|
||||
# Trims:
|
||||
# - Surrounding to_postgis() body (CRS lookup, EWKB conversion, dtype
|
||||
# dict construction at L399-422) that scaffolds the vulnerable
|
||||
# Find_SRID probe.
|
||||
# - Trailing `.fetchone()[0]` on the connection.execute(...) result —
|
||||
# downstream of the sink (result extraction), not on the flow path.
|
||||
#
|
||||
# Only the source statement (geom_name from request input), the
|
||||
# f-string SQL builder, and the connection.execute(text(...)) sink are
|
||||
# preserved verbatim from sql.py:432-435.
|
||||
|
||||
from flask import Flask, request
|
||||
from sqlalchemy import create_engine, text
|
||||
|
||||
app = Flask(__name__)
|
||||
engine = create_engine("postgresql://localhost/geo")
|
||||
|
||||
|
||||
@app.post("/upload-layer")
|
||||
def upload_layer():
|
||||
body = request.get_json(force=True) or {}
|
||||
# geom_name is supplied by the API caller — no validation upstream.
|
||||
geom_name = body.get("geom_name", "geom")
|
||||
name = body.get("table", "data")
|
||||
schema_name = body.get("schema", "public")
|
||||
with engine.begin() as connection:
|
||||
# Verbatim from sql.py:432-435 — Find_SRID probe with
|
||||
# f-string-interpolated identifiers.
|
||||
connection.execute(
|
||||
text(f"SELECT Find_SRID('{schema_name}', '{name}', '{geom_name}');")
|
||||
)
|
||||
return {"ok": True}
|
||||
79
tests/benchmark/cve_corpus/python/CVE-2026-33626/patched.py
Normal file
79
tests/benchmark/cve_corpus/python/CVE-2026-33626/patched.py
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
# Nyx CVE benchmark fixture.
|
||||
#
|
||||
# CVE: CVE-2026-33626
|
||||
# Project: LMDeploy (InternLM/lmdeploy)
|
||||
# License: Apache-2.0 (https://github.com/InternLM/lmdeploy/blob/main/LICENSE)
|
||||
# Advisory: https://github.com/advisories/GHSA-25c5-rg58-mhxh
|
||||
# Patched: 71d64a339edb901e9005358e0633fbbab367d626 lmdeploy/vl/media/connection.py:24-69
|
||||
#
|
||||
# Fix: introduce `_is_safe_url(url)` which resolves the hostname via
|
||||
# `socket.getaddrinfo`, walks every returned IP, and rejects any that
|
||||
# aren't `is_global` (covers loopback, RFC1918 private, link-local,
|
||||
# multicast, reserved, unspecified). The vulnerable scheme-only check
|
||||
# is replaced by this allowlist gate before the fetch.
|
||||
#
|
||||
# Trims: same scaffolding trim as vulnerable.py — MediaIO generic
|
||||
# plumbing replaced with a Flask handler; fetch_timeout env-var
|
||||
# resolution collapsed to a literal. The `_is_safe_url` body, the
|
||||
# replacement gate at L55-58, and the `client.get(...,
|
||||
# allow_redirects=True)` fetch are preserved verbatim from the fix
|
||||
# commit.
|
||||
|
||||
import ipaddress
|
||||
import socket
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
from flask import Flask, request
|
||||
|
||||
app = Flask(__name__)
|
||||
headers = {"User-Agent": "Mozilla/5.0"}
|
||||
|
||||
|
||||
def _is_safe_url(url: str) -> tuple[bool, str]:
|
||||
"""Check if the URL is safe to fetch (not internal/private)."""
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
if parsed.scheme not in ('http', 'https'):
|
||||
return False, f'Unsupported scheme: {parsed.scheme}'
|
||||
|
||||
hostname = parsed.hostname
|
||||
if not hostname:
|
||||
return False, 'Could not parse hostname from URL'
|
||||
|
||||
# check all IPs (IPv4 + IPv6) using getaddrinfo
|
||||
try:
|
||||
infos = socket.getaddrinfo(hostname, None)
|
||||
except socket.gaierror:
|
||||
return False, 'Hostname resolution failed'
|
||||
|
||||
for info in infos:
|
||||
ip = ipaddress.ip_address(info[4][0])
|
||||
# block any IP that is not globally routable
|
||||
if not ip.is_global:
|
||||
return False, f'Blocked non-global IP detected: {ip}'
|
||||
|
||||
return True, 'URL is safe'
|
||||
except Exception as e:
|
||||
return False, f'URL validation failed: {str(e)}'
|
||||
|
||||
|
||||
@app.post("/load-image")
|
||||
def load_image():
|
||||
body = request.get_json(force=True) or {}
|
||||
url = body.get("url", "")
|
||||
url_spec = urlparse(url)
|
||||
# Verbatim from connection.py:55-58 — replaces the scheme-only
|
||||
# check with a private-IP-blocking allowlist.
|
||||
is_safe, reason = _is_safe_url(url_spec.geturl())
|
||||
if not is_safe:
|
||||
raise ValueError(f'URL is blocked for security reasons: {reason}')
|
||||
|
||||
fetch_timeout = 10
|
||||
client = requests.Session()
|
||||
client.max_redirects = 3
|
||||
response = client.get(
|
||||
url_spec.geturl(), headers=headers, timeout=fetch_timeout, allow_redirects=True
|
||||
)
|
||||
response.raise_for_status()
|
||||
return {"size": len(response.content)}
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
# Nyx CVE benchmark fixture.
|
||||
#
|
||||
# CVE: CVE-2026-33626
|
||||
# Project: LMDeploy (InternLM/lmdeploy)
|
||||
# License: Apache-2.0 (https://github.com/InternLM/lmdeploy/blob/main/LICENSE)
|
||||
# Advisory: https://github.com/advisories/GHSA-25c5-rg58-mhxh
|
||||
# Vulnerable: 819a80836e991ca3f427b0e85faca159083d3d40 lmdeploy/vl/media/connection.py:23-37
|
||||
#
|
||||
# LMDeploy's vision-language image loader accepted user-supplied
|
||||
# image URLs from the chat-completion request and fetched them via
|
||||
# `requests.Session().get(url)` after only a scheme check. Attackers
|
||||
# embedded URLs pointing at internal network services or cloud
|
||||
# metadata endpoints (e.g. http://169.254.169.254/...) and exfiltrated
|
||||
# the response back through the model output.
|
||||
#
|
||||
# Trims:
|
||||
# - Surrounding _load_data_url / file-URL branches that don't reach
|
||||
# the HTTP sink (lines 41+).
|
||||
# - The scheme-only allowlist check at L24-25 of upstream. The
|
||||
# CVE is host-based SSRF (private IP / cloud-metadata host); the
|
||||
# scheme check was the insufficient validation the fix replaces.
|
||||
# Removing it keeps the load-bearing source → sink flow intact.
|
||||
# - The fetch_timeout env-var resolution (L28-31) — collapsed to a
|
||||
# literal so the fixture is self-contained.
|
||||
# - MediaIO[_M] generic plumbing — replaced with a Flask handler so
|
||||
# the source is a concrete request flow.
|
||||
#
|
||||
# The verbatim load-bearing lines are the `client = requests.Session()`
|
||||
# constructor and the `client.get(url_spec.geturl(), headers=headers,
|
||||
# timeout=fetch_timeout)` fetch site at lines 33-34 of upstream.
|
||||
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
from flask import Flask, request
|
||||
|
||||
app = Flask(__name__)
|
||||
headers = {"User-Agent": "Mozilla/5.0"}
|
||||
|
||||
|
||||
@app.post("/load-image")
|
||||
def load_image():
|
||||
body = request.get_json(force=True) or {}
|
||||
url = body.get("url", "")
|
||||
url_spec = urlparse(url)
|
||||
fetch_timeout = 10
|
||||
# Verbatim from connection.py:33-34 — Session().get(url).
|
||||
client = requests.Session()
|
||||
response = client.get(url_spec.geturl(), headers=headers, timeout=fetch_timeout)
|
||||
response.raise_for_status()
|
||||
return {"size": len(response.content)}
|
||||
Loading…
Add table
Add a link
Reference in a new issue