Performance and precision pass (#64)

This commit is contained in:
Eli Peter 2026-05-04 19:58:04 -04:00 committed by GitHub
parent c7c5e0f3a1
commit fb698d2c27
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
97 changed files with 9932 additions and 517 deletions

View file

@ -0,0 +1,30 @@
# Nyx CVE benchmark fixture.
#
# CVE: CVE-2023-6568
# Project: MLflow (mlflow/mlflow)
# License: Apache-2.0 (https://github.com/mlflow/mlflow/blob/master/LICENSE.txt)
# Advisory: https://nvd.nist.gov/vuln/detail/CVE-2023-6568
# Patched: 28ff3f94994941e038f2172c6484b65dc4db6ca1 mlflow/server/auth/__init__.py:744-770
#
# The fix replaces the f-string interpolation of the attacker-controlled
# `content_type` header with a static error message. No tainted value
# reaches `make_response`, so the reflected-XSS sink is silent.
from flask import request, make_response
def catch_mlflow_exception(fn):
return fn
@catch_mlflow_exception
def create_user():
content_type = request.headers.get("Content-Type")
if content_type == "application/json":
return make_response({"user": "ok"})
else:
message = (
"Invalid content type. Must be one of: "
"application/x-www-form-urlencoded, application/json"
)
return make_response(message, 400)

View file

@ -0,0 +1,45 @@
# Nyx CVE benchmark fixture.
#
# CVE: CVE-2023-6568
# Project: MLflow (mlflow/mlflow)
# License: Apache-2.0 (https://github.com/mlflow/mlflow/blob/master/LICENSE.txt)
# Advisory: https://nvd.nist.gov/vuln/detail/CVE-2023-6568
# Vulnerable: 28ff3f94994941e038f2172c6484b65dc4db6ca1~1 mlflow/server/auth/__init__.py:744-766
#
# Reflected Cross-Site Scripting in MLflow's auth server `create_user`
# handler. When a request arrived with an unrecognised `Content-Type`
# header, the handler reflected the attacker-controlled header value
# into a Flask response via an f-string and `make_response(...)`.
# Because `make_response` returns the response unmodified (no escaping)
# and Werkzeug serves the bytes back to the browser as text/html, the
# header reflection becomes XSS in the browser.
#
# Trims:
# - imports / module-level setup (config, store, blueprints L1-30) —
# scaffolding only.
# - non-`create_user` handlers (`get_user`, `update_user_password`,
# `update_user_admin`, all later in the file) — same `make_response`
# call shape but with non-tainted inputs; not the disclosed sink.
# - `flash` / `alert` paths inside `create_user` (form-urlencoded and
# application/json branches) — those branches do not produce the
# reflected XSS; only the `else` branch does.
#
# Verbatim load-bearing lines: `content_type = request.headers.get(
# "Content-Type")` (source) and `return make_response(f"Invalid content
# type: '{content_type}'", 400)` (sink) are byte-for-byte from
# mlflow/server/auth/__init__.py at the pre-fix SHA.
from flask import request, make_response
def catch_mlflow_exception(fn):
return fn
@catch_mlflow_exception
def create_user():
content_type = request.headers.get("Content-Type")
if content_type == "application/json":
return make_response({"user": "ok"})
else:
return make_response(f"Invalid content type: '{content_type}'", 400)

View file

@ -0,0 +1,26 @@
# Nyx CVE benchmark fixture.
#
# CVE: CVE-2024-21513
# Project: LangChain Experimental (langchain-ai/langchain)
# License: MIT (https://github.com/langchain-ai/langchain/blob/master/LICENSE)
# Advisory: https://nvd.nist.gov/vuln/detail/CVE-2024-21513
# Patched: 7b13292e3544b2f5f2bfb8a27a062ea2b0c34561
# libs/experimental/langchain_experimental/sql/vector_sql.py:79-83
#
# The fix removes the `_try_eval` helper entirely and returns the raw
# `db._execute(...)` result without invoking `eval(...)` at all. No
# `eval` sink remains, so `py.code_exec.eval` is silent.
from typing import Any, Dict, List, Union
class SQLDatabase:
def _execute(self, cmd: str, fetch: str = "all") -> Any:
...
def get_result_from_sqldb(
db: SQLDatabase, cmd: str
) -> Union[str, List[Dict[str, Any]], Dict[str, Any]]:
result = db._execute(cmd, fetch="all") # type: ignore
return result

View file

@ -0,0 +1,56 @@
# Nyx CVE benchmark fixture.
#
# CVE: CVE-2024-21513
# Project: LangChain Experimental (langchain-ai/langchain)
# License: MIT (https://github.com/langchain-ai/langchain/blob/master/LICENSE)
# Advisory: https://nvd.nist.gov/vuln/detail/CVE-2024-21513
# Vulnerable: 7b13292e3544b2f5f2bfb8a27a062ea2b0c34561~1
# libs/experimental/langchain_experimental/sql/vector_sql.py:79-98
#
# `langchain_experimental.sql.vector_sql.VectorSQLDatabaseChain` ran
# every value returned from a SQL query through Python's built-in
# `eval(...)` so that string-shaped numbers / lists were converted into
# Python objects. An attacker who could control the database content
# (for example by writing into a vector store backing the chain) could
# return a value such as `__import__("os").system("rm -rf /")` and the
# chain would `eval` it, achieving arbitrary code execution on the
# server hosting the chain.
#
# Trims:
# - imports / non-load-bearing module decls (L1-30 of upstream).
# - `parse(self, text: str)` output-parser method (L70-77) and the
# `VectorSQLDatabaseChain` class body (L101-200) — neither is on
# the disclosed source→sink path.
# - SQLAlchemy / SQLDatabase type hints simplified to `Any` to avoid
# pulling the upstream type chain into the fixture.
#
# Verbatim load-bearing lines: the `_try_eval` helper definition and
# the two dict / list comprehensions inside `get_result_from_sqldb`
# that call `_try_eval(v)` on each query-result value are
# byte-for-byte from vector_sql.py at the pre-fix SHA.
from typing import Any, Dict, List, Union
class SQLDatabase:
def _execute(self, cmd: str, fetch: str = "all") -> Any:
...
def _try_eval(x: Any) -> Any:
try:
return eval(x)
except Exception:
return x
def get_result_from_sqldb(
db: SQLDatabase, cmd: str
) -> Union[str, List[Dict[str, Any]], Dict[str, Any]]:
result = db._execute(cmd, fetch="all") # type: ignore
if isinstance(result, list):
return [{k: _try_eval(v) for k, v in dict(d._asdict()).items()} for d in result]
else:
return {
k: _try_eval(v) for k, v in dict(result._asdict()).items() # type: ignore
}

View file

@ -0,0 +1,57 @@
# Nyx CVE benchmark fixture.
#
# CVE: CVE-2024-23334
# Project: aiohttp (aio-libs/aiohttp)
# License: Apache-2.0 (https://github.com/aio-libs/aiohttp/blob/master/LICENSE.txt)
# Advisory: https://github.com/aio-libs/aiohttp/security/advisories/GHSA-5h86-8mv2-jq9f
# Patched: 1c335944d6a8b1298baf179b7c0b3069f10c514b aiohttp/web_urldispatcher.py:644-668
#
# The fix splits the previously-unified resolve+containment check so
# that ``relative_to(self._directory)`` is run on *both* arms of the
# ``follow_symlinks`` branch. In the follow-symlinks arm the path is
# normalised pre-resolve so a symlink target that lives outside the
# static directory still raises ``ValueError`` from ``relative_to`` and
# is converted to ``HTTPNotFound``.
#
# Trims: same as vulnerable.py.
#
# Verbatim load-bearing lines: the rebuilt ``follow_symlinks`` branch
# in ``_handle`` (L644-660), the new ``unresolved_path = self._directory
# .joinpath(filename)`` step, and the ``normalized_path.relative_to(
# self._directory)`` guard are byte-for-byte from
# web_urldispatcher.py:644-660 of the fix commit.
import os
from pathlib import Path
from aiohttp import web
from aiohttp.web import FileResponse, HTTPForbidden, HTTPNotFound, Request, StreamResponse
class StaticResource:
def __init__(self, directory: str, follow_symlinks: bool = True) -> None:
self._directory = Path(directory)
self._follow_symlinks = follow_symlinks
self._chunk_size = 256 * 1024
async def _handle(self, request: Request) -> StreamResponse:
rel_url = request.match_info["filename"]
try:
filename = Path(rel_url)
if filename.anchor:
raise HTTPForbidden()
unresolved_path = self._directory.joinpath(filename)
if self._follow_symlinks:
normalized_path = Path(os.path.normpath(unresolved_path))
normalized_path.relative_to(self._directory)
filepath = normalized_path.resolve()
else:
filepath = unresolved_path.resolve()
filepath.relative_to(self._directory)
except (ValueError, FileNotFoundError) as error:
raise HTTPNotFound() from error
except HTTPForbidden:
raise
if filepath.is_file():
return FileResponse(filepath, chunk_size=self._chunk_size)
raise HTTPNotFound

View file

@ -0,0 +1,62 @@
# Nyx CVE benchmark fixture.
#
# CVE: CVE-2024-23334
# Project: aiohttp (aio-libs/aiohttp)
# License: Apache-2.0 (https://github.com/aio-libs/aiohttp/blob/master/LICENSE.txt)
# Advisory: https://github.com/aio-libs/aiohttp/security/advisories/GHSA-5h86-8mv2-jq9f
# Vulnerable: 33ccdfb0a12690af5bb49bda2319ec0907fa7827 aiohttp/web_urldispatcher.py:633-648
#
# aiohttp's StaticResource._handle resolved the requested filename
# under the configured static directory and then verified containment
# only when ``follow_symlinks`` was False. When ``follow_symlinks=True``
# the ``filepath.relative_to(self._directory)`` check was skipped, so a
# symlink (or absolute path slip past the anchor check) under the
# static directory could escape it and serve files from anywhere on
# the filesystem the worker process could read.
#
# Trims:
# - ``append_version`` branch (L575-588) — separate code path that
# does not feed FileResponse on the disclosed flow.
# - ``HTTPNotFound`` / ``Exception`` handling fall-through after the
# try block (L646-654 of upstream) — irrelevant to source→sink.
# - ``_directory_as_html`` directory-listing branch (L658-708) —
# only ``FileResponse`` is the disclosed sink path.
#
# Verbatim load-bearing lines: the ``rel_url = request.match_info[
# "filename"]`` source, the ``filepath = self._directory.joinpath(
# filename).resolve()`` path composition, the missing ``relative_to``
# guard inside the ``if not self._follow_symlinks`` branch, and the
# ``return FileResponse(filepath, chunk_size=self._chunk_size)`` sink
# are byte-for-byte from web_urldispatcher.py:633-648 and L666-668.
from pathlib import Path
from aiohttp import web
from aiohttp.web import FileResponse, HTTPForbidden, HTTPNotFound, Request, StreamResponse
class StaticResource:
def __init__(self, directory: str, follow_symlinks: bool = True) -> None:
self._directory = Path(directory)
self._follow_symlinks = follow_symlinks
self._chunk_size = 256 * 1024
async def _handle(self, request: Request) -> StreamResponse:
rel_url = request.match_info["filename"]
try:
filename = Path(rel_url)
if filename.anchor:
# rel_url is an absolute name like
# /static/\\machine_name\c$ or /static/D:\path
# where the static dir is totally different
raise HTTPForbidden()
filepath = self._directory.joinpath(filename).resolve()
if not self._follow_symlinks:
filepath.relative_to(self._directory)
except (ValueError, FileNotFoundError) as error:
raise HTTPNotFound() from error
except HTTPForbidden:
raise
if filepath.is_file():
return FileResponse(filepath, chunk_size=self._chunk_size)
raise HTTPNotFound