mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-15 20:05:13 +02:00
Performance and precision pass (#64)
This commit is contained in:
parent
c7c5e0f3a1
commit
fb698d2c27
97 changed files with 9932 additions and 517 deletions
30
tests/benchmark/cve_corpus/python/CVE-2023-6568/patched.py
Normal file
30
tests/benchmark/cve_corpus/python/CVE-2023-6568/patched.py
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
# Nyx CVE benchmark fixture.
|
||||
#
|
||||
# CVE: CVE-2023-6568
|
||||
# Project: MLflow (mlflow/mlflow)
|
||||
# License: Apache-2.0 (https://github.com/mlflow/mlflow/blob/master/LICENSE.txt)
|
||||
# Advisory: https://nvd.nist.gov/vuln/detail/CVE-2023-6568
|
||||
# Patched: 28ff3f94994941e038f2172c6484b65dc4db6ca1 mlflow/server/auth/__init__.py:744-770
|
||||
#
|
||||
# The fix replaces the f-string interpolation of the attacker-controlled
|
||||
# `content_type` header with a static error message. No tainted value
|
||||
# reaches `make_response`, so the reflected-XSS sink is silent.
|
||||
|
||||
from flask import request, make_response
|
||||
|
||||
|
||||
def catch_mlflow_exception(fn):
|
||||
return fn
|
||||
|
||||
|
||||
@catch_mlflow_exception
|
||||
def create_user():
|
||||
content_type = request.headers.get("Content-Type")
|
||||
if content_type == "application/json":
|
||||
return make_response({"user": "ok"})
|
||||
else:
|
||||
message = (
|
||||
"Invalid content type. Must be one of: "
|
||||
"application/x-www-form-urlencoded, application/json"
|
||||
)
|
||||
return make_response(message, 400)
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
# Nyx CVE benchmark fixture.
|
||||
#
|
||||
# CVE: CVE-2023-6568
|
||||
# Project: MLflow (mlflow/mlflow)
|
||||
# License: Apache-2.0 (https://github.com/mlflow/mlflow/blob/master/LICENSE.txt)
|
||||
# Advisory: https://nvd.nist.gov/vuln/detail/CVE-2023-6568
|
||||
# Vulnerable: 28ff3f94994941e038f2172c6484b65dc4db6ca1~1 mlflow/server/auth/__init__.py:744-766
|
||||
#
|
||||
# Reflected Cross-Site Scripting in MLflow's auth server `create_user`
|
||||
# handler. When a request arrived with an unrecognised `Content-Type`
|
||||
# header, the handler reflected the attacker-controlled header value
|
||||
# into a Flask response via an f-string and `make_response(...)`.
|
||||
# Because `make_response` returns the response unmodified (no escaping)
|
||||
# and Werkzeug serves the bytes back to the browser as text/html, the
|
||||
# header reflection becomes XSS in the browser.
|
||||
#
|
||||
# Trims:
|
||||
# - imports / module-level setup (config, store, blueprints L1-30) —
|
||||
# scaffolding only.
|
||||
# - non-`create_user` handlers (`get_user`, `update_user_password`,
|
||||
# `update_user_admin`, all later in the file) — same `make_response`
|
||||
# call shape but with non-tainted inputs; not the disclosed sink.
|
||||
# - `flash` / `alert` paths inside `create_user` (form-urlencoded and
|
||||
# application/json branches) — those branches do not produce the
|
||||
# reflected XSS; only the `else` branch does.
|
||||
#
|
||||
# Verbatim load-bearing lines: `content_type = request.headers.get(
|
||||
# "Content-Type")` (source) and `return make_response(f"Invalid content
|
||||
# type: '{content_type}'", 400)` (sink) are byte-for-byte from
|
||||
# mlflow/server/auth/__init__.py at the pre-fix SHA.
|
||||
|
||||
from flask import request, make_response
|
||||
|
||||
|
||||
def catch_mlflow_exception(fn):
|
||||
return fn
|
||||
|
||||
|
||||
@catch_mlflow_exception
|
||||
def create_user():
|
||||
content_type = request.headers.get("Content-Type")
|
||||
if content_type == "application/json":
|
||||
return make_response({"user": "ok"})
|
||||
else:
|
||||
return make_response(f"Invalid content type: '{content_type}'", 400)
|
||||
26
tests/benchmark/cve_corpus/python/CVE-2024-21513/patched.py
Normal file
26
tests/benchmark/cve_corpus/python/CVE-2024-21513/patched.py
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
# Nyx CVE benchmark fixture.
|
||||
#
|
||||
# CVE: CVE-2024-21513
|
||||
# Project: LangChain Experimental (langchain-ai/langchain)
|
||||
# License: MIT (https://github.com/langchain-ai/langchain/blob/master/LICENSE)
|
||||
# Advisory: https://nvd.nist.gov/vuln/detail/CVE-2024-21513
|
||||
# Patched: 7b13292e3544b2f5f2bfb8a27a062ea2b0c34561
|
||||
# libs/experimental/langchain_experimental/sql/vector_sql.py:79-83
|
||||
#
|
||||
# The fix removes the `_try_eval` helper entirely and returns the raw
|
||||
# `db._execute(...)` result without invoking `eval(...)` at all. No
|
||||
# `eval` sink remains, so `py.code_exec.eval` is silent.
|
||||
|
||||
from typing import Any, Dict, List, Union
|
||||
|
||||
|
||||
class SQLDatabase:
|
||||
def _execute(self, cmd: str, fetch: str = "all") -> Any:
|
||||
...
|
||||
|
||||
|
||||
def get_result_from_sqldb(
|
||||
db: SQLDatabase, cmd: str
|
||||
) -> Union[str, List[Dict[str, Any]], Dict[str, Any]]:
|
||||
result = db._execute(cmd, fetch="all") # type: ignore
|
||||
return result
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
# Nyx CVE benchmark fixture.
|
||||
#
|
||||
# CVE: CVE-2024-21513
|
||||
# Project: LangChain Experimental (langchain-ai/langchain)
|
||||
# License: MIT (https://github.com/langchain-ai/langchain/blob/master/LICENSE)
|
||||
# Advisory: https://nvd.nist.gov/vuln/detail/CVE-2024-21513
|
||||
# Vulnerable: 7b13292e3544b2f5f2bfb8a27a062ea2b0c34561~1
|
||||
# libs/experimental/langchain_experimental/sql/vector_sql.py:79-98
|
||||
#
|
||||
# `langchain_experimental.sql.vector_sql.VectorSQLDatabaseChain` ran
|
||||
# every value returned from a SQL query through Python's built-in
|
||||
# `eval(...)` so that string-shaped numbers / lists were converted into
|
||||
# Python objects. An attacker who could control the database content
|
||||
# (for example by writing into a vector store backing the chain) could
|
||||
# return a value such as `__import__("os").system("rm -rf /")` and the
|
||||
# chain would `eval` it, achieving arbitrary code execution on the
|
||||
# server hosting the chain.
|
||||
#
|
||||
# Trims:
|
||||
# - imports / non-load-bearing module decls (L1-30 of upstream).
|
||||
# - `parse(self, text: str)` output-parser method (L70-77) and the
|
||||
# `VectorSQLDatabaseChain` class body (L101-200) — neither is on
|
||||
# the disclosed source→sink path.
|
||||
# - SQLAlchemy / SQLDatabase type hints simplified to `Any` to avoid
|
||||
# pulling the upstream type chain into the fixture.
|
||||
#
|
||||
# Verbatim load-bearing lines: the `_try_eval` helper definition and
|
||||
# the two dict / list comprehensions inside `get_result_from_sqldb`
|
||||
# that call `_try_eval(v)` on each query-result value are
|
||||
# byte-for-byte from vector_sql.py at the pre-fix SHA.
|
||||
|
||||
from typing import Any, Dict, List, Union
|
||||
|
||||
|
||||
class SQLDatabase:
|
||||
def _execute(self, cmd: str, fetch: str = "all") -> Any:
|
||||
...
|
||||
|
||||
|
||||
def _try_eval(x: Any) -> Any:
|
||||
try:
|
||||
return eval(x)
|
||||
except Exception:
|
||||
return x
|
||||
|
||||
|
||||
def get_result_from_sqldb(
|
||||
db: SQLDatabase, cmd: str
|
||||
) -> Union[str, List[Dict[str, Any]], Dict[str, Any]]:
|
||||
result = db._execute(cmd, fetch="all") # type: ignore
|
||||
if isinstance(result, list):
|
||||
return [{k: _try_eval(v) for k, v in dict(d._asdict()).items()} for d in result]
|
||||
else:
|
||||
return {
|
||||
k: _try_eval(v) for k, v in dict(result._asdict()).items() # type: ignore
|
||||
}
|
||||
57
tests/benchmark/cve_corpus/python/CVE-2024-23334/patched.py
Normal file
57
tests/benchmark/cve_corpus/python/CVE-2024-23334/patched.py
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
# Nyx CVE benchmark fixture.
|
||||
#
|
||||
# CVE: CVE-2024-23334
|
||||
# Project: aiohttp (aio-libs/aiohttp)
|
||||
# License: Apache-2.0 (https://github.com/aio-libs/aiohttp/blob/master/LICENSE.txt)
|
||||
# Advisory: https://github.com/aio-libs/aiohttp/security/advisories/GHSA-5h86-8mv2-jq9f
|
||||
# Patched: 1c335944d6a8b1298baf179b7c0b3069f10c514b aiohttp/web_urldispatcher.py:644-668
|
||||
#
|
||||
# The fix splits the previously-unified resolve+containment check so
|
||||
# that ``relative_to(self._directory)`` is run on *both* arms of the
|
||||
# ``follow_symlinks`` branch. In the follow-symlinks arm the path is
|
||||
# normalised pre-resolve so a symlink target that lives outside the
|
||||
# static directory still raises ``ValueError`` from ``relative_to`` and
|
||||
# is converted to ``HTTPNotFound``.
|
||||
#
|
||||
# Trims: same as vulnerable.py.
|
||||
#
|
||||
# Verbatim load-bearing lines: the rebuilt ``follow_symlinks`` branch
|
||||
# in ``_handle`` (L644-660), the new ``unresolved_path = self._directory
|
||||
# .joinpath(filename)`` step, and the ``normalized_path.relative_to(
|
||||
# self._directory)`` guard are byte-for-byte from
|
||||
# web_urldispatcher.py:644-660 of the fix commit.
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from aiohttp import web
|
||||
from aiohttp.web import FileResponse, HTTPForbidden, HTTPNotFound, Request, StreamResponse
|
||||
|
||||
|
||||
class StaticResource:
|
||||
def __init__(self, directory: str, follow_symlinks: bool = True) -> None:
|
||||
self._directory = Path(directory)
|
||||
self._follow_symlinks = follow_symlinks
|
||||
self._chunk_size = 256 * 1024
|
||||
|
||||
async def _handle(self, request: Request) -> StreamResponse:
|
||||
rel_url = request.match_info["filename"]
|
||||
try:
|
||||
filename = Path(rel_url)
|
||||
if filename.anchor:
|
||||
raise HTTPForbidden()
|
||||
unresolved_path = self._directory.joinpath(filename)
|
||||
if self._follow_symlinks:
|
||||
normalized_path = Path(os.path.normpath(unresolved_path))
|
||||
normalized_path.relative_to(self._directory)
|
||||
filepath = normalized_path.resolve()
|
||||
else:
|
||||
filepath = unresolved_path.resolve()
|
||||
filepath.relative_to(self._directory)
|
||||
except (ValueError, FileNotFoundError) as error:
|
||||
raise HTTPNotFound() from error
|
||||
except HTTPForbidden:
|
||||
raise
|
||||
if filepath.is_file():
|
||||
return FileResponse(filepath, chunk_size=self._chunk_size)
|
||||
raise HTTPNotFound
|
||||
|
|
@ -0,0 +1,62 @@
|
|||
# Nyx CVE benchmark fixture.
|
||||
#
|
||||
# CVE: CVE-2024-23334
|
||||
# Project: aiohttp (aio-libs/aiohttp)
|
||||
# License: Apache-2.0 (https://github.com/aio-libs/aiohttp/blob/master/LICENSE.txt)
|
||||
# Advisory: https://github.com/aio-libs/aiohttp/security/advisories/GHSA-5h86-8mv2-jq9f
|
||||
# Vulnerable: 33ccdfb0a12690af5bb49bda2319ec0907fa7827 aiohttp/web_urldispatcher.py:633-648
|
||||
#
|
||||
# aiohttp's StaticResource._handle resolved the requested filename
|
||||
# under the configured static directory and then verified containment
|
||||
# only when ``follow_symlinks`` was False. When ``follow_symlinks=True``
|
||||
# the ``filepath.relative_to(self._directory)`` check was skipped, so a
|
||||
# symlink (or absolute path slip past the anchor check) under the
|
||||
# static directory could escape it and serve files from anywhere on
|
||||
# the filesystem the worker process could read.
|
||||
#
|
||||
# Trims:
|
||||
# - ``append_version`` branch (L575-588) — separate code path that
|
||||
# does not feed FileResponse on the disclosed flow.
|
||||
# - ``HTTPNotFound`` / ``Exception`` handling fall-through after the
|
||||
# try block (L646-654 of upstream) — irrelevant to source→sink.
|
||||
# - ``_directory_as_html`` directory-listing branch (L658-708) —
|
||||
# only ``FileResponse`` is the disclosed sink path.
|
||||
#
|
||||
# Verbatim load-bearing lines: the ``rel_url = request.match_info[
|
||||
# "filename"]`` source, the ``filepath = self._directory.joinpath(
|
||||
# filename).resolve()`` path composition, the missing ``relative_to``
|
||||
# guard inside the ``if not self._follow_symlinks`` branch, and the
|
||||
# ``return FileResponse(filepath, chunk_size=self._chunk_size)`` sink
|
||||
# are byte-for-byte from web_urldispatcher.py:633-648 and L666-668.
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from aiohttp import web
|
||||
from aiohttp.web import FileResponse, HTTPForbidden, HTTPNotFound, Request, StreamResponse
|
||||
|
||||
|
||||
class StaticResource:
|
||||
def __init__(self, directory: str, follow_symlinks: bool = True) -> None:
|
||||
self._directory = Path(directory)
|
||||
self._follow_symlinks = follow_symlinks
|
||||
self._chunk_size = 256 * 1024
|
||||
|
||||
async def _handle(self, request: Request) -> StreamResponse:
|
||||
rel_url = request.match_info["filename"]
|
||||
try:
|
||||
filename = Path(rel_url)
|
||||
if filename.anchor:
|
||||
# rel_url is an absolute name like
|
||||
# /static/\\machine_name\c$ or /static/D:\path
|
||||
# where the static dir is totally different
|
||||
raise HTTPForbidden()
|
||||
filepath = self._directory.joinpath(filename).resolve()
|
||||
if not self._follow_symlinks:
|
||||
filepath.relative_to(self._directory)
|
||||
except (ValueError, FileNotFoundError) as error:
|
||||
raise HTTPNotFound() from error
|
||||
except HTTPForbidden:
|
||||
raise
|
||||
if filepath.is_file():
|
||||
return FileResponse(filepath, chunk_size=self._chunk_size)
|
||||
raise HTTPNotFound
|
||||
Loading…
Add table
Add a link
Reference in a new issue