Python fp and docs updtes (#58)

* refactor: Update comments for clarity and add expectations.json files for performance metrics

* feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks

* feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks

* refactor: Simplify code formatting for better readability in multiple files

* refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration

* feat: Update Java and Python patterns to include new security rules

* refactor: Improve comment clarity and consistency across multiple Rust files

* refactor: Simplify code formatting for improved readability in integration tests and module files

* refactor: Improve comment formatting and enhance clarity in assertions across multiple files
This commit is contained in:
Eli Peter 2026-04-29 19:53:34 -04:00 committed by GitHub
parent 4db0805de6
commit a438886217
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
291 changed files with 9485 additions and 3851 deletions

View file

@ -0,0 +1,19 @@
"""
Vulnerable counterpart to safe_fastapi_route_dependencies_auth.py: same
shape but with NO `dependencies=[Depends(...)]` keyword arg on the route
decorator. The FastAPI ownership-check rule must still fire the
recognizer must not blanket-suppress every FastAPI route, only those
with an actual dependency-injected auth check.
"""
from fastapi import FastAPI
router = FastAPI()
@router.delete("/{connection_id}")
def delete_connection(connection_id: str, session):
"""No auth — must still fire missing_ownership_check."""
connection = session.scalar(select(Connection).filter_by(conn_id=connection_id))
if connection is None:
raise HTTPException(404, "not found")
session.delete(connection)

View file

@ -0,0 +1,43 @@
"""
Distilled from airflow `airflow-core/src/airflow/api_fastapi/core_api/routes/public/connections.py`:
@connections_router.delete(
"/{connection_id}",
dependencies=[Depends(requires_access_connection(method="DELETE"))],
)
def delete_connection(connection_id: str, session: SessionDep):
connection = session.scalar(select(Connection).filter_by(conn_id=connection_id))
...
session.delete(connection)
The route's `dependencies=[Depends(requires_access_*)]` declares the auth gate at
the FastAPI level. The ownership-check rule must recognise the dependency-
injected check and not flag the row-fetch / mutation as missing ownership.
"""
from fastapi import Depends, FastAPI
router = FastAPI()
def requires_access_connection(method: str):
def check():
...
return check
@router.delete(
"/{connection_id}",
dependencies=[Depends(requires_access_connection(method="DELETE"))],
)
def delete_connection(connection_id: str, session):
connection = session.scalar(select(Connection).filter_by(conn_id=connection_id))
if connection is None:
raise HTTPException(404, "not found")
session.delete(connection)
@router.get(
"/{connection_id}",
dependencies=[Depends(requires_access_connection(method="GET"))],
)
def get_connection(connection_id: str, session):
return session.scalar(select(Connection).filter_by(conn_id=connection_id))

View file

@ -0,0 +1,79 @@
"""
Distilled from airflow `airflow-core/src/airflow/api_fastapi/core_api/routes/public/dag_run.py`:
@dag_run_router.post(
"",
dependencies=[Depends(requires_access_dag(method="POST", access_entity=DagAccessEntity.RUN))],
)
def trigger_dag_run(dag_id, body, dag_bag, user, session, request):
dm = session.scalar(select(DagModel).where(DagModel.dag_id == dag_id))
...
dag = get_latest_version_of_dag(dag_bag, dag_id, session)
dag_run = dag.create_dagrun(run_id=params["run_id"], ...)
The route-level `dependencies=[Depends(requires_access_dag(method="POST",
access_entity=...))]` decorator authorizes the entire handler the
handler body's `dag.create_dagrun(...)` call (where `dag` is a row
fetched using the auth-checked `dag_id`) must be covered too, even
though the call's subject is the bare row variable rather than the
original id.
Before the route-level fix, `auth_check_covers_subject` walked
`check.subjects` (empty for decorator-level checks whose inner call
carries no per-arg ValueRef) and never matched. After the fix,
`is_route_level=true` short-circuits coverage to true for any
non-login-guard route-level check, suppressing both the row-fetch
ownership flag and the downstream method-call ownership flag.
"""
from fastapi import Depends, FastAPI
router = FastAPI()
def requires_access_dag(method: str, access_entity=None):
def check():
...
return check
def get_latest_version_of_dag(dag_bag, dag_id, session):
return dag_bag.get(dag_id)
@router.get(
"/{dag_id}/runs/{run_id}",
dependencies=[Depends(requires_access_dag(method="GET"))],
)
def get_dag_run(dag_id: str, run_id: str, session):
"""
Route-level guard authorizes the entire handler. The
`filter_by(dag_id=dag_id, run_id=run_id)` ORM call must NOT trip
`py.auth.missing_ownership_check` even though the per-arg subjects
are id-shaped the route-level decorator covers them.
"""
dag_run = session.scalar(
select(DagRun).filter_by(dag_id=dag_id, run_id=run_id)
)
if dag_run is None:
raise HTTPException(404, "not found")
return dag_run
@router.delete(
"/{dag_id}",
dependencies=[Depends(requires_access_dag(method="DELETE"))],
)
def delete_dag(dag_id: str, session):
"""
Same shape, DELETE method. The row fetch and row-variable
method call must also be fully covered by the route-level guard.
`dag` is fetched using the auth-checked `dag_id`; without the
`is_route_level` short-circuit, the per-name walk would mismatch
`dag.<method>` (subject is the row var) against the check's
empty subjects vec.
"""
dag = session.scalar(select(DagModel).where(DagModel.dag_id == dag_id))
if dag is None:
raise HTTPException(404, "not found")
dag.cleanup_runs(session=session)

View file

@ -0,0 +1,33 @@
"""
Distilled from airflow `tests/unit/models/test_backfill.py` and
`providers/google/tests/unit/google/cloud/hooks/test_dlp.py`: pytest test
methods that take a SQLAlchemy `session` fixture by name and call
`session.commit()` / `session.add(...)` / `session.scalar(...)`.
Bare `session.<sqlalchemy_verb>` was previously classified as auth Session
context, which triggered `unit_has_user_input_evidence` even though the
test function takes no user input the `session` fixture is the
SQLAlchemy ORM Session, not the auth/HTTP session. After the engine
classifier narrowing, only `session.<identity_field>` (`session.user`,
`session.user_id`, ...) is treated as auth context; SQLAlchemy verbs
do not contribute user-input evidence on their own.
"""
def test_reverse_and_depends_on_past_fails(dep_on_past, dag_maker, session):
with dag_maker() as dag:
pass
session.commit()
b = _create_backfill(
dag_id=dag.dag_id,
from_date="2021-01-01",
to_date="2021-01-05",
)
if dep_on_past:
assert b is None
def test_create_deidentify_template_with_org_id(self, get_conn, mock_project_id):
get_conn.return_value.create_deidentify_template.return_value = {}
result = self.hook.create_deidentify_template(organization_id="ORG_ID")
assert result == {}