Python fp and docs updtes (#58)

* refactor: Update comments for clarity and add expectations.json files for performance metrics

* feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks

* feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks

* refactor: Simplify code formatting for better readability in multiple files

* refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration

* feat: Update Java and Python patterns to include new security rules

* refactor: Improve comment clarity and consistency across multiple Rust files

* refactor: Simplify code formatting for improved readability in integration tests and module files

* refactor: Improve comment formatting and enhance clarity in assertions across multiple files
This commit is contained in:
Eli Peter 2026-04-29 19:53:34 -04:00 committed by GitHub
parent 4db0805de6
commit a438886217
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
291 changed files with 9485 additions and 3851 deletions

View file

@ -106,6 +106,19 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sanitizer(Cap::URL_ENCODE),
case_sensitive: false,
},
// SQLAlchemy bound-parameter sanitizer. Values passed as keyword
// arguments to `text("…:name…").bindparams(name=value)` are bound
// by the driver, so injection cannot break out of the literal
// context. The accompanying SQL-string check (py.sqli.text_format)
// already flags the `text(f"…")` shape at construction, so this
// sanitizer only clears flow when the SQL is a literal and the
// values reach the engine via bindparams. Recognises both the
// method form (`text(…).bindparams(...)`) and the bare call form.
LabelRule {
matchers: &["bindparams", ".bindparams"],
label: DataLabel::Sanitizer(Cap::SQL_QUERY),
case_sensitive: false,
},
// Path canonicalization
LabelRule {
matchers: &["os.path.abspath", "os.path.normpath"],
@ -119,7 +132,7 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::CODE_EXEC),
case_sensitive: false,
},
// Jinja2 / string.Template tainted template string enables SSTI
// Jinja2 / string.Template, tainted template string enables SSTI
LabelRule {
matchers: &["Template"],
label: DataLabel::Sink(Cap::HTML_ESCAPE),
@ -141,7 +154,7 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::HTML_ESCAPE),
case_sensitive: false,
},
// Flask Markup bypasses auto-escaping
// Flask Markup, bypasses auto-escaping
LabelRule {
matchers: &["Markup"],
label: DataLabel::Sink(Cap::HTML_ESCAPE),
@ -216,7 +229,7 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::SSRF),
case_sensitive: false,
},
// aiohttp HTTP client SSRF sinks
// aiohttp HTTP client, SSRF sinks
LabelRule {
matchers: &[
"aiohttp.get",
@ -228,6 +241,30 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::SSRF),
case_sensitive: false,
},
// Type-qualified SSRF sinks: when the receiver is tracked as
// TypeKind::HttpClient (e.g. `client = requests.Session()`,
// `client = httpx.Client()`, or `s = aiohttp.ClientSession()`),
// resolve_type_qualified_labels() constructs `"HttpClient.<method>"`
// call texts so the receiver-name is no longer load-bearing. Matches
// the existing Rust HttpClient.<method> sink set so both languages
// stay in step on the type-aware SSRF model. Motivated by the
// upstream LMDeploy CVE-2026-33626 shape:
// client = requests.Session()
// response = client.get(url, ...)
LabelRule {
matchers: &[
"HttpClient.get",
"HttpClient.post",
"HttpClient.put",
"HttpClient.delete",
"HttpClient.patch",
"HttpClient.head",
"HttpClient.request",
"HttpClient.send",
],
label: DataLabel::Sink(Cap::SSRF),
case_sensitive: false,
},
LabelRule {
matchers: &[
"pickle.loads",
@ -256,7 +293,7 @@ pub static GATED_SINKS: &[SinkGate] = &[
dangerous_kwargs: &[],
activation: GateActivation::ValueMatch,
},
// subprocess.run(cmd, shell=True) multi-kwarg gate using the new
// subprocess.run(cmd, shell=True), multi-kwarg gate using the new
// presence-aware mechanism. Payload is arg 1 (after receiver offset
// applied by the CFG layer when the call is modelled method-style).
SinkGate {
@ -361,7 +398,7 @@ pub fn framework_rules(ctx: &FrameworkContext) -> Vec<RuntimeLabelRule> {
let mut rules = Vec::new();
if ctx.has(DetectedFramework::Django) {
// QuerySet.extra() raw SQL injection risk.
// QuerySet.extra(), raw SQL injection risk.
// Framework-conditional because `extra` is too generic as a static matcher.
rules.push(RuntimeLabelRule {
matchers: vec!["extra".into()],