[pitboss/grind] deferred session-0007 (20260520T233019Z-6958)

This commit is contained in:
pitboss 2026-05-21 01:36:46 -05:00
parent 9a0529e8f8
commit bb8484bb28
19 changed files with 934 additions and 0 deletions

View file

@ -37,6 +37,20 @@ fn source_imports_go_http(file_bytes: &[u8]) -> bool {
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
/// Returns `true` when the surrounding source visibly routes the
/// header value through a canonical Go URL-encoder / HTML-escaper.
fn value_routed_through_encoder(file_bytes: &[u8]) -> bool {
const ENCODER_CALLS: &[&[u8]] = &[
b"url.QueryEscape(",
b"url.PathEscape(",
b"template.HTMLEscapeString(",
b"template.JSEscapeString(",
];
ENCODER_CALLS
.iter()
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
impl FrameworkAdapter for HeaderGoAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
@ -52,6 +66,9 @@ impl FrameworkAdapter for HeaderGoAdapter {
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
if value_routed_through_encoder(file_bytes) {
return None;
}
let matches_call = super::any_callee_matches(summary, callee_is_header_setter);
let matches_source = source_imports_go_http(file_bytes);
if matches_call && matches_source {
@ -107,4 +124,22 @@ mod tests {
.detect(&summary, tree.root_node(), src)
.is_none());
}
#[test]
fn skips_when_value_url_encoded() {
let src: &[u8] = b"package x\nimport (\"net/http\"; \"net/url\")\n\
func Run(w http.ResponseWriter, v string) { w.Header().Set(\"X-Token\", url.QueryEscape(v)) }\n";
let tree = parse_go(src);
let summary = FuncSummary {
name: "Run".into(),
callees: vec![
crate::summary::CalleeSite::bare("Set"),
crate::summary::CalleeSite::bare("QueryEscape"),
],
..Default::default()
};
assert!(HeaderGoAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}

View file

@ -33,6 +33,27 @@ fn source_imports_servlet(file_bytes: &[u8]) -> bool {
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
/// Returns `true` when the surrounding source visibly routes the
/// header value through a canonical URL-encoder / HTML-escaper. The
/// header-setter then receives a CRLF-free string and cannot smuggle
/// a second header.
fn value_routed_through_encoder(file_bytes: &[u8]) -> bool {
const ENCODER_CALLS: &[&[u8]] = &[
b"URLEncoder.encode(",
b"Encode.forHtml(",
b"Encode.forHtmlAttribute(",
b"Encode.forUri(",
b"Encode.forUriComponent(",
b"escapeHtml(",
b"escapeHtml4(",
b"escapeXml(",
b"StringEscapeUtils.escape",
];
ENCODER_CALLS
.iter()
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
impl FrameworkAdapter for HeaderJavaAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
@ -48,6 +69,9 @@ impl FrameworkAdapter for HeaderJavaAdapter {
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
if value_routed_through_encoder(file_bytes) {
return None;
}
let matches_call = super::any_callee_matches(summary, callee_is_header_setter);
let matches_source = source_imports_servlet(file_bytes);
if matches_call && matches_source {
@ -103,4 +127,24 @@ mod tests {
.detect(&summary, tree.root_node(), src)
.is_none());
}
#[test]
fn skips_when_value_url_encoded() {
let src: &[u8] = b"import javax.servlet.http.HttpServletResponse;\n\
import java.net.URLEncoder;\n\
class C { void run(HttpServletResponse r, String v) throws Exception { \
String safe = URLEncoder.encode(v, \"UTF-8\"); r.setHeader(\"X-Token\", safe); } }\n";
let tree = parse_java(src);
let summary = FuncSummary {
name: "run".into(),
callees: vec![
crate::summary::CalleeSite::bare("setHeader"),
crate::summary::CalleeSite::bare("encode"),
],
..Default::default()
};
assert!(HeaderJavaAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}

View file

@ -45,6 +45,20 @@ fn source_uses_node_http(file_bytes: &[u8]) -> bool {
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
/// Returns `true` when the surrounding source visibly routes the
/// header value through a canonical Node / browser URL-encoder.
fn value_routed_through_encoder(file_bytes: &[u8]) -> bool {
const ENCODER_CALLS: &[&[u8]] = &[
b"encodeURIComponent(",
b"encodeURI(",
b"querystring.escape(",
b"qs.escape(",
];
ENCODER_CALLS
.iter()
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
impl FrameworkAdapter for HeaderJsAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
@ -60,6 +74,9 @@ impl FrameworkAdapter for HeaderJsAdapter {
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
if value_routed_through_encoder(file_bytes) {
return None;
}
let matches_call = super::any_callee_matches(summary, callee_is_header_setter);
let matches_source = source_uses_node_http(file_bytes);
if matches_call && matches_source {
@ -115,4 +132,22 @@ mod tests {
.detect(&summary, tree.root_node(), src)
.is_none());
}
#[test]
fn skips_when_value_url_encoded() {
let src: &[u8] = b"const http = require('http');\n\
function run(res, value) { res.setHeader('Set-Cookie', encodeURIComponent(value)); }\n";
let tree = parse_js(src);
let summary = FuncSummary {
name: "run".into(),
callees: vec![
crate::summary::CalleeSite::bare("setHeader"),
crate::summary::CalleeSite::bare("encodeURIComponent"),
],
..Default::default()
};
assert!(HeaderJsAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}

View file

@ -37,6 +37,20 @@ fn source_uses_php_response(file_bytes: &[u8]) -> bool {
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
/// Returns `true` when the surrounding source visibly routes the
/// header value through a canonical PHP URL-encoder / HTML-escaper.
fn value_routed_through_encoder(file_bytes: &[u8]) -> bool {
const ENCODER_CALLS: &[&[u8]] = &[
b"urlencode(",
b"rawurlencode(",
b"htmlspecialchars(",
b"htmlentities(",
];
ENCODER_CALLS
.iter()
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
impl FrameworkAdapter for HeaderPhpAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
@ -52,6 +66,9 @@ impl FrameworkAdapter for HeaderPhpAdapter {
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
if value_routed_through_encoder(file_bytes) {
return None;
}
let matches_call = super::any_callee_matches(summary, callee_is_header_setter);
let matches_source = source_uses_php_response(file_bytes);
if matches_call && matches_source {
@ -106,4 +123,22 @@ mod tests {
.detect(&summary, tree.root_node(), src)
.is_none());
}
#[test]
fn skips_when_value_url_encoded() {
let src: &[u8] =
b"<?php\nfunction run($v) { header('Set-Cookie: ' . urlencode($v)); }\n";
let tree = parse_php(src);
let summary = FuncSummary {
name: "run".into(),
callees: vec![
crate::summary::CalleeSite::bare("header"),
crate::summary::CalleeSite::bare("urlencode"),
],
..Default::default()
};
assert!(HeaderPhpAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}

View file

@ -39,6 +39,27 @@ fn source_imports_python_web(file_bytes: &[u8]) -> bool {
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
/// Returns `true` when the surrounding source visibly routes the
/// header value through a canonical URL-encoder / HTML-escaper.
fn value_routed_through_encoder(file_bytes: &[u8]) -> bool {
const ENCODER_CALLS: &[&[u8]] = &[
b"urllib.parse.quote(",
b"parse.quote(",
b"urllib.parse.quote_plus(",
b"parse.quote_plus(",
b"quote_plus(",
b"werkzeug.urls.url_quote(",
b"url_quote(",
b"urlencode(",
b"html.escape(",
b"markupsafe.escape(",
b"escape_html(",
];
ENCODER_CALLS
.iter()
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
impl FrameworkAdapter for HeaderPythonAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
@ -54,6 +75,9 @@ impl FrameworkAdapter for HeaderPythonAdapter {
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
if value_routed_through_encoder(file_bytes) {
return None;
}
let matches_call = super::any_callee_matches(summary, callee_is_header_setter);
let matches_source = source_imports_python_web(file_bytes);
if matches_call && matches_source {
@ -109,4 +133,24 @@ mod tests {
.detect(&summary, tree.root_node(), src)
.is_none());
}
#[test]
fn skips_when_value_url_encoded() {
let src: &[u8] = b"from flask import make_response\n\
from urllib.parse import quote\n\
def run(value):\n resp = make_response('hi')\n \
resp.headers['Set-Cookie'] = quote_plus(value)\n return resp\n";
let tree = parse_python(src);
let summary = FuncSummary {
name: "run".into(),
callees: vec![
crate::summary::CalleeSite::bare("__setitem__"),
crate::summary::CalleeSite::bare("quote_plus"),
],
..Default::default()
};
assert!(HeaderPythonAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}

View file

@ -38,6 +38,23 @@ fn source_uses_ruby_web(file_bytes: &[u8]) -> bool {
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
/// Returns `true` when the surrounding source visibly routes the
/// header value through a canonical Ruby URL-encoder / HTML-escaper.
fn value_routed_through_encoder(file_bytes: &[u8]) -> bool {
const ENCODER_CALLS: &[&[u8]] = &[
b"URI.encode_www_form_component(",
b"encode_www_form_component(",
b"CGI.escape(",
b"CGI.escapeHTML(",
b"ERB::Util.url_encode(",
b"ERB::Util.h(",
b"Rack::Utils.escape(",
];
ENCODER_CALLS
.iter()
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
impl FrameworkAdapter for HeaderRubyAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
@ -53,6 +70,9 @@ impl FrameworkAdapter for HeaderRubyAdapter {
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
if value_routed_through_encoder(file_bytes) {
return None;
}
let matches_call = super::any_callee_matches(summary, callee_is_header_setter);
let matches_source = source_uses_ruby_web(file_bytes);
if matches_call && matches_source {
@ -108,4 +128,23 @@ mod tests {
.detect(&summary, tree.root_node(), src)
.is_none());
}
#[test]
fn skips_when_value_url_encoded() {
let src: &[u8] = b"require 'rack'\nrequire 'uri'\n\
def run(value)\n response = Rack::Response.new\n \
response.set_header('Set-Cookie', URI.encode_www_form_component(value))\nend\n";
let tree = parse_ruby(src);
let summary = FuncSummary {
name: "run".into(),
callees: vec![
crate::summary::CalleeSite::bare("set_header"),
crate::summary::CalleeSite::bare("encode_www_form_component"),
],
..Default::default()
};
assert!(HeaderRubyAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}

View file

@ -39,6 +39,20 @@ fn source_imports_rust_http(file_bytes: &[u8]) -> bool {
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
/// Returns `true` when the surrounding source visibly routes the
/// header value through a canonical Rust URL-encoder.
fn value_routed_through_encoder(file_bytes: &[u8]) -> bool {
const ENCODER_CALLS: &[&[u8]] = &[
b"utf8_percent_encode(",
b"percent_encode(",
b"urlencoding::encode(",
b"form_urlencoded::byte_serialize(",
];
ENCODER_CALLS
.iter()
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
impl FrameworkAdapter for HeaderRustAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
@ -54,6 +68,9 @@ impl FrameworkAdapter for HeaderRustAdapter {
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
if value_routed_through_encoder(file_bytes) {
return None;
}
let matches_call = super::any_callee_matches(summary, callee_is_header_setter);
let matches_source = source_imports_rust_http(file_bytes);
if matches_call && matches_source {
@ -109,4 +126,26 @@ mod tests {
.detect(&summary, tree.root_node(), src)
.is_none());
}
#[test]
fn skips_when_value_url_encoded() {
let src: &[u8] = b"use axum::http::HeaderMap;\n\
use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC};\n\
fn run(headers: &mut HeaderMap, value: &str) {\n\
let safe = utf8_percent_encode(value, NON_ALPHANUMERIC).to_string();\n\
headers.insert(\"set-cookie\", safe.parse().unwrap());\n\
}\n";
let tree = parse_rust(src);
let summary = FuncSummary {
name: "run".into(),
callees: vec![
crate::summary::CalleeSite::bare("insert"),
crate::summary::CalleeSite::bare("utf8_percent_encode"),
],
..Default::default()
};
assert!(HeaderRustAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}

View file

@ -31,6 +31,38 @@ fn source_imports_go_web(file_bytes: &[u8]) -> bool {
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
/// Returns `true` when the surrounding source visibly routes the
/// redirect URL through a canonical host-allowlist / URL-validator.
fn url_routed_through_validator(file_bytes: &[u8]) -> bool {
const VALIDATOR_TOKENS: &[&[u8]] = &[
b"url.Parse(",
b"allowedHosts",
b"AllowedHosts",
b"allowlist",
b"Allowlist",
b".Host ==",
b".Hostname() ==",
];
VALIDATOR_TOKENS
.iter()
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
/// Returns `true` when the surrounding source looks like a mockgen-
/// generated mock (`gomock` / `EXPECT()` chains). The `Redirect`
/// callee on those receivers is a recorded-call assertion, not an
/// HTTP redirect.
fn looks_like_mockgen(file_bytes: &[u8]) -> bool {
const MOCK_TOKENS: &[&[u8]] = &[
b"github.com/golang/mock/gomock",
b"go.uber.org/mock/gomock",
b".EXPECT().",
];
MOCK_TOKENS
.iter()
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
impl FrameworkAdapter for RedirectGoAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
@ -46,6 +78,9 @@ impl FrameworkAdapter for RedirectGoAdapter {
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
if looks_like_mockgen(file_bytes) || url_routed_through_validator(file_bytes) {
return None;
}
let matches_call = super::any_callee_matches(summary, callee_is_redirect);
let matches_source = source_imports_go_web(file_bytes);
if matches_call && matches_source {
@ -101,4 +136,40 @@ mod tests {
.detect(&summary, tree.root_node(), src)
.is_none());
}
#[test]
fn skips_when_url_validated_against_allowlist() {
let src: &[u8] = b"package vuln\n\nimport (\n\t\"net/http\"\n\t\"net/url\"\n\t\"github.com/gin-gonic/gin\"\n)\n\
func Run(c *gin.Context, v string) {\n\t\
u, err := url.Parse(v)\n\t\
if err != nil || u.Hostname() != \"example.com\" { return }\n\t\
c.Redirect(http.StatusFound, v)\n}\n";
let tree = parse_go(src);
let summary = FuncSummary {
name: "Run".into(),
callees: vec![
crate::summary::CalleeSite::bare("Redirect"),
crate::summary::CalleeSite::bare("Parse"),
],
..Default::default()
};
assert!(RedirectGoAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
#[test]
fn skips_when_file_uses_gomock() {
let src: &[u8] = b"package vuln\n\nimport (\n\t\"github.com/golang/mock/gomock\"\n)\n\
func Run(m *MockRouter, v string) {\n\tm.EXPECT().Redirect(v)\n}\n";
let tree = parse_go(src);
let summary = FuncSummary {
name: "Run".into(),
callees: vec![crate::summary::CalleeSite::bare("Redirect")],
..Default::default()
};
assert!(RedirectGoAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}

View file

@ -33,6 +33,25 @@ fn source_imports_servlet(file_bytes: &[u8]) -> bool {
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
/// Returns `true` when the surrounding source visibly routes the
/// redirect URL through a canonical host-allowlist / URL-validator
/// helper, so the redirect cannot reach an off-origin attacker host.
fn url_routed_through_validator(file_bytes: &[u8]) -> bool {
const VALIDATOR_TOKENS: &[&[u8]] = &[
b"UrlValidator",
b".isValid(",
b"allowedHosts",
b"allowlist",
b"allowList",
b"WHITELIST",
b"isAllowedHost",
b"isAllowedRedirect",
];
VALIDATOR_TOKENS
.iter()
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
impl FrameworkAdapter for RedirectJavaAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
@ -48,6 +67,9 @@ impl FrameworkAdapter for RedirectJavaAdapter {
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
if url_routed_through_validator(file_bytes) {
return None;
}
let matches_call = super::any_callee_matches(summary, callee_is_redirect);
let matches_source = source_imports_servlet(file_bytes);
if matches_call && matches_source {
@ -103,4 +125,27 @@ mod tests {
.detect(&summary, tree.root_node(), src)
.is_none());
}
#[test]
fn skips_when_url_validated_against_allowlist() {
let src: &[u8] = b"import javax.servlet.http.HttpServletResponse;\n\
import org.apache.commons.validator.routines.UrlValidator;\n\
class C { void run(HttpServletResponse r, String v) throws Exception {\n\
UrlValidator vd = new UrlValidator();\n\
if (!vd.isValid(v)) return;\n\
r.sendRedirect(v);\n\
} }\n";
let tree = parse_java(src);
let summary = FuncSummary {
name: "run".into(),
callees: vec![
crate::summary::CalleeSite::bare("sendRedirect"),
crate::summary::CalleeSite::bare("isValid"),
],
..Default::default()
};
assert!(RedirectJavaAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}

View file

@ -38,6 +38,24 @@ fn source_imports_node_web(file_bytes: &[u8]) -> bool {
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
/// Returns `true` when the surrounding source visibly routes the
/// redirect URL through a canonical host-allowlist / URL-validator.
fn url_routed_through_validator(file_bytes: &[u8]) -> bool {
const VALIDATOR_TOKENS: &[&[u8]] = &[
b"new URL(",
b"allowedHosts",
b"allowedOrigins",
b"allowlist",
b"ALLOWLIST",
b".hostname ===",
b".origin ===",
b".host ===",
];
VALIDATOR_TOKENS
.iter()
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
impl FrameworkAdapter for RedirectJsAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
@ -53,6 +71,9 @@ impl FrameworkAdapter for RedirectJsAdapter {
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
if url_routed_through_validator(file_bytes) {
return None;
}
let matches_call = super::any_callee_matches(summary, callee_is_redirect);
let matches_source = source_imports_node_web(file_bytes);
if matches_call && matches_source {
@ -108,4 +129,22 @@ mod tests {
.detect(&summary, tree.root_node(), src)
.is_none());
}
#[test]
fn skips_when_url_validated_against_allowlist() {
let src: &[u8] = b"const express = require('express');\n\
function run(req, res, v) {\n \
const allowed = 'https://example.com';\n \
if (new URL(v).origin !== allowed) return;\n \
res.redirect(v);\n}\n";
let tree = parse_js(src);
let summary = FuncSummary {
name: "run".into(),
callees: vec![crate::summary::CalleeSite::bare("redirect")],
..Default::default()
};
assert!(RedirectJsAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}

View file

@ -38,6 +38,22 @@ fn source_imports_php_web(file_bytes: &[u8]) -> bool {
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
/// Returns `true` when the surrounding source visibly routes the
/// redirect URL through a canonical host-allowlist / URL-validator.
fn url_routed_through_validator(file_bytes: &[u8]) -> bool {
const VALIDATOR_TOKENS: &[&[u8]] = &[
b"parse_url(",
b"allowedHosts",
b"allowed_hosts",
b"allowlist",
b"in_array(",
b"filter_var(",
];
VALIDATOR_TOKENS
.iter()
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
impl FrameworkAdapter for RedirectPhpAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
@ -53,6 +69,9 @@ impl FrameworkAdapter for RedirectPhpAdapter {
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
if url_routed_through_validator(file_bytes) {
return None;
}
let matches_call = super::any_callee_matches(summary, callee_is_redirect);
let matches_source = source_imports_php_web(file_bytes);
if matches_call && matches_source {
@ -108,4 +127,26 @@ mod tests {
.detect(&summary, tree.root_node(), src)
.is_none());
}
#[test]
fn skips_when_url_validated_against_allowlist() {
let src: &[u8] = b"<?php\nfunction run($v) {\n\
$allowedHosts = ['example.com'];\n\
$parts = parse_url($v);\n\
if (!in_array($parts['host'], $allowedHosts, true)) return;\n\
header(\"Location: \" . $v);\n}\n";
let tree = parse_php(src);
let summary = FuncSummary {
name: "run".into(),
callees: vec![
crate::summary::CalleeSite::bare("header"),
crate::summary::CalleeSite::bare("parse_url"),
crate::summary::CalleeSite::bare("in_array"),
],
..Default::default()
};
assert!(RedirectPhpAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}

View file

@ -38,6 +38,26 @@ fn source_imports_python_web(file_bytes: &[u8]) -> bool {
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
/// Returns `true` when the surrounding source visibly routes the
/// redirect URL through a canonical host-allowlist / URL-validator.
fn url_routed_through_validator(file_bytes: &[u8]) -> bool {
const VALIDATOR_TOKENS: &[&[u8]] = &[
b"is_safe_url(",
b"url_has_allowed_host_and_scheme(",
b"allowed_hosts",
b"ALLOWED_HOSTS",
b"ALLOWLIST",
b"allowlist",
b".netloc in ",
b".netloc.in_",
b"urlparse(",
b"url_parse(",
];
VALIDATOR_TOKENS
.iter()
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
impl FrameworkAdapter for RedirectPythonAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
@ -53,6 +73,9 @@ impl FrameworkAdapter for RedirectPythonAdapter {
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
if url_routed_through_validator(file_bytes) {
return None;
}
let matches_call = super::any_callee_matches(summary, callee_is_redirect);
let matches_source = source_imports_python_web(file_bytes);
if matches_call && matches_source {
@ -108,4 +131,25 @@ mod tests {
.detect(&summary, tree.root_node(), src)
.is_none());
}
#[test]
fn skips_when_url_validated_against_allowlist() {
let src: &[u8] = b"from flask import redirect\n\
from django.utils.http import url_has_allowed_host_and_scheme\n\
def run(value):\n \
if not url_has_allowed_host_and_scheme(value, allowed_hosts={'example.com'}):\n \
return None\n return redirect(value)\n";
let tree = parse_python(src);
let summary = FuncSummary {
name: "run".into(),
callees: vec![
crate::summary::CalleeSite::bare("redirect"),
crate::summary::CalleeSite::bare("url_has_allowed_host_and_scheme"),
],
..Default::default()
};
assert!(RedirectPythonAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}

View file

@ -36,6 +36,24 @@ fn source_imports_ruby_web(file_bytes: &[u8]) -> bool {
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
/// Returns `true` when the surrounding source visibly routes the
/// redirect URL through a canonical host-allowlist / URL-validator.
fn url_routed_through_validator(file_bytes: &[u8]) -> bool {
const VALIDATOR_TOKENS: &[&[u8]] = &[
b"URI.parse(",
b"URI(",
b"allowed_hosts",
b"ALLOWED_HOSTS",
b"allowlist",
b"ALLOWLIST",
b".host ==",
b".host?(",
];
VALIDATOR_TOKENS
.iter()
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
impl FrameworkAdapter for RedirectRubyAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
@ -51,6 +69,9 @@ impl FrameworkAdapter for RedirectRubyAdapter {
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
if url_routed_through_validator(file_bytes) {
return None;
}
let matches_call = super::any_callee_matches(summary, callee_is_redirect);
let matches_source = source_imports_ruby_web(file_bytes);
if matches_call && matches_source {
@ -106,4 +127,25 @@ mod tests {
.detect(&summary, tree.root_node(), src)
.is_none());
}
#[test]
fn skips_when_url_validated_against_allowlist() {
let src: &[u8] = b"require 'rack'\nrequire 'uri'\n\
def run(value)\n allowed_hosts = ['example.com']\n \
host = URI.parse(value).host\n \
return unless allowed_hosts.include?(host)\n \
resp = Rack::Response.new\n resp.redirect(value)\n resp\nend\n";
let tree = parse_ruby(src);
let summary = FuncSummary {
name: "run".into(),
callees: vec![
crate::summary::CalleeSite::bare("redirect"),
crate::summary::CalleeSite::bare("parse"),
],
..Default::default()
};
assert!(RedirectRubyAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}

View file

@ -37,6 +37,23 @@ fn source_imports_rust_web(file_bytes: &[u8]) -> bool {
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
/// Returns `true` when the surrounding source visibly routes the
/// redirect URL through a canonical host-allowlist / URL-validator.
fn url_routed_through_validator(file_bytes: &[u8]) -> bool {
const VALIDATOR_TOKENS: &[&[u8]] = &[
b"Url::parse(",
b"allowed_hosts",
b"AllowedHosts",
b"allowlist",
b"Allowlist",
b".host_str()",
b".host() ==",
];
VALIDATOR_TOKENS
.iter()
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
impl FrameworkAdapter for RedirectRustAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
@ -52,6 +69,9 @@ impl FrameworkAdapter for RedirectRustAdapter {
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
if url_routed_through_validator(file_bytes) {
return None;
}
let matches_call = super::any_callee_matches(summary, callee_is_redirect);
let matches_source = source_imports_rust_web(file_bytes);
if matches_call && matches_source {
@ -107,4 +127,26 @@ mod tests {
.detect(&summary, tree.root_node(), src)
.is_none());
}
#[test]
fn skips_when_url_validated_against_allowlist() {
let src: &[u8] = b"use axum::response::Redirect;\n\
use url::Url;\n\n\
fn run(v: String) -> Option<Redirect> {\n\
let u = Url::parse(&v).ok()?;\n\
if u.host_str() != Some(\"example.com\") { return None; }\n\
Some(Redirect::to(&v))\n}\n";
let tree = parse_rust(src);
let summary = FuncSummary {
name: "run".into(),
callees: vec![
crate::summary::CalleeSite::bare("to"),
crate::summary::CalleeSite::bare("parse"),
],
..Default::default()
};
assert!(RedirectRustAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}

View file

@ -36,6 +36,23 @@ fn source_imports_xml(file_bytes: &[u8]) -> bool {
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
/// Returns `true` when the surrounding source visibly pins
/// `encoding/xml`'s `Decoder.Strict` to `true` (Go's safe-by-default
/// XML parser does not resolve external entities, but the brief
/// flags `Strict = false` as the XXE-prone shape, so explicit
/// `Strict = true` declarations are the canonical hardening marker).
fn parser_is_hardened(file_bytes: &[u8]) -> bool {
const HARDENING_NEEDLES: &[&[u8]] = &[
b"Strict: true",
b"Strict:true",
b".Strict = true",
b".Strict=true",
];
HARDENING_NEEDLES
.iter()
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
impl FrameworkAdapter for XxeGoAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
@ -51,6 +68,9 @@ impl FrameworkAdapter for XxeGoAdapter {
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
if parser_is_hardened(file_bytes) {
return None;
}
let matches_call = super::any_callee_matches(summary, callee_is_xml_parser);
let matches_source = source_imports_xml(file_bytes);
if matches_call && matches_source {
@ -110,4 +130,23 @@ mod tests {
.detect(&summary, tree.root_node(), src)
.is_none());
}
#[test]
fn skips_when_decoder_strict_pinned_true() {
let src: &[u8] = b"package main\nimport (\"bytes\"; \"encoding/xml\")\n\
func Run(body string) {\n\
d := xml.NewDecoder(bytes.NewReader([]byte(body)))\n\
d.Strict = true\n\
_ = d.Decode(&struct{}{})\n\
}\n";
let tree = parse_go(src);
let summary = FuncSummary {
name: "Run".into(),
callees: vec![crate::summary::CalleeSite::bare("NewDecoder")],
..Default::default()
};
assert!(XxeGoAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}

View file

@ -45,6 +45,32 @@ fn source_imports_xml_parser(file_bytes: &[u8]) -> bool {
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
/// Returns `true` when the surrounding source visibly hardens the
/// XML parser against external-entity / DTD expansion. Conservative:
/// only recognises hardening invocations in their canonical
/// syntactic form (quoted feature URIs or full call expressions) so
/// the detector ignores casual prose mentions in Javadoc / line
/// comments. False negatives turn into adapter fires, which the
/// rest of the pipeline still double-checks; false positives would
/// silently drop a real finding.
fn parser_is_hardened(file_bytes: &[u8]) -> bool {
const HARDENING_NEEDLES: &[&[u8]] = &[
b"\"http://apache.org/xml/features/disallow-doctype-decl\"",
b"setFeature(XMLConstants.FEATURE_SECURE_PROCESSING",
b"setFeature( XMLConstants.FEATURE_SECURE_PROCESSING",
b"setExpandEntityReferences(false)",
b"setExpandEntityReferences (false)",
b"\"http://xml.org/sax/features/external-general-entities\"",
b"\"http://xml.org/sax/features/external-parameter-entities\"",
b"XMLConstants.ACCESS_EXTERNAL_DTD,",
b"XMLConstants.ACCESS_EXTERNAL_SCHEMA,",
b"setXIncludeAware(false)",
];
HARDENING_NEEDLES
.iter()
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
impl FrameworkAdapter for XxeJavaAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
@ -60,6 +86,9 @@ impl FrameworkAdapter for XxeJavaAdapter {
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
if parser_is_hardened(file_bytes) {
return None;
}
let matches_call = super::any_callee_matches(summary, callee_is_xml_parse);
let matches_source = source_imports_xml_parser(file_bytes);
if matches_call && matches_source {
@ -136,4 +165,43 @@ mod tests {
.detect(&summary, tree.root_node(), src)
.is_none());
}
#[test]
fn skips_when_disallow_doctype_decl_set() {
let src: &[u8] = b"import javax.xml.parsers.DocumentBuilderFactory;\n\
public class V {\n public static void run(byte[] b) throws Exception {\n\
DocumentBuilderFactory f = DocumentBuilderFactory.newInstance();\n\
f.setFeature(\"http://apache.org/xml/features/disallow-doctype-decl\", true);\n\
f.newDocumentBuilder().parse(new java.io.ByteArrayInputStream(b));\n\
}\n}\n";
let tree = parse_java(src);
let summary = FuncSummary {
name: "run".into(),
callees: vec![crate::summary::CalleeSite::bare("parse")],
..Default::default()
};
assert!(XxeJavaAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
#[test]
fn skips_when_feature_secure_processing_set() {
let src: &[u8] = b"import javax.xml.parsers.DocumentBuilderFactory;\n\
import javax.xml.XMLConstants;\n\
public class V {\n public static void run(byte[] b) throws Exception {\n\
DocumentBuilderFactory f = DocumentBuilderFactory.newInstance();\n\
f.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);\n\
f.newDocumentBuilder().parse(new java.io.ByteArrayInputStream(b));\n\
}\n}\n";
let tree = parse_java(src);
let summary = FuncSummary {
name: "run".into(),
callees: vec![crate::summary::CalleeSite::bare("parse")],
..Default::default()
};
assert!(XxeJavaAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}

View file

@ -48,6 +48,47 @@ fn source_imports_xml(file_bytes: &[u8]) -> bool {
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
/// Returns `true` when the surrounding source visibly hardens the
/// libxml-backed PHP parser against external-entity expansion. PHP
/// 8.0+ disables the entity loader by default, so the absence of the
/// `LIBXML_NOENT` flag combined with `libxml_disable_entity_loader(true)`
/// (the canonical PHP < 8.0 hardener) or the `LIBXML_NONET` flag is
/// the canonical safe shape.
fn parser_is_hardened(file_bytes: &[u8]) -> bool {
// If LIBXML_NOENT is explicitly used, the parser is *un*-hardened
// (the flag asks libxml to substitute entities). Treat as unsafe
// regardless of any other tokens.
let mentions_noent = file_bytes
.windows(b"LIBXML_NOENT".len())
.any(|w| w == b"LIBXML_NOENT");
if mentions_noent {
return false;
}
const HARDENING_NEEDLES: &[&[u8]] = &[
b"libxml_disable_entity_loader(true)",
b"libxml_disable_entity_loader(TRUE)",
b"libxml_disable_entity_loader( true",
b"libxml_disable_entity_loader( TRUE",
b"LIBXML_NONET",
b"LIBXML_DTDLOAD",
];
// LIBXML_DTDLOAD on its own is neutral but commonly paired with
// explicit hardening; require at least one of the disable_entity
// / NONET tokens for a hardening verdict.
const STRONG: &[&[u8]] = &[
b"libxml_disable_entity_loader(true)",
b"libxml_disable_entity_loader(TRUE)",
b"libxml_disable_entity_loader( true",
b"libxml_disable_entity_loader( TRUE",
b"LIBXML_NONET",
];
let has_strong = STRONG
.iter()
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n));
let _ = HARDENING_NEEDLES; // retained for documentation of recognised tokens
has_strong
}
impl FrameworkAdapter for XxePhpAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
@ -63,6 +104,9 @@ impl FrameworkAdapter for XxePhpAdapter {
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
if parser_is_hardened(file_bytes) {
return None;
}
let matches_call = super::any_callee_matches(summary, callee_is_xml_parser);
let matches_source = source_imports_xml(file_bytes);
if matches_call || matches_source {
@ -117,4 +161,53 @@ mod tests {
.detect(&summary, tree.root_node(), src)
.is_none());
}
#[test]
fn skips_when_disable_entity_loader_true() {
let src: &[u8] = b"<?php\nfunction run($body) {\n\
libxml_disable_entity_loader(true);\n\
return simplexml_load_string($body);\n}\n";
let tree = parse_php(src);
let summary = FuncSummary {
name: "run".into(),
callees: vec![crate::summary::CalleeSite::bare("simplexml_load_string")],
..Default::default()
};
assert!(XxePhpAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
#[test]
fn skips_when_libxml_nonet_used() {
let src: &[u8] = b"<?php\nfunction run($body) {\n\
return simplexml_load_string($body, 'SimpleXMLElement', LIBXML_NONET);\n}\n";
let tree = parse_php(src);
let summary = FuncSummary {
name: "run".into(),
callees: vec![crate::summary::CalleeSite::bare("simplexml_load_string")],
..Default::default()
};
assert!(XxePhpAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
#[test]
fn still_fires_when_libxml_noent_present() {
// LIBXML_NOENT explicitly enables entity substitution -- the
// dangerous flag overrides any other apparent hardening.
let src: &[u8] = b"<?php\nfunction run($body) {\n\
libxml_disable_entity_loader(true);\n\
return simplexml_load_string($body, 'SimpleXMLElement', LIBXML_NOENT);\n}\n";
let tree = parse_php(src);
let summary = FuncSummary {
name: "run".into(),
callees: vec![crate::summary::CalleeSite::bare("simplexml_load_string")],
..Default::default()
};
assert!(XxePhpAdapter
.detect(&summary, tree.root_node(), src)
.is_some());
}
}

View file

@ -47,6 +47,29 @@ fn source_imports_xml(file_bytes: &[u8]) -> bool {
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
/// Returns `true` when the surrounding source visibly hardens the
/// XML parser against external-entity expansion. Conservative: only
/// recognises canonical lxml `resolve_entities=False` /
/// `no_network=True` parser flags and the `defusedxml` package
/// (whose parsers are safe-by-default).
fn parser_is_hardened(file_bytes: &[u8]) -> bool {
const HARDENING_NEEDLES: &[&[u8]] = &[
b"resolve_entities=False",
b"resolve_entities =False",
b"resolve_entities= False",
b"resolve_entities = False",
b"no_network=True",
b"no_network =True",
b"no_network= True",
b"no_network = True",
b"from defusedxml",
b"import defusedxml",
];
HARDENING_NEEDLES
.iter()
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
impl FrameworkAdapter for XxePythonAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
@ -62,6 +85,9 @@ impl FrameworkAdapter for XxePythonAdapter {
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
if parser_is_hardened(file_bytes) {
return None;
}
let matches_call = super::any_callee_matches(summary, callee_is_xml_parser);
let matches_source = source_imports_xml(file_bytes);
if matches_call && matches_source {
@ -117,4 +143,36 @@ mod tests {
.detect(&summary, tree.root_node(), src)
.is_none());
}
#[test]
fn skips_when_resolve_entities_false() {
let src: &[u8] = b"from lxml import etree\n\
def run(body):\n\
parser = etree.XMLParser(resolve_entities=False, no_network=True)\n\
return etree.fromstring(body, parser)\n";
let tree = parse_python(src);
let summary = FuncSummary {
name: "run".into(),
callees: vec![crate::summary::CalleeSite::bare("fromstring")],
..Default::default()
};
assert!(XxePythonAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
#[test]
fn skips_when_defusedxml_imported() {
let src: &[u8] = b"from defusedxml import ElementTree\n\
def run(body):\n return ElementTree.fromstring(body)\n";
let tree = parse_python(src);
let summary = FuncSummary {
name: "run".into(),
callees: vec![crate::summary::CalleeSite::bare("fromstring")],
..Default::default()
};
assert!(XxePythonAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}

View file

@ -36,6 +36,38 @@ fn source_imports_xml(file_bytes: &[u8]) -> bool {
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
/// Returns `true` when the surrounding source visibly hardens the
/// Ruby XML parser against external-entity expansion. Canonical
/// hardeners: `REXML::Document.entity_expansion_limit = 0` (kills
/// entity expansion outright) and `Nokogiri::XML::ParseOptions::NONET`
/// (no network for entity resolution).
///
/// If `Nokogiri::XML::ParseOptions::NOENT` is present the parser is
/// explicitly *un*-hardened (the flag asks Nokogiri to expand
/// entities), so the hardening verdict is suppressed.
fn parser_is_hardened(file_bytes: &[u8]) -> bool {
let mentions_noent = file_bytes
.windows(b"ParseOptions::NOENT".len())
.any(|w| w == b"ParseOptions::NOENT")
|| file_bytes
.windows(b"::NOENT".len())
.any(|w| w == b"::NOENT");
if mentions_noent {
return false;
}
const HARDENING_NEEDLES: &[&[u8]] = &[
b"entity_expansion_limit = 0",
b"entity_expansion_limit=0",
b"entity_expansion_limit =0",
b"entity_expansion_limit= 0",
b"ParseOptions::NONET",
b"Nokogiri::XML::ParseOptions::NONET",
];
HARDENING_NEEDLES
.iter()
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
impl FrameworkAdapter for XxeRubyAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
@ -51,6 +83,9 @@ impl FrameworkAdapter for XxeRubyAdapter {
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
if parser_is_hardened(file_bytes) {
return None;
}
let matches_call = super::any_callee_matches(summary, callee_is_xml_parser);
let matches_source = source_imports_xml(file_bytes);
if matches_call && matches_source {
@ -106,4 +141,50 @@ mod tests {
.detect(&summary, tree.root_node(), src)
.is_none());
}
#[test]
fn skips_when_entity_expansion_limit_zero() {
let src: &[u8] = b"require 'rexml/document'\n\
REXML::Document.entity_expansion_limit = 0\n\
def run(body)\n REXML::Document.new(body)\nend\n";
let tree = parse_ruby(src);
let summary = FuncSummary {
name: "run".into(),
callees: vec![crate::summary::CalleeSite::bare("new")],
..Default::default()
};
assert!(XxeRubyAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
#[test]
fn skips_when_nokogiri_nonet_used() {
let src: &[u8] = b"require 'nokogiri'\n\
def run(body)\n Nokogiri::XML(body) { |c| c.options = Nokogiri::XML::ParseOptions::NONET }\nend\n";
let tree = parse_ruby(src);
let summary = FuncSummary {
name: "run".into(),
callees: vec![crate::summary::CalleeSite::bare("XML")],
..Default::default()
};
assert!(XxeRubyAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
#[test]
fn still_fires_when_nokogiri_noent_present() {
let src: &[u8] = b"require 'nokogiri'\n\
def run(body)\n Nokogiri::XML(body) { |c| c.options = Nokogiri::XML::ParseOptions::NOENT | Nokogiri::XML::ParseOptions::DTDLOAD }\nend\n";
let tree = parse_ruby(src);
let summary = FuncSummary {
name: "run".into(),
callees: vec![crate::summary::CalleeSite::bare("XML")],
..Default::default()
};
assert!(XxeRubyAdapter
.detect(&summary, tree.root_node(), src)
.is_some());
}
}