docs: update inline references and improve XSS detection in Java servlet writers, refactor matchers for clarity and extend sanitizer support

This commit is contained in:
elipeter 2026-06-03 11:32:30 -05:00
parent c2cd6f009e
commit eb4332edb5
56 changed files with 339 additions and 144 deletions

View file

@ -57,9 +57,30 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
case_sensitive: false,
},
// OWASP ESAPI encoders
// OWASP ESAPI encoders. The idiomatic call site is the fluent
// `ESAPI.encoder().encodeForHTML(x)` chain, which Java's chain collapse
// rewrites to the callee text `ESAPI.encodeForHTML` (the intermediate
// `encoder()` call is dropped), so the class-qualified
// `Encoder.encodeForHTML` matcher never fires on it. Match the
// `ESAPI.`- and `encoder.`-qualified forms so a value run through the
// canonical XSS encoder has its HTML_ESCAPE cap cleared before it reaches
// a `response.getWriter()` sink. Deliberately NOT matched bare: the OWASP
// Benchmark ships a decoy `Utils.encodeForHTML(...)` that returns the
// string UNCHANGED to test whether a scanner is fooled by the method name,
// so a bare `encodeForHTML` matcher would suppress real reflected-XSS.
LabelRule {
matchers: &["Encoder.encodeForHTML", "Encoder.encodeForJavaScript"],
matchers: &[
"Encoder.encodeForHTML",
"Encoder.encodeForJavaScript",
"ESAPI.encodeForHTML",
"ESAPI.encodeForHTMLAttribute",
"ESAPI.encodeForJavaScript",
"ESAPI.encodeForCSS",
"encoder.encodeForHTML",
"encoder.encodeForHTMLAttribute",
"encoder.encodeForJavaScript",
"encoder.encodeForCSS",
],
label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
case_sensitive: false,
},
@ -232,10 +253,20 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sanitizer(Cap::FILE_IO),
case_sensitive: true,
},
// HTTP response sinks, println/print are broad (also match System.out)
// but necessary to catch response.getWriter().println() via suffix matching.
// HTTP response reflected-XSS sinks. `println` / `print` / `write` are
// the servlet response-writer output verbs; `write` is the dominant form
// in real servlets (`response.getWriter().write(html)`). All three are
// matched bare because Java collapses the writer chain
// `response.getWriter().write(x)` to the callee text `response.write`
// (the intermediate `getWriter()` call is dropped), so a receiver-typed
// `HttpResponse.write` rule never sees it. The breadth is bounded two
// ways: `System.out.println` / `System.err.println` are excluded by
// `suppress_known_safe_callees`, and `receiver_incompatible_sink_caps`
// strips `HTML_ESCAPE` whenever the receiver resolves to a non-response
// type (a `FileWriter` / `FileOutputStream` typed `FileHandle`, a DB
// connection, etc.), so genuine file/stream writes do not register as XSS.
LabelRule {
matchers: &["println", "print"],
matchers: &["println", "print", "write"],
label: DataLabel::Sink(Cap::HTML_ESCAPE),
case_sensitive: false,
},