[pitboss] phase 05: Track J.3 + Track L.3 — XXE corpus + DocumentBuilder / lxml / libxml / SimpleXML adapters

This commit is contained in:
pitboss 2026-05-17 20:39:12 -05:00
parent 637b733928
commit 4de925c3ef
35 changed files with 1985 additions and 23 deletions

View file

@ -0,0 +1,25 @@
// Phase 05 (Track J.3) — Go XXE benign fixture.
//
// Same parser surface as `vuln.go` but `Strict` is left at the
// default `true`, so the doctype is rejected and no entity body is
// substituted.
package benign
import (
"bytes"
"encoding/xml"
)
type Data struct {
XMLName xml.Name `xml:"data"`
Value string `xml:",chardata"`
}
func Run(body string) (*Data, error) {
d := xml.NewDecoder(bytes.NewReader([]byte(body)))
out := &Data{}
if err := d.Decode(out); err != nil {
return nil, err
}
return out, nil
}

View file

@ -0,0 +1,27 @@
// Phase 05 (Track J.3) — Go XXE vuln fixture.
//
// The function builds an `encoding/xml.Decoder` against the attacker
// payload with `Strict: false` so the doctype is parsed and any
// `<!ENTITY xxe SYSTEM "file:///…">` in the payload is resolved and
// substituted into element values.
package vuln
import (
"bytes"
"encoding/xml"
)
type Data struct {
XMLName xml.Name `xml:"data"`
Value string `xml:",chardata"`
}
func Run(body string) (*Data, error) {
d := xml.NewDecoder(bytes.NewReader([]byte(body)))
d.Strict = false
out := &Data{}
if err := d.Decode(out); err != nil {
return nil, err
}
return out, nil
}

View file

@ -0,0 +1,18 @@
// Phase 05 (Track J.3) Java XXE benign fixture.
//
// Same parser surface as `vuln.java` but the factory is hardened with
// `disallow-doctype-decl`, so the same payload's `<!ENTITY>` block is
// rejected at parse time and no entity body is substituted.
import java.io.ByteArrayInputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
public class Benign {
public static Document run(byte[] payload) throws Exception {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
DocumentBuilder builder = factory.newDocumentBuilder();
return builder.parse(new ByteArrayInputStream(payload));
}
}

View file

@ -0,0 +1,19 @@
// Phase 05 (Track J.3) Java XXE vuln fixture.
//
// The function feeds attacker bytes to a stock `DocumentBuilderFactory`
// without setting `disallow-doctype-decl` / `XMLConstants.FEATURE_
// SECURE_PROCESSING`, so any `<!ENTITY xxe SYSTEM "file:///…">`
// declaration in the payload is resolved and its body substituted
// into the parsed tree.
import java.io.ByteArrayInputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
public class Vuln {
public static Document run(byte[] payload) throws Exception {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
return builder.parse(new ByteArrayInputStream(payload));
}
}

View file

@ -0,0 +1,10 @@
<?php
// Phase 05 (Track J.3) — PHP XXE benign fixture.
//
// Same parser surface as `vuln.php` but the entity loader stays
// disabled and the LIBXML_NOENT flag is omitted, so the same payload's
// `<!ENTITY>` block is rejected and no entity body is substituted.
function run(string $body) {
libxml_disable_entity_loader(true);
return simplexml_load_string($body);
}

View file

@ -0,0 +1,11 @@
<?php
// Phase 05 (Track J.3) — PHP XXE vuln fixture.
//
// The function pulls XML off the request and feeds it to
// `simplexml_load_string` after re-enabling the libxml entity loader
// — so any `<!ENTITY xxe SYSTEM "file:///…">` in the payload is
// resolved and its body substituted into the parsed document.
function run(string $body) {
libxml_disable_entity_loader(false);
return simplexml_load_string($body, "SimpleXMLElement", LIBXML_NOENT);
}

View file

@ -0,0 +1,12 @@
"""Phase 05 (Track J.3) — Python XXE benign fixture.
Same parser surface as `vuln.py` but the parser is configured with
`resolve_entities=False` and `no_network=True`, so the same payload's
`<!ENTITY>` block is rejected and no entity body is substituted.
"""
from lxml import etree
def run(body: bytes):
parser = etree.XMLParser(resolve_entities=False, no_network=True)
return etree.fromstring(body, parser=parser)

View file

@ -0,0 +1,13 @@
"""Phase 05 (Track J.3) — Python XXE vuln fixture.
The function pulls XML bytes off the request and feeds them straight
to `lxml.etree.XMLParser(resolve_entities=True)`, so any
`<!ENTITY xxe SYSTEM "file:///…">` in the payload is resolved and its
body substituted into the parsed tree.
"""
from lxml import etree
def run(body: bytes):
parser = etree.XMLParser(resolve_entities=True)
return etree.fromstring(body, parser=parser)

View file

@ -0,0 +1,11 @@
# Phase 05 (Track J.3) — Ruby XXE benign fixture.
#
# Same parser surface as `vuln.rb` but the document is built under
# `REXML::Document::entity_expansion_limit = 0`, so the same payload's
# `<!ENTITY>` block triggers no expansion.
require 'rexml/document'
def run(body)
REXML::Document.entity_expansion_limit = 0
REXML::Document.new(body)
end

View file

@ -0,0 +1,11 @@
# Phase 05 (Track J.3) — Ruby XXE vuln fixture.
#
# The function feeds attacker XML straight to `REXML::Document.new`
# without disabling entity expansion, so any `<!ENTITY xxe SYSTEM
# "file:///…">` in the payload is resolved and its body substituted
# into the parsed document.
require 'rexml/document'
def run(body)
REXML::Document.new(body)
end