Skip to content

Implement request #68325: parse huge option for xml_parser_create #12256

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions UPGRADING
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,11 @@ PHP 8.4 UPGRADE NOTES
. The IntlDateFormatter class exposes now the new PATTERN constant
reflecting udat api's UDAT_PATTERN.

- XML:
. Added XML_OPTION_PARSE_HUGE to allow large inputs in xml_parse and
xml_parse_into_struct.
RFC: https://wiki.php.net/rfc/xml_option_parse_huge.

========================================
11. Changes to INI File Handling
========================================
Expand Down
3 changes: 2 additions & 1 deletion ext/xml/php_xml.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ enum php_xml_option {
PHP_XML_OPTION_CASE_FOLDING = 1,
PHP_XML_OPTION_TARGET_ENCODING,
PHP_XML_OPTION_SKIP_TAGSTART,
PHP_XML_OPTION_SKIP_WHITE
PHP_XML_OPTION_SKIP_WHITE,
PHP_XML_OPTION_PARSE_HUGE,
};

#ifdef LIBXML_EXPAT_COMPAT
Expand Down
91 changes: 91 additions & 0 deletions ext/xml/tests/XML_OPTION_PARSE_HUGE.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
--TEST--
Request #68325 (XML_OPTION_PARSE_HUGE cannot be set for xml_parser_create)
--EXTENSIONS--
xml
--SKIPIF--
<?php
if (!defined("LIBXML_VERSION")) die('skip this is a libxml2 test');
?>
--FILE--
<?php

function logName(string $str) {
if (strlen($str) > 20) {
echo substr($str, 0, 20) . "...\n";
} else {
echo $str . "\n";
}
}

function createParser(bool $huge) {
$parser = xml_parser_create();
echo "old option value: "; var_dump(xml_parser_get_option($parser, XML_OPTION_PARSE_HUGE));
xml_parser_set_option($parser, XML_OPTION_PARSE_HUGE, $huge);
echo "new option value: "; var_dump(xml_parser_get_option($parser, XML_OPTION_PARSE_HUGE));
xml_set_element_handler($parser, function($parser, $data) {
echo "open: ";
logName($data);
}, function($parser, $data) {
});
return $parser;
}

// Construct XML that is too large to parse without XML_OPTION_PARSE_HUGE
$long_text = str_repeat("A", 1000 * 1000 * 5 /* 5 MB */);
$long_xml_head = "<?xml version=\"1.0\"?><container><$long_text/><$long_text/><second>foo</second>";
$long_xml_tail = "</container>";

echo "--- Parse using xml_parse (failure) ---\n";
$parser = createParser(false);
$ret = xml_parse($parser, $long_xml_head, true);
echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n";

echo "--- Parse using xml_parse (success) ---\n";
$parser = createParser(true);
$ret = xml_parse($parser, $long_xml_head, false);
echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n";
$ret = xml_parse($parser, $long_xml_tail, true);
echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n";

echo "--- Parse using xml_parse_into_struct (failure) ---\n";
$parser = createParser(false);
$ret = xml_parse_into_struct($parser, $long_xml_head, $values, $index);
echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n";

echo "--- Parse using xml_parse_into_struct (success) ---\n";
$parser = createParser(true);
$ret = xml_parse_into_struct($parser, $long_xml_head . $long_xml_tail, $values, $index);
var_dump(count($values), count($index)); // Not printing out the raw array because the long string will be contained in them as key
echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n";

?>
--EXPECT--
--- Parse using xml_parse (failure) ---
old option value: bool(false)
new option value: bool(false)
open: CONTAINER
ret = 0 (XML_ERR_NAME_REQUIRED)
--- Parse using xml_parse (success) ---
old option value: bool(false)
new option value: bool(true)
open: CONTAINER
open: AAAAAAAAAAAAAAAAAAAA...
open: AAAAAAAAAAAAAAAAAAAA...
open: SECOND
ret = 1 (No error)
ret = 1 (No error)
--- Parse using xml_parse_into_struct (failure) ---
old option value: bool(false)
new option value: bool(false)
open: CONTAINER
ret = 0 (XML_ERR_NAME_REQUIRED)
--- Parse using xml_parse_into_struct (success) ---
old option value: bool(false)
new option value: bool(true)
open: CONTAINER
open: AAAAAAAAAAAAAAAAAAAA...
open: AAAAAAAAAAAAAAAAAAAA...
open: SECOND
int(5)
int(3)
ret = 1 (No error)
27 changes: 27 additions & 0 deletions ext/xml/tests/XML_OPTION_PARSE_HUGE_during_parsing.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
--TEST--
Request #68325 (XML_OPTION_PARSE_HUGE cannot be set for xml_parser_create - setting during parsing)
--EXTENSIONS--
xml
--SKIPIF--
<?php
if (!defined("LIBXML_VERSION")) die('skip this is a libxml2 test');
?>
--FILE--
<?php

$parser = xml_parser_create();
xml_set_element_handler($parser, function($parser, $data) {
xml_parser_set_option($parser, XML_OPTION_PARSE_HUGE, true);
}, function($parser, $data) {
});
xml_parse($parser, "<foo/>", true);

?>
--EXPECTF--
Fatal error: Uncaught Error: Cannot change option XML_OPTION_PARSE_HUGE while parsing in %s:%d
Stack trace:
#0 %s(%d): xml_parser_set_option(Object(XMLParser), 5, true)
#1 [internal function]: {closure}(Object(XMLParser), 'FOO', Array)
#2 %s(%d): xml_parse(Object(XMLParser), '<foo/>', true)
#3 {main}
thrown in %s on line %d
49 changes: 38 additions & 11 deletions ext/xml/xml.c
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ typedef struct {
int lastwasopen;
int skipwhite;
int isparsing;
bool parsehuge;

XML_Char *baseURI;

Expand Down Expand Up @@ -264,6 +265,28 @@ PHP_MINFO_FUNCTION(xml)

/* {{{ extension-internal functions */

static int xml_parse_helper(xml_parser *parser, const char *data, size_t data_len, bool is_final)
{
ZEND_ASSERT(!parser->isparsing);

/* libxml2 specific options */
#if LIBXML_EXPAT_COMPAT
/* See xmlInitSAXParserCtxt() and xmlCtxtUseOptions() */
if (parser->parsehuge) {
parser->parser->parser->options |= XML_PARSE_HUGE;
xmlDictSetLimit(parser->parser->parser->dict, 0);
} else {
parser->parser->parser->options &= ~XML_PARSE_HUGE;
xmlDictSetLimit(parser->parser->parser->dict, XML_MAX_DICTIONARY_LIMIT);
}
#endif

parser->isparsing = 1;
int ret = XML_Parse(parser->parser, (const XML_Char *) data, data_len, is_final);
parser->isparsing = 0;
return ret;
}

static void _xml_xmlchar_zval(const XML_Char *s, int len, const XML_Char *encoding, zval *ret)
{
if (s == NULL) {
Expand Down Expand Up @@ -1024,6 +1047,7 @@ static void php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAMETERS, int ns_supp
parser->target_encoding = encoding;
parser->case_folding = 1;
parser->isparsing = 0;
parser->parsehuge = false; /* It's the default for BC & DoS protection */

XML_SetUserData(parser->parser, parser);
ZVAL_COPY_VALUE(&parser->index, return_value);
Expand Down Expand Up @@ -1283,7 +1307,6 @@ PHP_FUNCTION(xml_parse)
zval *pind;
char *data;
size_t data_len;
int ret;
bool isFinal = 0;

if (zend_parse_parameters(ZEND_NUM_ARGS(), "Os|b", &pind, xml_parser_ce, &data, &data_len, &isFinal) == FAILURE) {
Expand All @@ -1295,10 +1318,7 @@ PHP_FUNCTION(xml_parse)
zend_throw_error(NULL, "Parser must not be called recursively");
RETURN_THROWS();
}
parser->isparsing = 1;
ret = XML_Parse(parser->parser, (XML_Char*)data, data_len, isFinal);
parser->isparsing = 0;
RETVAL_LONG(ret);
RETURN_LONG(xml_parse_helper(parser, data, data_len, isFinal));
}

/* }}} */
Expand All @@ -1310,7 +1330,6 @@ PHP_FUNCTION(xml_parse_into_struct)
zval *pind, *xdata, *info = NULL;
char *data;
size_t data_len;
int ret;

if (zend_parse_parameters(ZEND_NUM_ARGS(), "Osz|z", &pind, xml_parser_ce, &data, &data_len, &xdata, &info) == FAILURE) {
RETURN_THROWS();
Expand Down Expand Up @@ -1348,11 +1367,7 @@ PHP_FUNCTION(xml_parse_into_struct)
XML_SetElementHandler(parser->parser, _xml_startElementHandler, _xml_endElementHandler);
XML_SetCharacterDataHandler(parser->parser, _xml_characterDataHandler);

parser->isparsing = 1;
ret = XML_Parse(parser->parser, (XML_Char*)data, data_len, 1);
parser->isparsing = 0;

RETVAL_LONG(ret);
RETURN_LONG(xml_parse_helper(parser, data, data_len, true));
}
/* }}} */

Expand Down Expand Up @@ -1481,6 +1496,15 @@ PHP_FUNCTION(xml_parser_set_option)
case PHP_XML_OPTION_SKIP_WHITE:
parser->skipwhite = zend_is_true(value);
break;
/* Boolean option */
case PHP_XML_OPTION_PARSE_HUGE:
/* Prevent wreaking havock to the parser internals during parsing */
if (UNEXPECTED(parser->isparsing)) {
zend_throw_error(NULL, "Cannot change option XML_OPTION_PARSE_HUGE while parsing");
RETURN_THROWS();
}
parser->parsehuge = zend_is_true(value);
break;
/* Integer option */
case PHP_XML_OPTION_SKIP_TAGSTART:
/* The tag start offset is stored in an int */
Expand Down Expand Up @@ -1542,6 +1566,9 @@ PHP_FUNCTION(xml_parser_get_option)
case PHP_XML_OPTION_SKIP_WHITE:
RETURN_BOOL(parser->skipwhite);
break;
case PHP_XML_OPTION_PARSE_HUGE:
RETURN_BOOL(parser->parsehuge);
break;
case PHP_XML_OPTION_TARGET_ENCODING:
RETURN_STRING((char *)parser->target_encoding);
break;
Expand Down
5 changes: 5 additions & 0 deletions ext/xml/xml.stub.php
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,11 @@
* @cvalue PHP_XML_OPTION_SKIP_WHITE
*/
const XML_OPTION_SKIP_WHITE = UNKNOWN;
/**
* @var int
* @cvalue PHP_XML_OPTION_PARSE_HUGE
*/
const XML_OPTION_PARSE_HUGE = UNKNOWN;

/**
* @var string
Expand Down
3 changes: 2 additions & 1 deletion ext/xml/xml_arginfo.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.