Skip to content

Commit 98b08c5

Browse files
authored
Implement request #68325: parse huge option for xml_parser_create (#12256)
1 parent 06ee697 commit 98b08c5

7 files changed

+170
-13
lines changed

UPGRADING

+5
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,11 @@ PHP 8.4 UPGRADE NOTES
141141
. The IntlDateFormatter class exposes now the new PATTERN constant
142142
reflecting udat api's UDAT_PATTERN.
143143

144+
- XML:
145+
. Added XML_OPTION_PARSE_HUGE to allow large inputs in xml_parse and
146+
xml_parse_into_struct.
147+
RFC: https://wiki.php.net/rfc/xml_option_parse_huge.
148+
144149
========================================
145150
11. Changes to INI File Handling
146151
========================================

ext/xml/php_xml.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ enum php_xml_option {
4343
PHP_XML_OPTION_CASE_FOLDING = 1,
4444
PHP_XML_OPTION_TARGET_ENCODING,
4545
PHP_XML_OPTION_SKIP_TAGSTART,
46-
PHP_XML_OPTION_SKIP_WHITE
46+
PHP_XML_OPTION_SKIP_WHITE,
47+
PHP_XML_OPTION_PARSE_HUGE,
4748
};
4849

4950
#ifdef LIBXML_EXPAT_COMPAT
+91
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
--TEST--
2+
Request #68325 (XML_OPTION_PARSE_HUGE cannot be set for xml_parser_create)
3+
--EXTENSIONS--
4+
xml
5+
--SKIPIF--
6+
<?php
7+
if (!defined("LIBXML_VERSION")) die('skip this is a libxml2 test');
8+
?>
9+
--FILE--
10+
<?php
11+
12+
function logName(string $str) {
13+
if (strlen($str) > 20) {
14+
echo substr($str, 0, 20) . "...\n";
15+
} else {
16+
echo $str . "\n";
17+
}
18+
}
19+
20+
function createParser(bool $huge) {
21+
$parser = xml_parser_create();
22+
echo "old option value: "; var_dump(xml_parser_get_option($parser, XML_OPTION_PARSE_HUGE));
23+
xml_parser_set_option($parser, XML_OPTION_PARSE_HUGE, $huge);
24+
echo "new option value: "; var_dump(xml_parser_get_option($parser, XML_OPTION_PARSE_HUGE));
25+
xml_set_element_handler($parser, function($parser, $data) {
26+
echo "open: ";
27+
logName($data);
28+
}, function($parser, $data) {
29+
});
30+
return $parser;
31+
}
32+
33+
// Construct XML that is too large to parse without XML_OPTION_PARSE_HUGE
34+
$long_text = str_repeat("A", 1000 * 1000 * 5 /* 5 MB */);
35+
$long_xml_head = "<?xml version=\"1.0\"?><container><$long_text/><$long_text/><second>foo</second>";
36+
$long_xml_tail = "</container>";
37+
38+
echo "--- Parse using xml_parse (failure) ---\n";
39+
$parser = createParser(false);
40+
$ret = xml_parse($parser, $long_xml_head, true);
41+
echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n";
42+
43+
echo "--- Parse using xml_parse (success) ---\n";
44+
$parser = createParser(true);
45+
$ret = xml_parse($parser, $long_xml_head, false);
46+
echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n";
47+
$ret = xml_parse($parser, $long_xml_tail, true);
48+
echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n";
49+
50+
echo "--- Parse using xml_parse_into_struct (failure) ---\n";
51+
$parser = createParser(false);
52+
$ret = xml_parse_into_struct($parser, $long_xml_head, $values, $index);
53+
echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n";
54+
55+
echo "--- Parse using xml_parse_into_struct (success) ---\n";
56+
$parser = createParser(true);
57+
$ret = xml_parse_into_struct($parser, $long_xml_head . $long_xml_tail, $values, $index);
58+
var_dump(count($values), count($index)); // Not printing out the raw array because the long string will be contained in them as key
59+
echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n";
60+
61+
?>
62+
--EXPECT--
63+
--- Parse using xml_parse (failure) ---
64+
old option value: bool(false)
65+
new option value: bool(false)
66+
open: CONTAINER
67+
ret = 0 (XML_ERR_NAME_REQUIRED)
68+
--- Parse using xml_parse (success) ---
69+
old option value: bool(false)
70+
new option value: bool(true)
71+
open: CONTAINER
72+
open: AAAAAAAAAAAAAAAAAAAA...
73+
open: AAAAAAAAAAAAAAAAAAAA...
74+
open: SECOND
75+
ret = 1 (No error)
76+
ret = 1 (No error)
77+
--- Parse using xml_parse_into_struct (failure) ---
78+
old option value: bool(false)
79+
new option value: bool(false)
80+
open: CONTAINER
81+
ret = 0 (XML_ERR_NAME_REQUIRED)
82+
--- Parse using xml_parse_into_struct (success) ---
83+
old option value: bool(false)
84+
new option value: bool(true)
85+
open: CONTAINER
86+
open: AAAAAAAAAAAAAAAAAAAA...
87+
open: AAAAAAAAAAAAAAAAAAAA...
88+
open: SECOND
89+
int(5)
90+
int(3)
91+
ret = 1 (No error)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
--TEST--
2+
Request #68325 (XML_OPTION_PARSE_HUGE cannot be set for xml_parser_create - setting during parsing)
3+
--EXTENSIONS--
4+
xml
5+
--SKIPIF--
6+
<?php
7+
if (!defined("LIBXML_VERSION")) die('skip this is a libxml2 test');
8+
?>
9+
--FILE--
10+
<?php
11+
12+
$parser = xml_parser_create();
13+
xml_set_element_handler($parser, function($parser, $data) {
14+
xml_parser_set_option($parser, XML_OPTION_PARSE_HUGE, true);
15+
}, function($parser, $data) {
16+
});
17+
xml_parse($parser, "<foo/>", true);
18+
19+
?>
20+
--EXPECTF--
21+
Fatal error: Uncaught Error: Cannot change option XML_OPTION_PARSE_HUGE while parsing in %s:%d
22+
Stack trace:
23+
#0 %s(%d): xml_parser_set_option(Object(XMLParser), 5, true)
24+
#1 [internal function]: {closure}(Object(XMLParser), 'FOO', Array)
25+
#2 %s(%d): xml_parse(Object(XMLParser), '<foo/>', true)
26+
#3 {main}
27+
thrown in %s on line %d

ext/xml/xml.c

+38-11
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ typedef struct {
9191
int lastwasopen;
9292
int skipwhite;
9393
int isparsing;
94+
bool parsehuge;
9495

9596
XML_Char *baseURI;
9697

@@ -264,6 +265,28 @@ PHP_MINFO_FUNCTION(xml)
264265

265266
/* {{{ extension-internal functions */
266267

268+
static int xml_parse_helper(xml_parser *parser, const char *data, size_t data_len, bool is_final)
269+
{
270+
ZEND_ASSERT(!parser->isparsing);
271+
272+
/* libxml2 specific options */
273+
#if LIBXML_EXPAT_COMPAT
274+
/* See xmlInitSAXParserCtxt() and xmlCtxtUseOptions() */
275+
if (parser->parsehuge) {
276+
parser->parser->parser->options |= XML_PARSE_HUGE;
277+
xmlDictSetLimit(parser->parser->parser->dict, 0);
278+
} else {
279+
parser->parser->parser->options &= ~XML_PARSE_HUGE;
280+
xmlDictSetLimit(parser->parser->parser->dict, XML_MAX_DICTIONARY_LIMIT);
281+
}
282+
#endif
283+
284+
parser->isparsing = 1;
285+
int ret = XML_Parse(parser->parser, (const XML_Char *) data, data_len, is_final);
286+
parser->isparsing = 0;
287+
return ret;
288+
}
289+
267290
static void _xml_xmlchar_zval(const XML_Char *s, int len, const XML_Char *encoding, zval *ret)
268291
{
269292
if (s == NULL) {
@@ -1024,6 +1047,7 @@ static void php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAMETERS, int ns_supp
10241047
parser->target_encoding = encoding;
10251048
parser->case_folding = 1;
10261049
parser->isparsing = 0;
1050+
parser->parsehuge = false; /* It's the default for BC & DoS protection */
10271051

10281052
XML_SetUserData(parser->parser, parser);
10291053
ZVAL_COPY_VALUE(&parser->index, return_value);
@@ -1283,7 +1307,6 @@ PHP_FUNCTION(xml_parse)
12831307
zval *pind;
12841308
char *data;
12851309
size_t data_len;
1286-
int ret;
12871310
bool isFinal = 0;
12881311

12891312
if (zend_parse_parameters(ZEND_NUM_ARGS(), "Os|b", &pind, xml_parser_ce, &data, &data_len, &isFinal) == FAILURE) {
@@ -1295,10 +1318,7 @@ PHP_FUNCTION(xml_parse)
12951318
zend_throw_error(NULL, "Parser must not be called recursively");
12961319
RETURN_THROWS();
12971320
}
1298-
parser->isparsing = 1;
1299-
ret = XML_Parse(parser->parser, (XML_Char*)data, data_len, isFinal);
1300-
parser->isparsing = 0;
1301-
RETVAL_LONG(ret);
1321+
RETURN_LONG(xml_parse_helper(parser, data, data_len, isFinal));
13021322
}
13031323

13041324
/* }}} */
@@ -1310,7 +1330,6 @@ PHP_FUNCTION(xml_parse_into_struct)
13101330
zval *pind, *xdata, *info = NULL;
13111331
char *data;
13121332
size_t data_len;
1313-
int ret;
13141333

13151334
if (zend_parse_parameters(ZEND_NUM_ARGS(), "Osz|z", &pind, xml_parser_ce, &data, &data_len, &xdata, &info) == FAILURE) {
13161335
RETURN_THROWS();
@@ -1348,11 +1367,7 @@ PHP_FUNCTION(xml_parse_into_struct)
13481367
XML_SetElementHandler(parser->parser, _xml_startElementHandler, _xml_endElementHandler);
13491368
XML_SetCharacterDataHandler(parser->parser, _xml_characterDataHandler);
13501369

1351-
parser->isparsing = 1;
1352-
ret = XML_Parse(parser->parser, (XML_Char*)data, data_len, 1);
1353-
parser->isparsing = 0;
1354-
1355-
RETVAL_LONG(ret);
1370+
RETURN_LONG(xml_parse_helper(parser, data, data_len, true));
13561371
}
13571372
/* }}} */
13581373

@@ -1481,6 +1496,15 @@ PHP_FUNCTION(xml_parser_set_option)
14811496
case PHP_XML_OPTION_SKIP_WHITE:
14821497
parser->skipwhite = zend_is_true(value);
14831498
break;
1499+
/* Boolean option */
1500+
case PHP_XML_OPTION_PARSE_HUGE:
1501+
/* Prevent wreaking havock to the parser internals during parsing */
1502+
if (UNEXPECTED(parser->isparsing)) {
1503+
zend_throw_error(NULL, "Cannot change option XML_OPTION_PARSE_HUGE while parsing");
1504+
RETURN_THROWS();
1505+
}
1506+
parser->parsehuge = zend_is_true(value);
1507+
break;
14841508
/* Integer option */
14851509
case PHP_XML_OPTION_SKIP_TAGSTART:
14861510
/* The tag start offset is stored in an int */
@@ -1542,6 +1566,9 @@ PHP_FUNCTION(xml_parser_get_option)
15421566
case PHP_XML_OPTION_SKIP_WHITE:
15431567
RETURN_BOOL(parser->skipwhite);
15441568
break;
1569+
case PHP_XML_OPTION_PARSE_HUGE:
1570+
RETURN_BOOL(parser->parsehuge);
1571+
break;
15451572
case PHP_XML_OPTION_TARGET_ENCODING:
15461573
RETURN_STRING((char *)parser->target_encoding);
15471574
break;

ext/xml/xml.stub.php

+5
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,11 @@
133133
* @cvalue PHP_XML_OPTION_SKIP_WHITE
134134
*/
135135
const XML_OPTION_SKIP_WHITE = UNKNOWN;
136+
/**
137+
* @var int
138+
* @cvalue PHP_XML_OPTION_PARSE_HUGE
139+
*/
140+
const XML_OPTION_PARSE_HUGE = UNKNOWN;
136141

137142
/**
138143
* @var string

ext/xml/xml_arginfo.h

+2-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)