Skip to content

Commit 931f089

Browse files
committed
Implement request #68325: parse huge option for xml_parser_create
1 parent edd72da commit 931f089

6 files changed

+158
-13
lines changed

ext/xml/php_xml.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ enum php_xml_option {
4343
PHP_XML_OPTION_CASE_FOLDING = 1,
4444
PHP_XML_OPTION_TARGET_ENCODING,
4545
PHP_XML_OPTION_SKIP_TAGSTART,
46-
PHP_XML_OPTION_SKIP_WHITE
46+
PHP_XML_OPTION_SKIP_WHITE,
47+
PHP_XML_OPTION_PARSE_HUGE,
4748
};
4849

4950
#ifdef LIBXML_EXPAT_COMPAT
+88
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
--TEST--
2+
Request #68325 (XML_OPTION_PARSE_HUGE cannot be set for xml_parser_create)
3+
--EXTENSIONS--
4+
xml
5+
--FILE--
6+
<?php
7+
8+
function logName(string $str) {
9+
if (strlen($str) > 20) {
10+
echo substr($str, 0, 20) . "...\n";
11+
} else {
12+
echo $str . "\n";
13+
}
14+
}
15+
16+
function createParser(bool $huge) {
17+
$parser = xml_parser_create();
18+
echo "old option value: "; var_dump(xml_parser_get_option($parser, XML_OPTION_PARSE_HUGE));
19+
xml_parser_set_option($parser, XML_OPTION_PARSE_HUGE, $huge);
20+
echo "new option value: "; var_dump(xml_parser_get_option($parser, XML_OPTION_PARSE_HUGE));
21+
xml_set_element_handler($parser, function($parser, $data) {
22+
echo "open: ";
23+
logName($data);
24+
}, function($parser, $data) {
25+
});
26+
return $parser;
27+
}
28+
29+
// Construct XML that is too large to parse without XML_OPTION_PARSE_HUGE
30+
$long_text = str_repeat("A", 1000 * 1000 * 5 /* 5 MiB */);
31+
$long_xml_head = "<?xml version=\"1.0\"?><container><$long_text/><$long_text/><second>foo</second>";
32+
$long_xml_tail = "</container>";
33+
34+
echo "--- Parse using xml_parse (failure) ---\n";
35+
$parser = createParser(false);
36+
$ret = xml_parse($parser, $long_xml_head, true);
37+
echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n";
38+
39+
echo "--- Parse using xml_parse (success) ---\n";
40+
$parser = createParser(true);
41+
$ret = xml_parse($parser, $long_xml_head, false);
42+
echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n";
43+
xml_parser_set_option($parser, XML_OPTION_PARSE_HUGE, false); // Switch between parsing shouldn't matter
44+
$ret = xml_parse($parser, $long_xml_tail, true);
45+
echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n";
46+
47+
echo "--- Parse using xml_parse_into_struct (failure) ---\n";
48+
$parser = createParser(false);
49+
$ret = xml_parse_into_struct($parser, $long_xml_head, $values, $index);
50+
echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n";
51+
52+
echo "--- Parse using xml_parse_into_struct (success) ---\n";
53+
$parser = createParser(true);
54+
$ret = xml_parse_into_struct($parser, $long_xml_head . $long_xml_tail, $values, $index);
55+
var_dump(count($values), count($index)); // Not printing out the raw array because the long string will be contained in them as key
56+
echo "ret = $ret (", xml_error_string(xml_get_error_code($parser)), ")\n";
57+
58+
?>
59+
--EXPECT--
60+
--- Parse using xml_parse (failure) ---
61+
old option value: bool(false)
62+
new option value: bool(false)
63+
open: CONTAINER
64+
ret = 0 (XML_ERR_NAME_REQUIRED)
65+
--- Parse using xml_parse (success) ---
66+
old option value: bool(false)
67+
new option value: bool(true)
68+
open: CONTAINER
69+
open: AAAAAAAAAAAAAAAAAAAA...
70+
open: AAAAAAAAAAAAAAAAAAAA...
71+
open: SECOND
72+
ret = 1 (No error)
73+
ret = 1 (No error)
74+
--- Parse using xml_parse_into_struct (failure) ---
75+
old option value: bool(false)
76+
new option value: bool(false)
77+
open: CONTAINER
78+
ret = 0 (XML_ERR_NAME_REQUIRED)
79+
--- Parse using xml_parse_into_struct (success) ---
80+
old option value: bool(false)
81+
new option value: bool(true)
82+
open: CONTAINER
83+
open: AAAAAAAAAAAAAAAAAAAA...
84+
open: AAAAAAAAAAAAAAAAAAAA...
85+
open: SECOND
86+
int(5)
87+
int(3)
88+
ret = 1 (No error)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
--TEST--
2+
Request #68325 (XML_OPTION_PARSE_HUGE cannot be set for xml_parser_create - setting during parsing)
3+
--EXTENSIONS--
4+
xml
5+
--FILE--
6+
<?php
7+
8+
$parser = xml_parser_create();
9+
xml_set_element_handler($parser, function($parser, $data) {
10+
xml_parser_set_option($parser, XML_OPTION_PARSE_HUGE, true);
11+
}, function($parser, $data) {
12+
});
13+
xml_parse($parser, "<foo/>", true);
14+
15+
?>
16+
--EXPECTF--
17+
Fatal error: Uncaught Error: Cannot change option XML_OPTION_PARSE_HUGE while parsing in %s:%d
18+
Stack trace:
19+
#0 %s(%d): xml_parser_set_option(Object(XMLParser), 5, true)
20+
#1 [internal function]: {closure}(Object(XMLParser), 'FOO', Array)
21+
#2 %s(%d): xml_parse(Object(XMLParser), '<foo/>', true)
22+
#3 {main}
23+
thrown in %s on line %d

ext/xml/xml.c

+38-11
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ typedef struct {
108108
int lastwasopen;
109109
int skipwhite;
110110
int isparsing;
111+
bool parsehuge;
111112

112113
XML_Char *baseURI;
113114

@@ -283,6 +284,28 @@ PHP_MINFO_FUNCTION(xml)
283284

284285
/* {{{ extension-internal functions */
285286

287+
static int xml_parse_helper(xml_parser *parser, const char *data, size_t data_len, bool is_final)
288+
{
289+
ZEND_ASSERT(!parser->isparsing);
290+
291+
/* libxml2 specific options */
292+
#if LIBXML_EXPAT_COMPAT
293+
/* See xmlInitSAXParserCtxt() and xmlCtxtUseOptions() */
294+
if (parser->parsehuge) {
295+
parser->parser->parser->options |= XML_PARSE_HUGE;
296+
xmlDictSetLimit(parser->parser->parser->dict, 0);
297+
} else {
298+
parser->parser->parser->options &= ~XML_PARSE_HUGE;
299+
xmlDictSetLimit(parser->parser->parser->dict, XML_MAX_DICTIONARY_LIMIT);
300+
}
301+
#endif
302+
303+
parser->isparsing = 1;
304+
int ret = XML_Parse(parser->parser, (const XML_Char *) data, data_len, is_final);
305+
parser->isparsing = 0;
306+
return ret;
307+
}
308+
286309
static void _xml_xmlchar_zval(const XML_Char *s, int len, const XML_Char *encoding, zval *ret)
287310
{
288311
if (s == NULL) {
@@ -1016,6 +1039,7 @@ static void php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAMETERS, int ns_supp
10161039
parser->target_encoding = encoding;
10171040
parser->case_folding = 1;
10181041
parser->isparsing = 0;
1042+
parser->parsehuge = false; /* It's the default for BC & DoS protection */
10191043

10201044
XML_SetUserData(parser->parser, parser);
10211045
ZVAL_COPY_VALUE(&parser->index, return_value);
@@ -1225,7 +1249,6 @@ PHP_FUNCTION(xml_parse)
12251249
zval *pind;
12261250
char *data;
12271251
size_t data_len;
1228-
int ret;
12291252
bool isFinal = 0;
12301253

12311254
if (zend_parse_parameters(ZEND_NUM_ARGS(), "Os|b", &pind, xml_parser_ce, &data, &data_len, &isFinal) == FAILURE) {
@@ -1237,10 +1260,7 @@ PHP_FUNCTION(xml_parse)
12371260
zend_throw_error(NULL, "Parser must not be called recursively");
12381261
RETURN_THROWS();
12391262
}
1240-
parser->isparsing = 1;
1241-
ret = XML_Parse(parser->parser, (XML_Char*)data, data_len, isFinal);
1242-
parser->isparsing = 0;
1243-
RETVAL_LONG(ret);
1263+
RETURN_LONG(xml_parse_helper(parser, data, data_len, isFinal));
12441264
}
12451265

12461266
/* }}} */
@@ -1252,7 +1272,6 @@ PHP_FUNCTION(xml_parse_into_struct)
12521272
zval *pind, *xdata, *info = NULL;
12531273
char *data;
12541274
size_t data_len;
1255-
int ret;
12561275

12571276
if (zend_parse_parameters(ZEND_NUM_ARGS(), "Osz|z", &pind, xml_parser_ce, &data, &data_len, &xdata, &info) == FAILURE) {
12581277
RETURN_THROWS();
@@ -1290,11 +1309,7 @@ PHP_FUNCTION(xml_parse_into_struct)
12901309
XML_SetElementHandler(parser->parser, _xml_startElementHandler, _xml_endElementHandler);
12911310
XML_SetCharacterDataHandler(parser->parser, _xml_characterDataHandler);
12921311

1293-
parser->isparsing = 1;
1294-
ret = XML_Parse(parser->parser, (XML_Char*)data, data_len, 1);
1295-
parser->isparsing = 0;
1296-
1297-
RETVAL_LONG(ret);
1312+
RETURN_LONG(xml_parse_helper(parser, data, data_len, true));
12981313
}
12991314
/* }}} */
13001315

@@ -1423,6 +1438,15 @@ PHP_FUNCTION(xml_parser_set_option)
14231438
case PHP_XML_OPTION_SKIP_WHITE:
14241439
parser->skipwhite = zend_is_true(value);
14251440
break;
1441+
/* Boolean option */
1442+
case PHP_XML_OPTION_PARSE_HUGE:
1443+
/* Prevent wreaking havock to the parser internals during parsing */
1444+
if (UNEXPECTED(parser->isparsing)) {
1445+
zend_throw_error(NULL, "Cannot change option XML_OPTION_PARSE_HUGE while parsing");
1446+
RETURN_THROWS();
1447+
}
1448+
parser->parsehuge = zend_is_true(value);
1449+
break;
14261450
/* Integer option */
14271451
case PHP_XML_OPTION_SKIP_TAGSTART:
14281452
/* The tag start offset is stored in an int */
@@ -1484,6 +1508,9 @@ PHP_FUNCTION(xml_parser_get_option)
14841508
case PHP_XML_OPTION_SKIP_WHITE:
14851509
RETURN_BOOL(parser->skipwhite);
14861510
break;
1511+
case PHP_XML_OPTION_PARSE_HUGE:
1512+
RETURN_BOOL(parser->parsehuge);
1513+
break;
14871514
case PHP_XML_OPTION_TARGET_ENCODING:
14881515
RETURN_STRING((char *)parser->target_encoding);
14891516
break;

ext/xml/xml.stub.php

+5
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,11 @@
133133
* @cvalue PHP_XML_OPTION_SKIP_WHITE
134134
*/
135135
const XML_OPTION_SKIP_WHITE = UNKNOWN;
136+
/**
137+
* @var int
138+
* @cvalue PHP_XML_OPTION_PARSE_HUGE
139+
*/
140+
const XML_OPTION_PARSE_HUGE = UNKNOWN;
136141

137142
/**
138143
* @var string

ext/xml/xml_arginfo.h

+2-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)