Skip to content

Commit 704409a

Browse files
committed
Fix bug #70570: json_encode and var_dump ignore simplexml cdata
XML_CDATA_SECTION_NODE was simply not handled, handle it the same way as we handle text nodes for consistency reasons. We introduce the php_sxe_is_inclusive_text_node() helper for text-like nodes. In DOM parlance, these 2 types are called inclusive text nodes. Unfortunately, the fact that we handle CData the same as text now has a technical BC break. Previously this XML: ``` <?xml version="1.0" encoding="UTF-8"?> <container> <C><![CDATA[hello]]><foo/><![CDATA[world]]></C> </container> ``` resulted in this var_dump output: ``` object(SimpleXMLElement)#1 (1) { ["C"]=> object(SimpleXMLElement)#2 (1) { ["foo"]=> object(SimpleXMLElement)#3 (0) { } } } ``` However, after this patch (as text is not an element and thus handled specially) we get the following output: ``` object(SimpleXMLElement)#1 (1) { ["C"]=> string(10) "helloworld" } ```
1 parent 8c2c694 commit 704409a

File tree

2 files changed

+105
-6
lines changed

2 files changed

+105
-6
lines changed

ext/simplexml/simplexml.c

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,11 @@ static void php_sxe_iterator_move_forward(zend_object_iterator *iter);
5353
static void php_sxe_iterator_rewind(zend_object_iterator *iter);
5454
static zend_result sxe_object_cast_ex(zend_object *readobj, zval *writeobj, int type);
5555

56+
static zend_always_inline bool php_sxe_is_inclusive_text_node(const xmlNode *node)
57+
{
58+
return node->type == XML_TEXT_NODE || node->type == XML_CDATA_SECTION_NODE;
59+
}
60+
5661
/* {{{ _node_as_zval() */
5762
static void _node_as_zval(php_sxe_object *sxe, xmlNodePtr node, zval *value, SXE_ITER itertype, char *name, const xmlChar *nsprefix, int isprefix)
5863
{
@@ -746,7 +751,7 @@ static int sxe_prop_dim_exists(zend_object *object, zval *member, int check_empt
746751
if (node) {
747752
exists = 1;
748753
if (check_empty == 1 &&
749-
(!node->children || (node->children->type == XML_TEXT_NODE && !node->children->next &&
754+
(!node->children || (php_sxe_is_inclusive_text_node(node->children) && !node->children->next &&
750755
(!node->children->content || !node->children->content[0] || xmlStrEqual(node->children->content, (const xmlChar *) "0")))) ) {
751756
exists = 0;
752757
}
@@ -928,7 +933,7 @@ static void _get_base_node_value(php_sxe_object *sxe_ref, xmlNodePtr node, zval
928933
php_sxe_object *subnode;
929934
xmlChar *contents;
930935

931-
if (node->children && node->children->type == XML_TEXT_NODE && !xmlIsBlankNode(node->children)) {
936+
if (node->children && php_sxe_is_inclusive_text_node(node->children) && !xmlIsBlankNode(node->children)) {
932937
contents = xmlNodeListGetString(node->doc, node->children, 1);
933938
if (contents) {
934939
ZVAL_STRING(value, (char *)contents);
@@ -1026,7 +1031,7 @@ static int sxe_prop_is_empty(zend_object *object) /* {{{ */
10261031
if (node->children != NULL || node->prev != NULL || node->next != NULL) {
10271032
SKIP_TEXT(node);
10281033
} else {
1029-
if (node->type == XML_TEXT_NODE) {
1034+
if (php_sxe_is_inclusive_text_node(node)) {
10301035
const xmlChar *cur = node->content;
10311036
if (*cur != 0) {
10321037
is_empty = 0;
@@ -1147,7 +1152,7 @@ static HashTable *sxe_get_prop_hash(zend_object *object, int is_debug) /* {{{ */
11471152
if (node->children != NULL || node->prev != NULL || node->next != NULL || xmlIsBlankNode(node)) {
11481153
SKIP_TEXT(node);
11491154
} else {
1150-
if (node->type == XML_TEXT_NODE) {
1155+
if (php_sxe_is_inclusive_text_node(node)) {
11511156
const xmlChar *cur = node->content;
11521157

11531158
if (*cur != 0) {
@@ -1313,13 +1318,13 @@ PHP_METHOD(SimpleXMLElement, xpath)
13131318

13141319
for (i = 0; i < result->nodeNr; ++i) {
13151320
nodeptr = result->nodeTab[i];
1316-
if (nodeptr->type == XML_TEXT_NODE || nodeptr->type == XML_ELEMENT_NODE || nodeptr->type == XML_ATTRIBUTE_NODE) {
1321+
if (php_sxe_is_inclusive_text_node(nodeptr) || nodeptr->type == XML_ELEMENT_NODE || nodeptr->type == XML_ATTRIBUTE_NODE) {
13171322
/**
13181323
* Detect the case where the last selector is text(), simplexml
13191324
* always accesses the text() child by default, therefore we assign
13201325
* to the parent node.
13211326
*/
1322-
if (nodeptr->type == XML_TEXT_NODE) {
1327+
if (php_sxe_is_inclusive_text_node(nodeptr)) {
13231328
_node_as_zval(sxe, nodeptr->parent, &value, SXE_ITER_NONE, NULL, NULL, 0);
13241329
} else if (nodeptr->type == XML_ATTRIBUTE_NODE) {
13251330
_node_as_zval(sxe, nodeptr->parent, &value, SXE_ITER_ATTRLIST, (char*)nodeptr->name, nodeptr->ns ? (xmlChar *)nodeptr->ns->href : NULL, 0);

ext/simplexml/tests/bug70570.phpt

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
--TEST--
2+
Bug #70570 (json_encode and var_dump ignore simplexml cdata)
3+
--EXTENSIONS--
4+
simplexml
5+
--FILE--
6+
<?php
7+
8+
$input = <<<XML
9+
<?xml version="1.0" encoding="UTF-8"?>
10+
<container>
11+
<A><![CDATA[0]]></A>
12+
<B><![CDATA[hello world]]></B>
13+
<C><![CDATA[hello]]><foo/><![CDATA[world]]></C>
14+
<D><foo/><![CDATA[hello]]><bar/></D>
15+
</container>
16+
XML;
17+
18+
$xml = simplexml_load_string($input);
19+
20+
var_dump($xml);
21+
22+
foreach (["A", "B"] as $item) {
23+
echo "--- Testing $item ---\n";
24+
var_dump($xml->xpath("//$item/text()"));
25+
var_dump($xml->$item);
26+
var_dump(isset($xml->$item));
27+
var_dump(empty($xml->$item));
28+
var_dump((bool) ($xml->$item));
29+
}
30+
31+
echo "--- Testing C ---\n";
32+
33+
var_dump($xml->C); // Equivalent to how (normal) text nodes behave
34+
var_dump((string) $xml->C);
35+
var_dump($xml->C->foo);
36+
37+
?>
38+
--EXPECT--
39+
object(SimpleXMLElement)#1 (4) {
40+
["A"]=>
41+
string(1) "0"
42+
["B"]=>
43+
string(11) "hello world"
44+
["C"]=>
45+
string(10) "helloworld"
46+
["D"]=>
47+
object(SimpleXMLElement)#2 (2) {
48+
["foo"]=>
49+
object(SimpleXMLElement)#3 (0) {
50+
}
51+
["bar"]=>
52+
object(SimpleXMLElement)#4 (0) {
53+
}
54+
}
55+
}
56+
--- Testing A ---
57+
array(1) {
58+
[0]=>
59+
object(SimpleXMLElement)#2 (1) {
60+
[0]=>
61+
string(1) "0"
62+
}
63+
}
64+
object(SimpleXMLElement)#2 (1) {
65+
[0]=>
66+
string(1) "0"
67+
}
68+
bool(true)
69+
bool(true)
70+
bool(true)
71+
--- Testing B ---
72+
array(1) {
73+
[0]=>
74+
object(SimpleXMLElement)#2 (1) {
75+
[0]=>
76+
string(11) "hello world"
77+
}
78+
}
79+
object(SimpleXMLElement)#2 (1) {
80+
[0]=>
81+
string(11) "hello world"
82+
}
83+
bool(true)
84+
bool(false)
85+
bool(true)
86+
--- Testing C ---
87+
object(SimpleXMLElement)#2 (1) {
88+
["foo"]=>
89+
object(SimpleXMLElement)#3 (0) {
90+
}
91+
}
92+
string(10) "helloworld"
93+
object(SimpleXMLElement)#3 (0) {
94+
}

0 commit comments

Comments
 (0)