Skip to content

Commit 2f9320c

Browse files
authored
DOMXPath::quote(string $str): string (#13456)
Method to quote strings in XPath, similar to PDO::quote() / mysqli::real_escape_string. Sample usage: $xp->query("//span[contains(text()," . $xp->quote($string) . ")]") The algorithm is derived from Robert Rossney's research into XPath quoting published at https://stackoverflow.com/a/1352556/1067003 But using an improved implementation I wrote myself, originally for chrome-php/chrome#575
1 parent 9603199 commit 2f9320c

File tree

4 files changed

+148
-1
lines changed

4 files changed

+148
-1
lines changed

ext/dom/php_dom.stub.php

+2
Original file line numberDiff line numberDiff line change
@@ -934,6 +934,8 @@ public function registerNamespace(string $prefix, string $namespace): bool {}
934934
public function registerPhpFunctions(string|array|null $restrict = null): void {}
935935

936936
public function registerPhpFunctionNS(string $namespaceURI, string $name, callable $callable): void {}
937+
938+
public static function quote(string $str): string {}
937939
}
938940
#endif
939941

ext/dom/php_dom_arginfo.h

+11-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ext/dom/tests/DOMXPath_quote.phpt

+88
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
--TEST--
2+
Test DOMXPath::quote with various inputs
3+
--EXTENSIONS--
4+
dom
5+
--SKIPIF--
6+
<?php if (!class_exists('DOMXPath')) die('skip DOMXPath not available.'); ?>
7+
--FILE--
8+
<?php
9+
$dom = new DOMDocument();
10+
$xpath = new DOMXPath($dom);
11+
12+
13+
/**
14+
* Quote a string for use in an XPath expression.
15+
*
16+
* Example: $xp->query("//span[contains(text()," . $xp->quote($string) . ")]")
17+
*
18+
* @param string $string string to quote.
19+
* @return string quoted string.
20+
*/
21+
function UserlandDOMXPathQuote(string $string): string
22+
{
23+
if (false === \strpos($string, '\'')) {
24+
return '\'' . $string . '\'';
25+
}
26+
if (false === \strpos($string, '"')) {
27+
return '"' . $string . '"';
28+
}
29+
// if the string contains both single and double quotes, construct an
30+
// expression that concatenates all non-double-quote substrings with
31+
// the quotes, e.g.:
32+
// 'foo'"bar => concat("'foo'", '"bar")
33+
$sb = [];
34+
while ($string !== '') {
35+
$bytesUntilSingleQuote = \strcspn($string, '\'');
36+
$bytesUntilDoubleQuote = \strcspn($string, '"');
37+
$quoteMethod = ($bytesUntilSingleQuote > $bytesUntilDoubleQuote) ? "'" : '"';
38+
$bytesUntilQuote = \max($bytesUntilSingleQuote, $bytesUntilDoubleQuote);
39+
$sb[] = $quoteMethod . \substr($string, 0, $bytesUntilQuote) . $quoteMethod;
40+
$string = \substr($string, $bytesUntilQuote);
41+
}
42+
$sb = \implode(',', $sb);
43+
return 'concat(' . $sb . ')';
44+
}
45+
46+
47+
48+
$tests = [
49+
'' => "''", // empty string
50+
'foo' => "'foo'", // no quotes
51+
'"foo' => '\'"foo\'', // double quotes only
52+
'\'foo' => '"\'foo"', // single quotes only
53+
'\'foo"bar' => 'concat("\'foo",\'"bar\')', // both; double quotes in mid-string
54+
'\'foo"bar"baz' => 'concat("\'foo",\'"bar"baz\')', // multiple double quotes in mid-string
55+
'\'foo"' => 'concat("\'foo",\'"\')', // string ends with double quotes
56+
'\'foo""' => 'concat("\'foo",\'""\')', // string ends with run of double quotes
57+
'"\'foo' => 'concat(\'"\',"\'foo")', // string begins with double quotes
58+
'""\'foo' => 'concat(\'""\',"\'foo")', // string begins with run of double quotes
59+
'\'foo""bar' => 'concat("\'foo",\'""bar\')', // run of double quotes in mid-string
60+
];
61+
62+
foreach ($tests as $input => $expected) {
63+
$result = $xpath->quote($input);
64+
if ($result === $expected) {
65+
echo "Pass: {$input} => {$result}\n";
66+
} else {
67+
echo 'Fail: ';
68+
var_dump([
69+
'input' => $input,
70+
'expected' => $expected,
71+
'result' => $result,
72+
'userland_implementation_result' => UserlandDOMXPathQuote($input),
73+
]);
74+
}
75+
}
76+
?>
77+
--EXPECT--
78+
Pass: => ''
79+
Pass: foo => 'foo'
80+
Pass: "foo => '"foo'
81+
Pass: 'foo => "'foo"
82+
Pass: 'foo"bar => concat("'foo",'"bar')
83+
Pass: 'foo"bar"baz => concat("'foo",'"bar"baz')
84+
Pass: 'foo" => concat("'foo",'"')
85+
Pass: 'foo"" => concat("'foo",'""')
86+
Pass: "'foo => concat('"',"'foo")
87+
Pass: ""'foo => concat('""',"'foo")
88+
Pass: 'foo""bar => concat("'foo",'""bar')

ext/dom/xpath.c

+47
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,53 @@ PHP_METHOD(DOMXPath, registerPhpFunctionNS)
446446
);
447447
}
448448

449+
/* {{{ */
450+
PHP_METHOD(DOMXPath, quote) {
451+
const char *input;
452+
size_t input_len;
453+
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &input, &input_len) == FAILURE) {
454+
RETURN_THROWS();
455+
}
456+
if (memchr(input, '\'', input_len) == NULL) {
457+
zend_string *const output = zend_string_safe_alloc(1, input_len, 2, false);
458+
output->val[0] = '\'';
459+
memcpy(output->val + 1, input, input_len);
460+
output->val[input_len + 1] = '\'';
461+
output->val[input_len + 2] = '\0';
462+
RETURN_STR(output);
463+
} else if (memchr(input, '"', input_len) == NULL) {
464+
zend_string *const output = zend_string_safe_alloc(1, input_len, 2, false);
465+
output->val[0] = '"';
466+
memcpy(output->val + 1, input, input_len);
467+
output->val[input_len + 1] = '"';
468+
output->val[input_len + 2] = '\0';
469+
RETURN_STR(output);
470+
} else {
471+
smart_str output = {0};
472+
// need to use the concat() trick published by Robert Rossney at https://stackoverflow.com/a/1352556/1067003
473+
smart_str_appendl(&output, "concat(", 7);
474+
const char *ptr = input;
475+
const char *const end = input + input_len;
476+
while (ptr < end) {
477+
const char *const single_quote_ptr = memchr(ptr, '\'', end - ptr);
478+
const char *const double_quote_ptr = memchr(ptr, '"', end - ptr);
479+
const size_t distance_to_single_quote = single_quote_ptr ? single_quote_ptr - ptr : end - ptr;
480+
const size_t distance_to_double_quote = double_quote_ptr ? double_quote_ptr - ptr : end - ptr;
481+
const size_t bytes_until_quote = MAX(distance_to_single_quote, distance_to_double_quote);
482+
const char quote_method = (distance_to_single_quote > distance_to_double_quote) ? '\'' : '"';
483+
smart_str_appendc(&output, quote_method);
484+
smart_str_appendl(&output, ptr, bytes_until_quote);
485+
smart_str_appendc(&output, quote_method);
486+
ptr += bytes_until_quote;
487+
smart_str_appendc(&output, ',');
488+
}
489+
ZEND_ASSERT(ptr == end);
490+
output.s->val[output.s->len - 1] = ')';
491+
RETURN_STR(smart_str_extract(&output));
492+
}
493+
}
494+
/* }}} */
495+
449496
#endif /* LIBXML_XPATH_ENABLED */
450497

451498
#endif

0 commit comments

Comments
 (0)