Skip to content

Commit 64ad924

Browse files
committed
Fix github issue #23, not parsing an escape char case properly.
It turns out that unrecognized characters in escape sequences should be tolerated and passed through. This change implements that and also adds support for \0 in string literals: $ python3 >>> import json5 >>> json5.loads(r'"\/"') '/' >>> json5.loads(r'"\0"') '\x00' >>> quit() $
1 parent 12dfb79 commit 64ad924

File tree

3 files changed

+39
-4
lines changed

3 files changed

+39
-4
lines changed

json5/json5.g

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ esc_char = 'b' -> '\u0008'
5252
| squote -> '\u0027'
5353
| dquote -> '\u0022'
5454
| bslash -> '\u005C'
55+
| ~('x'|'u'|digit|eol) anything:c -> c
56+
| '0' ~digit -> '\u0000'
5557
| hex_esc:c -> c
5658
| unicode_esc:c -> c
5759

json5/parser.py

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,8 @@ def _esc_char_(self):
393393
self._esc_char__c4_, self._esc_char__c5_,
394394
self._esc_char__c6_, self._esc_char__c7_,
395395
self._esc_char__c8_, self._esc_char__c9_,
396-
self._esc_char__c10_])
396+
self._esc_char__c10_, self._esc_char__c11_,
397+
self._esc_char__c12_])
397398

398399
def _esc_char__c0_(self):
399400
self._seq([lambda: self._ch('b'), lambda: self._succeed('\b')])
@@ -402,10 +403,20 @@ def _esc_char__c1_(self):
402403
self._seq([lambda: self._ch('f'), lambda: self._succeed('\f')])
403404

404405
def _esc_char__c10_(self):
405-
self._push('esc_char__c10')
406+
self._seq([lambda: self._ch('0'), lambda: self._not(self._digit_),
407+
lambda: self._succeed('\x00')])
408+
409+
def _esc_char__c11_(self):
410+
self._push('esc_char__c11')
411+
self._seq([lambda: self._bind(self._hex_esc_, 'c'),
412+
lambda: self._succeed(self._get('c'))])
413+
self._pop('esc_char__c11')
414+
415+
def _esc_char__c12_(self):
416+
self._push('esc_char__c12')
406417
self._seq([lambda: self._bind(self._unicode_esc_, 'c'),
407418
lambda: self._succeed(self._get('c'))])
408-
self._pop('esc_char__c10')
419+
self._pop('esc_char__c12')
409420

410421
def _esc_char__c2_(self):
411422
self._seq([lambda: self._ch('n'), lambda: self._succeed('\n')])
@@ -430,10 +441,26 @@ def _esc_char__c8_(self):
430441

431442
def _esc_char__c9_(self):
432443
self._push('esc_char__c9')
433-
self._seq([lambda: self._bind(self._hex_esc_, 'c'),
444+
self._seq([self._esc_char__c9__s0_,
445+
lambda: self._bind(self._anything_, 'c'),
434446
lambda: self._succeed(self._get('c'))])
435447
self._pop('esc_char__c9')
436448

449+
def _esc_char__c9__s0_(self):
450+
self._not(lambda: (self._esc_char__c9__s0_n_g_)())
451+
452+
def _esc_char__c9__s0_n_g_(self):
453+
self._choose([self._esc_char__c9__s0_n_g__c0_,
454+
self._esc_char__c9__s0_n_g__c1_,
455+
lambda: self._seq([self._digit_]),
456+
lambda: self._seq([self._eol_])])
457+
458+
def _esc_char__c9__s0_n_g__c0_(self):
459+
self._seq([lambda: self._ch('x')])
460+
461+
def _esc_char__c9__s0_n_g__c1_(self):
462+
self._seq([lambda: self._ch('u')])
463+
437464
def _hex_esc_(self):
438465
self._push('hex_esc')
439466
self._seq([lambda: self._ch('x'), lambda: self._bind(self._hex_, 'h1'),

json5/tests/json5_test.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,12 @@ def test_strings(self):
196196
self.check_fail("'\\u0j00'")
197197
self.check_fail("'\\uj000'")
198198

199+
def test_unrecognized_escape_char(self):
200+
self.check(r'"\/"', '/')
201+
202+
def test_nul(self):
203+
self.check(r'"\0"', '\x00')
204+
199205
def test_whitespace(self):
200206
self.check('\n1', 1)
201207
self.check('\r1', 1)

0 commit comments

Comments
 (0)