Skip to content

Commit 7e56a40

Browse files
authored
Merge pull request #830 from lucacasonato/support_lone_surrogates_in_raw_value
Allow lone surrogates in raw values
2 parents 977975e + 51e9616 commit 7e56a40

File tree

2 files changed

+22
-27
lines changed

2 files changed

+22
-27
lines changed

src/read.rs

Lines changed: 8 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -954,34 +954,15 @@ where
954954

955955
match ch {
956956
b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => {}
957-
b'u' => match tri!(read.decode_hex_escape()) {
958-
0xDC00..=0xDFFF => {
959-
return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
960-
}
961-
962-
// Non-BMP characters are encoded as a sequence of
963-
// two hex escapes, representing UTF-16 surrogates.
964-
n1 @ 0xD800..=0xDBFF => {
965-
if tri!(next_or_eof(read)) != b'\\' {
966-
return error(read, ErrorCode::UnexpectedEndOfHexEscape);
967-
}
968-
if tri!(next_or_eof(read)) != b'u' {
969-
return error(read, ErrorCode::UnexpectedEndOfHexEscape);
970-
}
971-
972-
let n2 = tri!(read.decode_hex_escape());
973-
if n2 < 0xDC00 || n2 > 0xDFFF {
974-
return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
975-
}
976-
977-
let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000;
978-
if char::from_u32(n).is_none() {
979-
return error(read, ErrorCode::InvalidUnicodeCodePoint);
980-
}
981-
}
957+
b'u' => {
958+
// At this point we don't care if the codepoint is valid. We just
959+
// want to consume it. We don't actually know what is valid or not
960+
// at this point, because that depends on if this string will
961+
// ultimately be parsed into a string or a byte buffer in the "real"
962+
// parse.
982963

983-
_ => {}
984-
},
964+
tri!(read.decode_hex_escape());
965+
}
985966
_ => {
986967
return error(read, ErrorCode::InvalidEscape);
987968
}

tests/test.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1740,6 +1740,20 @@ fn test_byte_buf_de_lone_surrogate() {
17401740
assert!(res.is_err());
17411741
}
17421742

1743+
#[cfg(feature = "raw_value")]
1744+
#[test]
1745+
fn test_raw_de_lone_surrogate() {
1746+
use serde_json::value::RawValue;
1747+
1748+
assert!(from_str::<Box<RawValue>>(r#""\ud83c""#).is_ok());
1749+
assert!(from_str::<Box<RawValue>>(r#""\ud83c\n""#).is_ok());
1750+
assert!(from_str::<Box<RawValue>>(r#""\ud83c ""#).is_ok());
1751+
assert!(from_str::<Box<RawValue>>(r#""\udc01 ""#).is_ok());
1752+
assert!(from_str::<Box<RawValue>>(r#""\udc01\!""#).is_err());
1753+
assert!(from_str::<Box<RawValue>>(r#""\udc01\u""#).is_err());
1754+
assert!(from_str::<Box<RawValue>>(r#""\ud83c\ud83c""#).is_ok());
1755+
}
1756+
17431757
#[test]
17441758
fn test_byte_buf_de_multiple() {
17451759
let s: Vec<ByteBuf> = from_str(r#"["ab\nc", "cd\ne"]"#).unwrap();

0 commit comments

Comments
 (0)