Skip to content

Commit 7795ae8

Browse files
bpo-40958: Avoid buffer overflow in the parser when indexing the current line (GH-20875) (GH-20919)
(cherry picked from commit 51c5896) Co-authored-by: Pablo Galindo <[email protected]>
1 parent 3cf8094 commit 7795ae8

File tree

4 files changed

+16
-16
lines changed

4 files changed

+16
-16
lines changed

Lib/test/test_peg_parser.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -669,23 +669,23 @@ def f():
669669
{a$b}
670670
'''
671671
""",
672-
'(a$b)',
672+
'(a$b)\n',
673673
),
674674
'multiline_fstring_brace_on_next_line': (
675675
"""
676676
f'''
677677
{a$b
678678
}'''
679679
""",
680-
'(a$b',
680+
'(a$b\n',
681681
),
682682
'multiline_fstring_brace_on_previous_line': (
683683
"""
684684
f'''
685685
{
686686
a$b}'''
687687
""",
688-
'a$b)',
688+
'a$b)\n',
689689
),
690690
}
691691

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix a possible buffer overflow in the PEG parser when gathering information
2+
for emitting syntax errors. Patch by Pablo Galindo.

Parser/pegen/pegen.c

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -140,21 +140,18 @@ _create_dummy_identifier(Parser *p)
140140
}
141141

142142
static inline Py_ssize_t
143-
byte_offset_to_character_offset(PyObject *line, int col_offset)
143+
byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
144144
{
145145
const char *str = PyUnicode_AsUTF8(line);
146146
if (!str) {
147147
return 0;
148148
}
149+
assert(col_offset >= 0 && (unsigned long)col_offset <= strlen(str));
149150
PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace");
150151
if (!text) {
151152
return 0;
152153
}
153154
Py_ssize_t size = PyUnicode_GET_LENGTH(text);
154-
str = PyUnicode_AsUTF8(text);
155-
if (str != NULL && (int)strlen(str) == col_offset) {
156-
size = strlen(str);
157-
}
158155
Py_DECREF(text);
159156
return size;
160157
}
@@ -366,7 +363,7 @@ void *
366363
_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
367364
{
368365
Token *t = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
369-
int col_offset;
366+
Py_ssize_t col_offset;
370367
if (t->col_offset == -1) {
371368
col_offset = Py_SAFE_DOWNCAST(p->tok->cur - p->tok->buf,
372369
intptr_t, int);
@@ -386,7 +383,7 @@ _PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
386383

387384
void *
388385
_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
389-
int lineno, int col_offset,
386+
Py_ssize_t lineno, Py_ssize_t col_offset,
390387
const char *errmsg, va_list va)
391388
{
392389
PyObject *value = NULL;
@@ -406,16 +403,17 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
406403

407404
if (!error_line) {
408405
Py_ssize_t size = p->tok->inp - p->tok->buf;
409-
if (size && p->tok->buf[size-1] == '\n') {
410-
size--;
411-
}
412406
error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
413407
if (!error_line) {
414408
goto error;
415409
}
416410
}
417411

418-
Py_ssize_t col_number = byte_offset_to_character_offset(error_line, col_offset);
412+
Py_ssize_t col_number = col_offset;
413+
414+
if (p->tok->encoding != NULL) {
415+
col_number = byte_offset_to_character_offset(error_line, col_offset);
416+
}
419417

420418
tmp = Py_BuildValue("(OiiN)", p->tok->filename, lineno, col_number, error_line);
421419
if (!tmp) {

Parser/pegen/pegen.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ typedef struct _memo {
3434
typedef struct {
3535
int type;
3636
PyObject *bytes;
37-
int lineno, col_offset, end_lineno, end_col_offset;
37+
Py_ssize_t lineno, col_offset, end_lineno, end_col_offset;
3838
Memo *memo;
3939
} Token;
4040

@@ -132,7 +132,7 @@ void *_PyPegen_string_token(Parser *p);
132132
const char *_PyPegen_get_expr_name(expr_ty);
133133
void *_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...);
134134
void *_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
135-
int lineno, int col_offset,
135+
Py_ssize_t lineno, Py_ssize_t col_offset,
136136
const char *errmsg, va_list va);
137137
void *_PyPegen_dummy_name(Parser *p, ...);
138138

0 commit comments

Comments
 (0)