@@ -140,21 +140,18 @@ _create_dummy_identifier(Parser *p)
140
140
}
141
141
142
142
static inline Py_ssize_t
143
- byte_offset_to_character_offset (PyObject * line , int col_offset )
143
+ byte_offset_to_character_offset (PyObject * line , Py_ssize_t col_offset )
144
144
{
145
145
const char * str = PyUnicode_AsUTF8 (line );
146
146
if (!str ) {
147
147
return 0 ;
148
148
}
149
+ assert (col_offset >= 0 && (unsigned long )col_offset <= strlen (str ));
149
150
PyObject * text = PyUnicode_DecodeUTF8 (str , col_offset , "replace" );
150
151
if (!text ) {
151
152
return 0 ;
152
153
}
153
154
Py_ssize_t size = PyUnicode_GET_LENGTH (text );
154
- str = PyUnicode_AsUTF8 (text );
155
- if (str != NULL && (int )strlen (str ) == col_offset ) {
156
- size = strlen (str );
157
- }
158
155
Py_DECREF (text );
159
156
return size ;
160
157
}
@@ -366,7 +363,7 @@ void *
366
363
_PyPegen_raise_error (Parser * p , PyObject * errtype , const char * errmsg , ...)
367
364
{
368
365
Token * t = p -> known_err_token != NULL ? p -> known_err_token : p -> tokens [p -> fill - 1 ];
369
- int col_offset ;
366
+ Py_ssize_t col_offset ;
370
367
if (t -> col_offset == -1 ) {
371
368
col_offset = Py_SAFE_DOWNCAST (p -> tok -> cur - p -> tok -> buf ,
372
369
intptr_t , int );
@@ -386,7 +383,7 @@ _PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
386
383
387
384
void *
388
385
_PyPegen_raise_error_known_location (Parser * p , PyObject * errtype ,
389
- int lineno , int col_offset ,
386
+ Py_ssize_t lineno , Py_ssize_t col_offset ,
390
387
const char * errmsg , va_list va )
391
388
{
392
389
PyObject * value = NULL ;
@@ -406,16 +403,17 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
406
403
407
404
if (!error_line ) {
408
405
Py_ssize_t size = p -> tok -> inp - p -> tok -> buf ;
409
- if (size && p -> tok -> buf [size - 1 ] == '\n' ) {
410
- size -- ;
411
- }
412
406
error_line = PyUnicode_DecodeUTF8 (p -> tok -> buf , size , "replace" );
413
407
if (!error_line ) {
414
408
goto error ;
415
409
}
416
410
}
417
411
418
- Py_ssize_t col_number = byte_offset_to_character_offset (error_line , col_offset );
412
+ Py_ssize_t col_number = col_offset ;
413
+
414
+ if (p -> tok -> encoding != NULL ) {
415
+ col_number = byte_offset_to_character_offset (error_line , col_offset );
416
+ }
419
417
420
418
tmp = Py_BuildValue ("(OiiN)" , p -> tok -> filename , lineno , col_number , error_line );
421
419
if (!tmp ) {
0 commit comments