Skip to content

Commit 944fffe

Browse files
authored
GH-88116: Use a compact format to represent end line and column offsets. (GH-91666)
* Stores all location info in linetable to conform to PEP 626. * Remove column table from code objects. * Remove end-line table from code objects. * Document new location table format
1 parent 2a5f171 commit 944fffe

20 files changed

+853
-533
lines changed

Include/cpython/code.h

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -86,15 +86,7 @@ typedef uint16_t _Py_CODEUNIT;
8686
PyObject *co_filename; /* unicode (where it was loaded from) */ \
8787
PyObject *co_name; /* unicode (name, for reference) */ \
8888
PyObject *co_qualname; /* unicode (qualname, for reference) */ \
89-
PyObject *co_linetable; /* bytes (encoding addr<->lineno mapping) \
90-
See Objects/lnotab_notes.txt for details. \
91-
*/ \
92-
PyObject *co_endlinetable; /* bytes object that holds end lineno for \
93-
instructions separated across different \
94-
lines */ \
95-
PyObject *co_columntable; /* bytes object that holds start/end column \
96-
offset each instruction */ \
97-
\
89+
PyObject *co_linetable; /* bytes object that holds location info */ \
9890
PyObject *co_weakreflist; /* to support weakrefs to code objects */ \
9991
/* Scratch space for extra data relating to the code object. \
10092
Type is a void* to keep the format private in codeobject.c to force \
@@ -153,13 +145,13 @@ PyAPI_FUNC(PyCodeObject *) PyCode_New(
153145
int, int, int, int, int, PyObject *, PyObject *,
154146
PyObject *, PyObject *, PyObject *, PyObject *,
155147
PyObject *, PyObject *, PyObject *, int, PyObject *,
156-
PyObject *, PyObject *, PyObject *);
148+
PyObject *);
157149

158150
PyAPI_FUNC(PyCodeObject *) PyCode_NewWithPosOnlyArgs(
159151
int, int, int, int, int, int, PyObject *, PyObject *,
160152
PyObject *, PyObject *, PyObject *, PyObject *,
161153
PyObject *, PyObject *, PyObject *, int, PyObject *,
162-
PyObject *, PyObject *, PyObject *);
154+
PyObject *);
163155
/* same as struct above */
164156

165157
/* Creates a new empty code object with the specified source location. */
@@ -176,8 +168,8 @@ PyAPI_FUNC(int) PyCode_Addr2Location(PyCodeObject *, int, int *, int *, int *, i
176168
/* for internal use only */
177169
struct _opaque {
178170
int computed_line;
179-
const char *lo_next;
180-
const char *limit;
171+
const uint8_t *lo_next;
172+
const uint8_t *limit;
181173
};
182174

183175
typedef struct _line_offsets {
@@ -210,6 +202,20 @@ PyAPI_FUNC(int) _PyCode_GetExtra(PyObject *code, Py_ssize_t index,
210202
PyAPI_FUNC(int) _PyCode_SetExtra(PyObject *code, Py_ssize_t index,
211203
void *extra);
212204

205+
206+
typedef enum _PyCodeLocationInfoKind {
207+
/* short forms are 0 to 9 */
208+
PY_CODE_LOCATION_INFO_SHORT0 = 0,
209+
/* one lineforms are 10 to 12 */
210+
PY_CODE_LOCATION_INFO_ONE_LINE0 = 10,
211+
PY_CODE_LOCATION_INFO_ONE_LINE1 = 11,
212+
PY_CODE_LOCATION_INFO_ONE_LINE2 = 12,
213+
214+
PY_CODE_LOCATION_INFO_NO_COLUMNS = 13,
215+
PY_CODE_LOCATION_INFO_LONG = 14,
216+
PY_CODE_LOCATION_INFO_NONE = 15
217+
} _PyCodeLocationInfoKind;
218+
213219
#ifdef __cplusplus
214220
}
215221
#endif

Include/internal/pycore_code.h

Lines changed: 35 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,6 @@ struct _PyCodeConstructor {
176176
PyObject *code;
177177
int firstlineno;
178178
PyObject *linetable;
179-
PyObject *endlinetable;
180-
PyObject *columntable;
181179

182180
/* used by the code */
183181
PyObject *consts;
@@ -221,21 +219,10 @@ extern PyObject* _PyCode_GetCellvars(PyCodeObject *);
221219
extern PyObject* _PyCode_GetFreevars(PyCodeObject *);
222220
extern PyObject* _PyCode_GetCode(PyCodeObject *);
223221

224-
/* Return the ending source code line number from a bytecode index. */
225-
extern int _PyCode_Addr2EndLine(PyCodeObject *, int);
226-
227-
/* Return the ending source code line number from a bytecode index. */
228-
extern int _PyCode_Addr2EndLine(PyCodeObject *, int);
229-
/* Return the starting source code column offset from a bytecode index. */
230-
extern int _PyCode_Addr2Offset(PyCodeObject *, int);
231-
/* Return the ending source code column offset from a bytecode index. */
232-
extern int _PyCode_Addr2EndOffset(PyCodeObject *, int);
233-
234222
/** API for initializing the line number tables. */
235223
extern int _PyCode_InitAddressRange(PyCodeObject* co, PyCodeAddressRange *bounds);
236-
extern int _PyCode_InitEndAddressRange(PyCodeObject* co, PyCodeAddressRange* bounds);
237224

238-
/** Out of process API for initializing the line number table. */
225+
/** Out of process API for initializing the location table. */
239226
extern void _PyLineTable_InitAddressRange(
240227
const char *linetable,
241228
Py_ssize_t length,
@@ -445,6 +432,40 @@ read_obj(uint16_t *p)
445432
return (PyObject *)val;
446433
}
447434

435+
static inline int
436+
write_varint(uint8_t *ptr, unsigned int val)
437+
{
438+
int written = 1;
439+
while (val >= 64) {
440+
*ptr++ = 64 | (val & 63);
441+
val >>= 6;
442+
written++;
443+
}
444+
*ptr = val;
445+
return written;
446+
}
447+
448+
static inline int
449+
write_signed_varint(uint8_t *ptr, int val)
450+
{
451+
if (val < 0) {
452+
val = ((-val)<<1) | 1;
453+
}
454+
else {
455+
val = val << 1;
456+
}
457+
return write_varint(ptr, val);
458+
}
459+
460+
static inline int
461+
write_location_entry_start(uint8_t *ptr, int code, int length)
462+
{
463+
assert((code & 15) == code);
464+
*ptr = 128 | (code << 3) | (length - 1);
465+
return 1;
466+
}
467+
468+
448469
#ifdef __cplusplus
449470
}
450471
#endif

Lib/importlib/_bootstrap_external.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ def _write_atomic(path, data, mode=0o666):
402402
# add JUMP_BACKWARD_NO_INTERRUPT, make JUMP_NO_INTERRUPT virtual)
403403
# Python 3.11a7 3492 (make POP_JUMP_IF_NONE/NOT_NONE/TRUE/FALSE relative)
404404
# Python 3.11a7 3493 (Make JUMP_IF_TRUE_OR_POP/JUMP_IF_FALSE_OR_POP relative)
405-
405+
# Python 3.11a7 3494 (New location info table)
406406
# Python 3.12 will start with magic number 3500
407407

408408

Lib/test/test_code.py

Lines changed: 130 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -230,9 +230,7 @@ def func(): pass
230230
co.co_name,
231231
co.co_qualname,
232232
co.co_firstlineno,
233-
co.co_lnotab,
234-
co.co_endlinetable,
235-
co.co_columntable,
233+
co.co_linetable,
236234
co.co_exceptiontable,
237235
co.co_freevars,
238236
co.co_cellvars)
@@ -273,8 +271,6 @@ def func2():
273271
("co_filename", "newfilename"),
274272
("co_name", "newname"),
275273
("co_linetable", code2.co_linetable),
276-
("co_endlinetable", code2.co_endlinetable),
277-
("co_columntable", code2.co_columntable),
278274
):
279275
with self.subTest(attr=attr, value=value):
280276
new_code = code.replace(**{attr: value})
@@ -311,9 +307,7 @@ def func():
311307
co.co_name,
312308
co.co_qualname,
313309
co.co_firstlineno,
314-
co.co_lnotab,
315-
co.co_endlinetable,
316-
co.co_columntable,
310+
co.co_linetable,
317311
co.co_exceptiontable,
318312
co.co_freevars,
319313
co.co_cellvars,
@@ -391,14 +385,17 @@ def test_co_positions_artificial_instructions(self):
391385
)
392386

393387
def test_endline_and_columntable_none_when_no_debug_ranges(self):
394-
# Make sure that if `-X no_debug_ranges` is used, the endlinetable and
395-
# columntable are None.
388+
# Make sure that if `-X no_debug_ranges` is used, there is
389+
# minimal debug info
396390
code = textwrap.dedent("""
397391
def f():
398392
pass
399393
400-
assert f.__code__.co_endlinetable is None
401-
assert f.__code__.co_columntable is None
394+
positions = f.__code__.co_positions()
395+
for line, end_line, column, end_column in positions:
396+
assert line == end_line
397+
assert column is None
398+
assert end_column is None
402399
""")
403400
assert_python_ok('-X', 'no_debug_ranges', '-c', code)
404401

@@ -408,8 +405,11 @@ def test_endline_and_columntable_none_when_no_debug_ranges_env(self):
408405
def f():
409406
pass
410407
411-
assert f.__code__.co_endlinetable is None
412-
assert f.__code__.co_columntable is None
408+
positions = f.__code__.co_positions()
409+
for line, end_line, column, end_column in positions:
410+
assert line == end_line
411+
assert column is None
412+
assert end_column is None
413413
""")
414414
assert_python_ok('-c', code, PYTHONNODEBUGRANGES='1')
415415

@@ -421,35 +421,10 @@ def func():
421421
x = 1
422422
new_code = func.__code__.replace(co_linetable=b'')
423423
positions = new_code.co_positions()
424-
next(positions) # Skip RESUME at start
425424
for line, end_line, column, end_column in positions:
426425
self.assertIsNone(line)
427426
self.assertEqual(end_line, new_code.co_firstlineno + 1)
428427

429-
@requires_debug_ranges()
430-
def test_co_positions_empty_endlinetable(self):
431-
def func():
432-
x = 1
433-
new_code = func.__code__.replace(co_endlinetable=b'')
434-
positions = new_code.co_positions()
435-
next(positions) # Skip RESUME at start
436-
for line, end_line, column, end_column in positions:
437-
self.assertEqual(line, new_code.co_firstlineno + 1)
438-
self.assertIsNone(end_line)
439-
440-
@requires_debug_ranges()
441-
def test_co_positions_empty_columntable(self):
442-
def func():
443-
x = 1
444-
new_code = func.__code__.replace(co_columntable=b'')
445-
positions = new_code.co_positions()
446-
next(positions) # Skip RESUME at start
447-
for line, end_line, column, end_column in positions:
448-
self.assertEqual(line, new_code.co_firstlineno + 1)
449-
self.assertEqual(end_line, new_code.co_firstlineno + 1)
450-
self.assertIsNone(column)
451-
self.assertIsNone(end_column)
452-
453428

454429
def isinterned(s):
455430
return s is sys.intern(('_' + s + '_')[1:-1])
@@ -527,6 +502,122 @@ def callback(code):
527502
self.assertFalse(bool(coderef()))
528503
self.assertTrue(self.called)
529504

505+
# Python implementation of location table parsing algorithm
506+
def read(it):
507+
return next(it)
508+
509+
def read_varint(it):
510+
b = read(it)
511+
val = b & 63;
512+
shift = 0;
513+
while b & 64:
514+
b = read(it)
515+
shift += 6
516+
val |= (b&63) << shift
517+
return val
518+
519+
def read_signed_varint(it):
520+
uval = read_varint(it)
521+
if uval & 1:
522+
return -(uval >> 1)
523+
else:
524+
return uval >> 1
525+
526+
def parse_location_table(code):
527+
line = code.co_firstlineno
528+
it = iter(code.co_linetable)
529+
while True:
530+
try:
531+
first_byte = read(it)
532+
except StopIteration:
533+
return
534+
code = (first_byte >> 3) & 15
535+
length = (first_byte & 7) + 1
536+
if code == 15:
537+
yield (code, length, None, None, None, None)
538+
elif code == 14:
539+
line_delta = read_signed_varint(it)
540+
line += line_delta
541+
end_line = line + read_varint(it)
542+
col = read_varint(it)
543+
if col == 0:
544+
col = None
545+
else:
546+
col -= 1
547+
end_col = read_varint(it)
548+
if end_col == 0:
549+
end_col = None
550+
else:
551+
end_col -= 1
552+
yield (code, length, line, end_line, col, end_col)
553+
elif code == 13: # No column
554+
line_delta = read_signed_varint(it)
555+
line += line_delta
556+
yield (code, length, line, line, None, None)
557+
elif code in (10, 11, 12): # new line
558+
line_delta = code - 10
559+
line += line_delta
560+
column = read(it)
561+
end_column = read(it)
562+
yield (code, length, line, line, column, end_column)
563+
else:
564+
assert (0 <= code < 10)
565+
second_byte = read(it)
566+
column = code << 3 | (second_byte >> 4)
567+
yield (code, length, line, line, column, column + (second_byte & 15))
568+
569+
def positions_from_location_table(code):
570+
for _, length, line, end_line, col, end_col in parse_location_table(code):
571+
for _ in range(length):
572+
yield (line, end_line, col, end_col)
573+
574+
def misshappen():
575+
"""
576+
577+
578+
579+
580+
581+
"""
582+
x = (
583+
584+
585+
4
586+
587+
+
588+
589+
y
590+
591+
)
592+
y = (
593+
a
594+
+
595+
b
596+
+
597+
598+
d
599+
)
600+
return q if (
601+
602+
x
603+
604+
) else p
605+
606+
607+
class CodeLocationTest(unittest.TestCase):
608+
609+
def check_positions(self, func):
610+
pos1 = list(func.__code__.co_positions())
611+
pos2 = list(positions_from_location_table(func.__code__))
612+
for l1, l2 in zip(pos1, pos2):
613+
self.assertEqual(l1, l2)
614+
self.assertEqual(len(pos1), len(pos2))
615+
616+
617+
def test_positions(self):
618+
self.check_positions(parse_location_table)
619+
self.check_positions(misshappen)
620+
530621

531622
if check_impl_detail(cpython=True) and ctypes is not None:
532623
py = ctypes.pythonapi

0 commit comments

Comments
 (0)