Skip to content

Commit 428089d

Browse files
committed
[BOLT] Codereview changes #120064
- unittests - test/match_dwarf.py
1 parent 70cccee commit 428089d

File tree

3 files changed

+40
-14
lines changed

3 files changed

+40
-14
lines changed

bolt/test/AArch64/incorrect-negate-ra-state.s renamed to bolt/test/AArch64/negate-ra-state-incorrect.s

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,14 @@
55
# CHECK: BOLT-INFO: inconsistent RAStates in function foo
66

77
# check that foo got Ignored, so it's not in the new .text section
8-
# llvm-objdump %t.exe -d -j .text > %t.exe.dump
8+
# RUN: llvm-objdump %t.exe.bolt -d -j .text > %t.exe.dump
99
# RUN: not grep "<foo>:" %t.exe.dump
1010

1111

12+
# Why is this test incorrect?
13+
# There is an extra .cfi_negate_ra_state in line ...
14+
# Because of this, we will get to the autiasp (hint #29)
15+
# in a (seemingly) unsigned state. That is incorrect.
1216
.text
1317
.globl foo
1418
.p2align 2
@@ -37,5 +41,3 @@ foo:
3741
.type _start, %function
3842
_start:
3943
b foo
40-
41-
.reloc 0, R_AARCH64_NONE

bolt/test/AArch64/simple-negate-ra-state.s renamed to bolt/test/AArch64/negate-ra-state.s

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,15 @@
33

44
# RUN: llvm-objdump %t.exe -d > %t.exe.dump
55
# RUN: llvm-objdump --dwarf=frames %t.exe -D > %t.exe.dump-dwarf
6-
# RUN: match-dwarf %t.exe.dump %t.exe.dump-dwarf foo > orig.txt
6+
# RUN: match-dwarf %t.exe.dump %t.exe.dump-dwarf foo > %t.match-dwarf.txt
77

88
# RUN: llvm-bolt %t.exe -o %t.exe.bolt
99

1010
# RUN: llvm-objdump %t.exe.bolt -d > %t.exe.bolt.dump
1111
# RUN: llvm-objdump --dwarf=frames %t.exe.bolt > %t.exe.bolt.dump-dwarf
12-
# RUN: match-dwarf %t.exe.bolt.dump %t.exe.bolt.dump-dwarf foo > bolted.txt
12+
# RUN: match-dwarf %t.exe.bolt.dump %t.exe.bolt.dump-dwarf foo > %t.bolt.match-dwarf.txt
1313

14-
# RUN: diff orig.txt bolted.txt
14+
# RUN: diff %t.match-dwarf.txt %t.bolt.match-dwarf.txt
1515

1616
.text
1717
.globl foo

bolt/test/match_dwarf.py

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,14 @@
55
# by address to function names (which are parsed from a normal objdump).
66
# The script is used for checking if .cfi_negate_ra_state CFIs
77
# are generated by BOLT the same way they are generated by LLVM.
8+
# The script is called twice in unittests: once with the objdumps of
9+
# the BOLT input binary, and once with the output binary from BOLT.
10+
# We output the offsets of .cfi_negate_ra_state instructions from the
11+
# function's start address to see that BOLT can generate them to the same
12+
# locations.
13+
# Because we check the location, this is only useful for testing without
14+
# optimization flags, so `llvm-bolt input.exe -o output.exe`
15+
816

917
import argparse
1018
import subprocess
@@ -29,11 +37,17 @@ def print(self):
2937
print(self.name)
3038
print(self.body)
3139

32-
def parse_negates(self):
40+
def parse_negate_offsets(self):
41+
"""
42+
Create a list of locations/offsets of the negate_ra_state
43+
CFIs in the dwarf entry.
44+
To find offsets for each, we match the DW_CFA_advance_loc entries,
45+
and sum up their values.
46+
"""
3347
negate_offsets = []
3448
loc = 0
3549
# TODO: make sure this is not printed in hex
36-
re_advloc = f"DW_CFA_advance_loc: (\d+)"
50+
re_advloc = r"DW_CFA_advance_loc: (\d+)"
3751

3852
for line in self.body.splitlines():
3953
# if line matches advance_loc int
@@ -49,9 +63,12 @@ def __eq__(self, other):
4963
return self.name == other.name and self.negate_offsets == other.negate_offsets
5064

5165

52-
def parse_objdump(objdump):
66+
def extract_function_addresses(objdump):
5367
"""
5468
Parse and return address-to-name dictionary from objdump file
69+
Function names in the objdump look like this:
70+
000123abc <foo>:
71+
We want to create a dict from the addr (000123abc), to the name (foo).
5572
"""
5673
addr_name_dict = dict()
5774
re_function = re.compile(r"^([0-9a-fA-F]+)\s<(.*)>:$")
@@ -67,12 +84,19 @@ def parse_objdump(objdump):
6784
return addr_name_dict
6885

6986

70-
def parse_dwarf(dwarfdump, addr_name_dict):
87+
def match_dwarf_to_name(dwarfdump, addr_name_dict):
7188
"""
7289
Parse dwarf dump, and match names to blocks using the dict from the objdump.
7390
Return a list of NameDwarfPairs.
91+
The matched lines look like this:
92+
000123 000456 000789 FDE cie=000000 pc=0123abc...0456def
93+
We do not have the function name for this, only the PC range it applies to.
94+
We need to find the pc=0123abc (the start address), and find the matching name from
95+
the addr_name_dict.
96+
The result NameDwarfPair will hold the lines this header applied to, and instead of
97+
the header with the addresses, it will just have the function name.
7498
"""
75-
re_address_line = re.compile(r".*pc=([0-9a-fA-F]{8})\.\.\.([0-9a-fA-F]{8})")
99+
re_address_line = re.compile(r".*pc=([0-9a-fA-F]+)\.\.\.([0-9a-fA-F]+)")
76100
with open(dwarfdump, "r") as dw:
77101
functions = []
78102
for line in dw.readlines():
@@ -98,12 +122,12 @@ def main():
98122

99123
args = parser.parse_args()
100124

101-
addr_name_dict = parse_objdump(args.objdump)
102-
functions = parse_dwarf(args.dwarfdump, addr_name_dict)
125+
addr_name_dict = extract_function_addresses(args.objdump)
126+
functions = match_dwarf_to_name(args.dwarfdump, addr_name_dict)
103127

104128
for f in functions:
105129
if f.name == args.function:
106-
f.parse_negates()
130+
f.parse_negate_offsets()
107131
print(f.negate_offsets)
108132
break
109133
else:

0 commit comments

Comments
 (0)