5
5
# by address to function names (which are parsed from a normal objdump).
6
6
# The script is used for checking if .cfi_negate_ra_state CFIs
7
7
# are generated by BOLT the same way they are generated by LLVM.
8
+ # The script is called twice in unittests: once with the objdumps of
9
+ # the BOLT input binary, and once with the output binary from BOLT.
10
+ # We output the offsets of .cfi_negate_ra_state instructions from the
11
+ # function's start address to see that BOLT can generate them to the same
12
+ # locations.
13
+ # Because we check the location, this is only useful for testing without
14
+ # optimization flags, so `llvm-bolt input.exe -o output.exe`
15
+
8
16
9
17
import argparse
10
18
import subprocess
@@ -29,11 +37,17 @@ def print(self):
29
37
print (self .name )
30
38
print (self .body )
31
39
32
- def parse_negates (self ):
40
+ def parse_negate_offsets (self ):
41
+ """
42
+ Create a list of locations/offsets of the negate_ra_state
43
+ CFIs in the dwarf entry.
44
+ To find offsets for each, we match the DW_CFA_advance_loc entries,
45
+ and sum up their values.
46
+ """
33
47
negate_offsets = []
34
48
loc = 0
35
49
# TODO: make sure this is not printed in hex
36
- re_advloc = f "DW_CFA_advance_loc: (\d+)"
50
+ re_advloc = r "DW_CFA_advance_loc: (\d+)"
37
51
38
52
for line in self .body .splitlines ():
39
53
# if line matches advance_loc int
@@ -49,9 +63,12 @@ def __eq__(self, other):
49
63
return self .name == other .name and self .negate_offsets == other .negate_offsets
50
64
51
65
52
- def parse_objdump (objdump ):
66
+ def extract_function_addresses (objdump ):
53
67
"""
54
68
Parse and return address-to-name dictionary from objdump file
69
+ Function names in the objdump look like this:
70
+ 000123abc <foo>:
71
+ We want to create a dict from the addr (000123abc), to the name (foo).
55
72
"""
56
73
addr_name_dict = dict ()
57
74
re_function = re .compile (r"^([0-9a-fA-F]+)\s<(.*)>:$" )
@@ -67,12 +84,19 @@ def parse_objdump(objdump):
67
84
return addr_name_dict
68
85
69
86
70
- def parse_dwarf (dwarfdump , addr_name_dict ):
87
+ def match_dwarf_to_name (dwarfdump , addr_name_dict ):
71
88
"""
72
89
Parse dwarf dump, and match names to blocks using the dict from the objdump.
73
90
Return a list of NameDwarfPairs.
91
+ The matched lines look like this:
92
+ 000123 000456 000789 FDE cie=000000 pc=0123abc...0456def
93
+ We do not have the function name for this, only the PC range it applies to.
94
+ We need to find the pc=0123abc (the start address), and find the matching name from
95
+ the addr_name_dict.
96
+ The result NameDwarfPair will hold the lines this header applied to, and instead of
97
+ the header with the addresses, it will just have the function name.
74
98
"""
75
- re_address_line = re .compile (r".*pc=([0-9a-fA-F]{8} )\.\.\.([0-9a-fA-F]{8} )" )
99
+ re_address_line = re .compile (r".*pc=([0-9a-fA-F]+ )\.\.\.([0-9a-fA-F]+ )" )
76
100
with open (dwarfdump , "r" ) as dw :
77
101
functions = []
78
102
for line in dw .readlines ():
@@ -98,12 +122,12 @@ def main():
98
122
99
123
args = parser .parse_args ()
100
124
101
- addr_name_dict = parse_objdump (args .objdump )
102
- functions = parse_dwarf (args .dwarfdump , addr_name_dict )
125
+ addr_name_dict = extract_function_addresses (args .objdump )
126
+ functions = match_dwarf_to_name (args .dwarfdump , addr_name_dict )
103
127
104
128
for f in functions :
105
129
if f .name == args .function :
106
- f .parse_negates ()
130
+ f .parse_negate_offsets ()
107
131
print (f .negate_offsets )
108
132
break
109
133
else :
0 commit comments