[BOLT] Codereview changes #120064

bgergely0 · bgergely0 · commit 428089dc39c6 · 2025-04-30T10:22:07.000+02:00
- unittests
- test/match_dwarf.py
diff --git a/bolt/test/AArch64/negate-ra-state-incorrect.s b/bolt/test/AArch64/negate-ra-state-incorrect.s
@@ -5,10 +5,14 @@
 # CHECK: BOLT-INFO: inconsistent RAStates in function foo
 
 # check that foo got Ignored, so it's not in the new .text section
-# llvm-objdump %t.exe -d -j .text > %t.exe.dump
+# RUN: llvm-objdump %t.exe.bolt -d -j .text > %t.exe.dump
 # RUN: not grep "<foo>:" %t.exe.dump
 
 
+# Why is this test incorrect?
+#   There is an extra .cfi_negate_ra_state in line ...
+#   Because of this, we will get to the autiasp (hint #29)
+#   in a (seemingly) unsigned state. That is incorrect.
   .text
   .globl  foo
   .p2align        2
@@ -37,5 +41,3 @@ foo:
   .type _start, %function
 _start:
   b foo
-
-.reloc 0, R_AARCH64_NONE
diff --git a/bolt/test/AArch64/negate-ra-state.s b/bolt/test/AArch64/negate-ra-state.s
@@ -3,15 +3,15 @@
 
 # RUN: llvm-objdump %t.exe -d > %t.exe.dump
 # RUN: llvm-objdump --dwarf=frames %t.exe -D > %t.exe.dump-dwarf
-# RUN: match-dwarf %t.exe.dump %t.exe.dump-dwarf foo > orig.txt
+# RUN: match-dwarf %t.exe.dump %t.exe.dump-dwarf foo > %t.match-dwarf.txt
 
 # RUN: llvm-bolt %t.exe -o %t.exe.bolt
 
 # RUN: llvm-objdump %t.exe.bolt -d > %t.exe.bolt.dump
 # RUN: llvm-objdump --dwarf=frames %t.exe.bolt  > %t.exe.bolt.dump-dwarf
-# RUN: match-dwarf %t.exe.bolt.dump %t.exe.bolt.dump-dwarf foo > bolted.txt
+# RUN: match-dwarf %t.exe.bolt.dump %t.exe.bolt.dump-dwarf foo > %t.bolt.match-dwarf.txt
 
-# RUN: diff orig.txt bolted.txt
+# RUN: diff %t.match-dwarf.txt %t.bolt.match-dwarf.txt
 
 	.text
 	.globl	foo
diff --git a/bolt/test/match_dwarf.py b/bolt/test/match_dwarf.py
@@ -5,6 +5,14 @@
 # by address to function names (which are parsed from a normal objdump).
 # The script is used for checking if .cfi_negate_ra_state CFIs
 # are generated by BOLT the same way they are generated by LLVM.
+# The script is called twice in unittests: once with the objdumps of
+# the BOLT input binary, and once with the output binary from BOLT.
+# We output the offsets of .cfi_negate_ra_state instructions from the
+# function's start address to see that BOLT can generate them to the same
+# locations.
+# Because we check the location, this is only useful for testing without
+# optimization flags, so `llvm-bolt input.exe -o output.exe`
+
 
 import argparse
 import subprocess
@@ -29,11 +37,17 @@ def print(self):
         print(self.name)
         print(self.body)
 
-    def parse_negates(self):
+    def parse_negate_offsets(self):
+        """
+        Create a list of locations/offsets of the negate_ra_state
+        CFIs in the dwarf entry.
+        To find offsets for each, we match the DW_CFA_advance_loc entries,
+        and sum up their values.
+        """
         negate_offsets = []
         loc = 0
         # TODO: make sure this is not printed in hex
-        re_advloc = f"DW_CFA_advance_loc: (\d+)"
+        re_advloc = r"DW_CFA_advance_loc: (\d+)"
 
         for line in self.body.splitlines():
             # if line matches advance_loc int
@@ -49,9 +63,12 @@ def __eq__(self, other):
         return self.name == other.name and self.negate_offsets == other.negate_offsets
 
 
-def parse_objdump(objdump):
+def extract_function_addresses(objdump):
     """
     Parse and return address-to-name dictionary from objdump file
+    Function names in the objdump look like this:
+        000123abc <foo>:
+    We want to create a dict from the addr (000123abc), to the name (foo).
     """
     addr_name_dict = dict()
     re_function = re.compile(r"^([0-9a-fA-F]+)\s<(.*)>:$")
@@ -67,12 +84,19 @@ def parse_objdump(objdump):
     return addr_name_dict
 
 
-def parse_dwarf(dwarfdump, addr_name_dict):
+def match_dwarf_to_name(dwarfdump, addr_name_dict):
     """
     Parse dwarf dump, and match names to blocks using the dict from the objdump.
     Return a list of NameDwarfPairs.
+    The matched lines look like this:
+    000123 000456 000789 FDE cie=000000  pc=0123abc...0456def
+    We do not have the function name for this, only the PC range it applies to.
+    We need to find the pc=0123abc (the start address), and find the matching name from
+    the addr_name_dict.
+    The result NameDwarfPair will hold the lines this header applied to, and instead of
+    the header with the addresses, it will just have the function name.
     """
-    re_address_line = re.compile(r".*pc=([0-9a-fA-F]{8})\.\.\.([0-9a-fA-F]{8})")
+    re_address_line = re.compile(r".*pc=([0-9a-fA-F]+)\.\.\.([0-9a-fA-F]+)")
     with open(dwarfdump, "r") as dw:
         functions = []
         for line in dw.readlines():
@@ -98,12 +122,12 @@ def main():
 
     args = parser.parse_args()
 
-    addr_name_dict = parse_objdump(args.objdump)
-    functions = parse_dwarf(args.dwarfdump, addr_name_dict)
+    addr_name_dict = extract_function_addresses(args.objdump)
+    functions = match_dwarf_to_name(args.dwarfdump, addr_name_dict)
 
     for f in functions:
         if f.name == args.function:
-            f.parse_negates()
+            f.parse_negate_offsets()
             print(f.negate_offsets)
             break
     else: