@@ -15,11 +15,9 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
15
15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
16
16
; GFX9-NEXT: s_lshl_b32 s1, s0, 2
17
17
; GFX9-NEXT: s_and_b32 s0, s0, 15
18
- ; GFX9-NEXT: s_add_i32 s1, s1, 0
19
18
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
20
19
; GFX9-NEXT: scratch_store_dword off, v0, s1
21
20
; GFX9-NEXT: s_waitcnt vmcnt(0)
22
- ; GFX9-NEXT: s_add_i32 s0, s0, 0
23
21
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
24
22
; GFX9-NEXT: s_waitcnt vmcnt(0)
25
23
; GFX9-NEXT: s_endpgm
@@ -36,8 +34,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
36
34
; GFX10-NEXT: s_and_b32 s1, s0, 15
37
35
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
38
36
; GFX10-NEXT: s_lshl_b32 s1, s1, 2
39
- ; GFX10-NEXT: s_add_i32 s0, s0, 0
40
- ; GFX10-NEXT: s_add_i32 s1, s1, 0
41
37
; GFX10-NEXT: scratch_store_dword off, v0, s0
42
38
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
43
39
; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
@@ -51,11 +47,9 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
51
47
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
52
48
; GFX940-NEXT: s_lshl_b32 s1, s0, 2
53
49
; GFX940-NEXT: s_and_b32 s0, s0, 15
54
- ; GFX940-NEXT: s_add_i32 s1, s1, 0
55
50
; GFX940-NEXT: s_lshl_b32 s0, s0, 2
56
51
; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
57
52
; GFX940-NEXT: s_waitcnt vmcnt(0)
58
- ; GFX940-NEXT: s_add_i32 s0, s0, 0
59
53
; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
60
54
; GFX940-NEXT: s_waitcnt vmcnt(0)
61
55
; GFX940-NEXT: s_endpgm
@@ -68,8 +62,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
68
62
; GFX11-NEXT: s_and_b32 s1, s0, 15
69
63
; GFX11-NEXT: s_lshl_b32 s0, s0, 2
70
64
; GFX11-NEXT: s_lshl_b32 s1, s1, 2
71
- ; GFX11-NEXT: s_add_i32 s0, s0, 0
72
- ; GFX11-NEXT: s_add_i32 s1, s1, 0
73
65
; GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc
74
66
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
75
67
; GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc
@@ -84,8 +76,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
84
76
; GFX12-NEXT: s_and_b32 s1, s0, 15
85
77
; GFX12-NEXT: s_lshl_b32 s0, s0, 2
86
78
; GFX12-NEXT: s_lshl_b32 s1, s1, 2
87
- ; GFX12-NEXT: s_add_co_i32 s0, s0, 0
88
- ; GFX12-NEXT: s_add_co_i32 s1, s1, 0
89
79
; GFX12-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
90
80
; GFX12-NEXT: s_wait_storecnt 0x0
91
81
; GFX12-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS
@@ -1042,13 +1032,13 @@ define void @store_load_large_imm_offset_foo() {
1042
1032
; GFX9-LABEL: store_load_large_imm_offset_foo:
1043
1033
; GFX9: ; %bb.0: ; %bb
1044
1034
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1045
- ; GFX9-NEXT: v_mov_b32_e32 v0, 13
1046
1035
; GFX9-NEXT: s_movk_i32 s0, 0x3e80
1047
- ; GFX9-NEXT: s_add_i32 s1, s32, 4
1036
+ ; GFX9-NEXT: v_mov_b32_e32 v0, 13
1037
+ ; GFX9-NEXT: s_add_i32 s1, s32, s0
1048
1038
; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
1049
1039
; GFX9-NEXT: s_waitcnt vmcnt(0)
1050
1040
; GFX9-NEXT: v_mov_b32_e32 v0, 15
1051
- ; GFX9-NEXT: s_add_i32 s0, s0, s1
1041
+ ; GFX9-NEXT: s_add_i32 s0, s1, 4
1052
1042
; GFX9-NEXT: scratch_store_dword off, v0, s0
1053
1043
; GFX9-NEXT: s_waitcnt vmcnt(0)
1054
1044
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1059,10 +1049,10 @@ define void @store_load_large_imm_offset_foo() {
1059
1049
; GFX10: ; %bb.0: ; %bb
1060
1050
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1061
1051
; GFX10-NEXT: v_mov_b32_e32 v0, 13
1062
- ; GFX10-NEXT: v_mov_b32_e32 v1, 15
1063
1052
; GFX10-NEXT: s_movk_i32 s0, 0x3e80
1064
- ; GFX10-NEXT: s_add_i32 s1, s32, 4
1065
- ; GFX10-NEXT: s_add_i32 s0, s0, s1
1053
+ ; GFX10-NEXT: v_mov_b32_e32 v1, 15
1054
+ ; GFX10-NEXT: s_add_i32 s1, s32, s0
1055
+ ; GFX10-NEXT: s_add_i32 s0, s1, 4
1066
1056
; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
1067
1057
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1068
1058
; GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1074,13 +1064,13 @@ define void @store_load_large_imm_offset_foo() {
1074
1064
; GFX940-LABEL: store_load_large_imm_offset_foo:
1075
1065
; GFX940: ; %bb.0: ; %bb
1076
1066
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1077
- ; GFX940-NEXT: v_mov_b32_e32 v0, 13
1078
1067
; GFX940-NEXT: s_movk_i32 s0, 0x3e80
1079
- ; GFX940-NEXT: s_add_i32 s1, s32, 4
1068
+ ; GFX940-NEXT: v_mov_b32_e32 v0, 13
1069
+ ; GFX940-NEXT: s_add_i32 s1, s32, s0
1080
1070
; GFX940-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
1081
1071
; GFX940-NEXT: s_waitcnt vmcnt(0)
1082
1072
; GFX940-NEXT: v_mov_b32_e32 v0, 15
1083
- ; GFX940-NEXT: s_add_i32 s0, s0, s1
1073
+ ; GFX940-NEXT: s_add_i32 s0, s1, 4
1084
1074
; GFX940-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
1085
1075
; GFX940-NEXT: s_waitcnt vmcnt(0)
1086
1076
; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -1092,9 +1082,9 @@ define void @store_load_large_imm_offset_foo() {
1092
1082
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1093
1083
; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
1094
1084
; GFX11-NEXT: s_movk_i32 s0, 0x3e80
1095
- ; GFX11-NEXT: s_add_i32 s1, s32, 4
1096
- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1097
- ; GFX11-NEXT: s_add_i32 s0, s0, s1
1085
+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1086
+ ; GFX11-NEXT: s_add_i32 s1, s32, s0
1087
+ ; GFX11-NEXT: s_add_i32 s0, s1, 4
1098
1088
; GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
1099
1089
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1100
1090
; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
0 commit comments