Skip to content

Commit aa0a33b

Browse files
authored
[llvm][CodeGen] Address the issue of multiple resource reservations in window scheduling (llvm#100301)
Address the issue of multiple resource reservations in window scheduling
1 parent 84b1e59 commit aa0a33b

File tree

2 files changed

+99
-1
lines changed

2 files changed

+99
-1
lines changed

llvm/lib/CodeGen/WindowScheduler.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -451,7 +451,6 @@ int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG,
451451
}
452452
RM.reserveResources(*SU, CurCycle);
453453
}
454-
RM.reserveResources(*SU, CurCycle);
455454
OriToCycle[getOriMI(&MI)] = CurCycle;
456455
LLVM_DEBUG(dbgs() << "\tCycle " << CurCycle << " [S."
457456
<< getOriStage(getOriMI(&MI), Offset) << "]: " << MI);
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
# REQUIRES: asserts
2+
# RUN: llc --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \
3+
# RUN: -window-sched=force -filetype=null -verify-machineinstrs 2>&1 \
4+
# RUN: -window-search-ratio=100 -window-search-num=100 -window-diff-limit=1 \
5+
# RUN: | FileCheck %s
6+
7+
# We want to verify that all three V6_vaddw instructions are emitted in the same cycle.
8+
# CHECK: Best window offset is {{[0-9]+}} and Best II is {{[0-9]+}}.
9+
# CHECK-NEXT: Cycle [[CycleNum:[0-9]+]] [[[StageNum:S.[0-9]+]]]: {{%[0-9]+}}:hvxvr = V6_vaddw {{%[0-9]+}}:hvxvr, {{%[0-9]+}}:hvxvr
10+
# CHECK-NEXT: Cycle [[CycleNum]] [[[StageNum]]]: {{%[0-9]+}}:hvxvr = V6_vaddw {{%[0-9]+}}:hvxvr, {{%[0-9]+}}:hvxvr
11+
# CHECK-NEXT: Cycle [[CycleNum]] [[[StageNum]]]: {{%[0-9]+}}:hvxvr = V6_vaddw {{%[0-9]+}}:hvxvr, {{%[0-9]+}}:hvxvr
12+
13+
--- |
14+
define void @add_parallel(i32 %N, ptr noalias %x, ptr noalias %y) {
15+
entry:
16+
%isZeroLength = icmp eq i32 %N, 0
17+
br i1 %isZeroLength, label %loop.exit, label %loop.preheader
18+
19+
loop.preheader: ; preds = %entry
20+
%half_splat = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 1056964608)
21+
%one_splat = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 1065353216)
22+
%two_splat = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 1073741824)
23+
br label %loop.body
24+
25+
loop.exit: ; preds = %loop.body, %entry
26+
ret void
27+
28+
loop.body: ; preds = %loop.body, %loop.preheader
29+
%lsr.iv1 = phi ptr [ %cgep2, %loop.body ], [ %x, %loop.preheader ]
30+
%lsr.iv = phi ptr [ %cgep1, %loop.body ], [ %y, %loop.preheader ]
31+
%index = phi i32 [ 0, %loop.preheader ], [ %index.next, %loop.body ]
32+
%vec_x1 = load <32 x i32>, ptr %lsr.iv1, align 128
33+
%vec_add_1 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %one_splat, <32 x i32> %vec_x1)
34+
%vec_add_2 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %half_splat, <32 x i32> %vec_x1)
35+
%vec_add_3 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %two_splat, <32 x i32> %vec_x1)
36+
%vec_add_4 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %vec_add_1, <32 x i32> %vec_add_2)
37+
%vec_add_5 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %vec_add_1, <32 x i32> %vec_add_3)
38+
%vec_add_6 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %vec_add_5, <32 x i32> %vec_add_4)
39+
store <32 x i32> %vec_add_6, ptr %lsr.iv, align 128
40+
%index.next = add nuw i32 %index, 32
41+
%continue = icmp ult i32 %index.next, %N
42+
%cgep1 = getelementptr i8, ptr %lsr.iv, i32 128
43+
%cgep2 = getelementptr i8, ptr %lsr.iv1, i32 128
44+
br i1 %continue, label %loop.body, label %loop.exit
45+
}
46+
47+
declare <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32)
48+
declare <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32>, <32 x i32>)
49+
...
50+
---
51+
name: add_parallel
52+
tracksRegLiveness: true
53+
body: |
54+
bb.0.entry:
55+
successors: %bb.2(0x30000000), %bb.1(0x50000000)
56+
liveins: $r0, $r1, $r2
57+
58+
%0:intregs = COPY $r2
59+
%1:intregs = COPY $r1
60+
%2:intregs = COPY $r0
61+
%3:predregs = C2_cmpeqi %2, 0
62+
J2_jumpt killed %3, %bb.2, implicit-def dead $pc
63+
J2_jump %bb.1, implicit-def dead $pc
64+
65+
bb.1.loop.preheader:
66+
successors: %bb.3(0x80000000)
67+
68+
%4:intregs = A2_tfrsi 1056964608
69+
%5:hvxvr = V6_lvsplatw killed %4
70+
%6:intregs = A2_tfrsi 1065353216
71+
%7:hvxvr = V6_lvsplatw killed %6
72+
%8:intregs = A2_tfrsi 1073741824
73+
%9:hvxvr = V6_lvsplatw killed %8
74+
%10:intregs = A2_addi %2, 31
75+
%11:intregs = S2_lsr_i_r %10, 5
76+
%12:intregs = COPY %11
77+
J2_loop0r %bb.3, %12, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
78+
J2_jump %bb.3, implicit-def dead $pc
79+
80+
bb.2.loop.exit:
81+
PS_jmpret $r31, implicit-def dead $pc
82+
83+
bb.3.loop.body (machine-block-address-taken):
84+
successors: %bb.3(0x7c000000), %bb.2(0x04000000)
85+
86+
%13:intregs = PHI %1, %bb.1, %14, %bb.3
87+
%15:intregs = PHI %0, %bb.1, %16, %bb.3
88+
%17:hvxvr, %14:intregs = V6_vL32b_pi %13, 128 :: (load (s1024) from %ir.lsr.iv1)
89+
%18:hvxvr = V6_vaddw %7, %17
90+
%19:hvxvr = V6_vaddw %5, %17
91+
%20:hvxvr = V6_vaddw %9, %17
92+
%21:hvxvr = V6_vaddw %18, killed %19
93+
%22:hvxvr = V6_vaddw %18, killed %20
94+
%23:hvxvr = V6_vaddw killed %22, killed %21
95+
%16:intregs = V6_vS32b_pi %15, 128, killed %23 :: (store (s1024) into %ir.lsr.iv)
96+
ENDLOOP0 %bb.3, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
97+
J2_jump %bb.2, implicit-def dead $pc
98+
99+
...

0 commit comments

Comments
 (0)