Skip to content

Commit 1645976

Browse files
committed
[LV] Add test for RT check hoisting where loop guards simplify check.
Add a test case with a missed simplification when hoisting runtime checks due to not applying loop guards.
1 parent eea05c6 commit 1645976

File tree

1 file changed

+112
-21
lines changed

1 file changed

+112
-21
lines changed

llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll

Lines changed: 112 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1328,19 +1328,19 @@ define void @unknown_inner_stride(ptr nocapture noundef %dst, ptr nocapture noun
13281328
; CHECK: vector.body:
13291329
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
13301330
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 0
1331-
; CHECK-NEXT: [[TMP16:%.*]] = add nsw i64 [[TMP14]], [[TMP11]]
1332-
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP16]]
1333-
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0
1334-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP18]], align 4, !alias.scope [[META60:![0-9]+]]
1335-
; CHECK-NEXT: [[TMP20:%.*]] = add nsw i64 [[TMP14]], [[TMP12]]
1336-
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP20]]
1337-
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 0
1338-
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4, !alias.scope [[META63:![0-9]+]], !noalias [[META60]]
1339-
; CHECK-NEXT: [[TMP23:%.*]] = add nsw <4 x i32> [[WIDE_LOAD3]], [[WIDE_LOAD]]
1340-
; CHECK-NEXT: store <4 x i32> [[TMP23]], ptr [[TMP22]], align 4, !alias.scope [[META63]], !noalias [[META60]]
1331+
; CHECK-NEXT: [[TMP15:%.*]] = add nsw i64 [[TMP14]], [[TMP11]]
1332+
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP15]]
1333+
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
1334+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4, !alias.scope [[META60:![0-9]+]]
1335+
; CHECK-NEXT: [[TMP18:%.*]] = add nsw i64 [[TMP14]], [[TMP12]]
1336+
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP18]]
1337+
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0
1338+
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP20]], align 4, !alias.scope [[META63:![0-9]+]], !noalias [[META60]]
1339+
; CHECK-NEXT: [[TMP21:%.*]] = add nsw <4 x i32> [[WIDE_LOAD3]], [[WIDE_LOAD]]
1340+
; CHECK-NEXT: store <4 x i32> [[TMP21]], ptr [[TMP20]], align 4, !alias.scope [[META63]], !noalias [[META60]]
13411341
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1342-
; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1343-
; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP65:![0-9]+]]
1342+
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1343+
; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP65:![0-9]+]]
13441344
; CHECK: middle.block:
13451345
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
13461346
; CHECK-NEXT: br i1 [[CMP_N]], label [[INNER_LOOP_EXIT]], label [[SCALAR_PH]]
@@ -1349,15 +1349,15 @@ define void @unknown_inner_stride(ptr nocapture noundef %dst, ptr nocapture noun
13491349
; CHECK-NEXT: br label [[INNER_LOOP:%.*]]
13501350
; CHECK: inner.loop:
13511351
; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER_LOOP]] ]
1352-
; CHECK-NEXT: [[TMP25:%.*]] = mul nsw i64 [[INNER_IV]], [[TMP0]]
1353-
; CHECK-NEXT: [[TMP26:%.*]] = add nsw i64 [[TMP25]], [[TMP11]]
1354-
; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP26]]
1355-
; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4
1356-
; CHECK-NEXT: [[TMP28:%.*]] = mul nsw i64 [[INNER_IV]], [[TMP1]]
1357-
; CHECK-NEXT: [[TMP29:%.*]] = add nsw i64 [[TMP28]], [[TMP12]]
1358-
; CHECK-NEXT: [[ARRAYIDX11_US:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP29]]
1359-
; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[ARRAYIDX11_US]], align 4
1360-
; CHECK-NEXT: [[ADD12_US:%.*]] = add nsw i32 [[TMP30]], [[TMP27]]
1352+
; CHECK-NEXT: [[TMP23:%.*]] = mul nsw i64 [[INNER_IV]], [[TMP0]]
1353+
; CHECK-NEXT: [[TMP24:%.*]] = add nsw i64 [[TMP23]], [[TMP11]]
1354+
; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP24]]
1355+
; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4
1356+
; CHECK-NEXT: [[TMP26:%.*]] = mul nsw i64 [[INNER_IV]], [[TMP1]]
1357+
; CHECK-NEXT: [[TMP27:%.*]] = add nsw i64 [[TMP26]], [[TMP12]]
1358+
; CHECK-NEXT: [[ARRAYIDX11_US:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP27]]
1359+
; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX11_US]], align 4
1360+
; CHECK-NEXT: [[ADD12_US:%.*]] = add nsw i32 [[TMP28]], [[TMP25]]
13611361
; CHECK-NEXT: store i32 [[ADD12_US]], ptr [[ARRAYIDX11_US]], align 4
13621362
; CHECK-NEXT: [[INNER_IV_NEXT]] = add nuw nsw i64 [[INNER_IV]], 1
13631363
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INNER_IV_NEXT]], [[WIDE_TRIP_COUNT]]
@@ -1508,3 +1508,94 @@ inner.exit:
15081508
outer.exit:
15091509
ret void
15101510
}
1511+
1512+
; TODO: STRIDE_CHECK can be eliminated via loop guards.
1513+
define void @stride_check_known_via_loop_guard(ptr %C, ptr %A, i32 %Acols) {
1514+
; CHECK-LABEL: define void @stride_check_known_via_loop_guard
1515+
; CHECK-SAME: (ptr [[C:%.*]], ptr [[A:%.*]], i32 [[ACOLS:%.*]]) {
1516+
; CHECK-NEXT: entry:
1517+
; CHECK-NEXT: [[PRE_C:%.*]] = icmp ugt i32 [[ACOLS]], 0
1518+
; CHECK-NEXT: br i1 [[PRE_C]], label [[EXIT:%.*]], label [[OUTER_HEADER_PREHEADER:%.*]]
1519+
; CHECK: outer.header.preheader:
1520+
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 8
1521+
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[ACOLS]] to i64
1522+
; CHECK-NEXT: [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 3
1523+
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[C]], i64 34359738368
1524+
; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
1525+
; CHECK: outer.header:
1526+
; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i32 [ [[OUTER_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ], [ 0, [[OUTER_HEADER_PREHEADER]] ]
1527+
; CHECK-NEXT: [[MUL_US:%.*]] = mul i32 [[OUTER_IV]], [[ACOLS]]
1528+
; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr double, ptr [[A]], i32 [[MUL_US]]
1529+
; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
1530+
; CHECK: vector.scevcheck:
1531+
; CHECK-NEXT: br i1 true, label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
1532+
; CHECK: vector.memcheck:
1533+
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]]
1534+
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[C]], [[SCEVGEP]]
1535+
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1536+
; CHECK-NEXT: [[STRIDE_CHECK:%.*]] = icmp slt i64 [[TMP1]], 0
1537+
; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[FOUND_CONFLICT]], [[STRIDE_CHECK]]
1538+
; CHECK-NEXT: br i1 [[TMP2]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1539+
; CHECK: vector.ph:
1540+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
1541+
; CHECK: vector.body:
1542+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1543+
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0
1544+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[C]], i32 [[TMP3]]
1545+
; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[ARRAYIDX_US]], align 8, !alias.scope [[META69:![0-9]+]], !noalias [[META72:![0-9]+]]
1546+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[TMP5]], i64 0
1547+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
1548+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[TMP4]], i32 0
1549+
; CHECK-NEXT: store <4 x double> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8, !alias.scope [[META72]]
1550+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
1551+
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0
1552+
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP74:![0-9]+]]
1553+
; CHECK: middle.block:
1554+
; CHECK-NEXT: br i1 true, label [[OUTER_LATCH]], label [[SCALAR_PH]]
1555+
; CHECK: scalar.ph:
1556+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[OUTER_HEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
1557+
; CHECK-NEXT: br label [[INNER:%.*]]
1558+
; CHECK: inner:
1559+
; CHECK-NEXT: [[INNER_IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER]] ]
1560+
; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds double, ptr [[C]], i32 [[INNER_IV]]
1561+
; CHECK-NEXT: [[L:%.*]] = load double, ptr [[ARRAYIDX_US]], align 8
1562+
; CHECK-NEXT: store double [[L]], ptr [[GEP_C]], align 8
1563+
; CHECK-NEXT: [[INNER_IV_NEXT]] = add i32 [[INNER_IV]], 1
1564+
; CHECK-NEXT: [[INNER_C:%.*]] = icmp eq i32 [[INNER_IV_NEXT]], 0
1565+
; CHECK-NEXT: br i1 [[INNER_C]], label [[OUTER_LATCH]], label [[INNER]], !llvm.loop [[LOOP75:![0-9]+]]
1566+
; CHECK: outer.latch:
1567+
; CHECK-NEXT: [[OUTER_IV_NEXT]] = add i32 [[OUTER_IV]], 1
1568+
; CHECK-NEXT: [[OUTER_C:%.*]] = icmp ult i32 [[OUTER_IV]], 128
1569+
; CHECK-NEXT: br i1 [[OUTER_C]], label [[EXIT_LOOPEXIT:%.*]], label [[OUTER_HEADER]]
1570+
; CHECK: exit.loopexit:
1571+
; CHECK-NEXT: br label [[EXIT]]
1572+
; CHECK: exit:
1573+
; CHECK-NEXT: ret void
1574+
;
1575+
entry:
1576+
%pre.c = icmp ugt i32 %Acols, 0
1577+
br i1 %pre.c, label %exit, label %outer.header
1578+
1579+
outer.header:
1580+
%outer.iv = phi i32 [ 0, %entry ], [ %outer.iv.next, %outer.latch ]
1581+
%mul.us = mul i32 %outer.iv, %Acols
1582+
%arrayidx.us = getelementptr double, ptr %A, i32 %mul.us
1583+
br label %inner
1584+
1585+
inner:
1586+
%inner.iv = phi i32 [ 0, %outer.header ], [ %inner.iv.next, %inner ]
1587+
%gep.C = getelementptr inbounds double, ptr %C, i32 %inner.iv
1588+
%l = load double, ptr %arrayidx.us, align 8
1589+
store double %l, ptr %gep.C, align 8
1590+
%inner.iv.next = add i32 %inner.iv, 1
1591+
%inner.c = icmp eq i32 %inner.iv.next, 0
1592+
br i1 %inner.c, label %outer.latch, label %inner
1593+
1594+
outer.latch:
1595+
%outer.iv.next = add i32 %outer.iv, 1
1596+
%outer.c = icmp ult i32 %outer.iv, 128
1597+
br i1 %outer.c, label %exit, label %outer.header
1598+
1599+
exit:
1600+
ret void
1601+
}

0 commit comments

Comments
 (0)