@@ -1328,19 +1328,19 @@ define void @unknown_inner_stride(ptr nocapture noundef %dst, ptr nocapture noun
1328
1328
; CHECK: vector.body:
1329
1329
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1330
1330
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 0
1331
- ; CHECK-NEXT: [[TMP16 :%.*]] = add nsw i64 [[TMP14]], [[TMP11]]
1332
- ; CHECK-NEXT: [[TMP17 :%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP16 ]]
1333
- ; CHECK-NEXT: [[TMP18 :%.*]] = getelementptr inbounds i32, ptr [[TMP17 ]], i32 0
1334
- ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP18 ]], align 4, !alias.scope [[META60:![0-9]+]]
1335
- ; CHECK-NEXT: [[TMP20 :%.*]] = add nsw i64 [[TMP14]], [[TMP12]]
1336
- ; CHECK-NEXT: [[TMP21 :%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP20 ]]
1337
- ; CHECK-NEXT: [[TMP22 :%.*]] = getelementptr inbounds i32, ptr [[TMP21 ]], i32 0
1338
- ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP22 ]], align 4, !alias.scope [[META63:![0-9]+]], !noalias [[META60]]
1339
- ; CHECK-NEXT: [[TMP23 :%.*]] = add nsw <4 x i32> [[WIDE_LOAD3]], [[WIDE_LOAD]]
1340
- ; CHECK-NEXT: store <4 x i32> [[TMP23 ]], ptr [[TMP22 ]], align 4, !alias.scope [[META63]], !noalias [[META60]]
1331
+ ; CHECK-NEXT: [[TMP15 :%.*]] = add nsw i64 [[TMP14]], [[TMP11]]
1332
+ ; CHECK-NEXT: [[TMP16 :%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP15 ]]
1333
+ ; CHECK-NEXT: [[TMP17 :%.*]] = getelementptr inbounds i32, ptr [[TMP16 ]], i32 0
1334
+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP17 ]], align 4, !alias.scope [[META60:![0-9]+]]
1335
+ ; CHECK-NEXT: [[TMP18 :%.*]] = add nsw i64 [[TMP14]], [[TMP12]]
1336
+ ; CHECK-NEXT: [[TMP19 :%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP18 ]]
1337
+ ; CHECK-NEXT: [[TMP20 :%.*]] = getelementptr inbounds i32, ptr [[TMP19 ]], i32 0
1338
+ ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP20 ]], align 4, !alias.scope [[META63:![0-9]+]], !noalias [[META60]]
1339
+ ; CHECK-NEXT: [[TMP21 :%.*]] = add nsw <4 x i32> [[WIDE_LOAD3]], [[WIDE_LOAD]]
1340
+ ; CHECK-NEXT: store <4 x i32> [[TMP21 ]], ptr [[TMP20 ]], align 4, !alias.scope [[META63]], !noalias [[META60]]
1341
1341
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1342
- ; CHECK-NEXT: [[TMP24 :%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1343
- ; CHECK-NEXT: br i1 [[TMP24 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP65:![0-9]+]]
1342
+ ; CHECK-NEXT: [[TMP22 :%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1343
+ ; CHECK-NEXT: br i1 [[TMP22 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP65:![0-9]+]]
1344
1344
; CHECK: middle.block:
1345
1345
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
1346
1346
; CHECK-NEXT: br i1 [[CMP_N]], label [[INNER_LOOP_EXIT]], label [[SCALAR_PH]]
@@ -1349,15 +1349,15 @@ define void @unknown_inner_stride(ptr nocapture noundef %dst, ptr nocapture noun
1349
1349
; CHECK-NEXT: br label [[INNER_LOOP:%.*]]
1350
1350
; CHECK: inner.loop:
1351
1351
; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER_LOOP]] ]
1352
- ; CHECK-NEXT: [[TMP25 :%.*]] = mul nsw i64 [[INNER_IV]], [[TMP0]]
1353
- ; CHECK-NEXT: [[TMP26 :%.*]] = add nsw i64 [[TMP25 ]], [[TMP11]]
1354
- ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP26 ]]
1355
- ; CHECK-NEXT: [[TMP27 :%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4
1356
- ; CHECK-NEXT: [[TMP28 :%.*]] = mul nsw i64 [[INNER_IV]], [[TMP1]]
1357
- ; CHECK-NEXT: [[TMP29 :%.*]] = add nsw i64 [[TMP28 ]], [[TMP12]]
1358
- ; CHECK-NEXT: [[ARRAYIDX11_US:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP29 ]]
1359
- ; CHECK-NEXT: [[TMP30 :%.*]] = load i32, ptr [[ARRAYIDX11_US]], align 4
1360
- ; CHECK-NEXT: [[ADD12_US:%.*]] = add nsw i32 [[TMP30 ]], [[TMP27 ]]
1352
+ ; CHECK-NEXT: [[TMP23 :%.*]] = mul nsw i64 [[INNER_IV]], [[TMP0]]
1353
+ ; CHECK-NEXT: [[TMP24 :%.*]] = add nsw i64 [[TMP23 ]], [[TMP11]]
1354
+ ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP24 ]]
1355
+ ; CHECK-NEXT: [[TMP25 :%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4
1356
+ ; CHECK-NEXT: [[TMP26 :%.*]] = mul nsw i64 [[INNER_IV]], [[TMP1]]
1357
+ ; CHECK-NEXT: [[TMP27 :%.*]] = add nsw i64 [[TMP26 ]], [[TMP12]]
1358
+ ; CHECK-NEXT: [[ARRAYIDX11_US:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP27 ]]
1359
+ ; CHECK-NEXT: [[TMP28 :%.*]] = load i32, ptr [[ARRAYIDX11_US]], align 4
1360
+ ; CHECK-NEXT: [[ADD12_US:%.*]] = add nsw i32 [[TMP28 ]], [[TMP25 ]]
1361
1361
; CHECK-NEXT: store i32 [[ADD12_US]], ptr [[ARRAYIDX11_US]], align 4
1362
1362
; CHECK-NEXT: [[INNER_IV_NEXT]] = add nuw nsw i64 [[INNER_IV]], 1
1363
1363
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INNER_IV_NEXT]], [[WIDE_TRIP_COUNT]]
@@ -1508,3 +1508,94 @@ inner.exit:
1508
1508
outer.exit:
1509
1509
ret void
1510
1510
}
1511
+
1512
+ ; TODO: STRIDE_CHECK can be eliminated via loop guards.
1513
+ define void @stride_check_known_via_loop_guard (ptr %C , ptr %A , i32 %Acols ) {
1514
+ ; CHECK-LABEL: define void @stride_check_known_via_loop_guard
1515
+ ; CHECK-SAME: (ptr [[C:%.*]], ptr [[A:%.*]], i32 [[ACOLS:%.*]]) {
1516
+ ; CHECK-NEXT: entry:
1517
+ ; CHECK-NEXT: [[PRE_C:%.*]] = icmp ugt i32 [[ACOLS]], 0
1518
+ ; CHECK-NEXT: br i1 [[PRE_C]], label [[EXIT:%.*]], label [[OUTER_HEADER_PREHEADER:%.*]]
1519
+ ; CHECK: outer.header.preheader:
1520
+ ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 8
1521
+ ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[ACOLS]] to i64
1522
+ ; CHECK-NEXT: [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 3
1523
+ ; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[C]], i64 34359738368
1524
+ ; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
1525
+ ; CHECK: outer.header:
1526
+ ; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i32 [ [[OUTER_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ], [ 0, [[OUTER_HEADER_PREHEADER]] ]
1527
+ ; CHECK-NEXT: [[MUL_US:%.*]] = mul i32 [[OUTER_IV]], [[ACOLS]]
1528
+ ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr double, ptr [[A]], i32 [[MUL_US]]
1529
+ ; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
1530
+ ; CHECK: vector.scevcheck:
1531
+ ; CHECK-NEXT: br i1 true, label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
1532
+ ; CHECK: vector.memcheck:
1533
+ ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]]
1534
+ ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[C]], [[SCEVGEP]]
1535
+ ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1536
+ ; CHECK-NEXT: [[STRIDE_CHECK:%.*]] = icmp slt i64 [[TMP1]], 0
1537
+ ; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[FOUND_CONFLICT]], [[STRIDE_CHECK]]
1538
+ ; CHECK-NEXT: br i1 [[TMP2]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1539
+ ; CHECK: vector.ph:
1540
+ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
1541
+ ; CHECK: vector.body:
1542
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1543
+ ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0
1544
+ ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[C]], i32 [[TMP3]]
1545
+ ; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[ARRAYIDX_US]], align 8, !alias.scope [[META69:![0-9]+]], !noalias [[META72:![0-9]+]]
1546
+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[TMP5]], i64 0
1547
+ ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
1548
+ ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[TMP4]], i32 0
1549
+ ; CHECK-NEXT: store <4 x double> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8, !alias.scope [[META72]]
1550
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
1551
+ ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0
1552
+ ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP74:![0-9]+]]
1553
+ ; CHECK: middle.block:
1554
+ ; CHECK-NEXT: br i1 true, label [[OUTER_LATCH]], label [[SCALAR_PH]]
1555
+ ; CHECK: scalar.ph:
1556
+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[OUTER_HEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
1557
+ ; CHECK-NEXT: br label [[INNER:%.*]]
1558
+ ; CHECK: inner:
1559
+ ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER]] ]
1560
+ ; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds double, ptr [[C]], i32 [[INNER_IV]]
1561
+ ; CHECK-NEXT: [[L:%.*]] = load double, ptr [[ARRAYIDX_US]], align 8
1562
+ ; CHECK-NEXT: store double [[L]], ptr [[GEP_C]], align 8
1563
+ ; CHECK-NEXT: [[INNER_IV_NEXT]] = add i32 [[INNER_IV]], 1
1564
+ ; CHECK-NEXT: [[INNER_C:%.*]] = icmp eq i32 [[INNER_IV_NEXT]], 0
1565
+ ; CHECK-NEXT: br i1 [[INNER_C]], label [[OUTER_LATCH]], label [[INNER]], !llvm.loop [[LOOP75:![0-9]+]]
1566
+ ; CHECK: outer.latch:
1567
+ ; CHECK-NEXT: [[OUTER_IV_NEXT]] = add i32 [[OUTER_IV]], 1
1568
+ ; CHECK-NEXT: [[OUTER_C:%.*]] = icmp ult i32 [[OUTER_IV]], 128
1569
+ ; CHECK-NEXT: br i1 [[OUTER_C]], label [[EXIT_LOOPEXIT:%.*]], label [[OUTER_HEADER]]
1570
+ ; CHECK: exit.loopexit:
1571
+ ; CHECK-NEXT: br label [[EXIT]]
1572
+ ; CHECK: exit:
1573
+ ; CHECK-NEXT: ret void
1574
+ ;
1575
+ entry:
1576
+ %pre.c = icmp ugt i32 %Acols , 0
1577
+ br i1 %pre.c , label %exit , label %outer.header
1578
+
1579
+ outer.header:
1580
+ %outer.iv = phi i32 [ 0 , %entry ], [ %outer.iv.next , %outer.latch ]
1581
+ %mul.us = mul i32 %outer.iv , %Acols
1582
+ %arrayidx.us = getelementptr double , ptr %A , i32 %mul.us
1583
+ br label %inner
1584
+
1585
+ inner:
1586
+ %inner.iv = phi i32 [ 0 , %outer.header ], [ %inner.iv.next , %inner ]
1587
+ %gep.C = getelementptr inbounds double , ptr %C , i32 %inner.iv
1588
+ %l = load double , ptr %arrayidx.us , align 8
1589
+ store double %l , ptr %gep.C , align 8
1590
+ %inner.iv.next = add i32 %inner.iv , 1
1591
+ %inner.c = icmp eq i32 %inner.iv.next , 0
1592
+ br i1 %inner.c , label %outer.latch , label %inner
1593
+
1594
+ outer.latch:
1595
+ %outer.iv.next = add i32 %outer.iv , 1
1596
+ %outer.c = icmp ult i32 %outer.iv , 128
1597
+ br i1 %outer.c , label %exit , label %outer.header
1598
+
1599
+ exit:
1600
+ ret void
1601
+ }
0 commit comments