diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index a3186e61b94ad..baa16306ebf5d 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -863,6 +863,11 @@ ConstantRange computeConstantRange(const Value *V, bool ForSigned, const DominatorTree *DT = nullptr, unsigned Depth = 0); +/// Combine constant ranges from computeConstantRange() and computeKnownBits(). +ConstantRange +computeConstantRangeIncludingKnownBits(const WithCache &V, + bool ForSigned, const SimplifyQuery &SQ); + /// Return true if this function can prove that the instruction I will /// always transfer execution to one of its successors (including the next /// instruction that follows within a basic block). E.g. this is not diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 5beac5547d65e..9121a69629f86 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -3005,7 +3005,7 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS, } static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS, - Value *RHS, const InstrInfoQuery &IIQ) { + Value *RHS, const SimplifyQuery &SQ) { Type *ITy = getCompareTy(RHS); // The return type. Value *X; @@ -3031,8 +3031,8 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS, if (RHS_CR.isFullSet()) return ConstantInt::getTrue(ITy); - ConstantRange LHS_CR = - computeConstantRange(LHS, CmpInst::isSigned(Pred), IIQ.UseInstrInfo); + ConstantRange LHS_CR = llvm::computeConstantRangeIncludingKnownBits( + LHS, CmpInst::isSigned(Pred), SQ); if (!LHS_CR.isFullSet()) { if (RHS_CR.contains(LHS_CR)) return ConstantInt::getTrue(ITy); @@ -3043,7 +3043,7 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS, // (mul nuw/nsw X, MulC) != C --> true (if C is not a multiple of MulC) // (mul nuw/nsw X, MulC) == C --> false (if C is not a multiple of MulC) const APInt *MulC; - if (IIQ.UseInstrInfo && ICmpInst::isEquality(Pred) && + if (SQ.IIQ.UseInstrInfo && ICmpInst::isEquality(Pred) && ((match(LHS, m_NUWMul(m_Value(), m_APIntAllowUndef(MulC))) && *MulC != 0 && C->urem(*MulC) != 0) || (match(LHS, m_NSWMul(m_Value(), m_APIntAllowUndef(MulC))) && @@ -3749,7 +3749,7 @@ static Value *simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (Value *V = simplifyICmpWithZero(Pred, LHS, RHS, Q)) return V; - if (Value *V = simplifyICmpWithConstant(Pred, LHS, RHS, Q.IIQ)) + if (Value *V = simplifyICmpWithConstant(Pred, LHS, RHS, Q)) return V; // If both operands have range metadata, use the metadata diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 769d921eb1e8d..cac2602d455f9 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -6289,10 +6289,10 @@ static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) { } /// Combine constant ranges from computeConstantRange() and computeKnownBits(). -static ConstantRange -computeConstantRangeIncludingKnownBits(const WithCache &V, - bool ForSigned, - const SimplifyQuery &SQ) { +ConstantRange +llvm::computeConstantRangeIncludingKnownBits(const WithCache &V, + bool ForSigned, + const SimplifyQuery &SQ) { ConstantRange CR1 = ConstantRange::fromKnownBits(V.getKnownBits(SQ), ForSigned); ConstantRange CR2 = computeConstantRange(V, ForSigned, SQ.IIQ.UseInstrInfo); diff --git a/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll b/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll index 793d6ffa3e34e..2f7fae583dcdb 100644 --- a/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll +++ b/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll @@ -335,11 +335,7 @@ define i1 @blsi_ne_is_true(i32 %x) { define <2 x i1> @blsi_ge_is_false_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsi_ge_is_false_vec( -; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[X2:%.*]] = sub nsw <2 x i32> zeroinitializer, [[X1]] -; CHECK-NEXT: [[X3:%.*]] = and <2 x i32> [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = icmp ugt <2 x i32> [[X3]], -; CHECK-NEXT: ret <2 x i1> [[Z]] +; CHECK-NEXT: ret <2 x i1> zeroinitializer ; %x1 = or <2 x i32> %x, %x2 = sub <2 x i32> , %x1 @@ -350,11 +346,7 @@ define <2 x i1> @blsi_ge_is_false_vec(<2 x i32> %x) { define <2 x i1> @blsi_ge_is_false_diff_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsi_ge_is_false_diff_vec( -; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[X2:%.*]] = sub nsw <2 x i32> zeroinitializer, [[X1]] -; CHECK-NEXT: [[X3:%.*]] = and <2 x i32> [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = icmp ugt <2 x i32> [[X3]], -; CHECK-NEXT: ret <2 x i1> [[Z]] +; CHECK-NEXT: ret <2 x i1> zeroinitializer ; %x1 = or <2 x i32> %x, %x2 = sub <2 x i32> , %x1 diff --git a/llvm/test/Analysis/ValueTracking/monotonic-phi.ll b/llvm/test/Analysis/ValueTracking/monotonic-phi.ll index d55d2ed3d114f..d9519bc5784c0 100644 --- a/llvm/test/Analysis/ValueTracking/monotonic-phi.ll +++ b/llvm/test/Analysis/ValueTracking/monotonic-phi.ll @@ -484,8 +484,7 @@ define i1 @test_shl_zero_start(i8 %n) { ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[A]], [[N:%.*]] ; CHECK-NEXT: br i1 [[CMP1]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[A]], 0 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; entry: br label %loop @@ -559,8 +558,7 @@ define i1 @test_lshr_zero_start(i8 %n) { ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[A]], [[N:%.*]] ; CHECK-NEXT: br i1 [[CMP1]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[A]], 0 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; entry: br label %loop @@ -633,8 +631,7 @@ define i1 @test_ashr_zero_start(i8 %n) { ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[A]], [[N:%.*]] ; CHECK-NEXT: br i1 [[CMP1]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[A]], 0 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; entry: br label %loop diff --git a/llvm/test/Transforms/Attributor/lvi-after-jumpthreading.ll b/llvm/test/Transforms/Attributor/lvi-after-jumpthreading.ll index 2aa95216a6656..84c6213c75840 100644 --- a/llvm/test/Transforms/Attributor/lvi-after-jumpthreading.ll +++ b/llvm/test/Transforms/Attributor/lvi-after-jumpthreading.ll @@ -148,8 +148,7 @@ define i32 @test4(i32 %i, i1 %f, i32 %n) { ; CHECK-NEXT: call void @dummy(i1 [[F]]) #[[ATTR2]] ; CHECK-NEXT: [[CONSUME:%.*]] = call i32 @exit() ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[F]]) -; CHECK-NEXT: [[COND:%.*]] = icmp eq i1 [[F]], false -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[CONT:%.*]] +; CHECK-NEXT: br label [[CONT:%.*]] ; CHECK: exit2: ; CHECK-NEXT: ret i32 30 ; diff --git a/llvm/test/Transforms/InstSimplify/call.ll b/llvm/test/Transforms/InstSimplify/call.ll index 52c207a276046..ebc5c8bb0e52b 100644 --- a/llvm/test/Transforms/InstSimplify/call.ll +++ b/llvm/test/Transforms/InstSimplify/call.ll @@ -1582,9 +1582,7 @@ define i1 @ctlz_i1_non_poison_eq_false(i1 %x) { define i1 @ctlz_i1_poison_eq_false(i1 %x) { ; CHECK-LABEL: @ctlz_i1_poison_eq_false( -; CHECK-NEXT: [[CT:%.*]] = call i1 @llvm.ctlz.i1(i1 [[X:%.*]], i1 true) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i1 [[CT]], false -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %ct = call i1 @llvm.ctlz.i1(i1 %x, i1 true) %cmp = icmp eq i1 %ct, false @@ -1604,9 +1602,7 @@ define i1 @cttz_i1_non_poison_eq_false(i1 %x) { define i1 @cttz_i1_poison_eq_false(i1 %x) { ; CHECK-LABEL: @cttz_i1_poison_eq_false( -; CHECK-NEXT: [[CT:%.*]] = call i1 @llvm.cttz.i1(i1 [[X:%.*]], i1 true) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i1 [[CT]], false -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %ct = call i1 @llvm.cttz.i1(i1 %x, i1 true) %cmp = icmp eq i1 %ct, false diff --git a/llvm/test/Transforms/InstSimplify/icmp-constant.ll b/llvm/test/Transforms/InstSimplify/icmp-constant.ll index 04261f6f40b7c..1123a7092d3aa 100644 --- a/llvm/test/Transforms/InstSimplify/icmp-constant.ll +++ b/llvm/test/Transforms/InstSimplify/icmp-constant.ll @@ -433,9 +433,7 @@ define <2 x i1> @or1_vec_partial_undef(<2 x i32> %X) { ; Single bit OR. define i1 @or2_true(i8 %x) { ; CHECK-LABEL: @or2_true( -; CHECK-NEXT: [[Y:%.*]] = or i8 [[X:%.*]], 64 -; CHECK-NEXT: [[Z:%.*]] = icmp sge i8 [[Y]], -64 -; CHECK-NEXT: ret i1 [[Z]] +; CHECK-NEXT: ret i1 true ; %y = or i8 %x, 64 %z = icmp sge i8 %y, -64 @@ -457,9 +455,7 @@ define i1 @or2_unknown(i8 %x) { ; 78 = 0b01001110; -50 = 0b11001110 define i1 @or3_true(i8 %x) { ; CHECK-LABEL: @or3_true( -; CHECK-NEXT: [[Y:%.*]] = or i8 [[X:%.*]], 78 -; CHECK-NEXT: [[Z:%.*]] = icmp sge i8 [[Y]], -50 -; CHECK-NEXT: ret i1 [[Z]] +; CHECK-NEXT: ret i1 true ; %y = or i8 %x, 78 %z = icmp sge i8 %y, -50 @@ -573,9 +569,7 @@ define i1 @and3_unknown1(i8 %x) { define i1 @and3_true2(i8 %x) { ; CHECK-LABEL: @and3_true2( -; CHECK-NEXT: [[Y:%.*]] = and i8 [[X:%.*]], -75 -; CHECK-NEXT: [[Z:%.*]] = icmp sle i8 [[Y]], 53 -; CHECK-NEXT: ret i1 [[Z]] +; CHECK-NEXT: ret i1 true ; %y = and i8 %x, -75 %z = icmp sle i8 %y, 53 @@ -1140,3 +1134,19 @@ define <2 x i1> @heterogeneous_constvector(<2 x i8> %x) { %c = icmp ult <2 x i8> %x, ret <2 x i1> %c } + +define i8 @infer_sub_with_knownbits_info(i8 %a, i8 %b) { +; CHECK-LABEL: @infer_sub_with_knownbits_info( +; CHECK-NEXT: [[A1:%.*]] = or i8 [[A:%.*]], 1 +; CHECK-NEXT: [[A2:%.*]] = shl i8 [[B:%.*]], 1 +; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[A1]], [[A2]] +; CHECK-NEXT: ret i8 [[SUB]] +; + %a1 = or i8 %a, 1 + %a2 = shl i8 %b, 1 + %sub = sub i8 %a1, %a2 + %umax = tail call i8 @llvm.umax.i8(i8 %sub, i8 1) + ret i8 %umax +} + +declare i8 @llvm.umax.i8(i8, i8) diff --git a/llvm/test/Transforms/JumpThreading/pr62908.ll b/llvm/test/Transforms/JumpThreading/pr62908.ll index 4c389ee040b90..0c93803cdfef6 100644 --- a/llvm/test/Transforms/JumpThreading/pr62908.ll +++ b/llvm/test/Transforms/JumpThreading/pr62908.ll @@ -5,7 +5,14 @@ define i32 @test() { ; CHECK-LABEL: define i32 @test() { -; CHECK-NEXT: end: +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: unreachable: +; CHECK-NEXT: [[SH_PROM:%.*]] = zext i32 -1 to i64 +; CHECK-NEXT: [[SHL:%.*]] = shl nsw i64 -1, [[SH_PROM]] +; CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[SHL]] to i32 +; CHECK-NEXT: br label [[END]] +; CHECK: end: ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll index 6cad2e1c4a324..58a5d3c0b21ea 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll @@ -4578,10 +4578,8 @@ define void @test8() { ; EPILOG-BLOCK: outerloop: ; EPILOG-BLOCK-NEXT: %i = phi i64 [ 3, %bb ], [ 0, %outerloop.loopexit.1 ] ; EPILOG-BLOCK-NEXT: %0 = sub i64 100, %i -; EPILOG-BLOCK-NEXT: %1 = sub i64 99, %i ; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 -; EPILOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 -; EPILOG-BLOCK-NEXT: br i1 %2, label %exit.unr-lcssa, label %outerloop.new +; EPILOG-BLOCK-NEXT: br i1 false, label %exit.unr-lcssa, label %outerloop.new ; EPILOG-BLOCK: outerloop.new: ; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-BLOCK-NEXT: br label %innerH @@ -4711,7 +4709,6 @@ define void @test8() { ; PROLOG-BLOCK: outerloop: ; PROLOG-BLOCK-NEXT: %i = phi i64 [ 3, %bb ], [ 0, %outerloop.loopexit.1 ] ; PROLOG-BLOCK-NEXT: %0 = sub i64 100, %i -; PROLOG-BLOCK-NEXT: %1 = sub i64 99, %i ; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 ; PROLOG-BLOCK-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 ; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod, label %innerH.prol.preheader, label %innerH.prol.loopexit @@ -4724,8 +4721,7 @@ define void @test8() { ; PROLOG-BLOCK-NEXT: br label %innerH.prol.loopexit ; PROLOG-BLOCK: innerH.prol.loopexit: ; PROLOG-BLOCK-NEXT: %i3.unr = phi i64 [ %i, %outerloop ], [ %i4.prol, %latch.prol ] -; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 -; PROLOG-BLOCK-NEXT: br i1 %2, label %exit.loopexit, label %outerloop.new +; PROLOG-BLOCK-NEXT: br i1 false, label %exit.loopexit, label %outerloop.new ; PROLOG-BLOCK: outerloop.new: ; PROLOG-BLOCK-NEXT: br label %innerH ; PROLOG-BLOCK: innerH: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll index 144b29d84198a..f4a66cb708775 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll @@ -10,17 +10,14 @@ target triple = "aarch64-unknown-linux-gnu" define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-LABEL: @test_widen( ; TFNONE-NEXT: entry: +; TFNONE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; TFNONE: vector.ph: ; TFNONE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; TFNONE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; TFNONE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] -; TFNONE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; TFNONE: vector.ph: +; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP1]] +; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 -; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] -; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; TFNONE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; TFNONE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 ; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]] ; TFNONE: vector.body: ; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -29,9 +26,9 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP5:%.*]] = call @foo_vector( [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; TFNONE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; TFNONE-NEXT: store [[TMP5]], ptr [[TMP6]], align 8 -; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; TFNONE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TFNONE-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TFNONE: middle.block: ; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; TFNONE: scalar.ph: @@ -60,19 +57,19 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFCOMMON-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; TFCOMMON-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; TFCOMMON-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; TFCOMMON-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; TFCOMMON-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFCOMMON-NEXT: br label [[VECTOR_BODY:%.*]] ; TFCOMMON: vector.body: ; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; TFCOMMON-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]] -; TFCOMMON-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; TFCOMMON-NEXT: [[TMP6:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) -; TFCOMMON-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP6]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; TFCOMMON-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]] +; TFCOMMON-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFCOMMON-NEXT: [[TMP8:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) +; TFCOMMON-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] +; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP8]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFCOMMON-NEXT: [[TMP10:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; TFCOMMON-NEXT: [[TMP11:%.*]] = extractelement [[TMP10]], i32 0 @@ -102,17 +99,14 @@ for.cond.cleanup: define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-LABEL: @test_if_then( ; TFNONE-NEXT: entry: +; TFNONE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; TFNONE: vector.ph: ; TFNONE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; TFNONE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; TFNONE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] -; TFNONE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; TFNONE: vector.ph: +; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP1]] +; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 -; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] -; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; TFNONE-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; TFNONE-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 ; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]] ; TFNONE: vector.body: ; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -124,9 +118,9 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[PREDPHI:%.*]] = select [[TMP7]], zeroinitializer, [[TMP6]] ; TFNONE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] ; TFNONE-NEXT: store [[PREDPHI]], ptr [[TMP8]], align 8 -; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] -; TFNONE-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TFNONE-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; TFNONE: middle.block: ; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; TFNONE: scalar.ph: @@ -135,16 +129,16 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE: for.body: ; TFNONE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] -; TFNONE-NEXT: [[TMP12:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; TFNONE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP12]], 50 +; TFNONE-NEXT: [[TMP10:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; TFNONE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP10]], 50 ; TFNONE-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END]] ; TFNONE: if.then: -; TFNONE-NEXT: [[TMP13:%.*]] = call i64 @foo(i64 [[TMP12]]) #[[ATTR3]] +; TFNONE-NEXT: [[TMP11:%.*]] = call i64 @foo(i64 [[TMP10]]) #[[ATTR3]] ; TFNONE-NEXT: br label [[IF_END]] ; TFNONE: if.end: -; TFNONE-NEXT: [[TMP14:%.*]] = phi i64 [ [[TMP13]], [[IF_THEN]] ], [ 0, [[FOR_BODY]] ] +; TFNONE-NEXT: [[TMP12:%.*]] = phi i64 [ [[TMP11]], [[IF_THEN]] ], [ 0, [[FOR_BODY]] ] ; TFNONE-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDVARS_IV]] -; TFNONE-NEXT: store i64 [[TMP14]], ptr [[ARRAYIDX1]], align 8 +; TFNONE-NEXT: store i64 [[TMP12]], ptr [[ARRAYIDX1]], align 8 ; TFNONE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; TFNONE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1025 ; TFNONE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] @@ -161,25 +155,25 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFCOMMON-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; TFCOMMON-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; TFCOMMON-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2 +; TFCOMMON-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; TFCOMMON-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFCOMMON-NEXT: br label [[VECTOR_BODY:%.*]] ; TFCOMMON: vector.body: ; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; TFCOMMON-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; TFCOMMON-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; TFCOMMON-NEXT: [[TMP6:%.*]] = icmp ugt [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i64 50, i64 0), poison, zeroinitializer) -; TFCOMMON-NEXT: [[TMP7:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP6]], zeroinitializer -; TFCOMMON-NEXT: [[TMP8:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[TMP7]]) -; TFCOMMON-NEXT: [[TMP9:%.*]] = xor [[TMP6]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; TFCOMMON-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP9]], zeroinitializer -; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select [[TMP10]], zeroinitializer, [[TMP8]] -; TFCOMMON-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] -; TFCOMMON-NEXT: [[TMP12:%.*]] = or [[TMP7]], [[TMP10]] -; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP11]], i32 8, [[TMP12]]) -; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP14]] +; TFCOMMON-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] +; TFCOMMON-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFCOMMON-NEXT: [[TMP8:%.*]] = icmp ugt [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i64 50, i64 0), poison, zeroinitializer) +; TFCOMMON-NEXT: [[TMP9:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP8]], zeroinitializer +; TFCOMMON-NEXT: [[TMP10:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[TMP9]]) +; TFCOMMON-NEXT: [[TMP11:%.*]] = xor [[TMP8]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; TFCOMMON-NEXT: [[TMP12:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP11]], zeroinitializer +; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select [[TMP12]], zeroinitializer, [[TMP10]] +; TFCOMMON-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] +; TFCOMMON-NEXT: [[TMP14:%.*]] = or [[TMP9]], [[TMP12]] +; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP13]], i32 8, [[TMP14]]) +; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFCOMMON-NEXT: [[TMP15:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; TFCOMMON-NEXT: [[TMP16:%.*]] = extractelement [[TMP15]], i32 0 @@ -220,17 +214,14 @@ for.cond.cleanup: define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-LABEL: @test_widen_if_then_else( ; TFNONE-NEXT: entry: +; TFNONE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; TFNONE: vector.ph: ; TFNONE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; TFNONE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; TFNONE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] -; TFNONE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; TFNONE: vector.ph: +; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP1]] +; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 -; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] -; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; TFNONE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; TFNONE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 ; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]] ; TFNONE: vector.body: ; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -243,9 +234,9 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[PREDPHI:%.*]] = select [[TMP6]], [[TMP7]], [[TMP8]] ; TFNONE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] ; TFNONE-NEXT: store [[PREDPHI]], ptr [[TMP9]], align 8 -; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; TFNONE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TFNONE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; TFNONE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TFNONE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; TFNONE: middle.block: ; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; TFNONE: scalar.ph: @@ -254,19 +245,19 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE: for.body: ; TFNONE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] -; TFNONE-NEXT: [[TMP13:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; TFNONE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 50 +; TFNONE-NEXT: [[TMP11:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; TFNONE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 50 ; TFNONE-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; TFNONE: if.then: -; TFNONE-NEXT: [[TMP14:%.*]] = call i64 @foo(i64 [[TMP13]]) #[[ATTR4:[0-9]+]] +; TFNONE-NEXT: [[TMP12:%.*]] = call i64 @foo(i64 [[TMP11]]) #[[ATTR4:[0-9]+]] ; TFNONE-NEXT: br label [[IF_END]] ; TFNONE: if.else: -; TFNONE-NEXT: [[TMP15:%.*]] = call i64 @foo(i64 0) #[[ATTR4]] +; TFNONE-NEXT: [[TMP13:%.*]] = call i64 @foo(i64 0) #[[ATTR4]] ; TFNONE-NEXT: br label [[IF_END]] ; TFNONE: if.end: -; TFNONE-NEXT: [[TMP16:%.*]] = phi i64 [ [[TMP14]], [[IF_THEN]] ], [ [[TMP15]], [[IF_ELSE]] ] +; TFNONE-NEXT: [[TMP14:%.*]] = phi i64 [ [[TMP12]], [[IF_THEN]] ], [ [[TMP13]], [[IF_ELSE]] ] ; TFNONE-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDVARS_IV]] -; TFNONE-NEXT: store i64 [[TMP16]], ptr [[ARRAYIDX1]], align 8 +; TFNONE-NEXT: store i64 [[TMP14]], ptr [[ARRAYIDX1]], align 8 ; TFNONE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; TFNONE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1025 ; TFNONE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] @@ -283,26 +274,26 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFCOMMON-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; TFCOMMON-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; TFCOMMON-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 2 +; TFCOMMON-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; TFCOMMON-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFCOMMON-NEXT: br label [[VECTOR_BODY:%.*]] ; TFCOMMON: vector.body: ; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; TFCOMMON-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; TFCOMMON-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; TFCOMMON-NEXT: [[TMP6:%.*]] = icmp ugt [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i64 50, i64 0), poison, zeroinitializer) -; TFCOMMON-NEXT: [[TMP7:%.*]] = xor [[TMP6]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; TFCOMMON-NEXT: [[TMP8:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP7]], zeroinitializer -; TFCOMMON-NEXT: [[TMP9:%.*]] = call @foo_vector( zeroinitializer, [[TMP8]]) -; TFCOMMON-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP6]], zeroinitializer -; TFCOMMON-NEXT: [[TMP11:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[TMP10]]) -; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select [[TMP8]], [[TMP9]], [[TMP11]] -; TFCOMMON-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] -; TFCOMMON-NEXT: [[TMP13:%.*]] = or [[TMP8]], [[TMP10]] -; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP12]], i32 8, [[TMP13]]) -; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP15]] +; TFCOMMON-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] +; TFCOMMON-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFCOMMON-NEXT: [[TMP8:%.*]] = icmp ugt [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i64 50, i64 0), poison, zeroinitializer) +; TFCOMMON-NEXT: [[TMP9:%.*]] = xor [[TMP8]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; TFCOMMON-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP9]], zeroinitializer +; TFCOMMON-NEXT: [[TMP11:%.*]] = call @foo_vector( zeroinitializer, [[TMP10]]) +; TFCOMMON-NEXT: [[TMP12:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP8]], zeroinitializer +; TFCOMMON-NEXT: [[TMP13:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[TMP12]]) +; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select [[TMP10]], [[TMP11]], [[TMP13]] +; TFCOMMON-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] +; TFCOMMON-NEXT: [[TMP15:%.*]] = or [[TMP10]], [[TMP12]] +; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP14]], i32 8, [[TMP15]]) +; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFCOMMON-NEXT: [[TMP16:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; TFCOMMON-NEXT: [[TMP17:%.*]] = extractelement [[TMP16]], i32 0 @@ -346,17 +337,14 @@ for.cond.cleanup: define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-LABEL: @test_widen_nomask( ; TFNONE-NEXT: entry: +; TFNONE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; TFNONE: vector.ph: ; TFNONE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; TFNONE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; TFNONE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] -; TFNONE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; TFNONE: vector.ph: +; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP1]] +; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 -; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] -; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; TFNONE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; TFNONE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 ; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]] ; TFNONE: vector.body: ; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -365,9 +353,9 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP5:%.*]] = call @foo_vector_nomask( [[WIDE_LOAD]]) ; TFNONE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; TFNONE-NEXT: store [[TMP5]], ptr [[TMP6]], align 8 -; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; TFNONE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TFNONE-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; TFNONE: middle.block: ; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; TFNONE: scalar.ph: @@ -406,31 +394,23 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { ; TFFALLBACK-NEXT: entry: ; TFFALLBACK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; TFFALLBACK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; TFFALLBACK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] -; TFFALLBACK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; TFFALLBACK: vector.ph: +; TFFALLBACK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP1]] +; TFFALLBACK-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; TFFALLBACK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; TFFALLBACK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 -; TFFALLBACK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] -; TFFALLBACK-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; TFFALLBACK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; TFFALLBACK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 ; TFFALLBACK-NEXT: br label [[VECTOR_BODY:%.*]] ; TFFALLBACK: vector.body: -; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TFFALLBACK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]] ; TFFALLBACK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 ; TFFALLBACK-NEXT: [[TMP5:%.*]] = call @foo_vector_nomask( [[WIDE_LOAD]]) ; TFFALLBACK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; TFFALLBACK-NEXT: store [[TMP5]], ptr [[TMP6]], align 8 -; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; TFFALLBACK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TFFALLBACK-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] -; TFFALLBACK: scalar.ph: -; TFFALLBACK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[N_VEC]], [[VECTOR_BODY]] ] -; TFFALLBACK-NEXT: br label [[FOR_BODY:%.*]] +; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; TFFALLBACK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TFFALLBACK-NEXT: br i1 [[TMP7]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; TFFALLBACK: for.body: -; TFFALLBACK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; TFFALLBACK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[VECTOR_BODY]] ] ; TFFALLBACK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] ; TFFALLBACK-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8 ; TFFALLBACK-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR5:[0-9]+]] @@ -466,17 +446,14 @@ for.cond.cleanup: define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-LABEL: @test_widen_optmask( ; TFNONE-NEXT: entry: +; TFNONE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; TFNONE: vector.ph: ; TFNONE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; TFNONE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; TFNONE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] -; TFNONE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; TFNONE: vector.ph: +; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP1]] +; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 -; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] -; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; TFNONE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; TFNONE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 ; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]] ; TFNONE: vector.body: ; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -485,9 +462,9 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP5:%.*]] = call @foo_vector_nomask( [[WIDE_LOAD]]) ; TFNONE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; TFNONE-NEXT: store [[TMP5]], ptr [[TMP6]], align 8 -; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; TFNONE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TFNONE-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; TFNONE: middle.block: ; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; TFNONE: scalar.ph: @@ -516,19 +493,19 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFALWAYS-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; TFALWAYS-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TFALWAYS-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; TFALWAYS-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; TFALWAYS-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; TFALWAYS-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; TFALWAYS-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFALWAYS-NEXT: br label [[VECTOR_BODY:%.*]] ; TFALWAYS: vector.body: ; TFALWAYS-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; TFALWAYS-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]] -; TFALWAYS-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; TFALWAYS-NEXT: [[TMP6:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) -; TFALWAYS-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; TFALWAYS-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP6]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFALWAYS-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; TFALWAYS-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]] +; TFALWAYS-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFALWAYS-NEXT: [[TMP8:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) +; TFALWAYS-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] +; TFALWAYS-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP8]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFALWAYS-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFALWAYS-NEXT: [[TMP10:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; TFALWAYS-NEXT: [[TMP11:%.*]] = extractelement [[TMP10]], i32 0 @@ -546,19 +523,19 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFFALLBACK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; TFFALLBACK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TFFALLBACK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; TFFALLBACK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; TFFALLBACK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; TFFALLBACK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; TFFALLBACK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFFALLBACK-NEXT: br label [[VECTOR_BODY:%.*]] ; TFFALLBACK: vector.body: ; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; TFFALLBACK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]] -; TFFALLBACK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; TFFALLBACK-NEXT: [[TMP6:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) -; TFFALLBACK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; TFFALLBACK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP6]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; TFFALLBACK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]] +; TFFALLBACK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFFALLBACK-NEXT: [[TMP8:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) +; TFFALLBACK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] +; TFFALLBACK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP8]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFFALLBACK-NEXT: [[TMP10:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; TFFALLBACK-NEXT: [[TMP11:%.*]] = extractelement [[TMP10]], i32 0 @@ -590,17 +567,14 @@ for.cond.cleanup: define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, double %m) #4 { ; TFNONE-LABEL: @test_widen_fmuladd_and_call( ; TFNONE-NEXT: entry: +; TFNONE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; TFNONE: vector.ph: ; TFNONE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; TFNONE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; TFNONE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] -; TFNONE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; TFNONE: vector.ph: +; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP1]] +; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 -; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] -; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; TFNONE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; TFNONE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 ; TFNONE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, double [[M:%.*]], i64 0 ; TFNONE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -615,9 +589,9 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFNONE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; TFNONE-NEXT: store [[TMP7]], ptr [[TMP8]], align 8 ; TFNONE-NEXT: [[TMP9]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], [[TMP5]]) -; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; TFNONE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TFNONE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; TFNONE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TFNONE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; TFNONE: middle.block: ; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; TFNONE: scalar.ph: @@ -651,8 +625,8 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFALWAYS-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; TFALWAYS-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TFALWAYS-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; TFALWAYS-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; TFALWAYS-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2 +; TFALWAYS-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; TFALWAYS-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFALWAYS-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, double [[M:%.*]], i64 0 ; TFALWAYS-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -660,23 +634,23 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFALWAYS: vector.body: ; TFALWAYS-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; TFALWAYS-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] -; TFALWAYS-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[B:%.*]], i64 [[INDEX]] -; TFALWAYS-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; TFALWAYS-NEXT: [[TMP6:%.*]] = fmul [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] -; TFALWAYS-NEXT: [[TMP7:%.*]] = fptoui [[WIDE_MASKED_LOAD]] to -; TFALWAYS-NEXT: [[TMP8:%.*]] = call @foo_vector( [[TMP7]], [[ACTIVE_LANE_MASK]]) -; TFALWAYS-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; TFALWAYS-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP8]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFALWAYS-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP6]], shufflevector ( insertelement ( poison, double -0.000000e+00, i64 0), poison, zeroinitializer) -; TFALWAYS-NEXT: [[TMP11]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], [[TMP10]]) -; TFALWAYS-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP13]] +; TFALWAYS-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] +; TFALWAYS-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[B:%.*]], i64 [[INDEX]] +; TFALWAYS-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFALWAYS-NEXT: [[TMP8:%.*]] = fmul [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] +; TFALWAYS-NEXT: [[TMP9:%.*]] = fptoui [[WIDE_MASKED_LOAD]] to +; TFALWAYS-NEXT: [[TMP10:%.*]] = call @foo_vector( [[TMP9]], [[ACTIVE_LANE_MASK]]) +; TFALWAYS-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] +; TFALWAYS-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP10]], ptr [[TMP11]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFALWAYS-NEXT: [[TMP12:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP8]], shufflevector ( insertelement ( poison, double -0.000000e+00, i64 0), poison, zeroinitializer) +; TFALWAYS-NEXT: [[TMP13]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], [[TMP12]]) +; TFALWAYS-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFALWAYS-NEXT: [[TMP14:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; TFALWAYS-NEXT: [[TMP15:%.*]] = extractelement [[TMP14]], i32 0 ; TFALWAYS-NEXT: br i1 [[TMP15]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; TFALWAYS: for.cond.cleanup: -; TFALWAYS-NEXT: ret double [[TMP11]] +; TFALWAYS-NEXT: ret double [[TMP13]] ; ; TFFALLBACK-LABEL: @test_widen_fmuladd_and_call( ; TFFALLBACK-NEXT: entry: @@ -688,8 +662,8 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFFALLBACK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; TFFALLBACK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TFFALLBACK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; TFFALLBACK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; TFFALLBACK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2 +; TFFALLBACK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; TFFALLBACK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFFALLBACK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, double [[M:%.*]], i64 0 ; TFFALLBACK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -697,23 +671,23 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFFALLBACK: vector.body: ; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; TFFALLBACK-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] -; TFFALLBACK-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[B:%.*]], i64 [[INDEX]] -; TFFALLBACK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; TFFALLBACK-NEXT: [[TMP6:%.*]] = fmul [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] -; TFFALLBACK-NEXT: [[TMP7:%.*]] = fptoui [[WIDE_MASKED_LOAD]] to -; TFFALLBACK-NEXT: [[TMP8:%.*]] = call @foo_vector( [[TMP7]], [[ACTIVE_LANE_MASK]]) -; TFFALLBACK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; TFFALLBACK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP8]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFFALLBACK-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP6]], shufflevector ( insertelement ( poison, double -0.000000e+00, i64 0), poison, zeroinitializer) -; TFFALLBACK-NEXT: [[TMP11]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], [[TMP10]]) -; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP13]] +; TFFALLBACK-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] +; TFFALLBACK-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[B:%.*]], i64 [[INDEX]] +; TFFALLBACK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFFALLBACK-NEXT: [[TMP8:%.*]] = fmul [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] +; TFFALLBACK-NEXT: [[TMP9:%.*]] = fptoui [[WIDE_MASKED_LOAD]] to +; TFFALLBACK-NEXT: [[TMP10:%.*]] = call @foo_vector( [[TMP9]], [[ACTIVE_LANE_MASK]]) +; TFFALLBACK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] +; TFFALLBACK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP10]], ptr [[TMP11]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFFALLBACK-NEXT: [[TMP12:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP8]], shufflevector ( insertelement ( poison, double -0.000000e+00, i64 0), poison, zeroinitializer) +; TFFALLBACK-NEXT: [[TMP13]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], [[TMP12]]) +; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFFALLBACK-NEXT: [[TMP14:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; TFFALLBACK-NEXT: [[TMP15:%.*]] = extractelement [[TMP14]], i32 0 ; TFFALLBACK-NEXT: br i1 [[TMP15]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; TFFALLBACK: for.cond.cleanup: -; TFFALLBACK-NEXT: ret double [[TMP11]] +; TFFALLBACK-NEXT: ret double [[TMP13]] ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll b/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll index 4a2f9d07ed91c..4df1b263f2f1e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll @@ -7,17 +7,14 @@ target triple = "aarch64-unknown-linux-gnu" define void @test_widen(ptr noalias %a, ptr readnone %b) #1 { ; WIDE-LABEL: @test_widen( ; WIDE-NEXT: entry: +; WIDE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; WIDE: vector.ph: ; WIDE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; WIDE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 -; WIDE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] -; WIDE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; WIDE: vector.ph: +; WIDE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP1]] +; WIDE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; WIDE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; WIDE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 -; WIDE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] -; WIDE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; WIDE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; WIDE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 ; WIDE-NEXT: br label [[VECTOR_BODY:%.*]] ; WIDE: vector.body: ; WIDE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -27,9 +24,9 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #1 { ; WIDE-NEXT: [[TMP6:%.*]] = call @foo_vector( [[TMP5]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; WIDE-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; WIDE-NEXT: store [[TMP6]], ptr [[TMP7]], align 4 -; WIDE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] -; WIDE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; WIDE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; WIDE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; WIDE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; WIDE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; WIDE: middle.block: ; WIDE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; WIDE: scalar.ph: