-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[InstSimplify] Improve simplifyICmpWithConstant
by using KnownBits info
#76221
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-llvm-analysis Author: Yingwei Zheng (dtcxzyw) ChangesThis patch improves Patch is 62.05 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/76221.diff 10 Files Affected:
diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index a3186e61b94adf..baa16306ebf5df 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -863,6 +863,11 @@ ConstantRange computeConstantRange(const Value *V, bool ForSigned,
const DominatorTree *DT = nullptr,
unsigned Depth = 0);
+/// Combine constant ranges from computeConstantRange() and computeKnownBits().
+ConstantRange
+computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V,
+ bool ForSigned, const SimplifyQuery &SQ);
+
/// Return true if this function can prove that the instruction I will
/// always transfer execution to one of its successors (including the next
/// instruction that follows within a basic block). E.g. this is not
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 5beac5547d65e0..9121a69629f862 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -3005,7 +3005,7 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,
}
static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
- Value *RHS, const InstrInfoQuery &IIQ) {
+ Value *RHS, const SimplifyQuery &SQ) {
Type *ITy = getCompareTy(RHS); // The return type.
Value *X;
@@ -3031,8 +3031,8 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
if (RHS_CR.isFullSet())
return ConstantInt::getTrue(ITy);
- ConstantRange LHS_CR =
- computeConstantRange(LHS, CmpInst::isSigned(Pred), IIQ.UseInstrInfo);
+ ConstantRange LHS_CR = llvm::computeConstantRangeIncludingKnownBits(
+ LHS, CmpInst::isSigned(Pred), SQ);
if (!LHS_CR.isFullSet()) {
if (RHS_CR.contains(LHS_CR))
return ConstantInt::getTrue(ITy);
@@ -3043,7 +3043,7 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
// (mul nuw/nsw X, MulC) != C --> true (if C is not a multiple of MulC)
// (mul nuw/nsw X, MulC) == C --> false (if C is not a multiple of MulC)
const APInt *MulC;
- if (IIQ.UseInstrInfo && ICmpInst::isEquality(Pred) &&
+ if (SQ.IIQ.UseInstrInfo && ICmpInst::isEquality(Pred) &&
((match(LHS, m_NUWMul(m_Value(), m_APIntAllowUndef(MulC))) &&
*MulC != 0 && C->urem(*MulC) != 0) ||
(match(LHS, m_NSWMul(m_Value(), m_APIntAllowUndef(MulC))) &&
@@ -3749,7 +3749,7 @@ static Value *simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (Value *V = simplifyICmpWithZero(Pred, LHS, RHS, Q))
return V;
- if (Value *V = simplifyICmpWithConstant(Pred, LHS, RHS, Q.IIQ))
+ if (Value *V = simplifyICmpWithConstant(Pred, LHS, RHS, Q))
return V;
// If both operands have range metadata, use the metadata
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 769d921eb1e8d1..cac2602d455f9d 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -6289,10 +6289,10 @@ static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) {
}
/// Combine constant ranges from computeConstantRange() and computeKnownBits().
-static ConstantRange
-computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V,
- bool ForSigned,
- const SimplifyQuery &SQ) {
+ConstantRange
+llvm::computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V,
+ bool ForSigned,
+ const SimplifyQuery &SQ) {
ConstantRange CR1 =
ConstantRange::fromKnownBits(V.getKnownBits(SQ), ForSigned);
ConstantRange CR2 = computeConstantRange(V, ForSigned, SQ.IIQ.UseInstrInfo);
diff --git a/llvm/test/Transforms/Attributor/lvi-after-jumpthreading.ll b/llvm/test/Transforms/Attributor/lvi-after-jumpthreading.ll
index 2aa95216a66569..84c6213c75840a 100644
--- a/llvm/test/Transforms/Attributor/lvi-after-jumpthreading.ll
+++ b/llvm/test/Transforms/Attributor/lvi-after-jumpthreading.ll
@@ -148,8 +148,7 @@ define i32 @test4(i32 %i, i1 %f, i32 %n) {
; CHECK-NEXT: call void @dummy(i1 [[F]]) #[[ATTR2]]
; CHECK-NEXT: [[CONSUME:%.*]] = call i32 @exit()
; CHECK-NEXT: call void @llvm.assume(i1 noundef [[F]])
-; CHECK-NEXT: [[COND:%.*]] = icmp eq i1 [[F]], false
-; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[CONT:%.*]]
+; CHECK-NEXT: br label [[CONT:%.*]]
; CHECK: exit2:
; CHECK-NEXT: ret i32 30
;
diff --git a/llvm/test/Transforms/InstSimplify/call.ll b/llvm/test/Transforms/InstSimplify/call.ll
index 52c207a2760468..ebc5c8bb0e52bd 100644
--- a/llvm/test/Transforms/InstSimplify/call.ll
+++ b/llvm/test/Transforms/InstSimplify/call.ll
@@ -1582,9 +1582,7 @@ define i1 @ctlz_i1_non_poison_eq_false(i1 %x) {
define i1 @ctlz_i1_poison_eq_false(i1 %x) {
; CHECK-LABEL: @ctlz_i1_poison_eq_false(
-; CHECK-NEXT: [[CT:%.*]] = call i1 @llvm.ctlz.i1(i1 [[X:%.*]], i1 true)
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i1 [[CT]], false
-; CHECK-NEXT: ret i1 [[CMP]]
+; CHECK-NEXT: ret i1 true
;
%ct = call i1 @llvm.ctlz.i1(i1 %x, i1 true)
%cmp = icmp eq i1 %ct, false
@@ -1604,9 +1602,7 @@ define i1 @cttz_i1_non_poison_eq_false(i1 %x) {
define i1 @cttz_i1_poison_eq_false(i1 %x) {
; CHECK-LABEL: @cttz_i1_poison_eq_false(
-; CHECK-NEXT: [[CT:%.*]] = call i1 @llvm.cttz.i1(i1 [[X:%.*]], i1 true)
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i1 [[CT]], false
-; CHECK-NEXT: ret i1 [[CMP]]
+; CHECK-NEXT: ret i1 true
;
%ct = call i1 @llvm.cttz.i1(i1 %x, i1 true)
%cmp = icmp eq i1 %ct, false
diff --git a/llvm/test/Transforms/InstSimplify/icmp-constant.ll b/llvm/test/Transforms/InstSimplify/icmp-constant.ll
index 04261f6f40b7c3..1123a7092d3aa9 100644
--- a/llvm/test/Transforms/InstSimplify/icmp-constant.ll
+++ b/llvm/test/Transforms/InstSimplify/icmp-constant.ll
@@ -433,9 +433,7 @@ define <2 x i1> @or1_vec_partial_undef(<2 x i32> %X) {
; Single bit OR.
define i1 @or2_true(i8 %x) {
; CHECK-LABEL: @or2_true(
-; CHECK-NEXT: [[Y:%.*]] = or i8 [[X:%.*]], 64
-; CHECK-NEXT: [[Z:%.*]] = icmp sge i8 [[Y]], -64
-; CHECK-NEXT: ret i1 [[Z]]
+; CHECK-NEXT: ret i1 true
;
%y = or i8 %x, 64
%z = icmp sge i8 %y, -64
@@ -457,9 +455,7 @@ define i1 @or2_unknown(i8 %x) {
; 78 = 0b01001110; -50 = 0b11001110
define i1 @or3_true(i8 %x) {
; CHECK-LABEL: @or3_true(
-; CHECK-NEXT: [[Y:%.*]] = or i8 [[X:%.*]], 78
-; CHECK-NEXT: [[Z:%.*]] = icmp sge i8 [[Y]], -50
-; CHECK-NEXT: ret i1 [[Z]]
+; CHECK-NEXT: ret i1 true
;
%y = or i8 %x, 78
%z = icmp sge i8 %y, -50
@@ -573,9 +569,7 @@ define i1 @and3_unknown1(i8 %x) {
define i1 @and3_true2(i8 %x) {
; CHECK-LABEL: @and3_true2(
-; CHECK-NEXT: [[Y:%.*]] = and i8 [[X:%.*]], -75
-; CHECK-NEXT: [[Z:%.*]] = icmp sle i8 [[Y]], 53
-; CHECK-NEXT: ret i1 [[Z]]
+; CHECK-NEXT: ret i1 true
;
%y = and i8 %x, -75
%z = icmp sle i8 %y, 53
@@ -1140,3 +1134,19 @@ define <2 x i1> @heterogeneous_constvector(<2 x i8> %x) {
%c = icmp ult <2 x i8> %x, <i8 undef, i8 poison>
ret <2 x i1> %c
}
+
+define i8 @infer_sub_with_knownbits_info(i8 %a, i8 %b) {
+; CHECK-LABEL: @infer_sub_with_knownbits_info(
+; CHECK-NEXT: [[A1:%.*]] = or i8 [[A:%.*]], 1
+; CHECK-NEXT: [[A2:%.*]] = shl i8 [[B:%.*]], 1
+; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[A1]], [[A2]]
+; CHECK-NEXT: ret i8 [[SUB]]
+;
+ %a1 = or i8 %a, 1
+ %a2 = shl i8 %b, 1
+ %sub = sub i8 %a1, %a2
+ %umax = tail call i8 @llvm.umax.i8(i8 %sub, i8 1)
+ ret i8 %umax
+}
+
+declare i8 @llvm.umax.i8(i8, i8)
diff --git a/llvm/test/Transforms/JumpThreading/pr62908.ll b/llvm/test/Transforms/JumpThreading/pr62908.ll
index 4c389ee040b902..0c93803cdfef60 100644
--- a/llvm/test/Transforms/JumpThreading/pr62908.ll
+++ b/llvm/test/Transforms/JumpThreading/pr62908.ll
@@ -5,7 +5,14 @@
define i32 @test() {
; CHECK-LABEL: define i32 @test() {
-; CHECK-NEXT: end:
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[END:%.*]]
+; CHECK: unreachable:
+; CHECK-NEXT: [[SH_PROM:%.*]] = zext i32 -1 to i64
+; CHECK-NEXT: [[SHL:%.*]] = shl nsw i64 -1, [[SH_PROM]]
+; CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[SHL]] to i32
+; CHECK-NEXT: br label [[END]]
+; CHECK: end:
; CHECK-NEXT: ret i32 0
;
entry:
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
index 6cad2e1c4a3246..58a5d3c0b21eaf 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
@@ -4578,10 +4578,8 @@ define void @test8() {
; EPILOG-BLOCK: outerloop:
; EPILOG-BLOCK-NEXT: %i = phi i64 [ 3, %bb ], [ 0, %outerloop.loopexit.1 ]
; EPILOG-BLOCK-NEXT: %0 = sub i64 100, %i
-; EPILOG-BLOCK-NEXT: %1 = sub i64 99, %i
; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1
-; EPILOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1
-; EPILOG-BLOCK-NEXT: br i1 %2, label %exit.unr-lcssa, label %outerloop.new
+; EPILOG-BLOCK-NEXT: br i1 false, label %exit.unr-lcssa, label %outerloop.new
; EPILOG-BLOCK: outerloop.new:
; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %0, %xtraiter
; EPILOG-BLOCK-NEXT: br label %innerH
@@ -4711,7 +4709,6 @@ define void @test8() {
; PROLOG-BLOCK: outerloop:
; PROLOG-BLOCK-NEXT: %i = phi i64 [ 3, %bb ], [ 0, %outerloop.loopexit.1 ]
; PROLOG-BLOCK-NEXT: %0 = sub i64 100, %i
-; PROLOG-BLOCK-NEXT: %1 = sub i64 99, %i
; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1
; PROLOG-BLOCK-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0
; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod, label %innerH.prol.preheader, label %innerH.prol.loopexit
@@ -4724,8 +4721,7 @@ define void @test8() {
; PROLOG-BLOCK-NEXT: br label %innerH.prol.loopexit
; PROLOG-BLOCK: innerH.prol.loopexit:
; PROLOG-BLOCK-NEXT: %i3.unr = phi i64 [ %i, %outerloop ], [ %i4.prol, %latch.prol ]
-; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1
-; PROLOG-BLOCK-NEXT: br i1 %2, label %exit.loopexit, label %outerloop.new
+; PROLOG-BLOCK-NEXT: br i1 false, label %exit.loopexit, label %outerloop.new
; PROLOG-BLOCK: outerloop.new:
; PROLOG-BLOCK-NEXT: br label %innerH
; PROLOG-BLOCK: innerH:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
index 144b29d84198ac..f4a66cb7087753 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
@@ -10,17 +10,14 @@ target triple = "aarch64-unknown-linux-gnu"
define void @test_widen(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE-LABEL: @test_widen(
; TFNONE-NEXT: entry:
+; TFNONE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; TFNONE: vector.ph:
; TFNONE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; TFNONE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
-; TFNONE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]]
-; TFNONE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; TFNONE: vector.ph:
+; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP1]]
+; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
-; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
-; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
-; TFNONE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; TFNONE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2
; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]]
; TFNONE: vector.body:
; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -29,9 +26,9 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_LOAD]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
; TFNONE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; TFNONE-NEXT: store <vscale x 2 x i64> [[TMP5]], ptr [[TMP6]], align 8
-; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
-; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
+; TFNONE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; TFNONE-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; TFNONE: middle.block:
; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; TFNONE: scalar.ph:
@@ -60,19 +57,19 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 {
; TFCOMMON-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
-; TFCOMMON-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
-; TFCOMMON-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2
+; TFCOMMON-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; TFCOMMON-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
; TFCOMMON-NEXT: br label [[VECTOR_BODY:%.*]]
; TFCOMMON: vector.body:
; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
-; TFCOMMON-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]]
-; TFCOMMON-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP5]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison)
-; TFCOMMON-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
-; TFCOMMON-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
-; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP6]], ptr [[TMP7]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
-; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
+; TFCOMMON-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]]
+; TFCOMMON-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison)
+; TFCOMMON-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
+; TFCOMMON-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
+; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP8]], ptr [[TMP9]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
+; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025)
; TFCOMMON-NEXT: [[TMP10:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
; TFCOMMON-NEXT: [[TMP11:%.*]] = extractelement <vscale x 2 x i1> [[TMP10]], i32 0
@@ -102,17 +99,14 @@ for.cond.cleanup:
define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE-LABEL: @test_if_then(
; TFNONE-NEXT: entry:
+; TFNONE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; TFNONE: vector.ph:
; TFNONE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; TFNONE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
-; TFNONE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]]
-; TFNONE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; TFNONE: vector.ph:
+; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP1]]
+; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
-; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
-; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
-; TFNONE-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; TFNONE-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2
; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]]
; TFNONE: vector.body:
; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -124,9 +118,9 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP7]], <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> [[TMP6]]
; TFNONE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
; TFNONE-NEXT: store <vscale x 2 x i64> [[PREDPHI]], ptr [[TMP8]], align 8
-; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
-; TFNONE-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; TFNONE-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
+; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; TFNONE: middle.block:
; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; TFNONE: scalar.ph:
@@ -135,16 +129,16 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE: for.body:
; TFNONE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
-; TFNONE-NEXT: [[TMP12:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
-; TFNONE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP12]], 50
+; TFNONE-NEXT: [[TMP10:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
+; TFNONE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP10]], 50
; TFNONE-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END]]
; TFNONE: if.then:
-; TFNONE-NEXT: [[TMP13:%.*]] = call i64 @foo(i64 [[TMP12]]) #[[ATTR3]]
+; TFNONE-NEXT: [[TMP11:%.*]] = call i64 @foo(i64 [[TMP10]]) #[[ATTR3]]
; TFNONE-NEXT: br label [[IF_END]]
; TFNONE: if.end:
-; TFNONE-NEXT: [[TMP14:%.*]] = phi i64 [ [[TMP13]], [[IF_THEN]] ], [ 0, [[FOR_BODY]] ]
+; TFNONE-NEXT: [[TMP12:%.*]] = phi i64 [ [[TMP11]], [[IF_THEN]] ], [ 0, [[FOR_BODY]] ]
; TFNONE-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDVARS_IV]]
-; TFNONE-NEXT: store i64 [[TMP14]], ptr [[ARRAYIDX1]], align 8
+; TFNONE-NEXT: store i64 [[TMP12]], ptr [[ARRAYIDX1]], align 8
; TFNONE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; TFNONE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1025
; TFNONE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -161,25 +155,25 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
; TFCOMMON-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
-; TFCOMMON-NEX...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What's the compile-time impact?
IIRC it was always too expensive to use KnownBits to simplify all icmps in simplifyICmpInst, which is why this is left to InstCombine, which computes KnownBits anyway.
859aceb
to
044cc4e
Compare
Compile-time impact: http://llvm-compile-time-tracker.com/compare.php?from=1d4691a2338c816e18e0d7c7db9a6062dd89f68c&to=859aceb4b01b0f1d60847be303910f5702913b93&stat=instructions:u I will move it into InstCombine. |
This patch improves
simplifyICmpWithConstant
by usingcomputeConstantRangeIncludingKnownBits
.Fixes regression in _karatsuba_rec:cpython/Modules/_decimal/libmpdec/mpdecimal.c, which was introduced by #71396.
See also dtcxzyw/llvm-opt-benchmark#16 (comment).