-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[InstCombineCompares] Try to "strengthen" compares based on known bits. #79405
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Mikhail Gudim (mgudim) ChangesFor example, replace Full diff: https://github.com/llvm/llvm-project/pull/79405.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 3875e59c3ede3b..30b421381de11c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -6345,6 +6345,78 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
(Op0Known.One.isNegative() && Op1Known.One.isNegative())))
return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1);
+ // Try to "strengthen" the RHS of compare based on known bits.
+ // For example, replace `icmp ugt %x, 14` with `icmp ugt %x, 15` when
+ // it is known that the two least significant bits of `%x` is zero.
+ if (Op1Known.isConstant() && Op0Known.Zero.isMask()) {
+ APInt RHSConst = Op1Known.getConstant();
+ ConstantRange Op0PredRange =
+ ConstantRange::makeExactICmpRegion(Pred, RHSConst);
+ int KnownZeroMaskLength = BitWidth - Op0Known.Zero.countLeadingZeros();
+ if (KnownZeroMaskLength > 0) {
+ APInt PowOf2(BitWidth, 1 << KnownZeroMaskLength);
+ APInt Op0PredMin(BitWidth, 0);
+ APInt Op0PredMax(BitWidth, 0);
+ APInt Op0MinRefinedByKnownBits(BitWidth, 0);
+ APInt Op0MaxRefinedByKnownBits(BitWidth, 0);
+ APInt NewLower(BitWidth, 0);
+ APInt NewUpper(BitWidth, 0);
+ bool ImprovedLower = false;
+ bool ImprovedUpper = false;
+ if (I.isSigned()) {
+ Op0PredMin = Op0PredRange.getSignedMin();
+ Op0PredMax = Op0PredRange.getSignedMax();
+ // Compute the smallest number satisfying the known-bits constrained
+ // which is at greater or equal Op0PredMin.
+ Op0MinRefinedByKnownBits =
+ PowOf2 *
+ APIntOps::RoundingSDiv(Op0PredMin, PowOf2, APInt::Rounding::UP);
+ // Compute the largest number satisfying the known-bits constrained
+ // which is at less or equal Op0PredMax.
+ Op0MaxRefinedByKnownBits =
+ PowOf2 *
+ APIntOps::RoundingSDiv(Op0PredMax, PowOf2, APInt::Rounding::DOWN);
+ NewLower = APIntOps::smax(Op0MinRefinedByKnownBits, Op0PredMin);
+ NewUpper = APIntOps::smin(Op0MaxRefinedByKnownBits, Op0PredMax);
+ ImprovedLower = NewLower.sgt(Op0PredMin);
+ ImprovedUpper = NewUpper.slt(Op0PredMax);
+ } else {
+ Op0PredMin = Op0PredRange.getUnsignedMin();
+ Op0PredMax = Op0PredRange.getUnsignedMax();
+ Op0MinRefinedByKnownBits =
+ PowOf2 *
+ APIntOps::RoundingUDiv(Op0PredMin, PowOf2, APInt::Rounding::UP);
+ Op0MaxRefinedByKnownBits =
+ PowOf2 *
+ APIntOps::RoundingUDiv(Op0PredMax, PowOf2, APInt::Rounding::DOWN);
+ NewLower = APIntOps::umax(Op0MinRefinedByKnownBits, Op0PredMin);
+ NewUpper = APIntOps::umin(Op0MaxRefinedByKnownBits, Op0PredMax);
+ ImprovedLower = NewLower.ugt(Op0PredMin);
+ ImprovedUpper = NewUpper.ult(Op0PredMax);
+ }
+
+ // Non-strict inequalities should have been canonicalized to strict ones
+ // by now.
+ switch (Pred) {
+ default:
+ break;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SLT: {
+ if (ImprovedUpper)
+ return new ICmpInst(Pred, Op0,
+ ConstantInt::get(Op1->getType(), NewUpper + 1));
+ break;
+ }
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_SGT: {
+ if (ImprovedLower)
+ return new ICmpInst(Pred, Op0,
+ ConstantInt::get(Op1->getType(), NewLower - 1));
+ break;
+ }
+ }
+ }
+ }
return nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll
index 1f554c7b60256c..339d66e8c2e437 100644
--- a/llvm/test/Transforms/InstCombine/icmp.ll
+++ b/llvm/test/Transforms/InstCombine/icmp.ll
@@ -1445,8 +1445,8 @@ define <2 x i1> @test70vec(<2 x i32> %X) {
define i1 @icmp_sext16trunc(i32 %x) {
; CHECK-LABEL: @icmp_sext16trunc(
-; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i16
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[TMP1]], 36
+; CHECK-NEXT: [[SEXT1:%.*]] = shl i32 [[X:%.*]], 16
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[SEXT1]], 2293761
; CHECK-NEXT: ret i1 [[CMP]]
;
%trunc = trunc i32 %x to i16
@@ -1457,8 +1457,8 @@ define i1 @icmp_sext16trunc(i32 %x) {
define i1 @icmp_sext8trunc(i32 %x) {
; CHECK-LABEL: @icmp_sext8trunc(
-; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[TMP1]], 36
+; CHECK-NEXT: [[SEXT1:%.*]] = shl i32 [[X:%.*]], 24
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[SEXT1]], 587202561
; CHECK-NEXT: ret i1 [[CMP]]
;
%trunc = trunc i32 %x to i8
@@ -1470,8 +1470,8 @@ define i1 @icmp_sext8trunc(i32 %x) {
; Vectors should fold the same way.
define <2 x i1> @icmp_sext8trunc_vec(<2 x i32> %x) {
; CHECK-LABEL: @icmp_sext8trunc_vec(
-; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i8>
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[TMP1]], <i8 36, i8 36>
+; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], <i32 24, i32 24>
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i32> [[TMP1]], <i32 587202561, i32 587202561>
; CHECK-NEXT: ret <2 x i1> [[CMP]]
;
%trunc = trunc <2 x i32> %x to <2 x i8>
@@ -1482,8 +1482,8 @@ define <2 x i1> @icmp_sext8trunc_vec(<2 x i32> %x) {
define i1 @icmp_shl16(i32 %x) {
; CHECK-LABEL: @icmp_shl16(
-; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i16
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[TMP1]], 36
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], 16
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[SHL]], 2293761
; CHECK-NEXT: ret i1 [[CMP]]
;
%shl = shl i32 %x, 16
@@ -1496,7 +1496,7 @@ define i1 @icmp_shl16(i32 %x) {
define i1 @icmp_shl17(i32 %x) {
; CHECK-LABEL: @icmp_shl17(
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], 17
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[SHL]], 2359296
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[SHL]], 2228225
; CHECK-NEXT: ret i1 [[CMP]]
;
%shl = shl i32 %x, 17
@@ -1506,8 +1506,8 @@ define i1 @icmp_shl17(i32 %x) {
define <2 x i1> @icmp_shl16_vec(<2 x i32> %x) {
; CHECK-LABEL: @icmp_shl16_vec(
-; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i16>
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i16> [[TMP1]], <i16 36, i16 36>
+; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[X:%.*]], <i32 16, i32 16>
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i32> [[SHL]], <i32 2293761, i32 2293761>
; CHECK-NEXT: ret <2 x i1> [[CMP]]
;
%shl = shl <2 x i32> %x, <i32 16, i32 16>
@@ -1517,8 +1517,8 @@ define <2 x i1> @icmp_shl16_vec(<2 x i32> %x) {
define i1 @icmp_shl24(i32 %x) {
; CHECK-LABEL: @icmp_shl24(
-; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[TMP1]], 36
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], 24
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[SHL]], 587202561
; CHECK-NEXT: ret i1 [[CMP]]
;
%shl = shl i32 %x, 24
@@ -2154,7 +2154,7 @@ define i1 @icmp_ashr_and_overshift(i8 %X) {
define i1 @icmp_and_ashr_neg_and_legal(i8 %x) {
; CHECK-LABEL: @icmp_and_ashr_neg_and_legal(
; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], -32
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[TMP1]], 16
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[TMP1]], 1
; CHECK-NEXT: ret i1 [[CMP]]
;
%ashr = ashr i8 %x, 4
@@ -2180,7 +2180,7 @@ define i1 @icmp_and_ashr_mixed_and_shiftout(i8 %x) {
define i1 @icmp_and_ashr_neg_cmp_slt_legal(i8 %x) {
; CHECK-LABEL: @icmp_and_ashr_neg_cmp_slt_legal(
; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], -32
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[TMP1]], -64
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[TMP1]], -95
; CHECK-NEXT: ret i1 [[CMP]]
;
%ashr = ashr i8 %x, 4
@@ -2194,7 +2194,7 @@ define i1 @icmp_and_ashr_neg_cmp_slt_shiftout(i8 %x) {
; CHECK-LABEL: @icmp_and_ashr_neg_cmp_slt_shiftout(
; CHECK-NEXT: [[ASHR:%.*]] = ashr i8 [[X:%.*]], 4
; CHECK-NEXT: [[AND:%.*]] = and i8 [[ASHR]], -2
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[AND]], -68
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[AND]], -69
; CHECK-NEXT: ret i1 [[CMP]]
;
%ashr = ashr i8 %x, 4
@@ -5138,3 +5138,53 @@ entry:
%cmp = icmp eq i8 %add2, %add1
ret i1 %cmp
}
+
+define i1 @tighten_icmp_using_known_bits_ugt(i16 %a) {
+; CHECK-LABEL: @tighten_icmp_using_known_bits_ugt(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i16 [[A:%.*]], 15
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+entry:
+ %and_ = and i16 %a, 65532
+ %cmp = icmp ugt i16 %and_, 14
+ ret i1 %cmp
+}
+
+define i1 @tighten_icmp_using_known_bits_ult(i16 %a) {
+; CHECK-LABEL: @tighten_icmp_using_known_bits_ult(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[AND_:%.*]] = and i16 [[A:%.*]], -4
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i16 [[AND_]], 17
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+entry:
+ %and_ = and i16 %a, 65532
+ %cmp = icmp ult i16 %and_, 18
+ ret i1 %cmp
+}
+
+define i1 @tighten_icmp_using_known_bits_sgt(i16 %a) {
+; CHECK-LABEL: @tighten_icmp_using_known_bits_sgt(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i16 [[A:%.*]], -1
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+entry:
+ %and_ = and i16 %a, 65520
+ %cmp = icmp sgt i16 %and_, -15
+ ret i1 %cmp
+}
+
+define i1 @tighten_icmp_using_known_bits_slt(i16 %a) {
+; CHECK-LABEL: @tighten_icmp_using_known_bits_slt(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[AND_:%.*]] = and i16 [[A:%.*]], -4
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[AND_]], -15
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+entry:
+ %and_ = and i16 %a, 65532
+ %cmp = icmp slt i16 %and_, -14
+ ret i1 %cmp
+}
|
This (hopefully) fixes the degradation discussed in #75899 |
Conflicting files:
|
For example, replace `icmp ugt %x, 14` with `icmp ugt %x, 15` when it is known that the two least significant bits of `%x` is zero.
breakin of "sign bit check" pattern.
c4512dc
to
be9c0c7
Compare
You can test this locally with the following command:git-clang-format --diff 41fe98a6e7e5cdcab4a4e9e0d09339231f480c01 f5fef0574fffc6f1492403c9dbdd2dcfc66168e7 -- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp View the diff from clang-format here.diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index ab9460e09e..48148e69bf 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -6445,7 +6445,8 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
// if the result of compare is used only in conditional branches, try to
// "strengthen" the compare. This may allow us to deduce stronger results
- // about the value involved in comparison in the blocks dominated by these branches.
+ // about the value involved in comparison in the blocks dominated by these
+ // branches.
bool AllUsesAreInBranches = true;
for (const Use &U : I.uses()) {
const Instruction *UI = cast<Instruction>(U.getUser());
|
I am not sure that it is a correct way to fix the regression.
|
// Don't break the SignBitCheck pattern; | ||
if (InstCombiner::isSignBitCheck(Pred, RHSConst, TrueIfSigned)) | ||
return nullptr; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can add a check here to avoid breaking the SPF.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@dtcxzyw Added a check not to break any select patterns.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@dtcxzyw Added a check not to break any select patterns.
Tests?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@dtcxzyw Added a check not to break any select patterns.
Emm, it seems like some regressions are still there :(
dtcxzyw/llvm-opt-benchmark#148 (comment)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@dtcxzyw I just added test for select patterns and sign-check pattern. Everything works as intended. I tried your specific example and the pattern's wasn't broken. There must be something else happening in that code. Can you double check
that the degradation is still there? If yes, can you provide the reduced test case?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@dtcxzyw I just added test for select patterns and sign-check pattern. Everything works as intended. I tried your specific example and the pattern's wasn't broken. There must be something else happening in that code. Can you double check that the degradation is still there? If yes, can you provide the reduced test case?
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@_ZN6Assimp14StackAllocator18g_maxBytesPerBlockE = constant i64 67108864
define ptr @_ZN6Assimp14StackAllocator8AllocateEm(ptr %this, i64 %0) {
entry:
%mul = shl i64 %0, 1
store i64 %mul, ptr %this, align 8
%call = call ptr @_ZSt3minImERKT_S2_S2_(ptr %this, ptr @_ZN6Assimp14StackAllocator18g_maxBytesPerBlockE)
%1 = load i64, ptr %call, align 8
store i64 %1, ptr %this, align 8
ret ptr null
}
define ptr @_ZSt3minImERKT_S2_S2_(ptr %__a, ptr %__b) {
entry:
%0 = load i64, ptr %__b, align 8
%1 = load i64, ptr %__a, align 8
%cmp = icmp ult i64 %0, %1
%__b.__a = select i1 %cmp, ptr %__b, ptr %__a
ret ptr %__b.__a
}
Baseline (-O3):
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@_ZN6Assimp14StackAllocator18g_maxBytesPerBlockE = local_unnamed_addr constant i64 67108864
; Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: write)
define noalias ptr @_ZN6Assimp14StackAllocator8AllocateEm(ptr nocapture writeonly %this, i64 %0) local_unnamed_addr #0 {
entry:
%mul = shl i64 %0, 1
%1 = tail call i64 @llvm.umin.i64(i64 %mul, i64 67108864)
store i64 %1, ptr %this, align 8
ret ptr null
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read)
define ptr @_ZSt3minImERKT_S2_S2_(ptr readonly %__a, ptr readonly %__b) local_unnamed_addr #1 {
entry:
%0 = load i64, ptr %__b, align 8
%1 = load i64, ptr %__a, align 8
%cmp = icmp ult i64 %0, %1
%__b.__a = select i1 %cmp, ptr %__b, ptr %__a
ret ptr %__b.__a
}
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i64 @llvm.umin.i64(i64, i64) #2
attributes #0 = { mustprogress nofree nosync nounwind willreturn memory(argmem: write) }
attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
After this patch:
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@_ZN6Assimp14StackAllocator18g_maxBytesPerBlockE = local_unnamed_addr constant i64 67108864
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write)
define noalias noundef ptr @_ZN6Assimp14StackAllocator8AllocateEm(ptr nocapture writeonly %this, i64 %0) local_unnamed_addr #0 {
entry:
%mul = shl i64 %0, 1
%cmp.i = icmp ugt i64 %mul, 67108865
%1 = select i1 %cmp.i, i64 67108864, i64 %mul
store i64 %1, ptr %this, align 8
ret ptr null
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read)
define ptr @_ZSt3minImERKT_S2_S2_(ptr readonly %__a, ptr readonly %__b) local_unnamed_addr #1 {
entry:
%0 = load i64, ptr %__b, align 8
%1 = load i64, ptr %__a, align 8
%cmp = icmp ult i64 %0, %1
%__b.__a = select i1 %cmp, ptr %__b, ptr %__a
ret ptr %__b.__a
}
attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) }
attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) }
It looks like a phase ordering problem :(
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After inlining the call %call = call ptr @_ZSt3minImERKT_S2_S2_(ptr %this, ptr @_ZN6Assimp14StackAllocator18g_maxBytesPerBlockE)
the code looks like this:
; *** IR Dump After InlinerPass on (_ZN6Assimp14StackAllocator8AllocateEm) ***
define ptr @_ZN6Assimp14StackAllocator8AllocateEm(ptr %this, i64 %0) local_unnamed_addr {
entry:
%mul = shl i64 %0, 1
store i64 %mul, ptr %this, align 8
%1 = load i64, ptr %this, align 8
%cmp.i = icmp ult i64 67108864, %1
%__b.__a.i = select i1 %cmp.i, ptr @_ZN6Assimp14StackAllocator18g_maxBytesPerBlockE, ptr %this
%2 = load i64, ptr %__b.__a.i, align 8
store i64 %2, ptr %this, align 8
ret ptr null
}
so the constant _ZN6Assimp14StackAllocator18g_maxBytesPerBlockE
was replaced with its value in icmp
instruction, but not in the select
. Why?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see, the select
can only be simplified after inlining and combining it with load
.
1bf0572
to
012d5d1
Compare
012d5d1
to
82ad843
Compare
KnownBits Op0Known, | ||
KnownBits Op1Known, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
KnownBits Op0Known, | |
KnownBits Op1Known, | |
const KnownBits &Op0Known, | |
const KnownBits &Op1Known, |
KnownBits Op0Known, | ||
KnownBits Op1Known, | ||
unsigned BitWidth) { | ||
if (!BitWidth) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
BitWidth
is always non-zero.
|
||
ConstantRange Op0PredRange = | ||
ConstantRange::makeExactICmpRegion(Pred, RHSConst); | ||
int KnownZeroMaskLength = BitWidth - Op0Known.Zero.countLeadingZeros(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
int KnownZeroMaskLength = BitWidth - Op0Known.Zero.countLeadingZeros(); | |
unsigned KnownZeroMaskLength = BitWidth - Op0Known.Zero.countLeadingZeros(); |
if (KnownZeroMaskLength == 0) | ||
return nullptr; | ||
|
||
APInt PowOf2(BitWidth, 1 << KnownZeroMaskLength); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use APInt::getOneBitSet
instead. Please also add an i128 test.
// Compute the smallest number satisfying the known-bits constrained | ||
// which is at greater or equal Op0MinAccordingToPred. | ||
Op0MinRefinedByKnownBits = | ||
PowOf2 * APIntOps::RoundingSDiv(Op0MinAccordingToPred, PowOf2, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is the compile-time impact of this patch?
// Compute the largest number satisfying the known-bits constrained | ||
// which is at less or equal Op0MaxAccordingToPred. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// Compute the largest number satisfying the known-bits constrained | |
// which is at less or equal Op0MaxAccordingToPred. | |
// Compute the largest number satisfying the known-bits constraints | |
// which is at less or equal to Op0MaxAccordingToPred. |
PowOf2 * APIntOps::RoundingUDiv(Op0MinAccordingToPred, PowOf2, | ||
APInt::Rounding::UP); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
PowOf2 * APIntOps::RoundingUDiv(Op0MinAccordingToPred, PowOf2, | |
APInt::Rounding::UP); | |
PowOf2 * APIntOps::RoundingUDiv(Op0MinAccordingToPred, PowOf2, | |
APInt::Rounding::UP); |
Can the multiplication overflow?
// by now. | ||
switch (Pred) { | ||
default: | ||
break; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can early exit when the predicate is not signed.
break; | ||
case ICmpInst::ICMP_ULT: | ||
case ICmpInst::ICMP_SLT: { | ||
if (ImprovedUpper) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we avoid refining lower when the predicate is ult/slt
?
@dtcxzyw I think it's better to limit this only to the case when result of |
@dtcxzyw @nikic Actually, now I don't think it's such a good idea. If the comparison is used in a branch an we "strengthen" then if the branch is taken, we can deduce more. BUT on the other hand, we have actually "weakend" the opposite inequality: if the branch is not taken we can deduce less. Maybe a better way to express this would be to insert |
NewUpper = APIntOps::umin(Op0MaxRefinedByKnownBits, Op0MaxAccordingToPred); | ||
ImprovedLower = NewLower.ugt(Op0MinAccordingToPred); | ||
ImprovedUpper = NewUpper.ult(Op0MaxAccordingToPred); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Instead of creating this custom bounds, why not use ConstantRange
?
For example, replace
icmp ugt %x, 14
withicmp ugt %x, 15
when it is known that the two least significant bits of%x
is zero.