-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[InstCombine] Fold minmax intrinsic using KnownBits information #76242
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-llvm-transforms Author: Yingwei Zheng (dtcxzyw) ChangesThis patch tries to fold minmax intrinsic by using It is an alternative to #76221. Full diff: https://github.com/llvm/llvm-project/pull/76242.diff 4 Files Affected:
diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index a3186e61b94adf..baa16306ebf5df 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -863,6 +863,11 @@ ConstantRange computeConstantRange(const Value *V, bool ForSigned,
const DominatorTree *DT = nullptr,
unsigned Depth = 0);
+/// Combine constant ranges from computeConstantRange() and computeKnownBits().
+ConstantRange
+computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V,
+ bool ForSigned, const SimplifyQuery &SQ);
+
/// Return true if this function can prove that the instruction I will
/// always transfer execution to one of its successors (including the next
/// instruction that follows within a basic block). E.g. this is not
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 769d921eb1e8d1..cac2602d455f9d 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -6289,10 +6289,10 @@ static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) {
}
/// Combine constant ranges from computeConstantRange() and computeKnownBits().
-static ConstantRange
-computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V,
- bool ForSigned,
- const SimplifyQuery &SQ) {
+ConstantRange
+llvm::computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V,
+ bool ForSigned,
+ const SimplifyQuery &SQ) {
ConstantRange CR1 =
ConstantRange::fromKnownBits(V.getKnownBits(SQ), ForSigned);
ConstantRange CR2 = computeConstantRange(V, ForSigned, SQ.IIQ.UseInstrInfo);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index a272357fa04a41..76000270e868eb 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1796,6 +1796,19 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (Instruction *NewMinMax = factorizeMinMaxTree(II))
return NewMinMax;
+ // Try to fold minmax based on range information
+ ICmpInst::Predicate Pred =
+ ICmpInst::getNonStrictPredicate(MinMaxIntrinsic::getPredicate(IID));
+ bool IsSigned = MinMaxIntrinsic::isSigned(IID);
+ const auto LHS_CR = llvm::computeConstantRangeIncludingKnownBits(
+ I0, IsSigned, SQ.getWithInstruction(II));
+ const auto RHS_CR = llvm::computeConstantRangeIncludingKnownBits(
+ I1, IsSigned, SQ.getWithInstruction(II));
+ if (LHS_CR.icmp(Pred, RHS_CR))
+ return replaceInstUsesWith(*II, I0);
+ if (RHS_CR.icmp(Pred, LHS_CR))
+ return replaceInstUsesWith(*II, I1);
+
break;
}
case Intrinsic::bitreverse: {
diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
index f3833a420ee835..500ea0aa73f741 100644
--- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
@@ -2489,3 +2489,66 @@ define i1 @PR57986() {
%umin = call i1 @llvm.umin.i1(i1 ptrtoint (ptr @g to i1), i1 true)
ret i1 %umin
}
+
+define i8 @fold_umax_with_knownbits_info(i8 %a, i8 %b) {
+; CHECK-LABEL: @fold_umax_with_knownbits_info(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A1:%.*]] = or i8 [[A:%.*]], 1
+; CHECK-NEXT: [[A2:%.*]] = shl i8 [[B:%.*]], 1
+; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[A1]], [[A2]]
+; CHECK-NEXT: ret i8 [[SUB]]
+;
+entry:
+ %a1 = or i8 %a, 1
+ %a2 = shl i8 %b, 1
+ %sub = sub i8 %a1, %a2
+ %val = call i8 @llvm.umax.i8(i8 %sub, i8 1)
+ ret i8 %val
+}
+
+define i8 @fold_umin_with_knownbits_info(i8 %a, i8 %b) {
+; CHECK-LABEL: @fold_umin_with_knownbits_info(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret i8 3
+;
+entry:
+ %a1 = or i8 %a, 3
+ %a2 = shl i8 %b, 2
+ %sub = sub i8 %a1, %a2
+ %val = call i8 @llvm.umin.i8(i8 %sub, i8 3)
+ ret i8 %val
+}
+
+define i8 @fold_umax_with_knownbits_info_fail(i8 %a, i8 %b) {
+; CHECK-LABEL: @fold_umax_with_knownbits_info_fail(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A1:%.*]] = or i8 [[A:%.*]], 2
+; CHECK-NEXT: [[A2:%.*]] = shl i8 [[B:%.*]], 1
+; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[A1]], [[A2]]
+; CHECK-NEXT: [[VAL:%.*]] = call i8 @llvm.umax.i8(i8 [[SUB]], i8 1)
+; CHECK-NEXT: ret i8 [[VAL]]
+;
+entry:
+ %a1 = or i8 %a, 2
+ %a2 = shl i8 %b, 1
+ %sub = sub i8 %a1, %a2
+ %val = call i8 @llvm.umax.i8(i8 %sub, i8 1)
+ ret i8 %val
+}
+
+define i8 @fold_umin_with_knownbits_info_fail(i8 %a, i8 %b) {
+; CHECK-LABEL: @fold_umin_with_knownbits_info_fail(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A1:%.*]] = or i8 [[A:%.*]], 1
+; CHECK-NEXT: [[A2:%.*]] = shl i8 [[B:%.*]], 2
+; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[A1]], [[A2]]
+; CHECK-NEXT: [[VAL:%.*]] = call i8 @llvm.umin.i8(i8 [[SUB]], i8 3)
+; CHECK-NEXT: ret i8 [[VAL]]
+;
+entry:
+ %a1 = or i8 %a, 1
+ %a2 = shl i8 %b, 2
+ %sub = sub i8 %a1, %a2
+ %val = call i8 @llvm.umin.i8(i8 %sub, i8 3)
+ ret i8 %val
+}
|
5297a54
to
94c65a1
Compare
94c65a1
to
1700573
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
1700573
to
de719bd
Compare
This patch tries to fold minmax intrinsic by using
computeConstantRangeIncludingKnownBits
.Fixes regression in _karatsuba_rec:cpython/Modules/_decimal/libmpdec/mpdecimal.c, which was introduced by #71396.
See also dtcxzyw/llvm-opt-benchmark#16 (comment).
Alive2 for splat vectors with undef: https://alive2.llvm.org/ce/z/J8hKWd
It is an alternative to #76221.