[InstCombineCompares] Try to "strengthen" compares based on known bits. #79405

mgudim · 2024-01-25T04:32:32Z

For example, replace icmp ugt %x, 14 with icmp ugt %x, 15 when it is known that the two least significant bits of %x is zero.

llvmbot · 2024-01-25T04:33:05Z

@llvm/pr-subscribers-llvm-transforms

Author: Mikhail Gudim (mgudim)

Changes

For example, replace icmp ugt %x, 14 with icmp ugt %x, 15 when it is known that the two least significant bits of %x is zero.

Full diff: https://github.com/llvm/llvm-project/pull/79405.diff

2 Files Affected:

(modified) llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp (+72)
(modified) llvm/test/Transforms/InstCombine/icmp.ll (+66-16)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 3875e59c3ede3b..30b421381de11c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -6345,6 +6345,78 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
        (Op0Known.One.isNegative() && Op1Known.One.isNegative())))
     return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1);
 
+  // Try to "strengthen" the RHS of compare based on known bits.
+  // For example, replace `icmp ugt %x, 14` with `icmp ugt %x, 15` when
+  // it is known that the two least significant bits of `%x` is zero.
+  if (Op1Known.isConstant() && Op0Known.Zero.isMask()) {
+    APInt RHSConst = Op1Known.getConstant();
+    ConstantRange Op0PredRange =
+        ConstantRange::makeExactICmpRegion(Pred, RHSConst);
+    int KnownZeroMaskLength = BitWidth - Op0Known.Zero.countLeadingZeros();
+    if (KnownZeroMaskLength > 0) {
+      APInt PowOf2(BitWidth, 1 << KnownZeroMaskLength);
+      APInt Op0PredMin(BitWidth, 0);
+      APInt Op0PredMax(BitWidth, 0);
+      APInt Op0MinRefinedByKnownBits(BitWidth, 0);
+      APInt Op0MaxRefinedByKnownBits(BitWidth, 0);
+      APInt NewLower(BitWidth, 0);
+      APInt NewUpper(BitWidth, 0);
+      bool ImprovedLower = false;
+      bool ImprovedUpper = false;
+      if (I.isSigned()) {
+        Op0PredMin = Op0PredRange.getSignedMin();
+        Op0PredMax = Op0PredRange.getSignedMax();
+        // Compute the smallest number satisfying the known-bits constrained
+        // which is at greater or equal Op0PredMin.
+        Op0MinRefinedByKnownBits =
+            PowOf2 *
+            APIntOps::RoundingSDiv(Op0PredMin, PowOf2, APInt::Rounding::UP);
+        // Compute the largest number satisfying the known-bits constrained
+        // which is at less or equal Op0PredMax.
+        Op0MaxRefinedByKnownBits =
+            PowOf2 *
+            APIntOps::RoundingSDiv(Op0PredMax, PowOf2, APInt::Rounding::DOWN);
+        NewLower = APIntOps::smax(Op0MinRefinedByKnownBits, Op0PredMin);
+        NewUpper = APIntOps::smin(Op0MaxRefinedByKnownBits, Op0PredMax);
+        ImprovedLower = NewLower.sgt(Op0PredMin);
+        ImprovedUpper = NewUpper.slt(Op0PredMax);
+      } else {
+        Op0PredMin = Op0PredRange.getUnsignedMin();
+        Op0PredMax = Op0PredRange.getUnsignedMax();
+        Op0MinRefinedByKnownBits =
+            PowOf2 *
+            APIntOps::RoundingUDiv(Op0PredMin, PowOf2, APInt::Rounding::UP);
+        Op0MaxRefinedByKnownBits =
+            PowOf2 *
+            APIntOps::RoundingUDiv(Op0PredMax, PowOf2, APInt::Rounding::DOWN);
+        NewLower = APIntOps::umax(Op0MinRefinedByKnownBits, Op0PredMin);
+        NewUpper = APIntOps::umin(Op0MaxRefinedByKnownBits, Op0PredMax);
+        ImprovedLower = NewLower.ugt(Op0PredMin);
+        ImprovedUpper = NewUpper.ult(Op0PredMax);
+      }
+
+      // Non-strict inequalities should have been canonicalized to strict ones
+      // by now.
+      switch (Pred) {
+      default:
+        break;
+      case ICmpInst::ICMP_ULT:
+      case ICmpInst::ICMP_SLT: {
+        if (ImprovedUpper)
+          return new ICmpInst(Pred, Op0,
+                              ConstantInt::get(Op1->getType(), NewUpper + 1));
+        break;
+      }
+      case ICmpInst::ICMP_UGT:
+      case ICmpInst::ICMP_SGT: {
+        if (ImprovedLower)
+          return new ICmpInst(Pred, Op0,
+                              ConstantInt::get(Op1->getType(), NewLower - 1));
+        break;
+      }
+      }
+    }
+  }
   return nullptr;
 }
 
diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll
index 1f554c7b60256c..339d66e8c2e437 100644
--- a/llvm/test/Transforms/InstCombine/icmp.ll
+++ b/llvm/test/Transforms/InstCombine/icmp.ll
@@ -1445,8 +1445,8 @@ define <2 x i1> @test70vec(<2 x i32> %X) {
 
 define i1 @icmp_sext16trunc(i32 %x) {
 ; CHECK-LABEL: @icmp_sext16trunc(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i16
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i16 [[TMP1]], 36
+; CHECK-NEXT:    [[SEXT1:%.*]] = shl i32 [[X:%.*]], 16
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[SEXT1]], 2293761
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %trunc = trunc i32 %x to i16
@@ -1457,8 +1457,8 @@ define i1 @icmp_sext16trunc(i32 %x) {
 
 define i1 @icmp_sext8trunc(i32 %x) {
 ; CHECK-LABEL: @icmp_sext8trunc(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[TMP1]], 36
+; CHECK-NEXT:    [[SEXT1:%.*]] = shl i32 [[X:%.*]], 24
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[SEXT1]], 587202561
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %trunc = trunc i32 %x to i8
@@ -1470,8 +1470,8 @@ define i1 @icmp_sext8trunc(i32 %x) {
 ; Vectors should fold the same way.
 define <2 x i1> @icmp_sext8trunc_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @icmp_sext8trunc_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i8>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[TMP1]], <i8 36, i8 36>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], <i32 24, i32 24>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[TMP1]], <i32 587202561, i32 587202561>
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %trunc = trunc <2 x i32> %x to <2 x i8>
@@ -1482,8 +1482,8 @@ define <2 x i1> @icmp_sext8trunc_vec(<2 x i32> %x) {
 
 define i1 @icmp_shl16(i32 %x) {
 ; CHECK-LABEL: @icmp_shl16(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i16
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i16 [[TMP1]], 36
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[X:%.*]], 16
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[SHL]], 2293761
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %shl = shl i32 %x, 16
@@ -1496,7 +1496,7 @@ define i1 @icmp_shl16(i32 %x) {
 define i1 @icmp_shl17(i32 %x) {
 ; CHECK-LABEL: @icmp_shl17(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[X:%.*]], 17
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[SHL]], 2359296
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[SHL]], 2228225
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %shl = shl i32 %x, 17
@@ -1506,8 +1506,8 @@ define i1 @icmp_shl17(i32 %x) {
 
 define <2 x i1> @icmp_shl16_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @icmp_shl16_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i16>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i16> [[TMP1]], <i16 36, i16 36>
+; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> [[X:%.*]], <i32 16, i32 16>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[SHL]], <i32 2293761, i32 2293761>
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %shl = shl <2 x i32> %x, <i32 16, i32 16>
@@ -1517,8 +1517,8 @@ define <2 x i1> @icmp_shl16_vec(<2 x i32> %x) {
 
 define i1 @icmp_shl24(i32 %x) {
 ; CHECK-LABEL: @icmp_shl24(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[TMP1]], 36
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[X:%.*]], 24
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[SHL]], 587202561
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %shl = shl i32 %x, 24
@@ -2154,7 +2154,7 @@ define i1 @icmp_ashr_and_overshift(i8 %X) {
 define i1 @icmp_and_ashr_neg_and_legal(i8 %x) {
 ; CHECK-LABEL: @icmp_and_ashr_neg_and_legal(
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X:%.*]], -32
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[TMP1]], 16
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[TMP1]], 1
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %ashr = ashr i8 %x, 4
@@ -2180,7 +2180,7 @@ define i1 @icmp_and_ashr_mixed_and_shiftout(i8 %x) {
 define i1 @icmp_and_ashr_neg_cmp_slt_legal(i8 %x) {
 ; CHECK-LABEL: @icmp_and_ashr_neg_cmp_slt_legal(
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X:%.*]], -32
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[TMP1]], -64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[TMP1]], -95
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %ashr = ashr i8 %x, 4
@@ -2194,7 +2194,7 @@ define i1 @icmp_and_ashr_neg_cmp_slt_shiftout(i8 %x) {
 ; CHECK-LABEL: @icmp_and_ashr_neg_cmp_slt_shiftout(
 ; CHECK-NEXT:    [[ASHR:%.*]] = ashr i8 [[X:%.*]], 4
 ; CHECK-NEXT:    [[AND:%.*]] = and i8 [[ASHR]], -2
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[AND]], -68
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[AND]], -69
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %ashr = ashr i8 %x, 4
@@ -5138,3 +5138,53 @@ entry:
   %cmp = icmp eq i8 %add2, %add1
   ret i1 %cmp
 }
+
+define i1 @tighten_icmp_using_known_bits_ugt(i16 %a) {
+; CHECK-LABEL: @tighten_icmp_using_known_bits_ugt(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i16 [[A:%.*]], 15
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %and_ = and i16 %a, 65532
+  %cmp = icmp ugt i16 %and_, 14
+  ret i1 %cmp
+}
+
+define i1 @tighten_icmp_using_known_bits_ult(i16 %a) {
+; CHECK-LABEL: @tighten_icmp_using_known_bits_ult(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[AND_:%.*]] = and i16 [[A:%.*]], -4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i16 [[AND_]], 17
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %and_ = and i16 %a, 65532
+  %cmp = icmp ult i16 %and_, 18
+  ret i1 %cmp
+}
+
+define i1 @tighten_icmp_using_known_bits_sgt(i16 %a) {
+; CHECK-LABEL: @tighten_icmp_using_known_bits_sgt(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i16 [[A:%.*]], -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %and_ = and i16 %a, 65520
+  %cmp = icmp sgt i16 %and_, -15
+  ret i1 %cmp
+}
+
+define i1 @tighten_icmp_using_known_bits_slt(i16 %a) {
+; CHECK-LABEL: @tighten_icmp_using_known_bits_slt(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[AND_:%.*]] = and i16 [[A:%.*]], -4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i16 [[AND_]], -15
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %and_ = and i16 %a, 65532
+  %cmp = icmp slt i16 %and_, -14
+  ret i1 %cmp
+}

mgudim · 2024-01-25T04:35:00Z

This (hopefully) fixes the degradation discussed in #75899

PR Link: llvm/llvm-project#79405

dtcxzyw · 2024-01-25T05:30:52Z

Conflicting files:

llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll

For example, replace `icmp ugt %x, 14` with `icmp ugt %x, 15` when it is known that the two least significant bits of `%x` is zero.

breakin of "sign bit check" pattern.

test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll

github-actions · 2024-01-25T08:27:29Z

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:

git-clang-format --diff 41fe98a6e7e5cdcab4a4e9e0d09339231f480c01 f5fef0574fffc6f1492403c9dbdd2dcfc66168e7 -- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

View the diff from clang-format here.

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index ab9460e09e..48148e69bf 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -6445,7 +6445,8 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
 
   // if the result of compare is used only in conditional branches, try to
   // "strengthen" the compare. This may allow us to deduce stronger results
-  // about the value involved in comparison in the blocks dominated by these branches.
+  // about the value involved in comparison in the blocks dominated by these
+  // branches.
   bool AllUsesAreInBranches = true;
   for (const Use &U : I.uses()) {
     const Instruction *UI = cast<Instruction>(U.getUser());

PR Link: llvm/llvm-project#79405

dtcxzyw · 2024-01-30T14:44:17Z

I am not sure that it is a correct way to fix the regression.
This patch introduces some new regressions:

define @src(i64 %a) {
   %mul = shl i64 %a, 1
   %cmp.i = icmp ult i64 %mul, 163840000
   %max = select i1 %cmp.i, i64 163840000, i64 %mul
   ret i64 %max
}
define @before(i64 %a) {
   %mul = shl i64 %a, 1
   %max = call i64 @llvm.umax.i64(i64 %mul, i64 163840000)
   ret i64 %max
}
define @after(i64 %a) {
   %mul = shl i64 %a, 1
   %cmp.i = icmp ult i64 %mul, 163839999
   %max = select i1 %cmp.i, i64 163840000, i64 %mul
   ret i64 %max
}

dtcxzyw · 2024-01-30T14:53:22Z

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

+  // Don't break the SignBitCheck pattern;
+  if (InstCombiner::isSignBitCheck(Pred, RHSConst, TrueIfSigned))
+    return nullptr;
+


You can add a check here to avoid breaking the SPF.

@dtcxzyw Added a check not to break any select patterns.

@dtcxzyw Added a check not to break any select patterns.

Tests?

@dtcxzyw Added a check not to break any select patterns.

Emm, it seems like some regressions are still there :(
dtcxzyw/llvm-opt-benchmark#148 (comment)

@dtcxzyw I just added test for select patterns and sign-check pattern. Everything works as intended. I tried your specific example and the pattern's wasn't broken. There must be something else happening in that code. Can you double check
that the degradation is still there? If yes, can you provide the reduced test case?

@dtcxzyw I just added test for select patterns and sign-check pattern. Everything works as intended. I tried your specific example and the pattern's wasn't broken. There must be something else happening in that code. Can you double check that the degradation is still there? If yes, can you provide the reduced test case?

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @_ZN6Assimp14StackAllocator18g_maxBytesPerBlockE = constant i64 67108864 define ptr @_ZN6Assimp14StackAllocator8AllocateEm(ptr %this, i64 %0) { entry: %mul = shl i64 %0, 1 store i64 %mul, ptr %this, align 8 %call = call ptr @_ZSt3minImERKT_S2_S2_(ptr %this, ptr @_ZN6Assimp14StackAllocator18g_maxBytesPerBlockE) %1 = load i64, ptr %call, align 8 store i64 %1, ptr %this, align 8 ret ptr null } define ptr @_ZSt3minImERKT_S2_S2_(ptr %__a, ptr %__b) { entry: %0 = load i64, ptr %__b, align 8 %1 = load i64, ptr %__a, align 8 %cmp = icmp ult i64 %0, %1 %__b.__a = select i1 %cmp, ptr %__b, ptr %__a ret ptr %__b.__a }

Baseline (-O3):

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @_ZN6Assimp14StackAllocator18g_maxBytesPerBlockE = local_unnamed_addr constant i64 67108864 ; Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: write) define noalias ptr @_ZN6Assimp14StackAllocator8AllocateEm(ptr nocapture writeonly %this, i64 %0) local_unnamed_addr #0 { entry: %mul = shl i64 %0, 1 %1 = tail call i64 @llvm.umin.i64(i64 %mul, i64 67108864) store i64 %1, ptr %this, align 8 ret ptr null } ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) define ptr @_ZSt3minImERKT_S2_S2_(ptr readonly %__a, ptr readonly %__b) local_unnamed_addr #1 { entry: %0 = load i64, ptr %__b, align 8 %1 = load i64, ptr %__a, align 8 %cmp = icmp ult i64 %0, %1 %__b.__a = select i1 %cmp, ptr %__b, ptr %__a ret ptr %__b.__a } ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i64 @llvm.umin.i64(i64, i64) #2 attributes #0 = { mustprogress nofree nosync nounwind willreturn memory(argmem: write) } attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) } attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

After this patch:

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @_ZN6Assimp14StackAllocator18g_maxBytesPerBlockE = local_unnamed_addr constant i64 67108864 ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) define noalias noundef ptr @_ZN6Assimp14StackAllocator8AllocateEm(ptr nocapture writeonly %this, i64 %0) local_unnamed_addr #0 { entry: %mul = shl i64 %0, 1 %cmp.i = icmp ugt i64 %mul, 67108865 %1 = select i1 %cmp.i, i64 67108864, i64 %mul store i64 %1, ptr %this, align 8 ret ptr null } ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) define ptr @_ZSt3minImERKT_S2_S2_(ptr readonly %__a, ptr readonly %__b) local_unnamed_addr #1 { entry: %0 = load i64, ptr %__b, align 8 %1 = load i64, ptr %__a, align 8 %cmp = icmp ult i64 %0, %1 %__b.__a = select i1 %cmp, ptr %__b, ptr %__a ret ptr %__b.__a } attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) } attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) }

It looks like a phase ordering problem :(

After inlining the call %call = call ptr @_ZSt3minImERKT_S2_S2_(ptr %this, ptr @_ZN6Assimp14StackAllocator18g_maxBytesPerBlockE) the code looks like this:

; *** IR Dump After InlinerPass on (_ZN6Assimp14StackAllocator8AllocateEm) *** define ptr @_ZN6Assimp14StackAllocator8AllocateEm(ptr %this, i64 %0) local_unnamed_addr { entry: %mul = shl i64 %0, 1 store i64 %mul, ptr %this, align 8 %1 = load i64, ptr %this, align 8 %cmp.i = icmp ult i64 67108864, %1 %__b.__a.i = select i1 %cmp.i, ptr @_ZN6Assimp14StackAllocator18g_maxBytesPerBlockE, ptr %this %2 = load i64, ptr %__b.__a.i, align 8 store i64 %2, ptr %this, align 8 ret ptr null }

so the constant _ZN6Assimp14StackAllocator18g_maxBytesPerBlockE was replaced with its value in icmp instruction, but not in the select. Why?

I see, the select can only be simplified after inlining and combining it with load.

PR Link: llvm/llvm-project#79405

broken.

dtcxzyw · 2024-02-06T04:24:05Z

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

+                                                 KnownBits Op0Known,
+                                                 KnownBits Op1Known,


Suggested change

KnownBits Op0Known,

KnownBits Op1Known,

const KnownBits &Op0Known,

const KnownBits &Op1Known,

dtcxzyw · 2024-02-06T04:24:50Z

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

+                                                 KnownBits Op0Known,
+                                                 KnownBits Op1Known,
+                                                 unsigned BitWidth) {
+  if (!BitWidth)


BitWidth is always non-zero.

dtcxzyw · 2024-02-06T04:26:15Z

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

+
+  ConstantRange Op0PredRange =
+      ConstantRange::makeExactICmpRegion(Pred, RHSConst);
+  int KnownZeroMaskLength = BitWidth - Op0Known.Zero.countLeadingZeros();


Suggested change

int KnownZeroMaskLength = BitWidth - Op0Known.Zero.countLeadingZeros();

unsigned KnownZeroMaskLength = BitWidth - Op0Known.Zero.countLeadingZeros();

dtcxzyw · 2024-02-06T04:27:36Z

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

+  if (KnownZeroMaskLength == 0)
+    return nullptr;
+
+  APInt PowOf2(BitWidth, 1 << KnownZeroMaskLength);


Use APInt::getOneBitSet instead. Please also add an i128 test.

dtcxzyw · 2024-02-06T04:28:31Z

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

+    // Compute the smallest number satisfying the known-bits constrained
+    // which is at greater or equal Op0MinAccordingToPred.
+    Op0MinRefinedByKnownBits =
+        PowOf2 * APIntOps::RoundingSDiv(Op0MinAccordingToPred, PowOf2,


What is the compile-time impact of this patch?

dtcxzyw · 2024-02-06T04:33:30Z

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

+    // Compute the largest number satisfying the known-bits constrained
+    // which is at less or equal Op0MaxAccordingToPred.


Suggested change

// Compute the largest number satisfying the known-bits constrained

// which is at less or equal Op0MaxAccordingToPred.

// Compute the largest number satisfying the known-bits constraints

// which is at less or equal to Op0MaxAccordingToPred.

dtcxzyw · 2024-02-06T04:35:45Z

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

+        PowOf2 * APIntOps::RoundingUDiv(Op0MinAccordingToPred, PowOf2,
+                                        APInt::Rounding::UP);


Suggested change

PowOf2 * APIntOps::RoundingUDiv(Op0MinAccordingToPred, PowOf2,

APInt::Rounding::UP);

PowOf2 * APIntOps::RoundingUDiv(Op0MinAccordingToPred, PowOf2,

APInt::Rounding::UP);

Can the multiplication overflow?

dtcxzyw · 2024-02-06T04:37:52Z

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

+  // by now.
+  switch (Pred) {
+  default:
+    break;


We can early exit when the predicate is not signed.

dtcxzyw · 2024-02-06T04:39:22Z

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

+    break;
+  case ICmpInst::ICMP_ULT:
+  case ICmpInst::ICMP_SLT: {
+    if (ImprovedUpper)


Can we avoid refining lower when the predicate is ult/slt?

mgudim · 2024-02-06T06:39:53Z

@dtcxzyw I think it's better to limit this only to the case when result of icmp is used in branches. It covers the case you found in the benchmarks and avoids breaking patterns. We this should be extended to some other scenario, we can do that once we find it. What do you think? I pushed the commit with this change and I'll fix the tests soon.

mgudim · 2024-02-06T07:11:02Z

@dtcxzyw @nikic Actually, now I don't think it's such a good idea. If the comparison is used in a branch an we "strengthen" then if the branch is taken, we can deduce more. BUT on the other hand, we have actually "weakend" the opposite inequality: if the branch is not taken we can deduce less. Maybe a better way to express this would be to insert assume intrinsics?

goldsteinn · 2024-02-24T18:30:36Z

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

+    NewUpper = APIntOps::umin(Op0MaxRefinedByKnownBits, Op0MaxAccordingToPred);
+    ImprovedLower = NewLower.ugt(Op0MinAccordingToPred);
+    ImprovedUpper = NewUpper.ult(Op0MaxAccordingToPred);
+  }


Instead of creating this custom bounds, why not use ConstantRange?

mgudim requested a review from nikic as a code owner January 25, 2024 04:32

mgudim requested review from dtcxzyw and removed request for nikic January 25, 2024 04:32

llvmbot added the llvm:transforms label Jan 25, 2024

mgudim requested review from nikic and goldsteinn January 25, 2024 04:32

mgudim mentioned this pull request Jan 25, 2024

[InstCombine] Extend foldICmpAddConstant to disjoint or. #75899

Open

dtcxzyw added a commit to dtcxzyw/llvm-opt-benchmark that referenced this pull request Jan 25, 2024

pre-commit: test PR79405

56dad23

PR Link: llvm/llvm-project#79405

dtcxzyw mentioned this pull request Jan 25, 2024

pre-commit: test PR79405 dtcxzyw/llvm-opt-benchmark#148

Closed

mgudim added 4 commits January 25, 2024 03:19

[InstCombineCompares] Try to "strengthen" compares based on known bits.

20e1c91

For example, replace `icmp ugt %x, 14` with `icmp ugt %x, 15` when it is known that the two least significant bits of `%x` is zero.

Updated tests.

ec9315d

Put the transformation in a separate function. Added a check to prevent

91b7eb1

breakin of "sign bit check" pattern.

updated

be9c0c7

test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll

mgudim force-pushed the icmp-known-bits branch from c4512dc to be9c0c7 Compare January 25, 2024 08:25

Format.

4488bcb

dtcxzyw added a commit to dtcxzyw/llvm-opt-benchmark that referenced this pull request Jan 25, 2024

pre-commit: test PR79405

1684fe5

PR Link: llvm/llvm-project#79405

dtcxzyw added a commit to dtcxzyw/llvm-opt-benchmark that referenced this pull request Jan 25, 2024

pre-commit: test PR79405

44ad4a4

PR Link: llvm/llvm-project#79405

Updated tests.

70fc18c

dtcxzyw reviewed Jan 30, 2024

View reviewed changes

mgudim force-pushed the icmp-known-bits branch 5 times, most recently from 1bf0572 to 012d5d1 Compare February 1, 2024 18:15

Added a check not to break any "select patterns".

82ad843

mgudim force-pushed the icmp-known-bits branch from 012d5d1 to 82ad843 Compare February 1, 2024 18:40

dtcxzyw added a commit to dtcxzyw/llvm-opt-benchmark that referenced this pull request Feb 2, 2024

pre-commit: test PR79405

e57cc05

PR Link: llvm/llvm-project#79405

mgudim added 2 commits February 5, 2024 22:48

Added test to check that select and "sing bit check" patterns are not

fae6acc

broken.

Added a check that icmp must be used as a condition in select

67cc2a0

dtcxzyw requested changes Feb 6, 2024

View reviewed changes

Only strengthen the compare if it is used only in branches.

f5fef05

goldsteinn reviewed Feb 24, 2024

View reviewed changes

mgudim closed this Jun 18, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[InstCombineCompares] Try to "strengthen" compares based on known bits. #79405

[InstCombineCompares] Try to "strengthen" compares based on known bits. #79405

mgudim commented Jan 25, 2024

llvmbot commented Jan 25, 2024

mgudim commented Jan 25, 2024 •

edited

Loading

dtcxzyw commented Jan 25, 2024

github-actions bot commented Jan 25, 2024 •

edited

Loading

dtcxzyw commented Jan 30, 2024

dtcxzyw Jan 30, 2024

mgudim Feb 1, 2024

dtcxzyw Feb 2, 2024

dtcxzyw Feb 2, 2024

mgudim Feb 6, 2024

dtcxzyw Feb 6, 2024

mgudim Feb 6, 2024

mgudim Feb 6, 2024

dtcxzyw Feb 6, 2024

dtcxzyw Feb 6, 2024

dtcxzyw Feb 6, 2024

dtcxzyw Feb 6, 2024

dtcxzyw Feb 6, 2024

dtcxzyw Feb 6, 2024

dtcxzyw Feb 6, 2024

dtcxzyw Feb 6, 2024

dtcxzyw Feb 6, 2024

mgudim commented Feb 6, 2024 •

edited

Loading

mgudim commented Feb 6, 2024

goldsteinn Feb 24, 2024

	int KnownZeroMaskLength = BitWidth - Op0Known.Zero.countLeadingZeros();
	unsigned KnownZeroMaskLength = BitWidth - Op0Known.Zero.countLeadingZeros();

		// Compute the largest number satisfying the known-bits constrained
		// which is at less or equal Op0MaxAccordingToPred.

		PowOf2 * APIntOps::RoundingUDiv(Op0MinAccordingToPred, PowOf2,
		APInt::Rounding::UP);

[InstCombineCompares] Try to "strengthen" compares based on known bits. #79405

[InstCombineCompares] Try to "strengthen" compares based on known bits. #79405

Conversation

mgudim commented Jan 25, 2024

llvmbot commented Jan 25, 2024

mgudim commented Jan 25, 2024 • edited Loading

dtcxzyw commented Jan 25, 2024

github-actions bot commented Jan 25, 2024 • edited Loading

dtcxzyw commented Jan 30, 2024

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

mgudim commented Feb 6, 2024 • edited Loading

mgudim commented Feb 6, 2024

Choose a reason for hiding this comment

mgudim commented Jan 25, 2024 •

edited

Loading

github-actions bot commented Jan 25, 2024 •

edited

Loading

mgudim commented Feb 6, 2024 •

edited

Loading