Skip to content

Commit 1e04454

Browse files
committed
Missing AArch64ISD::BICi handling
1 parent d312788 commit 1e04454

File tree

4 files changed

+90
-9
lines changed

4 files changed

+90
-9
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3419,13 +3419,18 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
34193419
Known = KnownBits::mulhs(Known, Known2);
34203420
break;
34213421
}
3422-
case ISD::AVGCEILU: {
3422+
case ISD::AVGFLOORU:
3423+
case ISD::AVGCEILU:
3424+
case ISD::AVGFLOORS:
3425+
case ISD::AVGCEILS: {
3426+
bool IsCeil = Opcode == ISD::AVGCEILU || Opcode == ISD::AVGCEILS;
3427+
bool IsSigned = Opcode == ISD::AVGFLOORS || Opcode == ISD::AVGCEILS;
34233428
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
34243429
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
3425-
Known = Known.zext(BitWidth + 1);
3426-
Known2 = Known2.zext(BitWidth + 1);
3427-
KnownBits One = KnownBits::makeConstant(APInt(1, 1));
3428-
Known = KnownBits::computeForAddCarry(Known, Known2, One);
3430+
Known = IsSigned ? Known.sext(BitWidth + 1) : Known.zext(BitWidth + 1);
3431+
Known2 = IsSigned ? Known2.sext(BitWidth + 1) : Known2.zext(BitWidth + 1);
3432+
KnownBits Carry = KnownBits::makeConstant(APInt(1, IsCeil ? 1 : 0));
3433+
Known = KnownBits::computeForAddCarry(Known, Known2, Carry);
34293434
Known = Known.extractBits(BitWidth, 1);
34303435
break;
34313436
}

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24580,6 +24580,19 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
2458024580
if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ false))
2458124581
return R;
2458224582
return performFlagSettingCombine(N, DCI, AArch64ISD::SBC);
24583+
case AArch64ISD::BICi: {
24584+
KnownBits Known;
24585+
APInt DemandedBits =
24586+
APInt::getAllOnes(N->getValueType(0).getScalarSizeInBits());
24587+
APInt DemandedElts =
24588+
APInt::getAllOnes(N->getValueType(0).getVectorNumElements());
24589+
TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
24590+
!DCI.isBeforeLegalizeOps());
24591+
if (DAG.getTargetLoweringInfo().SimplifyDemandedBits(
24592+
SDValue(N, 0), DemandedBits, DemandedElts, Known, TLO))
24593+
return TLO.New;
24594+
break;
24595+
}
2458324596
case ISD::XOR:
2458424597
return performXorCombine(N, DAG, DCI, Subtarget);
2458524598
case ISD::MUL:
@@ -27620,6 +27633,24 @@ bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
2762027633
// used - simplify to just Val.
2762127634
return TLO.CombineTo(Op, ShiftR->getOperand(0));
2762227635
}
27636+
case AArch64ISD::BICi: {
27637+
// Fold BICi if all destination bits already known to be zeroed
27638+
SDValue Op0 = Op.getOperand(0);
27639+
KnownBits KnownOp0 =
27640+
TLO.DAG.computeKnownBits(Op0, OriginalDemandedElts, Depth + 1);
27641+
// Op0 &= ~(ConstantOperandVal(1) << ConstantOperandVal(2))
27642+
uint64_t BitsToClear = Op->getConstantOperandVal(1)
27643+
<< Op->getConstantOperandVal(2);
27644+
APInt AlreadyZeroedBitsToClear = BitsToClear & KnownOp0.Zero;
27645+
if (APInt(Known.getBitWidth(), BitsToClear)
27646+
.isSubsetOf(AlreadyZeroedBitsToClear))
27647+
return TLO.CombineTo(Op, Op0);
27648+
27649+
Known = KnownOp0 &
27650+
KnownBits::makeConstant(APInt(Known.getBitWidth(), ~BitsToClear));
27651+
27652+
return false;
27653+
}
2762327654
case ISD::INTRINSIC_WO_CHAIN: {
2762427655
if (auto ElementSize = IsSVECntIntrinsic(Op)) {
2762527656
unsigned MaxSVEVectorSizeInBits = Subtarget->getMaxSVEVectorSizeInBits();

llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
1212
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
1313
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
1414
; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
15-
; CHECK-NEXT: bic v0.8h, #254, lsl #8
1615
; CHECK-NEXT: ret
1716
%x0 = zext <8 x i8> %a0 to <8 x i16>
1817
%x1 = zext <8 x i8> %a1 to <8 x i16>
@@ -27,7 +26,6 @@ define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
2726
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
2827
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
2928
; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
30-
; CHECK-NEXT: bic v0.8h, #254, lsl #8
3129
; CHECK-NEXT: ret
3230
%x0 = zext <8 x i8> %a0 to <8 x i16>
3331
%x1 = zext <8 x i8> %a1 to <8 x i16>
@@ -42,7 +40,6 @@ define <8 x i16> @hadds_zext(<8 x i8> %a0, <8 x i8> %a1) {
4240
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
4341
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
4442
; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
45-
; CHECK-NEXT: bic v0.8h, #254, lsl #8
4643
; CHECK-NEXT: ret
4744
%x0 = zext <8 x i8> %a0 to <8 x i16>
4845
%x1 = zext <8 x i8> %a1 to <8 x i16>
@@ -57,7 +54,6 @@ define <8 x i16> @shaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
5754
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
5855
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
5956
; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
60-
; CHECK-NEXT: bic v0.8h, #254, lsl #8
6157
; CHECK-NEXT: ret
6258
%x0 = zext <8 x i8> %a0 to <8 x i16>
6359
%x1 = zext <8 x i8> %a1 to <8 x i16>

llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "llvm/Analysis/MemoryLocation.h"
1010
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
1111
#include "llvm/AsmParser/Parser.h"
12+
#include "llvm/CodeGen/ISDOpcodes.h"
1213
#include "llvm/CodeGen/MachineModuleInfo.h"
1314
#include "llvm/CodeGen/SelectionDAG.h"
1415
#include "llvm/CodeGen/TargetLowering.h"
@@ -796,4 +797,52 @@ TEST_F(AArch64SelectionDAGTest, computeKnownBits_extload_knownnegative) {
796797
EXPECT_EQ(Known.One, APInt(32, 0xfffffff0));
797798
}
798799

800+
TEST_F(AArch64SelectionDAGTest,
801+
computeKnownBits_AVGFLOORU_AVGFLOORS_AVGCEILU_AVGCEILS) {
802+
SDLoc Loc;
803+
auto Int8VT = EVT::getIntegerVT(Context, 8);
804+
auto Int16VT = EVT::getIntegerVT(Context, 16);
805+
auto Int8Vec8VT = EVT::getVectorVT(Context, Int8VT, 8);
806+
auto Int16Vec8VT = EVT::getVectorVT(Context, Int16VT, 8);
807+
808+
SDValue UnknownOp0 = DAG->getRegister(0, Int8Vec8VT);
809+
SDValue UnknownOp1 = DAG->getRegister(1, Int8Vec8VT);
810+
811+
SDValue ZextOp0 =
812+
DAG->getNode(ISD::ZERO_EXTEND, Loc, Int16Vec8VT, UnknownOp0);
813+
SDValue ZextOp1 =
814+
DAG->getNode(ISD::ZERO_EXTEND, Loc, Int16Vec8VT, UnknownOp1);
815+
// ZextOp0 = 00000000????????
816+
// ZextOp1 = 00000000????????
817+
// => (for all AVG* instructions)
818+
// Known.Zero = 1111111100000000 (0xFF00)
819+
// Known.One = 0000000000000000 (0x0000)
820+
auto Zeroes = APInt(16, 0xFF00);
821+
auto Ones = APInt(16, 0x0000);
822+
823+
SDValue AVGFLOORU =
824+
DAG->getNode(ISD::AVGFLOORU, Loc, Int16Vec8VT, ZextOp0, ZextOp1);
825+
KnownBits KnownAVGFLOORU = DAG->computeKnownBits(AVGFLOORU);
826+
EXPECT_EQ(KnownAVGFLOORU.Zero, Zeroes);
827+
EXPECT_EQ(KnownAVGFLOORU.One, Ones);
828+
829+
SDValue AVGFLOORS =
830+
DAG->getNode(ISD::AVGFLOORU, Loc, Int16Vec8VT, ZextOp0, ZextOp1);
831+
KnownBits KnownAVGFLOORS = DAG->computeKnownBits(AVGFLOORS);
832+
EXPECT_EQ(KnownAVGFLOORS.Zero, Zeroes);
833+
EXPECT_EQ(KnownAVGFLOORS.One, Ones);
834+
835+
SDValue AVGCEILU =
836+
DAG->getNode(ISD::AVGCEILU, Loc, Int16Vec8VT, ZextOp0, ZextOp1);
837+
KnownBits KnownAVGCEILU = DAG->computeKnownBits(AVGCEILU);
838+
EXPECT_EQ(KnownAVGCEILU.Zero, Zeroes);
839+
EXPECT_EQ(KnownAVGCEILU.One, Ones);
840+
841+
SDValue AVGCEILS =
842+
DAG->getNode(ISD::AVGCEILS, Loc, Int16Vec8VT, ZextOp0, ZextOp1);
843+
KnownBits KnownAVGCEILS = DAG->computeKnownBits(AVGCEILS);
844+
EXPECT_EQ(KnownAVGCEILS.Zero, Zeroes);
845+
EXPECT_EQ(KnownAVGCEILS.One, Ones);
846+
}
847+
799848
} // end namespace llvm

0 commit comments

Comments
 (0)