Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit a3a709f

Browse files
committed
[X86][AVX2] Tag VPMOVSX/VPMOVZX ymm instructions as WriteShuffle256
These are more like cross-lane shuffles than regular shuffles - we already do this for AVX512 equivalents. Differential Revision: https://reviews.llvm.org/D46229 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@331659 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 8fd84d4 commit a3a709f

9 files changed

+80
-141
lines changed

lib/Target/X86/X86InstrSSE.td

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4881,26 +4881,29 @@ multiclass SS41I_pmovx_rrrm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp,
48814881
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
48824882
Sched<[sched.Folded]>;
48834883
}
4884-
// FIXME: YMM cases should use SchedWriteShuffle.YMM.
4884+
48854885
multiclass SS41I_pmovx_rm_all<bits<8> opc, string OpcodeStr,
48864886
X86MemOperand MemOp, X86MemOperand MemYOp,
4887-
X86SchedWriteWidths sched, Predicate prd> {
4888-
defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128, sched.XMM>;
4887+
Predicate prd> {
4888+
defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128,
4889+
SchedWriteShuffle.XMM>;
48894890
let Predicates = [HasAVX, prd] in
48904891
defm V#NAME : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemOp,
4891-
VR128, VR128, sched.XMM>, VEX, VEX_WIG;
4892+
VR128, VR128, SchedWriteShuffle.XMM>,
4893+
VEX, VEX_WIG;
48924894
let Predicates = [HasAVX2, prd] in
48934895
defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemYOp,
4894-
VR256, VR128, sched.XMM>, VEX, VEX_L, VEX_WIG;
4896+
VR256, VR128, WriteShuffle256>,
4897+
VEX, VEX_L, VEX_WIG;
48954898
}
48964899

48974900
multiclass SS41I_pmovx_rm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp,
48984901
X86MemOperand MemYOp, Predicate prd> {
48994902
defm PMOVSX#NAME : SS41I_pmovx_rm_all<opc, !strconcat("pmovsx", OpcodeStr),
4900-
MemOp, MemYOp, SchedWriteShuffle, prd>;
4903+
MemOp, MemYOp, prd>;
49014904
defm PMOVZX#NAME : SS41I_pmovx_rm_all<!add(opc, 0x10),
49024905
!strconcat("pmovzx", OpcodeStr),
4903-
MemOp, MemYOp, SchedWriteShuffle, prd>;
4906+
MemOp, MemYOp, prd>;
49044907
}
49054908

49064909
defm BW : SS41I_pmovx_rm<0x20, "bw", i64mem, i128mem, NoVLX_Or_NoBWI>;

lib/Target/X86/X86SchedBroadwell.td

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -687,19 +687,7 @@ def BWWriteResGroup28 : SchedWriteRes<[BWPort5]> {
687687
let ResourceCycles = [1];
688688
}
689689
def: InstRW<[BWWriteResGroup28], (instregex "VPBROADCASTBrr",
690-
"VPBROADCASTWrr",
691-
"VPMOVSXBDYrr",
692-
"VPMOVSXBQYrr",
693-
"VPMOVSXBWYrr",
694-
"VPMOVSXDQYrr",
695-
"VPMOVSXWDYrr",
696-
"VPMOVSXWQYrr",
697-
"VPMOVZXBDYrr",
698-
"VPMOVZXBQYrr",
699-
"VPMOVZXBWYrr",
700-
"VPMOVZXDQYrr",
701-
"VPMOVZXWDYrr",
702-
"VPMOVZXWQYrr")>;
690+
"VPBROADCASTWrr")>;
703691

704692
def BWWriteResGroup30 : SchedWriteRes<[BWPort0156]> {
705693
let Latency = 2;

lib/Target/X86/X86SchedHaswell.td

Lines changed: 1 addition & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1287,19 +1287,7 @@ def HWWriteResGroup51 : SchedWriteRes<[HWPort5]> {
12871287
let ResourceCycles = [1];
12881288
}
12891289
def: InstRW<[HWWriteResGroup51], (instregex "VPBROADCASTBrr",
1290-
"VPBROADCASTWrr",
1291-
"VPMOVSXBDYrr",
1292-
"VPMOVSXBQYrr",
1293-
"VPMOVSXBWYrr",
1294-
"VPMOVSXDQYrr",
1295-
"VPMOVSXWDYrr",
1296-
"VPMOVSXWQYrr",
1297-
"VPMOVZXBDYrr",
1298-
"VPMOVZXBQYrr",
1299-
"VPMOVZXBWYrr",
1300-
"VPMOVZXDQYrr",
1301-
"VPMOVZXWDYrr",
1302-
"VPMOVZXWQYrr")>;
1290+
"VPBROADCASTWrr")>;
13031291

13041292
def HWWriteResGroup52 : SchedWriteRes<[HWPort1,HWPort23]> {
13051293
let Latency = 9;
@@ -1320,17 +1308,6 @@ def: InstRW<[HWWriteResGroup52_1], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
13201308
"VCVTPS2DQYrm",
13211309
"VCVTTPS2DQYrm")>;
13221310

1323-
def HWWriteResGroup53 : SchedWriteRes<[HWPort5,HWPort23]> {
1324-
let Latency = 10;
1325-
let NumMicroOps = 2;
1326-
let ResourceCycles = [1,1];
1327-
}
1328-
def: InstRW<[HWWriteResGroup53], (instregex "VPMOVZXBDYrm",
1329-
"VPMOVZXBQYrm",
1330-
"VPMOVZXBWYrm",
1331-
"VPMOVZXDQYrm",
1332-
"VPMOVZXWQYrm")>;
1333-
13341311
def HWWriteResGroup53_1 : SchedWriteRes<[HWPort5,HWPort23]> {
13351312
let Latency = 9;
13361313
let NumMicroOps = 2;

lib/Target/X86/X86SchedSkylakeClient.td

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -731,19 +731,7 @@ def: InstRW<[SKLWriteResGroup30], (instregex "(ADD|SUB|SUBR)_FPrST0",
731731
"(ADD|SUB|SUBR)_FrST0",
732732
"VPBROADCASTBrr",
733733
"VPBROADCASTWrr",
734-
"(V?)PCMPGTQ(Y?)rr",
735-
"VPMOVSXBDYrr",
736-
"VPMOVSXBQYrr",
737-
"VPMOVSXBWYrr",
738-
"VPMOVSXDQYrr",
739-
"VPMOVSXWDYrr",
740-
"VPMOVSXWQYrr",
741-
"VPMOVZXBDYrr",
742-
"VPMOVZXBQYrr",
743-
"VPMOVZXBWYrr",
744-
"VPMOVZXDQYrr",
745-
"VPMOVZXWDYrr",
746-
"VPMOVZXWQYrr")>;
734+
"(V?)PCMPGTQ(Y?)rr")>;
747735

748736
def SKLWriteResGroup31 : SchedWriteRes<[SKLPort0,SKLPort5]> {
749737
let Latency = 3;
@@ -1558,12 +1546,7 @@ def SKLWriteResGroup133 : SchedWriteRes<[SKLPort5,SKLPort23]> {
15581546
}
15591547
def: InstRW<[SKLWriteResGroup133], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
15601548
"ILD_F(16|32|64)m",
1561-
"VPCMPGTQYrm",
1562-
"VPMOVZXBDYrm",
1563-
"VPMOVZXBQYrm",
1564-
"VPMOVZXBWYrm",
1565-
"VPMOVZXDQYrm",
1566-
"VPMOVZXWQYrm")>;
1549+
"VPCMPGTQYrm")>;
15671550

15681551
def SKLWriteResGroup134 : SchedWriteRes<[SKLPort01,SKLPort23]> {
15691552
let Latency = 10;

lib/Target/X86/X86SchedSkylakeServer.td

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1062,18 +1062,6 @@ def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_FPrST0",
10621062
"VPMINUQZ128rr",
10631063
"VPMINUQZ256rr",
10641064
"VPMINUQZrr",
1065-
"VPMOVSXBDYrr",
1066-
"VPMOVSXBQYrr",
1067-
"VPMOVSXBWYrr",
1068-
"VPMOVSXDQYrr",
1069-
"VPMOVSXWDYrr",
1070-
"VPMOVSXWQYrr",
1071-
"VPMOVZXBDYrr",
1072-
"VPMOVZXBQYrr",
1073-
"VPMOVZXBWYrr",
1074-
"VPMOVZXDQYrr",
1075-
"VPMOVZXWDYrr",
1076-
"VPMOVZXWQYrr",
10771065
"VPSADBWZrr", // TODO: 512-bit ops require ports 0/1 to be joined.
10781066
"VPTESTMBZ128rr",
10791067
"VPTESTMBZ256rr",
@@ -2603,11 +2591,6 @@ def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
26032591
"VPMINSQZrm(b?)",
26042592
"VPMINUQZ256rm(b?)",
26052593
"VPMINUQZrm(b?)",
2606-
"VPMOVZXBDYrm",
2607-
"VPMOVZXBQYrm",
2608-
"VPMOVZXBWYrm",
2609-
"VPMOVZXDQYrm",
2610-
"VPMOVZXWQYrm",
26112594
"VPTESTMBZ256rm(b?)",
26122595
"VPTESTMBZrm(b?)",
26132596
"VPTESTMDZ256rm(b?)",

lib/Target/X86/X86ScheduleZnver1.td

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -940,15 +940,20 @@ def ZnWriteFPU12Y : SchedWriteRes<[ZnFPU12]> {
940940
let NumMicroOps = 2;
941941
}
942942
def ZnWriteFPU12m : SchedWriteRes<[ZnAGU, ZnFPU12]> ;
943+
def ZnWriteFPU12Ym : SchedWriteRes<[ZnAGU, ZnFPU12]> {
944+
let Latency = 8;
945+
let NumMicroOps = 2;
946+
}
943947

944948
def : InstRW<[ZnWriteFPU12], (instregex "MMX_PACKSSDWirr",
945949
"MMX_PACKSSWBirr", "MMX_PACKUSWBirr")>;
946950
def : InstRW<[ZnWriteFPU12m], (instregex "MMX_PACKSSDWirm",
947951
"MMX_PACKSSWBirm", "MMX_PACKUSWBirm")>;
948952

949-
// VPMOVSX/ZX BW BD BQ DW DQ.
953+
// VPMOVSX/ZX BW BD BQ WD WQ DQ.
950954
// y <- x.
951-
def : InstRW<[ZnWriteFPU12Y], (instregex "VPMOV(SX|ZX)(BW|BQ|DW|DQ)Yrr")>;
955+
def : InstRW<[ZnWriteFPU12Y], (instregex "VPMOV(SX|ZX)(BW|BD|BQ|WD|WQ|DQ)Yrr")>;
956+
def : InstRW<[ZnWriteFPU12Ym], (instregex "VPMOV(SX|ZX)(BW|BD|BQ|WD|WQ|DQ)Yrm")>;
952957

953958
def ZnWriteFPU013 : SchedWriteRes<[ZnFPU013]> ;
954959
def ZnWriteFPU013Y : SchedWriteRes<[ZnFPU013]> {

0 commit comments

Comments
 (0)