Skip to content

Commit 007b50f

Browse files
committed
[X86][BtVer2] Improve simulation of (V)PINSR values
Include the 6cy delay transferring from the GPR to FPU. llvm-svn: 332737
1 parent b550090 commit 007b50f

File tree

7 files changed

+38
-37
lines changed

7 files changed

+38
-37
lines changed

llvm/lib/Target/X86/X86ScheduleBtVer2.td

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -461,9 +461,10 @@ defm : JWriteResFpuPair<WriteVarVecShiftY,[JFPU01, JVALU], 1>; // NOTE: Doesn't
461461
// Vector insert/extract operations.
462462
////////////////////////////////////////////////////////////////////////////////
463463

464-
defm : JWriteResFpuPair<WriteVecInsert, [JFPU01, JVALU], 1>;
465-
def : WriteRes<WriteVecExtract, [JFPU0, JFPA, JALU0]> { let Latency = 3; }
466-
def : WriteRes<WriteVecExtractSt, [JFPU1, JSTC, JSAGU]> { let Latency = 3; }
464+
defm : X86WriteRes<WriteVecInsert, [JFPU01, JVALU], 7, [1,1], 2>;
465+
defm : X86WriteRes<WriteVecInsertLd, [JFPU01, JVALU, JLAGU], 4, [1,1,1], 1>;
466+
defm : X86WriteRes<WriteVecExtract, [JFPU0, JFPA, JALU0], 3, [1,1,1], 1>;
467+
defm : X86WriteRes<WriteVecExtractSt, [JFPU1, JSTC, JSAGU], 3, [1,1,1], 1>;
467468

468469
////////////////////////////////////////////////////////////////////////////////
469470
// SSE42 String instructions.

llvm/test/CodeGen/X86/mmx-schedule.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3565,9 +3565,9 @@ define i64 @test_pinsrw(x86_mmx %a0, i32 %a1, i16* %a2) optsize {
35653565
;
35663566
; BTVER2-LABEL: test_pinsrw:
35673567
; BTVER2: # %bb.0:
3568+
; BTVER2-NEXT: pinsrw $0, %edi, %mm0 # sched: [7:0.50]
35683569
; BTVER2-NEXT: movswl (%rsi), %eax # sched: [4:1.00]
3569-
; BTVER2-NEXT: pinsrw $0, %edi, %mm0 # sched: [1:0.50]
3570-
; BTVER2-NEXT: pinsrw $1, %eax, %mm0 # sched: [1:0.50]
3570+
; BTVER2-NEXT: pinsrw $1, %eax, %mm0 # sched: [7:0.50]
35713571
; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50]
35723572
; BTVER2-NEXT: retq # sched: [4:1.00]
35733573
;

llvm/test/CodeGen/X86/sse2-schedule.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9080,14 +9080,14 @@ define <8 x i16> @test_pinsrw(<8 x i16> %a0, i16 %a1, i16 *%a2) {
90809080
;
90819081
; BTVER2-SSE-LABEL: test_pinsrw:
90829082
; BTVER2-SSE: # %bb.0:
9083-
; BTVER2-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [1:0.50]
9084-
; BTVER2-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00]
9083+
; BTVER2-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [7:0.50]
9084+
; BTVER2-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [4:1.00]
90859085
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
90869086
;
90879087
; BTVER2-LABEL: test_pinsrw:
90889088
; BTVER2: # %bb.0:
9089-
; BTVER2-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [1:0.50]
9090-
; BTVER2-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
9089+
; BTVER2-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [7:0.50]
9090+
; BTVER2-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [4:1.00]
90919091
; BTVER2-NEXT: retq # sched: [4:1.00]
90929092
;
90939093
; ZNVER1-SSE-LABEL: test_pinsrw:

llvm/test/CodeGen/X86/sse41-schedule.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2180,14 +2180,14 @@ define <16 x i8> @test_pinsrb(<16 x i8> %a0, i8 %a1, i8 *%a2) {
21802180
;
21812181
; BTVER2-SSE-LABEL: test_pinsrb:
21822182
; BTVER2-SSE: # %bb.0:
2183-
; BTVER2-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [1:0.50]
2184-
; BTVER2-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00]
2183+
; BTVER2-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [7:0.50]
2184+
; BTVER2-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [4:1.00]
21852185
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
21862186
;
21872187
; BTVER2-LABEL: test_pinsrb:
21882188
; BTVER2: # %bb.0:
2189-
; BTVER2-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:0.50]
2190-
; BTVER2-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
2189+
; BTVER2-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [7:0.50]
2190+
; BTVER2-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [4:1.00]
21912191
; BTVER2-NEXT: retq # sched: [4:1.00]
21922192
;
21932193
; ZNVER1-SSE-LABEL: test_pinsrb:
@@ -2282,14 +2282,14 @@ define <4 x i32> @test_pinsrd(<4 x i32> %a0, i32 %a1, i32 *%a2) {
22822282
;
22832283
; BTVER2-SSE-LABEL: test_pinsrd:
22842284
; BTVER2-SSE: # %bb.0:
2285-
; BTVER2-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [1:0.50]
2286-
; BTVER2-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00]
2285+
; BTVER2-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [7:0.50]
2286+
; BTVER2-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [4:1.00]
22872287
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
22882288
;
22892289
; BTVER2-LABEL: test_pinsrd:
22902290
; BTVER2: # %bb.0:
2291-
; BTVER2-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:0.50]
2292-
; BTVER2-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
2291+
; BTVER2-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [7:0.50]
2292+
; BTVER2-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [4:1.00]
22932293
; BTVER2-NEXT: retq # sched: [4:1.00]
22942294
;
22952295
; ZNVER1-SSE-LABEL: test_pinsrd:
@@ -2396,15 +2396,15 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) {
23962396
;
23972397
; BTVER2-SSE-LABEL: test_pinsrq:
23982398
; BTVER2-SSE: # %bb.0:
2399-
; BTVER2-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00]
2400-
; BTVER2-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [1:0.50]
2399+
; BTVER2-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [7:0.50]
2400+
; BTVER2-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [4:1.00]
24012401
; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
24022402
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
24032403
;
24042404
; BTVER2-LABEL: test_pinsrq:
24052405
; BTVER2: # %bb.0:
2406-
; BTVER2-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00]
2407-
; BTVER2-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [1:0.50]
2406+
; BTVER2-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [7:0.50]
2407+
; BTVER2-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [4:1.00]
24082408
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
24092409
; BTVER2-NEXT: retq # sched: [4:1.00]
24102410
;

llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1449,14 +1449,14 @@ vzeroupper
14491449
# CHECK-NEXT: 1 6 1.00 * vphsubsw (%rax), %xmm1, %xmm2
14501450
# CHECK-NEXT: 1 1 0.50 vphsubw %xmm0, %xmm1, %xmm2
14511451
# CHECK-NEXT: 1 6 1.00 * vphsubw (%rax), %xmm1, %xmm2
1452-
# CHECK-NEXT: 1 1 0.50 vpinsrb $1, %eax, %xmm1, %xmm2
1453-
# CHECK-NEXT: 1 6 1.00 * vpinsrb $1, (%rax), %xmm1, %xmm2
1454-
# CHECK-NEXT: 1 1 0.50 vpinsrd $1, %eax, %xmm1, %xmm2
1455-
# CHECK-NEXT: 1 6 1.00 * vpinsrd $1, (%rax), %xmm1, %xmm2
1456-
# CHECK-NEXT: 1 1 0.50 vpinsrq $1, %rax, %xmm1, %xmm2
1457-
# CHECK-NEXT: 1 6 1.00 * vpinsrq $1, (%rax), %xmm1, %xmm2
1458-
# CHECK-NEXT: 1 1 0.50 vpinsrw $1, %eax, %xmm1, %xmm2
1459-
# CHECK-NEXT: 1 6 1.00 * vpinsrw $1, (%rax), %xmm1, %xmm2
1452+
# CHECK-NEXT: 2 7 0.50 vpinsrb $1, %eax, %xmm1, %xmm2
1453+
# CHECK-NEXT: 1 4 1.00 * vpinsrb $1, (%rax), %xmm1, %xmm2
1454+
# CHECK-NEXT: 2 7 0.50 vpinsrd $1, %eax, %xmm1, %xmm2
1455+
# CHECK-NEXT: 1 4 1.00 * vpinsrd $1, (%rax), %xmm1, %xmm2
1456+
# CHECK-NEXT: 2 7 0.50 vpinsrq $1, %rax, %xmm1, %xmm2
1457+
# CHECK-NEXT: 1 4 1.00 * vpinsrq $1, (%rax), %xmm1, %xmm2
1458+
# CHECK-NEXT: 2 7 0.50 vpinsrw $1, %eax, %xmm1, %xmm2
1459+
# CHECK-NEXT: 1 4 1.00 * vpinsrw $1, (%rax), %xmm1, %xmm2
14601460
# CHECK-NEXT: 1 2 1.00 vpmaddubsw %xmm0, %xmm1, %xmm2
14611461
# CHECK-NEXT: 1 7 1.00 * vpmaddubsw (%rax), %xmm1, %xmm2
14621462
# CHECK-NEXT: 1 2 1.00 vpmaddwd %xmm0, %xmm1, %xmm2

llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse1.s

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -269,8 +269,8 @@ xorps (%rax), %xmm2
269269
# CHECK-NEXT: 1 1 0.50 pavgw %mm0, %mm2
270270
# CHECK-NEXT: 1 6 1.00 * pavgw (%rax), %mm2
271271
# CHECK-NEXT: 1 3 1.00 pextrw $1, %mm0, %ecx
272-
# CHECK-NEXT: 1 1 0.50 pinsrw $1, %eax, %mm2
273-
# CHECK-NEXT: 1 6 1.00 * pinsrw $1, (%rax), %mm2
272+
# CHECK-NEXT: 2 7 0.50 pinsrw $1, %eax, %mm2
273+
# CHECK-NEXT: 1 4 1.00 * pinsrw $1, (%rax), %mm2
274274
# CHECK-NEXT: 1 1 0.50 pmaxsw %mm0, %mm2
275275
# CHECK-NEXT: 1 6 1.00 * pmaxsw (%rax), %mm2
276276
# CHECK-NEXT: 1 1 0.50 pmaxub %mm0, %mm2

llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse41.s

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -191,12 +191,12 @@ roundss $1, (%rax), %xmm2
191191
# CHECK-NEXT: 1 3 1.00 * pextrw $1, %xmm0, (%rax)
192192
# CHECK-NEXT: 1 2 1.00 phminposuw %xmm0, %xmm2
193193
# CHECK-NEXT: 1 7 1.00 * phminposuw (%rax), %xmm2
194-
# CHECK-NEXT: 1 1 0.50 pinsrb $1, %eax, %xmm1
195-
# CHECK-NEXT: 1 6 1.00 * pinsrb $1, (%rax), %xmm1
196-
# CHECK-NEXT: 1 1 0.50 pinsrd $1, %eax, %xmm1
197-
# CHECK-NEXT: 1 6 1.00 * pinsrd $1, (%rax), %xmm1
198-
# CHECK-NEXT: 1 1 0.50 pinsrq $1, %rax, %xmm1
199-
# CHECK-NEXT: 1 6 1.00 * pinsrq $1, (%rax), %xmm1
194+
# CHECK-NEXT: 2 7 0.50 pinsrb $1, %eax, %xmm1
195+
# CHECK-NEXT: 1 4 1.00 * pinsrb $1, (%rax), %xmm1
196+
# CHECK-NEXT: 2 7 0.50 pinsrd $1, %eax, %xmm1
197+
# CHECK-NEXT: 1 4 1.00 * pinsrd $1, (%rax), %xmm1
198+
# CHECK-NEXT: 2 7 0.50 pinsrq $1, %rax, %xmm1
199+
# CHECK-NEXT: 1 4 1.00 * pinsrq $1, (%rax), %xmm1
200200
# CHECK-NEXT: 1 1 0.50 pmaxsb %xmm0, %xmm2
201201
# CHECK-NEXT: 1 6 1.00 * pmaxsb (%rax), %xmm2
202202
# CHECK-NEXT: 1 1 0.50 pmaxsd %xmm0, %xmm2

0 commit comments

Comments
 (0)