@@ -1474,6 +1474,189 @@ entry:
1474
1474
ret <2 x double > %out
1475
1475
}
1476
1476
1477
+ define arm_aapcs_vfpcc <4 x double > @shuffle4_f64 (<2 x double > %src1 , <2 x double > %src2 ) {
1478
+ ; CHECK-LABEL: shuffle4_f64:
1479
+ ; CHECK: @ %bb.0: @ %entry
1480
+ ; CHECK-NEXT: vmov.f32 s8, s6
1481
+ ; CHECK-NEXT: vmov.f32 s6, s0
1482
+ ; CHECK-NEXT: vmov.f32 s9, s7
1483
+ ; CHECK-NEXT: vmov.f32 s7, s1
1484
+ ; CHECK-NEXT: vmov.f32 s10, s2
1485
+ ; CHECK-NEXT: vmov.f32 s11, s3
1486
+ ; CHECK-NEXT: vmov q0, q2
1487
+ ; CHECK-NEXT: bx lr
1488
+ entry:
1489
+ %out = shufflevector <2 x double > %src1 , <2 x double > %src2 , <4 x i32 > <i32 3 , i32 1 , i32 2 , i32 0 >
1490
+ ret <4 x double > %out
1491
+ }
1492
+ define arm_aapcs_vfpcc <4 x double > @shuffle5_f64 (<2 x double > %src1 , <2 x double > %src2 ) {
1493
+ ; CHECK-LABEL: shuffle5_f64:
1494
+ ; CHECK: @ %bb.0: @ %entry
1495
+ ; CHECK-NEXT: vmov.f32 s8, s6
1496
+ ; CHECK-NEXT: vmov.f32 s10, s4
1497
+ ; CHECK-NEXT: vmov.f32 s4, s2
1498
+ ; CHECK-NEXT: vmov.f32 s6, s0
1499
+ ; CHECK-NEXT: vmov.f32 s9, s7
1500
+ ; CHECK-NEXT: vmov.f32 s11, s5
1501
+ ; CHECK-NEXT: vmov.f32 s5, s3
1502
+ ; CHECK-NEXT: vmov.f32 s7, s1
1503
+ ; CHECK-NEXT: vmov q0, q2
1504
+ ; CHECK-NEXT: bx lr
1505
+ entry:
1506
+ %out = shufflevector <2 x double > %src1 , <2 x double > %src2 , <4 x i32 > <i32 3 , i32 2 , i32 1 , i32 0 >
1507
+ ret <4 x double > %out
1508
+ }
1509
+ define arm_aapcs_vfpcc <2 x double > @shuffle6_f64 (<2 x double > %src1 , <2 x double > %src2 ) {
1510
+ ; CHECK-LABEL: shuffle6_f64:
1511
+ ; CHECK: @ %bb.0: @ %entry
1512
+ ; CHECK-NEXT: vmov.f32 s2, s6
1513
+ ; CHECK-NEXT: vmov.f32 s3, s7
1514
+ ; CHECK-NEXT: bx lr
1515
+ entry:
1516
+ %out = shufflevector <2 x double > %src1 , <2 x double > %src2 , <2 x i32 > <i32 0 , i32 3 >
1517
+ ret <2 x double > %out
1518
+ }
1519
+ define arm_aapcs_vfpcc <2 x double > @shuffle7_f64 (<2 x double > %src1 , <2 x double > %src2 ) {
1520
+ ; CHECK-LABEL: shuffle7_f64:
1521
+ ; CHECK: @ %bb.0: @ %entry
1522
+ ; CHECK-NEXT: vmov.f32 s0, s6
1523
+ ; CHECK-NEXT: vmov.f32 s1, s7
1524
+ ; CHECK-NEXT: bx lr
1525
+ entry:
1526
+ %out = shufflevector <2 x double > %src1 , <2 x double > %src2 , <2 x i32 > <i32 3 , i32 1 >
1527
+ ret <2 x double > %out
1528
+ }
1529
+ define arm_aapcs_vfpcc <2 x double > @shuffle8_f64 (<2 x double > %src1 , <2 x double > %src2 ) {
1530
+ ; CHECK-LABEL: shuffle8_f64:
1531
+ ; CHECK: @ %bb.0: @ %entry
1532
+ ; CHECK-NEXT: vmov.f32 s6, s2
1533
+ ; CHECK-NEXT: vmov.f32 s7, s3
1534
+ ; CHECK-NEXT: vmov q0, q1
1535
+ ; CHECK-NEXT: bx lr
1536
+ entry:
1537
+ %out = shufflevector <2 x double > %src1 , <2 x double > %src2 , <2 x i32 > <i32 2 , i32 1 >
1538
+ ret <2 x double > %out
1539
+ }
1540
+ define arm_aapcs_vfpcc <8 x double > @shuffle9_f64 (<4 x double > %src1 , <4 x double > %src2 ) {
1541
+ ; CHECK-LABEL: shuffle9_f64:
1542
+ ; CHECK: @ %bb.0: @ %entry
1543
+ ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
1544
+ ; CHECK-NEXT: vpush {d8, d9, d10, d11}
1545
+ ; CHECK-NEXT: vmov q5, q2
1546
+ ; CHECK-NEXT: vmov.f32 s16, s0
1547
+ ; CHECK-NEXT: vmov.f32 s18, s20
1548
+ ; CHECK-NEXT: vmov.f32 s20, s2
1549
+ ; CHECK-NEXT: vmov.f32 s10, s12
1550
+ ; CHECK-NEXT: vmov.f32 s19, s21
1551
+ ; CHECK-NEXT: vmov.f32 s8, s4
1552
+ ; CHECK-NEXT: vmov.f32 s17, s1
1553
+ ; CHECK-NEXT: vmov.f32 s21, s3
1554
+ ; CHECK-NEXT: vmov q0, q4
1555
+ ; CHECK-NEXT: vmov.f32 s12, s6
1556
+ ; CHECK-NEXT: vmov.f32 s11, s13
1557
+ ; CHECK-NEXT: vmov.f32 s9, s5
1558
+ ; CHECK-NEXT: vmov.f32 s13, s7
1559
+ ; CHECK-NEXT: vmov q1, q5
1560
+ ; CHECK-NEXT: vpop {d8, d9, d10, d11}
1561
+ ; CHECK-NEXT: bx lr
1562
+ entry:
1563
+ %out = shufflevector <4 x double > %src1 , <4 x double > %src2 , <8 x i32 > <i32 0 , i32 4 , i32 1 , i32 5 , i32 2 , i32 6 , i32 3 , i32 7 >
1564
+ ret <8 x double > %out
1565
+ }
1566
+
1567
+
1568
+
1569
+
1570
+ define arm_aapcs_vfpcc <4 x i64 > @shuffle4_i64 (<2 x i64 > %src1 , <2 x i64 > %src2 ) {
1571
+ ; CHECK-LABEL: shuffle4_i64:
1572
+ ; CHECK: @ %bb.0: @ %entry
1573
+ ; CHECK-NEXT: vmov.f32 s8, s6
1574
+ ; CHECK-NEXT: vmov.f32 s6, s0
1575
+ ; CHECK-NEXT: vmov.f32 s9, s7
1576
+ ; CHECK-NEXT: vmov.f32 s7, s1
1577
+ ; CHECK-NEXT: vmov.f32 s10, s2
1578
+ ; CHECK-NEXT: vmov.f32 s11, s3
1579
+ ; CHECK-NEXT: vmov q0, q2
1580
+ ; CHECK-NEXT: bx lr
1581
+ entry:
1582
+ %out = shufflevector <2 x i64 > %src1 , <2 x i64 > %src2 , <4 x i32 > <i32 3 , i32 1 , i32 2 , i32 0 >
1583
+ ret <4 x i64 > %out
1584
+ }
1585
+ define arm_aapcs_vfpcc <4 x i64 > @shuffle5_i64 (<2 x i64 > %src1 , <2 x i64 > %src2 ) {
1586
+ ; CHECK-LABEL: shuffle5_i64:
1587
+ ; CHECK: @ %bb.0: @ %entry
1588
+ ; CHECK-NEXT: vmov.f32 s8, s6
1589
+ ; CHECK-NEXT: vmov.f32 s10, s4
1590
+ ; CHECK-NEXT: vmov.f32 s4, s2
1591
+ ; CHECK-NEXT: vmov.f32 s6, s0
1592
+ ; CHECK-NEXT: vmov.f32 s9, s7
1593
+ ; CHECK-NEXT: vmov.f32 s11, s5
1594
+ ; CHECK-NEXT: vmov.f32 s5, s3
1595
+ ; CHECK-NEXT: vmov.f32 s7, s1
1596
+ ; CHECK-NEXT: vmov q0, q2
1597
+ ; CHECK-NEXT: bx lr
1598
+ entry:
1599
+ %out = shufflevector <2 x i64 > %src1 , <2 x i64 > %src2 , <4 x i32 > <i32 3 , i32 2 , i32 1 , i32 0 >
1600
+ ret <4 x i64 > %out
1601
+ }
1602
+ define arm_aapcs_vfpcc <2 x i64 > @shuffle6_i64 (<2 x i64 > %src1 , <2 x i64 > %src2 ) {
1603
+ ; CHECK-LABEL: shuffle6_i64:
1604
+ ; CHECK: @ %bb.0: @ %entry
1605
+ ; CHECK-NEXT: vmov.f32 s2, s6
1606
+ ; CHECK-NEXT: vmov.f32 s3, s7
1607
+ ; CHECK-NEXT: bx lr
1608
+ entry:
1609
+ %out = shufflevector <2 x i64 > %src1 , <2 x i64 > %src2 , <2 x i32 > <i32 0 , i32 3 >
1610
+ ret <2 x i64 > %out
1611
+ }
1612
+ define arm_aapcs_vfpcc <2 x i64 > @shuffle7_i64 (<2 x i64 > %src1 , <2 x i64 > %src2 ) {
1613
+ ; CHECK-LABEL: shuffle7_i64:
1614
+ ; CHECK: @ %bb.0: @ %entry
1615
+ ; CHECK-NEXT: vmov.f32 s0, s6
1616
+ ; CHECK-NEXT: vmov.f32 s1, s7
1617
+ ; CHECK-NEXT: bx lr
1618
+ entry:
1619
+ %out = shufflevector <2 x i64 > %src1 , <2 x i64 > %src2 , <2 x i32 > <i32 3 , i32 1 >
1620
+ ret <2 x i64 > %out
1621
+ }
1622
+ define arm_aapcs_vfpcc <2 x i64 > @shuffle8_i64 (<2 x i64 > %src1 , <2 x i64 > %src2 ) {
1623
+ ; CHECK-LABEL: shuffle8_i64:
1624
+ ; CHECK: @ %bb.0: @ %entry
1625
+ ; CHECK-NEXT: vmov.f32 s6, s2
1626
+ ; CHECK-NEXT: vmov.f32 s7, s3
1627
+ ; CHECK-NEXT: vmov q0, q1
1628
+ ; CHECK-NEXT: bx lr
1629
+ entry:
1630
+ %out = shufflevector <2 x i64 > %src1 , <2 x i64 > %src2 , <2 x i32 > <i32 2 , i32 1 >
1631
+ ret <2 x i64 > %out
1632
+ }
1633
+ define arm_aapcs_vfpcc <8 x i64 > @shuffle9_i64 (<4 x i64 > %src1 , <4 x i64 > %src2 ) {
1634
+ ; CHECK-LABEL: shuffle9_i64:
1635
+ ; CHECK: @ %bb.0: @ %entry
1636
+ ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
1637
+ ; CHECK-NEXT: vpush {d8, d9, d10, d11}
1638
+ ; CHECK-NEXT: vmov q5, q2
1639
+ ; CHECK-NEXT: vmov.f32 s16, s0
1640
+ ; CHECK-NEXT: vmov.f32 s18, s20
1641
+ ; CHECK-NEXT: vmov.f32 s20, s2
1642
+ ; CHECK-NEXT: vmov.f32 s10, s12
1643
+ ; CHECK-NEXT: vmov.f32 s19, s21
1644
+ ; CHECK-NEXT: vmov.f32 s8, s4
1645
+ ; CHECK-NEXT: vmov.f32 s17, s1
1646
+ ; CHECK-NEXT: vmov.f32 s21, s3
1647
+ ; CHECK-NEXT: vmov q0, q4
1648
+ ; CHECK-NEXT: vmov.f32 s12, s6
1649
+ ; CHECK-NEXT: vmov.f32 s11, s13
1650
+ ; CHECK-NEXT: vmov.f32 s9, s5
1651
+ ; CHECK-NEXT: vmov.f32 s13, s7
1652
+ ; CHECK-NEXT: vmov q1, q5
1653
+ ; CHECK-NEXT: vpop {d8, d9, d10, d11}
1654
+ ; CHECK-NEXT: bx lr
1655
+ entry:
1656
+ %out = shufflevector <4 x i64 > %src1 , <4 x i64 > %src2 , <8 x i32 > <i32 0 , i32 4 , i32 1 , i32 5 , i32 2 , i32 6 , i32 3 , i32 7 >
1657
+ ret <8 x i64 > %out
1658
+ }
1659
+
1477
1660
1478
1661
define arm_aapcs_vfpcc <4 x i32 > @insert_i32 (i32 %a ) {
1479
1662
; CHECK-LABEL: insert_i32:
@@ -1548,7 +1731,7 @@ define arm_aapcs_vfpcc i64 @scalar_to_vector_i32(<8 x i16> %v) {
1548
1731
; CHECK: @ %bb.0: @ %entry
1549
1732
; CHECK-NEXT: .pad #8
1550
1733
; CHECK-NEXT: sub sp, #8
1551
- ; CHECK-NEXT: adr r2, .LCPI76_0
1734
+ ; CHECK-NEXT: adr r2, .LCPI88_0
1552
1735
; CHECK-NEXT: vmov.u16 r0, q0[0]
1553
1736
; CHECK-NEXT: vldrw.u32 q0, [r2]
1554
1737
; CHECK-NEXT: mov r1, sp
@@ -1558,7 +1741,7 @@ define arm_aapcs_vfpcc i64 @scalar_to_vector_i32(<8 x i16> %v) {
1558
1741
; CHECK-NEXT: bx lr
1559
1742
; CHECK-NEXT: .p2align 4
1560
1743
; CHECK-NEXT: @ %bb.1:
1561
- ; CHECK-NEXT: .LCPI76_0 :
1744
+ ; CHECK-NEXT: .LCPI88_0 :
1562
1745
; CHECK-NEXT: .zero 4
1563
1746
; CHECK-NEXT: .long 7 @ 0x7
1564
1747
; CHECK-NEXT: .long 1 @ 0x1
0 commit comments