@@ -1649,28 +1649,16 @@ declare <15 x i64> @llvm.vp.bitreverse.v15i64(<15 x i64>, <15 x i1>, i32)
1649
1649
define <15 x i64 > @vp_bitreverse_v15i64 (<15 x i64 > %va , <15 x i1 > %m , i32 zeroext %evl ) {
1650
1650
; RV32-LABEL: vp_bitreverse_v15i64:
1651
1651
; RV32: # %bb.0:
1652
- ; RV32-NEXT: addi sp, sp, -48
1653
- ; RV32-NEXT: .cfi_def_cfa_offset 48
1652
+ ; RV32-NEXT: addi sp, sp, -16
1653
+ ; RV32-NEXT: .cfi_def_cfa_offset 16
1654
1654
; RV32-NEXT: csrr a1, vlenb
1655
1655
; RV32-NEXT: li a2, 24
1656
1656
; RV32-NEXT: mul a1, a1, a2
1657
1657
; RV32-NEXT: sub sp, sp, a1
1658
- ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30 , 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb
1659
- ; RV32-NEXT: sw zero, 20 (sp)
1658
+ ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10 , 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
1659
+ ; RV32-NEXT: sw zero, 12 (sp)
1660
1660
; RV32-NEXT: lui a1, 1044480
1661
- ; RV32-NEXT: sw a1, 16(sp)
1662
- ; RV32-NEXT: lui a1, 61681
1663
- ; RV32-NEXT: addi a1, a1, -241
1664
- ; RV32-NEXT: sw a1, 44(sp)
1665
- ; RV32-NEXT: sw a1, 40(sp)
1666
- ; RV32-NEXT: lui a1, 209715
1667
- ; RV32-NEXT: addi a1, a1, 819
1668
- ; RV32-NEXT: sw a1, 36(sp)
1669
- ; RV32-NEXT: sw a1, 32(sp)
1670
- ; RV32-NEXT: lui a1, 349525
1671
- ; RV32-NEXT: addi a1, a1, 1365
1672
- ; RV32-NEXT: sw a1, 28(sp)
1673
- ; RV32-NEXT: sw a1, 24(sp)
1661
+ ; RV32-NEXT: sw a1, 8(sp)
1674
1662
; RV32-NEXT: li a1, 56
1675
1663
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1676
1664
; RV32-NEXT: vsll.vx v16, v8, a1, v0.t
@@ -1683,21 +1671,21 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex
1683
1671
; RV32-NEXT: csrr a4, vlenb
1684
1672
; RV32-NEXT: slli a4, a4, 4
1685
1673
; RV32-NEXT: add a4, sp, a4
1686
- ; RV32-NEXT: addi a4, a4, 48
1674
+ ; RV32-NEXT: addi a4, a4, 16
1687
1675
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
1688
- ; RV32-NEXT: addi a4, sp, 16
1689
- ; RV32-NEXT: vsetivli zero, 16 , e64, m8, ta, ma
1676
+ ; RV32-NEXT: addi a4, sp, 8
1677
+ ; RV32-NEXT: vsetivli zero, 15 , e64, m8, ta, ma
1690
1678
; RV32-NEXT: vlse64.v v16, (a4), zero
1691
1679
; RV32-NEXT: csrr a4, vlenb
1692
1680
; RV32-NEXT: slli a4, a4, 3
1693
1681
; RV32-NEXT: add a4, sp, a4
1694
- ; RV32-NEXT: addi a4, a4, 48
1682
+ ; RV32-NEXT: addi a4, a4, 16
1695
1683
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
1696
1684
; RV32-NEXT: lui a4, 4080
1697
1685
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1698
1686
; RV32-NEXT: vand.vx v24, v8, a4, v0.t
1699
1687
; RV32-NEXT: vsll.vi v24, v24, 24, v0.t
1700
- ; RV32-NEXT: addi a5, sp, 48
1688
+ ; RV32-NEXT: addi a5, sp, 16
1701
1689
; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill
1702
1690
; RV32-NEXT: vand.vv v24, v8, v16, v0.t
1703
1691
; RV32-NEXT: vsll.vi v16, v24, 8, v0.t
@@ -1706,62 +1694,65 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex
1706
1694
; RV32-NEXT: csrr a5, vlenb
1707
1695
; RV32-NEXT: slli a5, a5, 4
1708
1696
; RV32-NEXT: add a5, sp, a5
1709
- ; RV32-NEXT: addi a5, a5, 48
1697
+ ; RV32-NEXT: addi a5, a5, 16
1710
1698
; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
1711
1699
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
1712
1700
; RV32-NEXT: csrr a5, vlenb
1713
1701
; RV32-NEXT: slli a5, a5, 4
1714
1702
; RV32-NEXT: add a5, sp, a5
1715
- ; RV32-NEXT: addi a5, a5, 48
1703
+ ; RV32-NEXT: addi a5, a5, 16
1716
1704
; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
1717
1705
; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t
1718
1706
; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t
1719
1707
; RV32-NEXT: vand.vx v24, v24, a2, v0.t
1720
1708
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
1721
- ; RV32-NEXT: addi a1, sp, 48
1709
+ ; RV32-NEXT: addi a1, sp, 16
1722
1710
; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1723
1711
; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t
1724
1712
; RV32-NEXT: vand.vx v24, v24, a4, v0.t
1725
1713
; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
1726
1714
; RV32-NEXT: csrr a1, vlenb
1727
1715
; RV32-NEXT: slli a1, a1, 3
1728
1716
; RV32-NEXT: add a1, sp, a1
1729
- ; RV32-NEXT: addi a1, a1, 48
1717
+ ; RV32-NEXT: addi a1, a1, 16
1730
1718
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
1731
1719
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
1732
1720
; RV32-NEXT: vor.vv v8, v8, v24, v0.t
1733
- ; RV32-NEXT: addi a1, sp, 40
1734
- ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1735
- ; RV32-NEXT: vlse64.v v24, (a1), zero
1736
- ; RV32-NEXT: addi a1, sp, 48
1721
+ ; RV32-NEXT: addi a1, sp, 16
1737
1722
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
1738
- ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1739
1723
; RV32-NEXT: vor.vv v8, v8, v16, v0.t
1740
1724
; RV32-NEXT: csrr a1, vlenb
1741
1725
; RV32-NEXT: slli a1, a1, 4
1742
1726
; RV32-NEXT: add a1, sp, a1
1743
- ; RV32-NEXT: addi a1, a1, 48
1727
+ ; RV32-NEXT: addi a1, a1, 16
1744
1728
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
1745
- ; RV32-NEXT: vor.vv v16, v16, v8, v0.t
1746
- ; RV32-NEXT: vsrl.vi v8, v16, 4, v0.t
1747
- ; RV32-NEXT: vand.vv v8, v8, v24, v0.t
1748
- ; RV32-NEXT: vand.vv v16, v16, v24, v0.t
1749
- ; RV32-NEXT: addi a1, sp, 32
1750
- ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1751
- ; RV32-NEXT: vlse64.v v24, (a1), zero
1729
+ ; RV32-NEXT: vor.vv v8, v16, v8, v0.t
1730
+ ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
1731
+ ; RV32-NEXT: lui a1, 61681
1732
+ ; RV32-NEXT: addi a1, a1, -241
1733
+ ; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma
1734
+ ; RV32-NEXT: vmv.v.x v24, a1
1752
1735
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1753
- ; RV32-NEXT: vsll.vi v16, v16, 4, v0.t
1754
- ; RV32-NEXT: vor.vv v16, v8, v16, v0.t
1755
- ; RV32-NEXT: vsrl.vi v8, v16, 2, v0.t
1756
- ; RV32-NEXT: vand.vv v8, v8, v24, v0.t
1757
1736
; RV32-NEXT: vand.vv v16, v16, v24, v0.t
1758
- ; RV32-NEXT: addi a1, sp, 24
1759
- ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1760
- ; RV32-NEXT: vlse64.v v24, (a1), zero
1737
+ ; RV32-NEXT: vand.vv v8, v8, v24, v0.t
1738
+ ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
1739
+ ; RV32-NEXT: vor.vv v8, v16, v8, v0.t
1740
+ ; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
1741
+ ; RV32-NEXT: lui a1, 209715
1742
+ ; RV32-NEXT: addi a1, a1, 819
1743
+ ; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma
1744
+ ; RV32-NEXT: vmv.v.x v24, a1
1761
1745
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1762
- ; RV32-NEXT: vsll.vi v16, v16, 2, v0.t
1763
- ; RV32-NEXT: vor.vv v8, v8, v16, v0.t
1746
+ ; RV32-NEXT: vand.vv v16, v16, v24, v0.t
1747
+ ; RV32-NEXT: vand.vv v8, v8, v24, v0.t
1748
+ ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
1749
+ ; RV32-NEXT: vor.vv v8, v16, v8, v0.t
1764
1750
; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
1751
+ ; RV32-NEXT: lui a1, 349525
1752
+ ; RV32-NEXT: addi a1, a1, 1365
1753
+ ; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma
1754
+ ; RV32-NEXT: vmv.v.x v24, a1
1755
+ ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1765
1756
; RV32-NEXT: vand.vv v16, v16, v24, v0.t
1766
1757
; RV32-NEXT: vand.vv v8, v8, v24, v0.t
1767
1758
; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
@@ -1770,7 +1761,7 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex
1770
1761
; RV32-NEXT: li a1, 24
1771
1762
; RV32-NEXT: mul a0, a0, a1
1772
1763
; RV32-NEXT: add sp, sp, a0
1773
- ; RV32-NEXT: addi sp, sp, 48
1764
+ ; RV32-NEXT: addi sp, sp, 16
1774
1765
; RV32-NEXT: ret
1775
1766
;
1776
1767
; RV64-LABEL: vp_bitreverse_v15i64:
@@ -1856,27 +1847,15 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex
1856
1847
define <15 x i64 > @vp_bitreverse_v15i64_unmasked (<15 x i64 > %va , i32 zeroext %evl ) {
1857
1848
; RV32-LABEL: vp_bitreverse_v15i64_unmasked:
1858
1849
; RV32: # %bb.0:
1859
- ; RV32-NEXT: addi sp, sp, -48
1860
- ; RV32-NEXT: .cfi_def_cfa_offset 48
1850
+ ; RV32-NEXT: addi sp, sp, -16
1851
+ ; RV32-NEXT: .cfi_def_cfa_offset 16
1861
1852
; RV32-NEXT: csrr a1, vlenb
1862
1853
; RV32-NEXT: slli a1, a1, 3
1863
1854
; RV32-NEXT: sub sp, sp, a1
1864
- ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30 , 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb
1865
- ; RV32-NEXT: sw zero, 20 (sp)
1855
+ ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10 , 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
1856
+ ; RV32-NEXT: sw zero, 12 (sp)
1866
1857
; RV32-NEXT: lui a1, 1044480
1867
- ; RV32-NEXT: sw a1, 16(sp)
1868
- ; RV32-NEXT: lui a1, 61681
1869
- ; RV32-NEXT: addi a1, a1, -241
1870
- ; RV32-NEXT: sw a1, 44(sp)
1871
- ; RV32-NEXT: sw a1, 40(sp)
1872
- ; RV32-NEXT: lui a1, 209715
1873
- ; RV32-NEXT: addi a1, a1, 819
1874
- ; RV32-NEXT: sw a1, 36(sp)
1875
- ; RV32-NEXT: sw a1, 32(sp)
1876
- ; RV32-NEXT: lui a1, 349525
1877
- ; RV32-NEXT: addi a1, a1, 1365
1878
- ; RV32-NEXT: sw a1, 28(sp)
1879
- ; RV32-NEXT: sw a1, 24(sp)
1858
+ ; RV32-NEXT: sw a1, 8(sp)
1880
1859
; RV32-NEXT: li a1, 56
1881
1860
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1882
1861
; RV32-NEXT: vsll.vx v16, v8, a1
@@ -1886,66 +1865,69 @@ define <15 x i64> @vp_bitreverse_v15i64_unmasked(<15 x i64> %va, i32 zeroext %ev
1886
1865
; RV32-NEXT: li a3, 40
1887
1866
; RV32-NEXT: vsll.vx v24, v24, a3
1888
1867
; RV32-NEXT: vor.vv v16, v16, v24
1889
- ; RV32-NEXT: addi a4, sp, 48
1890
- ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
1891
1868
; RV32-NEXT: addi a4, sp, 16
1892
- ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1893
- ; RV32-NEXT: vlse64.v v24, (a4), zero
1869
+ ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
1870
+ ; RV32-NEXT: addi a4, sp, 8
1871
+ ; RV32-NEXT: vsetivli zero, 15, e64, m8, ta, ma
1872
+ ; RV32-NEXT: vlse64.v v16, (a4), zero
1894
1873
; RV32-NEXT: lui a4, 4080
1895
1874
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1896
1875
; RV32-NEXT: vand.vx v0, v8, a4
1897
1876
; RV32-NEXT: vsll.vi v0, v0, 24
1898
- ; RV32-NEXT: vand.vv v16 , v8, v24
1899
- ; RV32-NEXT: vsll.vi v16, v16 , 8
1900
- ; RV32-NEXT: vor.vv v16 , v0, v16
1901
- ; RV32-NEXT: addi a5, sp, 48
1877
+ ; RV32-NEXT: vand.vv v24 , v8, v16
1878
+ ; RV32-NEXT: vsll.vi v24, v24 , 8
1879
+ ; RV32-NEXT: vor.vv v24 , v0, v24
1880
+ ; RV32-NEXT: addi a5, sp, 16
1902
1881
; RV32-NEXT: vl8r.v v0, (a5) # Unknown-size Folded Reload
1903
- ; RV32-NEXT: vor.vv v16 , v0, v16
1904
- ; RV32-NEXT: vs8r.v v16 , (a5) # Unknown-size Folded Spill
1882
+ ; RV32-NEXT: vor.vv v24 , v0, v24
1883
+ ; RV32-NEXT: vs8r.v v24 , (a5) # Unknown-size Folded Spill
1905
1884
; RV32-NEXT: vsrl.vx v0, v8, a3
1906
1885
; RV32-NEXT: vand.vx v0, v0, a2
1907
- ; RV32-NEXT: vsrl.vx v16 , v8, a1
1908
- ; RV32-NEXT: vor.vv v0 , v0, v16
1909
- ; RV32-NEXT: vsrl.vi v16 , v8, 8
1910
- ; RV32-NEXT: vand.vv v16, v16, v24
1886
+ ; RV32-NEXT: vsrl.vx v24 , v8, a1
1887
+ ; RV32-NEXT: vor.vv v24 , v0, v24
1888
+ ; RV32-NEXT: vsrl.vi v0 , v8, 8
1889
+ ; RV32-NEXT: vand.vv v16, v0, v16
1911
1890
; RV32-NEXT: vsrl.vi v8, v8, 24
1912
1891
; RV32-NEXT: vand.vx v8, v8, a4
1913
1892
; RV32-NEXT: vor.vv v8, v16, v8
1914
- ; RV32-NEXT: addi a1, sp, 40
1915
- ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1916
- ; RV32-NEXT: vlse64.v v16, (a1), zero
1917
- ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1918
- ; RV32-NEXT: vor.vv v8, v8, v0
1919
- ; RV32-NEXT: addi a1, sp, 48
1920
- ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
1921
- ; RV32-NEXT: vor.vv v8, v24, v8
1922
- ; RV32-NEXT: vsrl.vi v24, v8, 4
1923
- ; RV32-NEXT: vand.vv v24, v24, v16
1924
- ; RV32-NEXT: vand.vv v8, v8, v16
1925
- ; RV32-NEXT: addi a1, sp, 32
1926
- ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1927
- ; RV32-NEXT: vlse64.v v16, (a1), zero
1893
+ ; RV32-NEXT: vor.vv v8, v8, v24
1894
+ ; RV32-NEXT: addi a1, sp, 16
1895
+ ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
1896
+ ; RV32-NEXT: vor.vv v8, v16, v8
1897
+ ; RV32-NEXT: vsrl.vi v16, v8, 4
1898
+ ; RV32-NEXT: lui a1, 61681
1899
+ ; RV32-NEXT: addi a1, a1, -241
1900
+ ; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma
1901
+ ; RV32-NEXT: vmv.v.x v24, a1
1928
1902
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1903
+ ; RV32-NEXT: vand.vv v16, v16, v24
1904
+ ; RV32-NEXT: vand.vv v8, v8, v24
1929
1905
; RV32-NEXT: vsll.vi v8, v8, 4
1930
- ; RV32-NEXT: vor.vv v8, v24, v8
1931
- ; RV32-NEXT: vsrl.vi v24, v8, 2
1932
- ; RV32-NEXT: vand.vv v24, v24, v16
1933
- ; RV32-NEXT: vand.vv v8, v8, v16
1934
- ; RV32-NEXT: addi a1, sp, 24
1935
- ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1936
- ; RV32-NEXT: vlse64.v v16, (a1), zero
1906
+ ; RV32-NEXT: vor.vv v8, v16, v8
1907
+ ; RV32-NEXT: vsrl.vi v16, v8, 2
1908
+ ; RV32-NEXT: lui a1, 209715
1909
+ ; RV32-NEXT: addi a1, a1, 819
1910
+ ; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma
1911
+ ; RV32-NEXT: vmv.v.x v24, a1
1937
1912
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1913
+ ; RV32-NEXT: vand.vv v16, v16, v24
1914
+ ; RV32-NEXT: vand.vv v8, v8, v24
1938
1915
; RV32-NEXT: vsll.vi v8, v8, 2
1939
- ; RV32-NEXT: vor.vv v8, v24, v8
1940
- ; RV32-NEXT: vsrl.vi v24, v8, 1
1941
- ; RV32-NEXT: vand.vv v24, v24, v16
1942
- ; RV32-NEXT: vand.vv v8, v8, v16
1916
+ ; RV32-NEXT: vor.vv v8, v16, v8
1917
+ ; RV32-NEXT: vsrl.vi v16, v8, 1
1918
+ ; RV32-NEXT: lui a1, 349525
1919
+ ; RV32-NEXT: addi a1, a1, 1365
1920
+ ; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma
1921
+ ; RV32-NEXT: vmv.v.x v24, a1
1922
+ ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1923
+ ; RV32-NEXT: vand.vv v16, v16, v24
1924
+ ; RV32-NEXT: vand.vv v8, v8, v24
1943
1925
; RV32-NEXT: vadd.vv v8, v8, v8
1944
- ; RV32-NEXT: vor.vv v8, v24 , v8
1926
+ ; RV32-NEXT: vor.vv v8, v16 , v8
1945
1927
; RV32-NEXT: csrr a0, vlenb
1946
1928
; RV32-NEXT: slli a0, a0, 3
1947
1929
; RV32-NEXT: add sp, sp, a0
1948
- ; RV32-NEXT: addi sp, sp, 48
1930
+ ; RV32-NEXT: addi sp, sp, 16
1949
1931
; RV32-NEXT: ret
1950
1932
;
1951
1933
; RV64-LABEL: vp_bitreverse_v15i64_unmasked:
0 commit comments