Skip to content

Commit c957acd

Browse files
authored
Add vqmovn neon instructions (rust-lang#1163)
1 parent c14e984 commit c957acd

File tree

4 files changed

+593
-23
lines changed

4 files changed

+593
-23
lines changed

crates/core_arch/src/aarch64/neon/generated.rs

Lines changed: 297 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5895,6 +5895,150 @@ pub unsafe fn vqdmulhs_laneq_s32<const N: i32>(a: i32, b: int32x4_t) -> i32 {
58955895
vqdmulhs_s32(a, b)
58965896
}
58975897

5898+
/// Saturating extract narrow
5899+
#[inline]
5900+
#[target_feature(enable = "neon")]
5901+
#[cfg_attr(test, assert_instr(sqxtn))]
5902+
pub unsafe fn vqmovnh_s16(a: i16) -> i8 {
5903+
simd_extract(vqmovn_s16(vdupq_n_s16(a)), 0)
5904+
}
5905+
5906+
/// Saturating extract narrow
5907+
#[inline]
5908+
#[target_feature(enable = "neon")]
5909+
#[cfg_attr(test, assert_instr(sqxtn))]
5910+
pub unsafe fn vqmovns_s32(a: i32) -> i16 {
5911+
simd_extract(vqmovn_s32(vdupq_n_s32(a)), 0)
5912+
}
5913+
5914+
/// Saturating extract narrow
5915+
#[inline]
5916+
#[target_feature(enable = "neon")]
5917+
#[cfg_attr(test, assert_instr(sqxtn))]
5918+
pub unsafe fn vqmovnd_s64(a: i64) -> i32 {
5919+
simd_extract(vqmovn_s64(vdupq_n_s64(a)), 0)
5920+
}
5921+
5922+
/// Saturating extract narrow
5923+
#[inline]
5924+
#[target_feature(enable = "neon")]
5925+
#[cfg_attr(test, assert_instr(uqxtn))]
5926+
pub unsafe fn vqmovnh_u16(a: u16) -> u8 {
5927+
simd_extract(vqmovn_u16(vdupq_n_u16(a)), 0)
5928+
}
5929+
5930+
/// Saturating extract narrow
5931+
#[inline]
5932+
#[target_feature(enable = "neon")]
5933+
#[cfg_attr(test, assert_instr(uqxtn))]
5934+
pub unsafe fn vqmovns_u32(a: u32) -> u16 {
5935+
simd_extract(vqmovn_u32(vdupq_n_u32(a)), 0)
5936+
}
5937+
5938+
/// Saturating extract narrow
5939+
#[inline]
5940+
#[target_feature(enable = "neon")]
5941+
#[cfg_attr(test, assert_instr(uqxtn))]
5942+
pub unsafe fn vqmovnd_u64(a: u64) -> u32 {
5943+
simd_extract(vqmovn_u64(vdupq_n_u64(a)), 0)
5944+
}
5945+
5946+
/// Signed saturating extract narrow
5947+
#[inline]
5948+
#[target_feature(enable = "neon")]
5949+
#[cfg_attr(test, assert_instr(sqxtn2))]
5950+
pub unsafe fn vqmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t {
5951+
simd_shuffle16!(a, vqmovn_s16(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
5952+
}
5953+
5954+
/// Signed saturating extract narrow
5955+
#[inline]
5956+
#[target_feature(enable = "neon")]
5957+
#[cfg_attr(test, assert_instr(sqxtn2))]
5958+
pub unsafe fn vqmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t {
5959+
simd_shuffle8!(a, vqmovn_s32(b), [0, 1, 2, 3, 4, 5, 6, 7])
5960+
}
5961+
5962+
/// Signed saturating extract narrow
5963+
#[inline]
5964+
#[target_feature(enable = "neon")]
5965+
#[cfg_attr(test, assert_instr(sqxtn2))]
5966+
pub unsafe fn vqmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t {
5967+
simd_shuffle4!(a, vqmovn_s64(b), [0, 1, 2, 3])
5968+
}
5969+
5970+
/// Signed saturating extract narrow
5971+
#[inline]
5972+
#[target_feature(enable = "neon")]
5973+
#[cfg_attr(test, assert_instr(uqxtn2))]
5974+
pub unsafe fn vqmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
5975+
simd_shuffle16!(a, vqmovn_u16(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
5976+
}
5977+
5978+
/// Signed saturating extract narrow
5979+
#[inline]
5980+
#[target_feature(enable = "neon")]
5981+
#[cfg_attr(test, assert_instr(uqxtn2))]
5982+
pub unsafe fn vqmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
5983+
simd_shuffle8!(a, vqmovn_u32(b), [0, 1, 2, 3, 4, 5, 6, 7])
5984+
}
5985+
5986+
/// Signed saturating extract narrow
5987+
#[inline]
5988+
#[target_feature(enable = "neon")]
5989+
#[cfg_attr(test, assert_instr(uqxtn2))]
5990+
pub unsafe fn vqmovn_high_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
5991+
simd_shuffle4!(a, vqmovn_u64(b), [0, 1, 2, 3])
5992+
}
5993+
5994+
/// Signed saturating extract unsigned narrow
5995+
#[inline]
5996+
#[target_feature(enable = "neon")]
5997+
#[cfg_attr(test, assert_instr(sqxtun))]
5998+
pub unsafe fn vqmovunh_s16(a: i16) -> u8 {
5999+
simd_extract(vqmovun_s16(vdupq_n_s16(a)), 0)
6000+
}
6001+
6002+
/// Signed saturating extract unsigned narrow
6003+
#[inline]
6004+
#[target_feature(enable = "neon")]
6005+
#[cfg_attr(test, assert_instr(sqxtun))]
6006+
pub unsafe fn vqmovuns_s32(a: i32) -> u16 {
6007+
simd_extract(vqmovun_s32(vdupq_n_s32(a)), 0)
6008+
}
6009+
6010+
/// Signed saturating extract unsigned narrow
6011+
#[inline]
6012+
#[target_feature(enable = "neon")]
6013+
#[cfg_attr(test, assert_instr(sqxtun))]
6014+
pub unsafe fn vqmovund_s64(a: i64) -> u32 {
6015+
simd_extract(vqmovun_s64(vdupq_n_s64(a)), 0)
6016+
}
6017+
6018+
/// Signed saturating extract unsigned narrow
6019+
#[inline]
6020+
#[target_feature(enable = "neon")]
6021+
#[cfg_attr(test, assert_instr(sqxtun2))]
6022+
pub unsafe fn vqmovun_high_s16(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
6023+
simd_shuffle16!(a, vqmovun_s16(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
6024+
}
6025+
6026+
/// Signed saturating extract unsigned narrow
6027+
#[inline]
6028+
#[target_feature(enable = "neon")]
6029+
#[cfg_attr(test, assert_instr(sqxtun2))]
6030+
pub unsafe fn vqmovun_high_s32(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
6031+
simd_shuffle8!(a, vqmovun_s32(b), [0, 1, 2, 3, 4, 5, 6, 7])
6032+
}
6033+
6034+
/// Signed saturating extract unsigned narrow
6035+
#[inline]
6036+
#[target_feature(enable = "neon")]
6037+
#[cfg_attr(test, assert_instr(sqxtun2))]
6038+
pub unsafe fn vqmovun_high_s64(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
6039+
simd_shuffle4!(a, vqmovun_s64(b), [0, 1, 2, 3])
6040+
}
6041+
58986042
/// Signed saturating rounding doubling multiply returning high half
58996043
#[inline]
59006044
#[target_feature(enable = "neon")]
@@ -13537,6 +13681,159 @@ mod test {
1353713681
assert_eq!(r, e);
1353813682
}
1353913683

13684+
#[simd_test(enable = "neon")]
13685+
unsafe fn test_vqmovnh_s16() {
13686+
let a: i16 = 1;
13687+
let e: i8 = 1;
13688+
let r: i8 = transmute(vqmovnh_s16(transmute(a)));
13689+
assert_eq!(r, e);
13690+
}
13691+
13692+
#[simd_test(enable = "neon")]
13693+
unsafe fn test_vqmovns_s32() {
13694+
let a: i32 = 1;
13695+
let e: i16 = 1;
13696+
let r: i16 = transmute(vqmovns_s32(transmute(a)));
13697+
assert_eq!(r, e);
13698+
}
13699+
13700+
#[simd_test(enable = "neon")]
13701+
unsafe fn test_vqmovnd_s64() {
13702+
let a: i64 = 1;
13703+
let e: i32 = 1;
13704+
let r: i32 = transmute(vqmovnd_s64(transmute(a)));
13705+
assert_eq!(r, e);
13706+
}
13707+
13708+
#[simd_test(enable = "neon")]
13709+
unsafe fn test_vqmovnh_u16() {
13710+
let a: u16 = 1;
13711+
let e: u8 = 1;
13712+
let r: u8 = transmute(vqmovnh_u16(transmute(a)));
13713+
assert_eq!(r, e);
13714+
}
13715+
13716+
#[simd_test(enable = "neon")]
13717+
unsafe fn test_vqmovns_u32() {
13718+
let a: u32 = 1;
13719+
let e: u16 = 1;
13720+
let r: u16 = transmute(vqmovns_u32(transmute(a)));
13721+
assert_eq!(r, e);
13722+
}
13723+
13724+
#[simd_test(enable = "neon")]
13725+
unsafe fn test_vqmovnd_u64() {
13726+
let a: u64 = 1;
13727+
let e: u32 = 1;
13728+
let r: u32 = transmute(vqmovnd_u64(transmute(a)));
13729+
assert_eq!(r, e);
13730+
}
13731+
13732+
#[simd_test(enable = "neon")]
13733+
unsafe fn test_vqmovn_high_s16() {
13734+
let a: i8x8 = i8x8::new(0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F);
13735+
let b: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
13736+
let e: i8x16 = i8x16::new(0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F);
13737+
let r: i8x16 = transmute(vqmovn_high_s16(transmute(a), transmute(b)));
13738+
assert_eq!(r, e);
13739+
}
13740+
13741+
#[simd_test(enable = "neon")]
13742+
unsafe fn test_vqmovn_high_s32() {
13743+
let a: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
13744+
let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
13745+
let e: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
13746+
let r: i16x8 = transmute(vqmovn_high_s32(transmute(a), transmute(b)));
13747+
assert_eq!(r, e);
13748+
}
13749+
13750+
#[simd_test(enable = "neon")]
13751+
unsafe fn test_vqmovn_high_s64() {
13752+
let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
13753+
let b: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0x7F_FF_FF_FF_FF_FF_FF_FF);
13754+
let e: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
13755+
let r: i32x4 = transmute(vqmovn_high_s64(transmute(a), transmute(b)));
13756+
assert_eq!(r, e);
13757+
}
13758+
13759+
#[simd_test(enable = "neon")]
13760+
unsafe fn test_vqmovn_high_u16() {
13761+
let a: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
13762+
let b: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
13763+
let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
13764+
let r: u8x16 = transmute(vqmovn_high_u16(transmute(a), transmute(b)));
13765+
assert_eq!(r, e);
13766+
}
13767+
13768+
#[simd_test(enable = "neon")]
13769+
unsafe fn test_vqmovn_high_u32() {
13770+
let a: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
13771+
let b: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
13772+
let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
13773+
let r: u16x8 = transmute(vqmovn_high_u32(transmute(a), transmute(b)));
13774+
assert_eq!(r, e);
13775+
}
13776+
13777+
#[simd_test(enable = "neon")]
13778+
unsafe fn test_vqmovn_high_u64() {
13779+
let a: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
13780+
let b: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
13781+
let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
13782+
let r: u32x4 = transmute(vqmovn_high_u64(transmute(a), transmute(b)));
13783+
assert_eq!(r, e);
13784+
}
13785+
13786+
#[simd_test(enable = "neon")]
13787+
unsafe fn test_vqmovunh_s16() {
13788+
let a: i16 = 1;
13789+
let e: u8 = 1;
13790+
let r: u8 = transmute(vqmovunh_s16(transmute(a)));
13791+
assert_eq!(r, e);
13792+
}
13793+
13794+
#[simd_test(enable = "neon")]
13795+
unsafe fn test_vqmovuns_s32() {
13796+
let a: i32 = 1;
13797+
let e: u16 = 1;
13798+
let r: u16 = transmute(vqmovuns_s32(transmute(a)));
13799+
assert_eq!(r, e);
13800+
}
13801+
13802+
#[simd_test(enable = "neon")]
13803+
unsafe fn test_vqmovund_s64() {
13804+
let a: i64 = 1;
13805+
let e: u32 = 1;
13806+
let r: u32 = transmute(vqmovund_s64(transmute(a)));
13807+
assert_eq!(r, e);
13808+
}
13809+
13810+
#[simd_test(enable = "neon")]
13811+
unsafe fn test_vqmovun_high_s16() {
13812+
let a: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
13813+
let b: i16x8 = i16x8::new(-1, -1, -1, -1, -1, -1, -1, -1);
13814+
let e: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13815+
let r: u8x16 = transmute(vqmovun_high_s16(transmute(a), transmute(b)));
13816+
assert_eq!(r, e);
13817+
}
13818+
13819+
#[simd_test(enable = "neon")]
13820+
unsafe fn test_vqmovun_high_s32() {
13821+
let a: u16x4 = u16x4::new(0, 0, 0, 0);
13822+
let b: i32x4 = i32x4::new(-1, -1, -1, -1);
13823+
let e: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
13824+
let r: u16x8 = transmute(vqmovun_high_s32(transmute(a), transmute(b)));
13825+
assert_eq!(r, e);
13826+
}
13827+
13828+
#[simd_test(enable = "neon")]
13829+
unsafe fn test_vqmovun_high_s64() {
13830+
let a: u32x2 = u32x2::new(0, 0);
13831+
let b: i64x2 = i64x2::new(-1, -1);
13832+
let e: u32x4 = u32x4::new(0, 0, 0, 0);
13833+
let r: u32x4 = transmute(vqmovun_high_s64(transmute(a), transmute(b)));
13834+
assert_eq!(r, e);
13835+
}
13836+
1354013837
#[simd_test(enable = "neon")]
1354113838
unsafe fn test_vqrdmulhh_s16() {
1354213839
let a: i16 = 1;

0 commit comments

Comments
 (0)