Skip to content

Commit ef44f34

Browse files
committed
add vldx neon instructions
1 parent 435f54a commit ef44f34

File tree

7 files changed

+9100
-6913
lines changed

7 files changed

+9100
-6913
lines changed

crates/core_arch/src/aarch64/neon/generated.rs

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4514,6 +4514,84 @@ pub unsafe fn vqaddd_s64(a: i64, b: i64) -> i64 {
45144514
vqaddd_s64_(a, b)
45154515
}
45164516

4517+
/// Load multiple single-element structures to one, two, three, or four registers
4518+
#[inline]
4519+
#[target_feature(enable = "neon")]
4520+
#[cfg_attr(test, assert_instr(ld1))]
4521+
pub unsafe fn vld1_f64_x2(a: *const f64) -> float64x1x2_t {
4522+
#[allow(improper_ctypes)]
4523+
extern "unadjusted" {
4524+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v1f64.p0f64")]
4525+
fn vld1_f64_x2_(a: *const f64) -> float64x1x2_t;
4526+
}
4527+
vld1_f64_x2_(a)
4528+
}
4529+
4530+
/// Load multiple single-element structures to one, two, three, or four registers
4531+
#[inline]
4532+
#[target_feature(enable = "neon")]
4533+
#[cfg_attr(test, assert_instr(ld1))]
4534+
pub unsafe fn vld1q_f64_x2(a: *const f64) -> float64x2x2_t {
4535+
#[allow(improper_ctypes)]
4536+
extern "unadjusted" {
4537+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v2f64.p0f64")]
4538+
fn vld1q_f64_x2_(a: *const f64) -> float64x2x2_t;
4539+
}
4540+
vld1q_f64_x2_(a)
4541+
}
4542+
4543+
/// Load multiple single-element structures to one, two, three, or four registers
4544+
#[inline]
4545+
#[target_feature(enable = "neon")]
4546+
#[cfg_attr(test, assert_instr(ld1))]
4547+
pub unsafe fn vld1_f64_x3(a: *const f64) -> float64x1x3_t {
4548+
#[allow(improper_ctypes)]
4549+
extern "unadjusted" {
4550+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v1f64.p0f64")]
4551+
fn vld1_f64_x3_(a: *const f64) -> float64x1x3_t;
4552+
}
4553+
vld1_f64_x3_(a)
4554+
}
4555+
4556+
/// Load multiple single-element structures to one, two, three, or four registers
4557+
#[inline]
4558+
#[target_feature(enable = "neon")]
4559+
#[cfg_attr(test, assert_instr(ld1))]
4560+
pub unsafe fn vld1q_f64_x3(a: *const f64) -> float64x2x3_t {
4561+
#[allow(improper_ctypes)]
4562+
extern "unadjusted" {
4563+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v2f64.p0f64")]
4564+
fn vld1q_f64_x3_(a: *const f64) -> float64x2x3_t;
4565+
}
4566+
vld1q_f64_x3_(a)
4567+
}
4568+
4569+
/// Load multiple single-element structures to one, two, three, or four registers
4570+
#[inline]
4571+
#[target_feature(enable = "neon")]
4572+
#[cfg_attr(test, assert_instr(ld1))]
4573+
pub unsafe fn vld1_f64_x4(a: *const f64) -> float64x1x4_t {
4574+
#[allow(improper_ctypes)]
4575+
extern "unadjusted" {
4576+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v1f64.p0f64")]
4577+
fn vld1_f64_x4_(a: *const f64) -> float64x1x4_t;
4578+
}
4579+
vld1_f64_x4_(a)
4580+
}
4581+
4582+
/// Load multiple single-element structures to one, two, three, or four registers
4583+
#[inline]
4584+
#[target_feature(enable = "neon")]
4585+
#[cfg_attr(test, assert_instr(ld1))]
4586+
pub unsafe fn vld1q_f64_x4(a: *const f64) -> float64x2x4_t {
4587+
#[allow(improper_ctypes)]
4588+
extern "unadjusted" {
4589+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v2f64.p0f64")]
4590+
fn vld1q_f64_x4_(a: *const f64) -> float64x2x4_t;
4591+
}
4592+
vld1q_f64_x4_(a)
4593+
}
4594+
45174595
/// Multiply
45184596
#[inline]
45194597
#[target_feature(enable = "neon")]
@@ -12857,6 +12935,54 @@ mod test {
1285712935
assert_eq!(r, e);
1285812936
}
1285912937

12938+
#[simd_test(enable = "neon")]
12939+
unsafe fn test_vld1_f64_x2() {
12940+
let a: [f64; 3] = [0., 1., 2.];
12941+
let e: [f64; 2] = [1., 2.];
12942+
let r: [f64; 2] = transmute(vld1_f64_x2(a[1..].as_ptr()));
12943+
assert_eq!(r, e);
12944+
}
12945+
12946+
#[simd_test(enable = "neon")]
12947+
unsafe fn test_vld1q_f64_x2() {
12948+
let a: [f64; 5] = [0., 1., 2., 3., 4.];
12949+
let e: [f64x2; 2] = [f64x2::new(1., 2.), f64x2::new(3., 4.)];
12950+
let r: [f64x2; 2] = transmute(vld1q_f64_x2(a[1..].as_ptr()));
12951+
assert_eq!(r, e);
12952+
}
12953+
12954+
#[simd_test(enable = "neon")]
12955+
unsafe fn test_vld1_f64_x3() {
12956+
let a: [f64; 4] = [0., 1., 2., 3.];
12957+
let e: [f64; 3] = [1., 2., 3.];
12958+
let r: [f64; 3] = transmute(vld1_f64_x3(a[1..].as_ptr()));
12959+
assert_eq!(r, e);
12960+
}
12961+
12962+
#[simd_test(enable = "neon")]
12963+
unsafe fn test_vld1q_f64_x3() {
12964+
let a: [f64; 7] = [0., 1., 2., 3., 4., 5., 6.];
12965+
let e: [f64x2; 3] = [f64x2::new(1., 2.), f64x2::new(3., 4.), f64x2::new(5., 6.)];
12966+
let r: [f64x2; 3] = transmute(vld1q_f64_x3(a[1..].as_ptr()));
12967+
assert_eq!(r, e);
12968+
}
12969+
12970+
#[simd_test(enable = "neon")]
12971+
unsafe fn test_vld1_f64_x4() {
12972+
let a: [f64; 5] = [0., 1., 2., 3., 4.];
12973+
let e: [f64; 4] = [1., 2., 3., 4.];
12974+
let r: [f64; 4] = transmute(vld1_f64_x4(a[1..].as_ptr()));
12975+
assert_eq!(r, e);
12976+
}
12977+
12978+
#[simd_test(enable = "neon")]
12979+
unsafe fn test_vld1q_f64_x4() {
12980+
let a: [f64; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.];
12981+
let e: [f64x2; 4] = [f64x2::new(1., 2.), f64x2::new(3., 4.), f64x2::new(5., 6.), f64x2::new(7., 8.)];
12982+
let r: [f64x2; 4] = transmute(vld1q_f64_x4(a[1..].as_ptr()));
12983+
assert_eq!(r, e);
12984+
}
12985+
1286012986
#[simd_test(enable = "neon")]
1286112987
unsafe fn test_vmul_f64() {
1286212988
let a: f64 = 1.0;

crates/core_arch/src/aarch64/neon/mod.rs

Lines changed: 20 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -25,44 +25,34 @@ types! {
2525
pub struct float64x2_t(f64, f64);
2626
}
2727

28-
/// ARM-specific type containing two `int8x16_t` vectors.
28+
/// ARM-specific type containing two `float64x1_t` vectors.
2929
#[derive(Copy, Clone)]
30-
pub struct int8x16x2_t(pub int8x16_t, pub int8x16_t);
31-
/// ARM-specific type containing three `int8x16_t` vectors.
30+
pub struct float64x1x2_t(pub float64x1_t, pub float64x1_t);
31+
/// ARM-specific type containing three `float64x1_t` vectors.
3232
#[derive(Copy, Clone)]
33-
pub struct int8x16x3_t(pub int8x16_t, pub int8x16_t, pub int8x16_t);
34-
/// ARM-specific type containing four `int8x16_t` vectors.
33+
pub struct float64x1x3_t(pub float64x1_t, pub float64x1_t, pub float64x1_t);
34+
/// ARM-specific type containing four `float64x1_t` vectors.
3535
#[derive(Copy, Clone)]
36-
pub struct int8x16x4_t(pub int8x16_t, pub int8x16_t, pub int8x16_t, pub int8x16_t);
37-
38-
/// ARM-specific type containing two `uint8x16_t` vectors.
39-
#[derive(Copy, Clone)]
40-
pub struct uint8x16x2_t(pub uint8x16_t, pub uint8x16_t);
41-
/// ARM-specific type containing three `uint8x16_t` vectors.
42-
#[derive(Copy, Clone)]
43-
pub struct uint8x16x3_t(pub uint8x16_t, pub uint8x16_t, pub uint8x16_t);
44-
/// ARM-specific type containing four `uint8x16_t` vectors.
45-
#[derive(Copy, Clone)]
46-
pub struct uint8x16x4_t(
47-
pub uint8x16_t,
48-
pub uint8x16_t,
49-
pub uint8x16_t,
50-
pub uint8x16_t,
36+
pub struct float64x1x4_t(
37+
pub float64x1_t,
38+
pub float64x1_t,
39+
pub float64x1_t,
40+
pub float64x1_t,
5141
);
5242

53-
/// ARM-specific type containing two `poly8x16_t` vectors.
43+
/// ARM-specific type containing two `float64x2_t` vectors.
5444
#[derive(Copy, Clone)]
55-
pub struct poly8x16x2_t(pub poly8x16_t, pub poly8x16_t);
56-
/// ARM-specific type containing three `poly8x16_t` vectors.
45+
pub struct float64x2x2_t(pub float64x2_t, pub float64x2_t);
46+
/// ARM-specific type containing three `float64x2_t` vectors.
5747
#[derive(Copy, Clone)]
58-
pub struct poly8x16x3_t(pub poly8x16_t, pub poly8x16_t, pub poly8x16_t);
59-
/// ARM-specific type containing four `poly8x16_t` vectors.
48+
pub struct float64x2x3_t(pub float64x2_t, pub float64x2_t, pub float64x2_t);
49+
/// ARM-specific type containing four `float64x2_t` vectors.
6050
#[derive(Copy, Clone)]
61-
pub struct poly8x16x4_t(
62-
pub poly8x16_t,
63-
pub poly8x16_t,
64-
pub poly8x16_t,
65-
pub poly8x16_t,
51+
pub struct float64x2x4_t(
52+
pub float64x2_t,
53+
pub float64x2_t,
54+
pub float64x2_t,
55+
pub float64x2_t,
6656
);
6757

6858
#[allow(improper_ctypes)]

0 commit comments

Comments
 (0)