diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs index 686eb9446a..a85d979a59 100644 --- a/crates/core_arch/src/aarch64/neon/mod.rs +++ b/crates/core_arch/src/aarch64/neon/mod.rs @@ -12,8 +12,8 @@ pub use self::generated::*; use crate::{ core_arch::{arm_shared::*, simd::*, simd_llvm::*}, hint::unreachable_unchecked, - mem::{size_of, transmute, zeroed}, - ptr::copy_nonoverlapping, + mem::{transmute, zeroed}, + ptr::{read_unaligned, write_unaligned}, }; #[cfg(test)] use stdarch_test::assert_instr; @@ -464,16 +464,7 @@ pub unsafe fn vcopy_laneq_f64( #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t { - transmute(i8x8::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - *ptr.offset(4), - *ptr.offset(5), - *ptr.offset(6), - *ptr.offset(7), - )) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -481,24 +472,7 @@ pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t { - transmute(i8x16::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - *ptr.offset(4), - *ptr.offset(5), - *ptr.offset(6), - *ptr.offset(7), - *ptr.offset(8), - *ptr.offset(9), - *ptr.offset(10), - *ptr.offset(11), - *ptr.offset(12), - *ptr.offset(13), - *ptr.offset(14), - *ptr.offset(15), - )) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -506,12 +480,7 @@ pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t { - transmute(i16x4::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - )) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -519,16 +488,7 @@ pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t { - transmute(i16x8::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - *ptr.offset(4), - *ptr.offset(5), - *ptr.offset(6), - *ptr.offset(7), - )) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -536,7 +496,7 @@ pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t { - transmute(i32x2::new(*ptr, *ptr.offset(1))) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -544,12 +504,7 @@ pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t { - transmute(i32x4::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - )) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -557,7 +512,7 @@ pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t { - transmute(i64x1::new(*ptr)) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -565,7 +520,7 @@ pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t { - transmute(i64x2::new(*ptr, *ptr.offset(1))) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -573,16 +528,7 @@ pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t { - transmute(u8x8::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - *ptr.offset(4), - *ptr.offset(5), - *ptr.offset(6), - *ptr.offset(7), - )) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -590,24 +536,7 @@ pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t { - transmute(u8x16::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - *ptr.offset(4), - *ptr.offset(5), - *ptr.offset(6), - *ptr.offset(7), - *ptr.offset(8), - *ptr.offset(9), - *ptr.offset(10), - *ptr.offset(11), - *ptr.offset(12), - *ptr.offset(13), - *ptr.offset(14), - *ptr.offset(15), - )) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -615,12 +544,7 @@ pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t { - transmute(u16x4::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - )) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -628,16 +552,7 @@ pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t { - transmute(u16x8::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - *ptr.offset(4), - *ptr.offset(5), - *ptr.offset(6), - *ptr.offset(7), - )) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -645,7 +560,7 @@ pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t { - transmute(u32x2::new(*ptr, *ptr.offset(1))) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -653,12 +568,7 @@ pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t { - transmute(u32x4::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - )) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -666,7 +576,7 @@ pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t { - transmute(u64x1::new(*ptr)) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -674,7 +584,7 @@ pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t { - transmute(u64x2::new(*ptr, *ptr.offset(1))) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -682,16 +592,7 @@ pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t { - transmute(u8x8::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - *ptr.offset(4), - *ptr.offset(5), - *ptr.offset(6), - *ptr.offset(7), - )) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -699,24 +600,7 @@ pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t { - transmute(u8x16::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - *ptr.offset(4), - *ptr.offset(5), - *ptr.offset(6), - *ptr.offset(7), - *ptr.offset(8), - *ptr.offset(9), - *ptr.offset(10), - *ptr.offset(11), - *ptr.offset(12), - *ptr.offset(13), - *ptr.offset(14), - *ptr.offset(15), - )) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -724,12 +608,7 @@ pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t { - transmute(u16x4::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - )) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -737,16 +616,7 @@ pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t { - transmute(u16x8::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - *ptr.offset(4), - *ptr.offset(5), - *ptr.offset(6), - *ptr.offset(7), - )) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -754,7 +624,7 @@ pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t { - transmute(f32x2::new(*ptr, *ptr.offset(1))) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -762,12 +632,7 @@ pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t { - transmute(f32x4::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - )) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -775,7 +640,7 @@ pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_f64(ptr: *const f64) -> float64x1_t { - transmute(f64x1::new(*ptr)) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -783,7 +648,7 @@ pub unsafe fn vld1_f64(ptr: *const f64) -> float64x1_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_f64(ptr: *const f64) -> float64x2_t { - transmute(f64x2::new(*ptr, *ptr.offset(1))) + read_unaligned(ptr.cast()) } /// Store multiple single-element structures from one, two, three, or four registers. @@ -792,11 +657,7 @@ pub unsafe fn vld1q_f64(ptr: *const f64) -> float64x2_t { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) { - copy_nonoverlapping( - &a as *const int8x8_t as *const i8, - ptr as *mut i8, - size_of::(), - ) + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -805,11 +666,7 @@ pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) { - copy_nonoverlapping( - &a as *const int8x16_t as *const i8, - ptr as *mut i8, - size_of::(), - ) + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -818,11 +675,7 @@ pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) { - copy_nonoverlapping( - &a as *const int16x4_t as *const i16, - ptr as *mut i16, - size_of::(), - ) + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -831,11 +684,7 @@ pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) { - copy_nonoverlapping( - &a as *const int16x8_t as *const i16, - ptr as *mut i16, - size_of::(), - ) + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -844,11 +693,7 @@ pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) { - copy_nonoverlapping( - &a as *const int32x2_t as *const i32, - ptr as *mut i32, - size_of::(), - ) + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -857,11 +702,7 @@ pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) { - copy_nonoverlapping( - &a as *const int32x4_t as *const i32, - ptr as *mut i32, - size_of::(), - ) + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -870,11 +711,7 @@ pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) { - copy_nonoverlapping( - &a as *const int64x1_t as *const i64, - ptr as *mut i64, - size_of::(), - ) + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -883,11 +720,7 @@ pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) { - copy_nonoverlapping( - &a as *const int64x2_t as *const i64, - ptr as *mut i64, - size_of::(), - ) + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -896,11 +729,7 @@ pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) { - copy_nonoverlapping( - &a as *const uint8x8_t as *const u8, - ptr as *mut u8, - size_of::(), - ) + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -909,11 +738,7 @@ pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) { - copy_nonoverlapping( - &a as *const uint8x16_t as *const u8, - ptr as *mut u8, - size_of::(), - ) + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -922,11 +747,7 @@ pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) { - copy_nonoverlapping( - &a as *const uint16x4_t as *const u16, - ptr as *mut u16, - size_of::(), - ) + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -935,11 +756,7 @@ pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) { - copy_nonoverlapping( - &a as *const uint16x8_t as *const u16, - ptr as *mut u16, - size_of::(), - ) + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -948,11 +765,7 @@ pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) { - copy_nonoverlapping( - &a as *const uint32x2_t as *const u32, - ptr as *mut u32, - size_of::(), - ) + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -961,11 +774,7 @@ pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) { - copy_nonoverlapping( - &a as *const uint32x4_t as *const u32, - ptr as *mut u32, - size_of::(), - ) + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -974,11 +783,7 @@ pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) { - copy_nonoverlapping( - &a as *const uint64x1_t as *const u64, - ptr as *mut u64, - size_of::(), - ) + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -987,11 +792,7 @@ pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) { - copy_nonoverlapping( - &a as *const uint64x2_t as *const u64, - ptr as *mut u64, - size_of::(), - ) + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -1000,11 +801,7 @@ pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) { - copy_nonoverlapping( - &a as *const poly8x8_t as *const p8, - ptr as *mut p8, - size_of::(), - ) + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -1013,11 +810,7 @@ pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) { - copy_nonoverlapping( - &a as *const poly8x16_t as *const p8, - ptr as *mut p8, - size_of::(), - ) + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -1026,11 +819,7 @@ pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) { - copy_nonoverlapping( - &a as *const poly16x4_t as *const p16, - ptr as *mut p16, - size_of::(), - ) + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -1039,11 +828,7 @@ pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) { - copy_nonoverlapping( - &a as *const poly16x8_t as *const p16, - ptr as *mut p16, - size_of::(), - ) + write_unaligned(ptr.cast(), a); } // Store multiple single-element structures from one, two, three, or four registers. @@ -1052,11 +837,7 @@ pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) { - copy_nonoverlapping( - &a as *const poly64x1_t as *const p64, - ptr as *mut p64, - size_of::(), - ) + write_unaligned(ptr.cast(), a); } // Store multiple single-element structures from one, two, three, or four registers. @@ -1065,11 +846,7 @@ pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) { - copy_nonoverlapping( - &a as *const poly64x2_t as *const p64, - ptr as *mut p64, - size_of::(), - ) + write_unaligned(ptr.cast(), a); } // Store multiple single-element structures from one, two, three, or four registers. @@ -1078,11 +855,7 @@ pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) { - copy_nonoverlapping( - &a as *const float32x2_t as *const f32, - ptr as *mut f32, - size_of::(), - ) + write_unaligned(ptr.cast(), a); } // Store multiple single-element structures from one, two, three, or four registers. @@ -1091,11 +864,7 @@ pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) { - copy_nonoverlapping( - &a as *const float32x4_t as *const f32, - ptr as *mut f32, - size_of::(), - ) + write_unaligned(ptr.cast(), a); } // Store multiple single-element structures from one, two, three, or four registers. @@ -1104,11 +873,7 @@ pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_f64(ptr: *mut f64, a: float64x1_t) { - copy_nonoverlapping( - &a as *const float64x1_t as *const f64, - ptr as *mut f64, - size_of::(), - ) + write_unaligned(ptr.cast(), a); } // Store multiple single-element structures from one, two, three, or four registers. @@ -1117,11 +882,7 @@ pub unsafe fn vst1_f64(ptr: *mut f64, a: float64x1_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_f64(ptr: *mut f64, a: float64x2_t) { - copy_nonoverlapping( - &a as *const float64x2_t as *const f64, - ptr as *mut f64, - size_of::(), - ) + write_unaligned(ptr.cast(), a); } /// Absolute Value (wrapping).