diff --git a/crates/core_arch/src/x86/sse4a.rs b/crates/core_arch/src/x86/sse4a.rs index 369cd4cf31..f002006ced 100644 --- a/crates/core_arch/src/x86/sse4a.rs +++ b/crates/core_arch/src/x86/sse4a.rs @@ -9,17 +9,18 @@ use stdarch_test::assert_instr; extern "C" { #[link_name = "llvm.x86.sse4a.extrq"] fn extrq(x: i64x2, y: i8x16) -> i64x2; + #[link_name = "llvm.x86.sse4a.extrqi"] + fn extrqi(x: i64x2, len: u8, idx: u8) -> i64x2; #[link_name = "llvm.x86.sse4a.insertq"] fn insertq(x: i64x2, y: i64x2) -> i64x2; + #[link_name = "llvm.x86.sse4a.insertqi"] + fn insertqi(x: i64x2, y: i64x2, len: u8, idx: u8) -> i64x2; #[link_name = "llvm.x86.sse4a.movnt.sd"] fn movntsd(x: *mut f64, y: __m128d); #[link_name = "llvm.x86.sse4a.movnt.ss"] fn movntss(x: *mut f32, y: __m128); } -// FIXME(blocked on #248): _mm_extracti_si64(x, len, idx) // EXTRQ -// FIXME(blocked on #248): _mm_inserti_si64(x, y, len, idx) // INSERTQ - /// Extracts the bit range specified by `y` from the lower 64 bits of `x`. /// /// The `[13:8]` bits of `y` specify the index of the bit-range to extract. The @@ -39,6 +40,27 @@ pub unsafe fn _mm_extract_si64(x: __m128i, y: __m128i) -> __m128i { transmute(extrq(x.as_i64x2(), y.as_i8x16())) } +/// Extracts the specified bits from the lower 64 bits of the 128-bit integer vector operand at the +/// index `idx` and of the length `len`. +/// +/// `idx` specifies the index of the LSB. `len` specifies the number of bits to extract. If length +/// and index are both zero, bits `[63:0]` of parameter `x` are extracted. It is a compile-time error +/// for `len + idx` to be greater than 64 or for `len` to be zero and `idx` to be non-zero. +/// +/// Returns a 128-bit integer vector whose lower 64 bits contain the extracted bits. +#[inline] +#[target_feature(enable = "sse4a")] +#[cfg_attr(test, assert_instr(extrq, LEN = 5, IDX = 5))] +#[rustc_legacy_const_generics(1, 2)] +#[unstable(feature = "simd_x86_updates", issue = "126936")] +pub unsafe fn _mm_extracti_si64(x: __m128i) -> __m128i { + // LLVM mentions that it is UB if these are not satisfied + static_assert_uimm_bits!(LEN, 6); + static_assert_uimm_bits!(IDX, 6); + static_assert!((LEN == 0 && IDX == 0) || (LEN != 0 && LEN + IDX <= 64)); + transmute(extrqi(x.as_i64x2(), LEN as u8, IDX as u8)) +} + /// Inserts the `[length:0]` bits of `y` into `x` at `index`. /// /// The bits of `y`: @@ -56,6 +78,25 @@ pub unsafe fn _mm_insert_si64(x: __m128i, y: __m128i) -> __m128i { transmute(insertq(x.as_i64x2(), y.as_i64x2())) } +/// Inserts the `len` least-significant bits from the lower 64 bits of the 128-bit integer vector operand `y` into +/// the lower 64 bits of the 128-bit integer vector operand `x` at the index `idx` and of the length `len`. +/// +/// `idx` specifies the index of the LSB. `len` specifies the number of bits to insert. If length and index +/// are both zero, bits `[63:0]` of parameter `x` are replaced with bits `[63:0]` of parameter `y`. It is a +/// compile-time error for `len + idx` to be greater than 64 or for `len` to be zero and `idx` to be non-zero. +#[inline] +#[target_feature(enable = "sse4a")] +#[cfg_attr(test, assert_instr(insertq, LEN = 5, IDX = 5))] +#[rustc_legacy_const_generics(2, 3)] +#[unstable(feature = "simd_x86_updates", issue = "126936")] +pub unsafe fn _mm_inserti_si64(x: __m128i, y: __m128i) -> __m128i { + // LLVM mentions that it is UB if these are not satisfied + static_assert_uimm_bits!(LEN, 6); + static_assert_uimm_bits!(IDX, 6); + static_assert!((LEN == 0 && IDX == 0) || (LEN != 0 && LEN + IDX <= 64)); + transmute(insertqi(x.as_i64x2(), y.as_i64x2(), LEN as u8, IDX as u8)) +} + /// Non-temporal store of `a.0` into `p`. /// /// Writes 64-bit data to a memory location without polluting the caches. @@ -114,6 +155,14 @@ mod tests { assert_eq_m128i(r, e); } + #[simd_test(enable = "sse4a")] + unsafe fn test_mm_extracti_si64() { + let a = _mm_setr_epi64x(0x0123456789abcdef, 0); + let r = _mm_extracti_si64::<8, 8>(a); + let e = _mm_setr_epi64x(0xcd, 0); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "sse4a")] unsafe fn test_mm_insert_si64() { let i = 0b0110_i64; @@ -131,6 +180,15 @@ mod tests { assert_eq_m128i(r, expected); } + #[simd_test(enable = "sse4a")] + unsafe fn test_mm_inserti_si64() { + let a = _mm_setr_epi64x(0x0123456789abcdef, 0); + let b = _mm_setr_epi64x(0x0011223344556677, 0); + let r = _mm_inserti_si64::<8, 8>(a, b); + let e = _mm_setr_epi64x(0x0123456789ab77ef, 0); + assert_eq_m128i(r, e); + } + #[repr(align(16))] struct MemoryF64 { data: [f64; 2], diff --git a/crates/core_arch/src/x86/tbm.rs b/crates/core_arch/src/x86/tbm.rs index d1102a1169..ac54f824cd 100644 --- a/crates/core_arch/src/x86/tbm.rs +++ b/crates/core_arch/src/x86/tbm.rs @@ -13,57 +13,28 @@ #[cfg(test)] use stdarch_test::assert_instr; -// FIXME(blocked on #248) -// TODO: LLVM-CODEGEN ERROR: LLVM ERROR: Cannot select: -// intrinsic %llvm.x86.tbm.bextri.u32 -/* -#[allow(dead_code)] extern "C" { - #[link_name="llvm.x86.tbm.bextri.u32"] - fn x86_tbm_bextri_u32(a: u32, y: u32) -> u32; - #[link_name="llvm.x86.tbm.bextri.u64"] - fn x86_tbm_bextri_u64(x: u64, y: u64) -> u64; -} - -/// Extracts bits in range [`start`, `start` + `length`) from `a` into -/// the least significant bits of the result. -#[inline] -#[target_feature(enable = "tbm")] -pub fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 { - _bextr2_u32(a, (start & 0xffu32) | ((len & 0xffu32) << 8u32)) -} - -/// Extracts bits in range [`start`, `start` + `length`) from `a` into -/// the least significant bits of the result. -#[inline] -#[target_feature(enable = "tbm")] -pub fn _bextr_u64(a: u64, start: u64, len: u64) -> u64 { - _bextr2_u64(a, (start & 0xffu64) | ((len & 0xffu64) << 8u64)) + #[link_name = "llvm.x86.tbm.bextri.u32"] + fn bextri_u32(a: u32, control: u32) -> u32; } /// Extracts bits of `a` specified by `control` into /// the least significant bits of the result. /// /// Bits `[7,0]` of `control` specify the index to the first bit in the range to -/// be extracted, and bits `[15,8]` specify the length of the range. +/// be extracted, and bits `[15,8]` specify the length of the range. For any bit +/// position in the specified range that lie beyond the MSB of the source operand, +/// zeroes will be written. If the range is empty, the result is zero. #[inline] #[target_feature(enable = "tbm")] -pub fn _bextr2_u32(a: u32, control: u32) -> u32 { - unsafe { x86_tbm_bextri_u32(a, control) } +#[cfg_attr(test, assert_instr(bextr, CONTROL = 0x0404))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "simd_x86_updates", issue = "126936")] +pub unsafe fn _bextri_u32(a: u32) -> u32 { + static_assert_uimm_bits!(CONTROL, 16); + unsafe { bextri_u32(a, CONTROL) } } -/// Extracts bits of `a` specified by `control` into -/// the least significant bits of the result. -/// -/// Bits `[7,0]` of `control` specify the index to the first bit in the range to -/// be extracted, and bits `[15,8]` specify the length of the range. -#[inline] -#[target_feature(enable = "tbm")] -pub fn _bextr2_u64(a: u64, control: u64) -> u64 { - unsafe { x86_tbm_bextri_u64(a, control) } -} -*/ - /// Clears all bits below the least significant zero bit of `x`. /// /// If there is no zero bit in `x`, it returns zero. @@ -75,18 +46,6 @@ pub unsafe fn _blcfill_u32(x: u32) -> u32 { x & (x.wrapping_add(1)) } -/// Clears all bits below the least significant zero bit of `x`. -/// -/// If there is no zero bit in `x`, it returns zero. -#[inline] -#[target_feature(enable = "tbm")] -#[cfg_attr(test, assert_instr(blcfill))] -#[cfg(not(target_arch = "x86"))] // generates lots of instructions -#[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _blcfill_u64(x: u64) -> u64 { - x & (x.wrapping_add(1)) -} - /// Sets all bits of `x` to 1 except for the least significant zero bit. /// /// If there is no zero bit in `x`, it sets all bits. @@ -95,19 +54,7 @@ pub unsafe fn _blcfill_u64(x: u64) -> u64 { #[cfg_attr(test, assert_instr(blci))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blci_u32(x: u32) -> u32 { - x | !(x.wrapping_add(1)) -} - -/// Sets all bits of `x` to 1 except for the least significant zero bit. -/// -/// If there is no zero bit in `x`, it sets all bits. -#[inline] -#[target_feature(enable = "tbm")] -#[cfg_attr(test, assert_instr(blci))] -#[cfg(not(target_arch = "x86"))] // generates lots of instructions -#[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _blci_u64(x: u64) -> u64 { - x | !(x.wrapping_add(1)) + x | !x.wrapping_add(1) } /// Sets the least significant zero bit of `x` and clears all other bits. @@ -118,19 +65,7 @@ pub unsafe fn _blci_u64(x: u64) -> u64 { #[cfg_attr(test, assert_instr(blcic))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blcic_u32(x: u32) -> u32 { - !x & (x.wrapping_add(1)) -} - -/// Sets the least significant zero bit of `x` and clears all other bits. -/// -/// If there is no zero bit in `x`, it returns zero. -#[inline] -#[target_feature(enable = "tbm")] -#[cfg_attr(test, assert_instr(blcic))] -#[cfg(not(target_arch = "x86"))] // generates lots of instructions -#[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _blcic_u64(x: u64) -> u64 { - !x & (x.wrapping_add(1)) + !x & x.wrapping_add(1) } /// Sets the least significant zero bit of `x` and clears all bits above @@ -142,20 +77,7 @@ pub unsafe fn _blcic_u64(x: u64) -> u64 { #[cfg_attr(test, assert_instr(blcmsk))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blcmsk_u32(x: u32) -> u32 { - x ^ (x.wrapping_add(1)) -} - -/// Sets the least significant zero bit of `x` and clears all bits above -/// that bit. -/// -/// If there is no zero bit in `x`, it sets all the bits. -#[inline] -#[target_feature(enable = "tbm")] -#[cfg_attr(test, assert_instr(blcmsk))] -#[cfg(not(target_arch = "x86"))] // generates lots of instructions -#[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _blcmsk_u64(x: u64) -> u64 { - x ^ (x.wrapping_add(1)) + x ^ x.wrapping_add(1) } /// Sets the least significant zero bit of `x`. @@ -166,18 +88,6 @@ pub unsafe fn _blcmsk_u64(x: u64) -> u64 { #[cfg_attr(test, assert_instr(blcs))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blcs_u32(x: u32) -> u32 { - x | (x.wrapping_add(1)) -} - -/// Sets the least significant zero bit of `x`. -/// -/// If there is no zero bit in `x`, it returns `x`. -#[inline] -#[target_feature(enable = "tbm")] -#[cfg_attr(test, assert_instr(blcs))] -#[cfg(not(target_arch = "x86"))] // generates lots of instructions -#[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _blcs_u64(x: u64) -> u64 { x | x.wrapping_add(1) } @@ -189,19 +99,7 @@ pub unsafe fn _blcs_u64(x: u64) -> u64 { #[cfg_attr(test, assert_instr(blsfill))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blsfill_u32(x: u32) -> u32 { - x | (x.wrapping_sub(1)) -} - -/// Sets all bits of `x` below the least significant one. -/// -/// If there is no set bit in `x`, it sets all the bits. -#[inline] -#[target_feature(enable = "tbm")] -#[cfg_attr(test, assert_instr(blsfill))] -#[cfg(not(target_arch = "x86"))] // generates lots of instructions -#[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _blsfill_u64(x: u64) -> u64 { - x | (x.wrapping_sub(1)) + x | x.wrapping_sub(1) } /// Clears least significant bit and sets all other bits. @@ -212,19 +110,7 @@ pub unsafe fn _blsfill_u64(x: u64) -> u64 { #[cfg_attr(test, assert_instr(blsic))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blsic_u32(x: u32) -> u32 { - !x | (x.wrapping_sub(1)) -} - -/// Clears least significant bit and sets all other bits. -/// -/// If there is no set bit in `x`, it sets all the bits. -#[inline] -#[target_feature(enable = "tbm")] -#[cfg_attr(test, assert_instr(blsic))] -#[cfg(not(target_arch = "x86"))] // generates lots of instructions -#[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _blsic_u64(x: u64) -> u64 { - !x | (x.wrapping_sub(1)) + !x | x.wrapping_sub(1) } /// Clears all bits below the least significant zero of `x` and sets all other @@ -236,20 +122,7 @@ pub unsafe fn _blsic_u64(x: u64) -> u64 { #[cfg_attr(test, assert_instr(t1mskc))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _t1mskc_u32(x: u32) -> u32 { - !x | (x.wrapping_add(1)) -} - -/// Clears all bits below the least significant zero of `x` and sets all other -/// bits. -/// -/// If the least significant bit of `x` is `0`, it sets all bits. -#[inline] -#[target_feature(enable = "tbm")] -#[cfg_attr(test, assert_instr(t1mskc))] -#[cfg(not(target_arch = "x86"))] // generates lots of instructions -#[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _t1mskc_u64(x: u64) -> u64 { - !x | (x.wrapping_add(1)) + !x | x.wrapping_add(1) } /// Sets all bits below the least significant one of `x` and clears all other @@ -261,20 +134,7 @@ pub unsafe fn _t1mskc_u64(x: u64) -> u64 { #[cfg_attr(test, assert_instr(tzmsk))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _tzmsk_u32(x: u32) -> u32 { - !x & (x.wrapping_sub(1)) -} - -/// Sets all bits below the least significant one of `x` and clears all other -/// bits. -/// -/// If the least significant bit of `x` is 1, it returns zero. -#[inline] -#[target_feature(enable = "tbm")] -#[cfg_attr(test, assert_instr(tzmsk))] -#[cfg(not(target_arch = "x86"))] // generates lots of instructions -#[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _tzmsk_u64(x: u64) -> u64 { - !x & (x.wrapping_sub(1)) + !x & x.wrapping_sub(1) } #[cfg(test)] @@ -283,31 +143,17 @@ mod tests { use crate::core_arch::x86::*; - /* #[simd_test(enable = "tbm")] - unsafe fn test_bextr_u32() { - assert_eq!(_bextr_u32(0b0101_0000u32, 4, 4), 0b0000_0101u32); + unsafe fn test_bextri_u32() { + assert_eq!(_bextri_u32::<0x0404>(0b0101_0000u32), 0b0000_0101u32); } - #[simd_test(enable = "tbm")] - unsafe fn test_bextr_u64() { - assert_eq!(_bextr_u64(0b0101_0000u64, 4, 4), 0b0000_0101u64); - } - */ - #[simd_test(enable = "tbm")] unsafe fn test_blcfill_u32() { assert_eq!(_blcfill_u32(0b0101_0111u32), 0b0101_0000u32); assert_eq!(_blcfill_u32(0b1111_1111u32), 0u32); } - #[simd_test(enable = "tbm")] - #[cfg(not(target_arch = "x86"))] - unsafe fn test_blcfill_u64() { - assert_eq!(_blcfill_u64(0b0101_0111u64), 0b0101_0000u64); - assert_eq!(_blcfill_u64(0b1111_1111u64), 0u64); - } - #[simd_test(enable = "tbm")] unsafe fn test_blci_u32() { assert_eq!( @@ -320,59 +166,24 @@ mod tests { ); } - #[simd_test(enable = "tbm")] - #[cfg(not(target_arch = "x86"))] - #[rustfmt::skip] - unsafe fn test_blci_u64() { - assert_eq!( - _blci_u64(0b0101_0000u64), - 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110u64 - ); - assert_eq!( - _blci_u64(0b1111_1111u64), - 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110_1111_1111u64 - ); - } - #[simd_test(enable = "tbm")] unsafe fn test_blcic_u32() { assert_eq!(_blcic_u32(0b0101_0001u32), 0b0000_0010u32); assert_eq!(_blcic_u32(0b1111_1111u32), 0b1_0000_0000u32); } - #[simd_test(enable = "tbm")] - #[cfg(not(target_arch = "x86"))] - unsafe fn test_blcic_u64() { - assert_eq!(_blcic_u64(0b0101_0001u64), 0b0000_0010u64); - assert_eq!(_blcic_u64(0b1111_1111u64), 0b1_0000_0000u64); - } - #[simd_test(enable = "tbm")] unsafe fn test_blcmsk_u32() { assert_eq!(_blcmsk_u32(0b0101_0001u32), 0b0000_0011u32); assert_eq!(_blcmsk_u32(0b1111_1111u32), 0b1_1111_1111u32); } - #[simd_test(enable = "tbm")] - #[cfg(not(target_arch = "x86"))] - unsafe fn test_blcmsk_u64() { - assert_eq!(_blcmsk_u64(0b0101_0001u64), 0b0000_0011u64); - assert_eq!(_blcmsk_u64(0b1111_1111u64), 0b1_1111_1111u64); - } - #[simd_test(enable = "tbm")] unsafe fn test_blcs_u32() { assert_eq!(_blcs_u32(0b0101_0001u32), 0b0101_0011u32); assert_eq!(_blcs_u32(0b1111_1111u32), 0b1_1111_1111u32); } - #[simd_test(enable = "tbm")] - #[cfg(not(target_arch = "x86"))] - unsafe fn test_blcs_u64() { - assert_eq!(_blcs_u64(0b0101_0001u64), 0b0101_0011u64); - assert_eq!(_blcs_u64(0b1111_1111u64), 0b1_1111_1111u64); - } - #[simd_test(enable = "tbm")] unsafe fn test_blsfill_u32() { assert_eq!(_blsfill_u32(0b0101_0100u32), 0b0101_0111u32); @@ -382,17 +193,6 @@ mod tests { ); } - #[simd_test(enable = "tbm")] - #[cfg(not(target_arch = "x86"))] - #[rustfmt::skip] - unsafe fn test_blsfill_u64() { - assert_eq!(_blsfill_u64(0b0101_0100u64), 0b0101_0111u64); - assert_eq!( - _blsfill_u64(0u64), - 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64 - ); - } - #[simd_test(enable = "tbm")] unsafe fn test_blsic_u32() { assert_eq!( @@ -405,20 +205,6 @@ mod tests { ); } - #[simd_test(enable = "tbm")] - #[cfg(not(target_arch = "x86"))] - #[rustfmt::skip] - unsafe fn test_blsic_u64() { - assert_eq!( - _blsic_u64(0b0101_0100u64), - 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1011u64 - ); - assert_eq!( - _blsic_u64(0u64), - 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64 - ); - } - #[simd_test(enable = "tbm")] unsafe fn test_t1mskc_u32() { assert_eq!( @@ -431,30 +217,9 @@ mod tests { ); } - #[simd_test(enable = "tbm")] - #[cfg(not(target_arch = "x86"))] - #[rustfmt::skip] - unsafe fn test_t1mksc_u64() { - assert_eq!( - _t1mskc_u64(0b0101_0111u64), - 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1000u64 - ); - assert_eq!( - _t1mskc_u64(0u64), - 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64 - ); - } - #[simd_test(enable = "tbm")] unsafe fn test_tzmsk_u32() { assert_eq!(_tzmsk_u32(0b0101_1000u32), 0b0000_0111u32); assert_eq!(_tzmsk_u32(0b0101_1001u32), 0b0000_0000u32); } - - #[simd_test(enable = "tbm")] - #[cfg(not(target_arch = "x86"))] - unsafe fn test_tzmsk_u64() { - assert_eq!(_tzmsk_u64(0b0101_1000u64), 0b0000_0111u64); - assert_eq!(_tzmsk_u64(0b0101_1001u64), 0b0000_0000u64); - } } diff --git a/crates/core_arch/src/x86_64/mod.rs b/crates/core_arch/src/x86_64/mod.rs index fb7bce6871..2e0139c5da 100644 --- a/crates/core_arch/src/x86_64/mod.rs +++ b/crates/core_arch/src/x86_64/mod.rs @@ -42,6 +42,10 @@ mod bmi2; #[stable(feature = "simd_x86", since = "1.27.0")] pub use self::bmi2::*; +mod tbm; +#[stable(feature = "simd_x86", since = "1.27.0")] +pub use self::tbm::*; + mod avx512f; #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub use self::avx512f::*; diff --git a/crates/core_arch/src/x86_64/tbm.rs b/crates/core_arch/src/x86_64/tbm.rs new file mode 100644 index 0000000000..d63f7d5598 --- /dev/null +++ b/crates/core_arch/src/x86_64/tbm.rs @@ -0,0 +1,225 @@ +//! Trailing Bit Manipulation (TBM) instruction set. +//! +//! The reference is [AMD64 Architecture Programmer's Manual, Volume 3: +//! General-Purpose and System Instructions][amd64_ref]. +//! +//! [Wikipedia][wikipedia_bmi] provides a quick overview of the available +//! instructions. +//! +//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf +//! [wikipedia_bmi]: +//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 + +#[cfg(test)] +use stdarch_test::assert_instr; + +extern "C" { + #[link_name = "llvm.x86.tbm.bextri.u64"] + fn bextri_u64(a: u64, control: u64) -> u64; +} + +/// Extracts bits of `a` specified by `control` into +/// the least significant bits of the result. +/// +/// Bits `[7,0]` of `control` specify the index to the first bit in the range to +/// be extracted, and bits `[15,8]` specify the length of the range. For any bit +/// position in the specified range that lie beyond the MSB of the source operand, +/// zeroes will be written. If the range is empty, the result is zero. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(bextr, CONTROL = 0x0404))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "simd_x86_updates", issue = "126936")] +pub unsafe fn _bextri_u64(a: u64) -> u64 { + static_assert_uimm_bits!(CONTROL, 16); + unsafe { bextri_u64(a, CONTROL) } +} + +/// Clears all bits below the least significant zero bit of `x`. +/// +/// If there is no zero bit in `x`, it returns zero. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(blcfill))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _blcfill_u64(x: u64) -> u64 { + x & x.wrapping_add(1) +} + +/// Sets all bits of `x` to 1 except for the least significant zero bit. +/// +/// If there is no zero bit in `x`, it sets all bits. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(blci))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _blci_u64(x: u64) -> u64 { + x | !x.wrapping_add(1) +} + +/// Sets the least significant zero bit of `x` and clears all other bits. +/// +/// If there is no zero bit in `x`, it returns zero. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(blcic))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _blcic_u64(x: u64) -> u64 { + !x & x.wrapping_add(1) +} + +/// Sets the least significant zero bit of `x` and clears all bits above +/// that bit. +/// +/// If there is no zero bit in `x`, it sets all the bits. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(blcmsk))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _blcmsk_u64(x: u64) -> u64 { + x ^ x.wrapping_add(1) +} + +/// Sets the least significant zero bit of `x`. +/// +/// If there is no zero bit in `x`, it returns `x`. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(blcs))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _blcs_u64(x: u64) -> u64 { + x | x.wrapping_add(1) +} + +/// Sets all bits of `x` below the least significant one. +/// +/// If there is no set bit in `x`, it sets all the bits. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(blsfill))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _blsfill_u64(x: u64) -> u64 { + x | x.wrapping_sub(1) +} + +/// Clears least significant bit and sets all other bits. +/// +/// If there is no set bit in `x`, it sets all the bits. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(blsic))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _blsic_u64(x: u64) -> u64 { + !x | x.wrapping_sub(1) +} + +/// Clears all bits below the least significant zero of `x` and sets all other +/// bits. +/// +/// If the least significant bit of `x` is `0`, it sets all bits. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(t1mskc))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _t1mskc_u64(x: u64) -> u64 { + !x | x.wrapping_add(1) +} + +/// Sets all bits below the least significant one of `x` and clears all other +/// bits. +/// +/// If the least significant bit of `x` is 1, it returns zero. +#[inline] +#[target_feature(enable = "tbm")] +#[cfg_attr(test, assert_instr(tzmsk))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _tzmsk_u64(x: u64) -> u64 { + !x & x.wrapping_sub(1) +} + +#[cfg(test)] +mod tests { + use stdarch_test::simd_test; + + use crate::core_arch::x86_64::*; + + #[simd_test(enable = "tbm")] + unsafe fn test_bextri_u64() { + assert_eq!(_bextri_u64::<0x0404>(0b0101_0000u64), 0b0000_0101u64); + } + + #[simd_test(enable = "tbm")] + unsafe fn test_blcfill_u64() { + assert_eq!(_blcfill_u64(0b0101_0111u64), 0b0101_0000u64); + assert_eq!(_blcfill_u64(0b1111_1111u64), 0u64); + } + + #[simd_test(enable = "tbm")] + unsafe fn test_blci_u64() { + assert_eq!( + _blci_u64(0b0101_0000u64), + 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110u64 + ); + assert_eq!( + _blci_u64(0b1111_1111u64), + 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110_1111_1111u64 + ); + } + + #[simd_test(enable = "tbm")] + unsafe fn test_blcic_u64() { + assert_eq!(_blcic_u64(0b0101_0001u64), 0b0000_0010u64); + assert_eq!(_blcic_u64(0b1111_1111u64), 0b1_0000_0000u64); + } + + #[simd_test(enable = "tbm")] + unsafe fn test_blcmsk_u64() { + assert_eq!(_blcmsk_u64(0b0101_0001u64), 0b0000_0011u64); + assert_eq!(_blcmsk_u64(0b1111_1111u64), 0b1_1111_1111u64); + } + + #[simd_test(enable = "tbm")] + unsafe fn test_blcs_u64() { + assert_eq!(_blcs_u64(0b0101_0001u64), 0b0101_0011u64); + assert_eq!(_blcs_u64(0b1111_1111u64), 0b1_1111_1111u64); + } + + #[simd_test(enable = "tbm")] + unsafe fn test_blsfill_u64() { + assert_eq!(_blsfill_u64(0b0101_0100u64), 0b0101_0111u64); + assert_eq!( + _blsfill_u64(0u64), + 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64 + ); + } + + #[simd_test(enable = "tbm")] + unsafe fn test_blsic_u64() { + assert_eq!( + _blsic_u64(0b0101_0100u64), + 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1011u64 + ); + assert_eq!( + _blsic_u64(0u64), + 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64 + ); + } + + #[simd_test(enable = "tbm")] + unsafe fn test_t1mksc_u64() { + assert_eq!( + _t1mskc_u64(0b0101_0111u64), + 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1000u64 + ); + assert_eq!( + _t1mskc_u64(0u64), + 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64 + ); + } + + #[simd_test(enable = "tbm")] + unsafe fn test_tzmsk_u64() { + assert_eq!(_tzmsk_u64(0b0101_1000u64), 0b0000_0111u64); + assert_eq!(_tzmsk_u64(0b0101_1001u64), 0b0000_0000u64); + } +}