|
| 1 | +// Ignore everything except x86 and x86_64 |
| 2 | +// Any new targets that are added to CI should be ignored here. |
| 3 | +// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.) |
| 4 | +//@ignore-target-aarch64 |
| 5 | +//@ignore-target-arm |
| 6 | +//@ignore-target-avr |
| 7 | +//@ignore-target-s390x |
| 8 | +//@ignore-target-thumbv7em |
| 9 | +//@ignore-target-wasm32 |
| 10 | +//@compile-flags: -C target-feature=+bmi1,+bmi2 |
| 11 | + |
| 12 | +#[cfg(target_arch = "x86")] |
| 13 | +use std::arch::x86::*; |
| 14 | +#[cfg(target_arch = "x86_64")] |
| 15 | +use std::arch::x86_64::*; |
| 16 | + |
| 17 | +fn main() { |
| 18 | + // BMI1 and BMI2 are independent from each other, so both must be checked. |
| 19 | + assert!(is_x86_feature_detected!("bmi1")); |
| 20 | + assert!(is_x86_feature_detected!("bmi2")); |
| 21 | + |
| 22 | + unsafe { |
| 23 | + test_bmi_32(); |
| 24 | + test_bmi_64(); |
| 25 | + } |
| 26 | +} |
| 27 | + |
| 28 | +/// Test the 32-bit variants of the intrinsics. |
| 29 | +unsafe fn test_bmi_32() { |
| 30 | + unsafe fn test_bextr_u32() { |
| 31 | + let r = _bextr_u32(0b0101_0000u32, 4, 4); |
| 32 | + assert_eq!(r, 0b0000_0101u32); |
| 33 | + |
| 34 | + for i in 0..16 { |
| 35 | + assert_eq!(_bextr_u32(u32::MAX, i, 4), 0b1111); |
| 36 | + assert_eq!(_bextr_u32(u32::MAX, 4, i), (1 << i) - 1); |
| 37 | + } |
| 38 | + |
| 39 | + // Ensure that indices larger than the bit count are covered. |
| 40 | + // It is important to go above 32 in order to verify the bit selection |
| 41 | + // of the instruction. |
| 42 | + |
| 43 | + for i in 0..256 { |
| 44 | + // If the index is out of bounds, the original input won't be changed, thus the `min(32)`. |
| 45 | + assert_eq!(_bextr_u32(u32::MAX, 0, i).count_ones(), i.min(32)); |
| 46 | + } |
| 47 | + |
| 48 | + for i in 0..256 { |
| 49 | + assert_eq!(_bextr_u32(u32::MAX, i, 0), 0); |
| 50 | + } |
| 51 | + |
| 52 | + // Test cases with completly random values. These cases also test |
| 53 | + // that the function works even if upper bits of the control value are set. |
| 54 | + assert_eq!(_bextr2_u32(0x7408a392, 0x54ef705), 0x3a0451c); |
| 55 | + assert_eq!(_bextr2_u32(0xbc5a3494, 0xdd193203), 0x178b4692); |
| 56 | + assert_eq!(_bextr2_u32(0xc0332325, 0xf96e207), 0x1806646); |
| 57 | + } |
| 58 | + test_bextr_u32(); |
| 59 | + |
| 60 | + unsafe fn test_pext_u32() { |
| 61 | + let n = 0b1011_1110_1001_0011u32; |
| 62 | + |
| 63 | + let m0 = 0b0110_0011_1000_0101u32; |
| 64 | + let s0 = 0b0000_0000_0011_0101u32; |
| 65 | + |
| 66 | + let m1 = 0b1110_1011_1110_1111u32; |
| 67 | + let s1 = 0b0001_0111_0100_0011u32; |
| 68 | + |
| 69 | + // Testing of random values. |
| 70 | + assert_eq!(_pext_u32(n, m0), s0); |
| 71 | + assert_eq!(_pext_u32(n, m1), s1); |
| 72 | + assert_eq!(_pext_u32(0x12345678, 0xff00fff0), 0x00012567); |
| 73 | + |
| 74 | + // Testing of various identities. |
| 75 | + assert_eq!(_pext_u32(u32::MAX, u32::MAX), u32::MAX); |
| 76 | + assert_eq!(_pext_u32(u32::MAX, 0), 0); |
| 77 | + assert_eq!(_pext_u32(0, u32::MAX), 0); |
| 78 | + } |
| 79 | + test_pext_u32(); |
| 80 | + |
| 81 | + unsafe fn test_pdep_u32() { |
| 82 | + let n = 0b1011_1110_1001_0011u32; |
| 83 | + |
| 84 | + let m0 = 0b0110_0011_1000_0101u32; |
| 85 | + let s0 = 0b0000_0010_0000_0101u32; |
| 86 | + |
| 87 | + let m1 = 0b1110_1011_1110_1111u32; |
| 88 | + let s1 = 0b1110_1001_0010_0011u32; |
| 89 | + |
| 90 | + // Testing of random values. |
| 91 | + assert_eq!(_pdep_u32(n, m0), s0); |
| 92 | + assert_eq!(_pdep_u32(n, m1), s1); |
| 93 | + assert_eq!(_pdep_u32(0x00012567, 0xff00fff0), 0x12005670); |
| 94 | + |
| 95 | + // Testing of various identities. |
| 96 | + assert_eq!(_pdep_u32(u32::MAX, u32::MAX), u32::MAX); |
| 97 | + assert_eq!(_pdep_u32(0, u32::MAX), 0); |
| 98 | + assert_eq!(_pdep_u32(u32::MAX, 0), 0); |
| 99 | + } |
| 100 | + test_pdep_u32(); |
| 101 | + |
| 102 | + unsafe fn test_bzhi_u32() { |
| 103 | + let n = 0b1111_0010u32; |
| 104 | + let s = 0b0001_0010u32; |
| 105 | + assert_eq!(_bzhi_u32(n, 5), s); |
| 106 | + |
| 107 | + // Ensure that indices larger than the bit count are covered. |
| 108 | + // It is important to go above 32 in order to verify the bit selection |
| 109 | + // of the instruction. |
| 110 | + for i in 0..=512 { |
| 111 | + // The instruction only takes the lowest eight bits to generate the index, hence `i & 0xff`. |
| 112 | + // If the index is out of bounds, the original input won't be changed, thus the `min(32)`. |
| 113 | + let expected = 1u32.checked_shl((i & 0xff).min(32)).unwrap_or(0).wrapping_sub(1); |
| 114 | + let actual = _bzhi_u32(u32::MAX, i); |
| 115 | + assert_eq!(expected, actual); |
| 116 | + } |
| 117 | + } |
| 118 | + test_bzhi_u32(); |
| 119 | +} |
| 120 | + |
| 121 | +#[cfg(not(target_arch = "x86_64"))] |
| 122 | +unsafe fn test_bmi_64() {} |
| 123 | + |
| 124 | +/// Test the 64-bit variants of the intrinsics. |
| 125 | +#[cfg(target_arch = "x86_64")] |
| 126 | +unsafe fn test_bmi_64() { |
| 127 | + unsafe fn test_bextr_u64() { |
| 128 | + let r = _bextr_u64(0b0101_0000u64, 4, 4); |
| 129 | + assert_eq!(r, 0b0000_0101u64); |
| 130 | + |
| 131 | + for i in 0..16 { |
| 132 | + assert_eq!(_bextr_u64(u64::MAX, i, 4), 0b1111); |
| 133 | + assert_eq!(_bextr_u64(u64::MAX, 32, i), (1 << i) - 1); |
| 134 | + } |
| 135 | + |
| 136 | + // Ensure that indices larger than the bit count are covered. |
| 137 | + // It is important to go above 64 in order to verify the bit selection |
| 138 | + // of the instruction. |
| 139 | + |
| 140 | + for i in 0..256 { |
| 141 | + // If the index is out of bounds, the original input won't be changed, thus the `min(64)`. |
| 142 | + assert_eq!(_bextr_u64(u64::MAX, 0, i).count_ones(), i.min(64)); |
| 143 | + } |
| 144 | + |
| 145 | + for i in 0..256 { |
| 146 | + assert_eq!(_bextr_u64(u64::MAX, i, 0), 0); |
| 147 | + } |
| 148 | + |
| 149 | + // Test cases with completly random values. These cases also test |
| 150 | + // that the function works even if upper bits of the control value are set. |
| 151 | + assert_eq!(_bextr2_u64(0x4ff6cfbcea75f055, 0x216642e228425719), 0x27fb67de75); |
| 152 | + assert_eq!(_bextr2_u64(0xb05e991e6f6e1b6, 0xc76dd5d7f67dfc14), 0xb05e991e6f); |
| 153 | + assert_eq!(_bextr2_u64(0x5a3a629e323d848f, 0x95ac507d20e7719), 0x2d1d314f19); |
| 154 | + } |
| 155 | + test_bextr_u64(); |
| 156 | + |
| 157 | + unsafe fn test_pext_u64() { |
| 158 | + let n = 0b1011_1110_1001_0011u64; |
| 159 | + |
| 160 | + let m0 = 0b0110_0011_1000_0101u64; |
| 161 | + let s0 = 0b0000_0000_0011_0101u64; |
| 162 | + |
| 163 | + let m1 = 0b1110_1011_1110_1111u64; |
| 164 | + let s1 = 0b0001_0111_0100_0011u64; |
| 165 | + |
| 166 | + // Testing of random values. |
| 167 | + assert_eq!(_pext_u64(n, m0), s0); |
| 168 | + assert_eq!(_pext_u64(n, m1), s1); |
| 169 | + assert_eq!(_pext_u64(0x12345678, 0xff00fff0), 0x00012567); |
| 170 | + |
| 171 | + // Testing of various identities. |
| 172 | + assert_eq!(_pext_u64(u64::MAX, u64::MAX), u64::MAX); |
| 173 | + assert_eq!(_pext_u64(u64::MAX, 0), 0); |
| 174 | + assert_eq!(_pext_u64(0, u64::MAX), 0); |
| 175 | + } |
| 176 | + test_pext_u64(); |
| 177 | + |
| 178 | + unsafe fn test_pdep_u64() { |
| 179 | + let n = 0b1011_1110_1001_0011u64; |
| 180 | + |
| 181 | + let m0 = 0b0110_0011_1000_0101u64; |
| 182 | + let s0 = 0b0000_0010_0000_0101u64; |
| 183 | + |
| 184 | + let m1 = 0b1110_1011_1110_1111u64; |
| 185 | + let s1 = 0b1110_1001_0010_0011u64; |
| 186 | + |
| 187 | + // Testing of random values. |
| 188 | + assert_eq!(_pdep_u64(n, m0), s0); |
| 189 | + assert_eq!(_pdep_u64(n, m1), s1); |
| 190 | + assert_eq!(_pdep_u64(0x00012567, 0xff00fff0), 0x12005670); |
| 191 | + |
| 192 | + // Testing of various identities. |
| 193 | + assert_eq!(_pdep_u64(u64::MAX, u64::MAX), u64::MAX); |
| 194 | + assert_eq!(_pdep_u64(0, u64::MAX), 0); |
| 195 | + assert_eq!(_pdep_u64(u64::MAX, 0), 0); |
| 196 | + } |
| 197 | + test_pdep_u64(); |
| 198 | + |
| 199 | + unsafe fn test_bzhi_u64() { |
| 200 | + let n = 0b1111_0010u64; |
| 201 | + let s = 0b0001_0010u64; |
| 202 | + assert_eq!(_bzhi_u64(n, 5), s); |
| 203 | + |
| 204 | + // Ensure that indices larger than the bit count are covered. |
| 205 | + // It is important to go above 255 in order to verify the bit selection |
| 206 | + // of the instruction. |
| 207 | + for i in 0..=512 { |
| 208 | + // The instruction only takes the lowest eight bits to generate the index, hence `i & 0xff`. |
| 209 | + // If the index is out of bounds, the original input won't be changed, thus the `min(64)`. |
| 210 | + let expected = 1u64.checked_shl((i & 0xff).min(64)).unwrap_or(0).wrapping_sub(1); |
| 211 | + let actual = _bzhi_u64(u64::MAX, i); |
| 212 | + assert_eq!(expected, actual); |
| 213 | + } |
| 214 | + } |
| 215 | + test_bzhi_u64(); |
| 216 | +} |
0 commit comments