|
1 | 1 | //! Advanced Bit Manipulation (ABM) instructions
|
2 | 2 | //!
|
3 |
| -//! That is, POPCNT and LZCNT. These instructions have their own CPUID bits to |
4 |
| -//! indicate support. |
5 |
| -//! |
6 |
| -//! TODO: it is unclear which target feature to use here. SSE4.2 should be good |
7 |
| -//! enough but we might need to use BMI for LZCNT if there are any problems. |
| 3 | +//! The POPCNT and LZCNT have their own CPUID bits to indicate support. |
8 | 4 | //!
|
9 | 5 | //! The references are:
|
10 | 6 | //!
|
|
19 | 15 | ///
|
20 | 16 | /// When the operand is zero, it returns its size in bits.
|
21 | 17 | #[inline(always)]
|
22 |
| -#[target_feature = "+sse4.2"] |
| 18 | +#[target_feature = "+lzcnt"] |
23 | 19 | pub fn _lzcnt_u32(x: u32) -> u32 { x.leading_zeros() }
|
24 | 20 |
|
25 | 21 | /// Counts the leading most significant zero bits.
|
26 | 22 | ///
|
27 | 23 | /// When the operand is zero, it returns its size in bits.
|
28 | 24 | #[inline(always)]
|
29 |
| -#[target_feature = "+sse4.2"] |
| 25 | +#[target_feature = "+lzcnt"] |
30 | 26 | pub fn _lzcnt_u64(x: u64) -> u64 { x.leading_zeros() as u64 }
|
31 | 27 |
|
32 | 28 | /// Counts the bits that are set.
|
33 | 29 | #[inline(always)]
|
34 |
| -#[target_feature = "+sse4.2"] |
| 30 | +#[target_feature = "+popcnt"] |
35 | 31 | pub fn _popcnt32(x: u32) -> u32 { x.count_ones() }
|
36 | 32 |
|
37 | 33 | /// Counts the bits that are set.
|
38 | 34 | #[inline(always)]
|
39 |
| -#[target_feature = "+sse4.2"] |
| 35 | +#[target_feature = "+popcnt"] |
40 | 36 | pub fn _popcnt64(x: u64) -> u64 { x.count_ones() as u64 }
|
41 | 37 |
|
42 |
| -#[cfg(all(test, target_feature = "sse4.2", any(target_arch = "x86", target_arch = "x86_64")))] |
| 38 | +#[cfg(all(test, target_feature = "bmi", any(target_arch = "x86", target_arch = "x86_64")))] |
43 | 39 | mod tests {
|
44 | 40 | use x86::abm;
|
45 | 41 |
|
46 | 42 | #[test]
|
47 |
| - #[target_feature = "+sse4.2"] |
| 43 | + #[target_feature = "+lzcnt"] |
48 | 44 | fn _lzcnt_u32() {
|
49 | 45 | assert_eq!(abm::_lzcnt_u32(0b0101_1010u32), 25u32);
|
50 | 46 | }
|
51 | 47 |
|
52 | 48 | #[test]
|
53 |
| - #[target_feature = "+sse4.2"] |
| 49 | + #[target_feature = "+lzcnt"] |
54 | 50 | fn _lzcnt_u64() {
|
55 | 51 | assert_eq!(abm::_lzcnt_u64(0b0101_1010u64), 57u64);
|
56 | 52 | }
|
57 | 53 |
|
58 | 54 | #[test]
|
59 |
| - #[target_feature = "+sse4.2"] |
| 55 | + #[target_feature = "+popcnt"] |
60 | 56 | fn _popcnt32() {
|
61 | 57 | assert_eq!(abm::_popcnt32(0b0101_1010u32), 4);
|
62 | 58 | }
|
63 | 59 |
|
64 | 60 | #[test]
|
65 |
| - #[target_feature = "+sse4.2"] |
| 61 | + #[target_feature = "+popcnt"] |
66 | 62 | fn _popcnt64() {
|
67 | 63 | assert_eq!(abm::_popcnt64(0b0101_1010u64), 4);
|
68 | 64 | }
|
|
0 commit comments