Skip to content

Commit cc9699d

Browse files
committed
Auto merge of #3674 - TDecking:bmi, r=RalfJung
Implement LLVM x86 bmi intrinsics This implements the intrinsics for both the bmi1 and bmi2 ISA extensions. All of these intrinsics live inside the same namespace as far as LLVM is concerned, which is why it is arguably better to bundle the implementations of these two extensions.
2 parents 314f7f2 + 459eada commit cc9699d

File tree

3 files changed

+330
-0
lines changed

3 files changed

+330
-0
lines changed

src/shims/x86/bmi.rs

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
use rustc_span::Symbol;
2+
use rustc_target::spec::abi::Abi;
3+
4+
use crate::*;
5+
6+
impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
7+
pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
8+
fn emulate_x86_bmi_intrinsic(
9+
&mut self,
10+
link_name: Symbol,
11+
abi: Abi,
12+
args: &[OpTy<'tcx>],
13+
dest: &MPlaceTy<'tcx>,
14+
) -> InterpResult<'tcx, EmulateItemResult> {
15+
let this = self.eval_context_mut();
16+
17+
// Prefix should have already been checked.
18+
let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.bmi.").unwrap();
19+
20+
// The intrinsics are suffixed with the bit size of their operands.
21+
let (is_64_bit, unprefixed_name) = if unprefixed_name.ends_with("64") {
22+
(true, unprefixed_name.strip_suffix(".64").unwrap_or(""))
23+
} else {
24+
(false, unprefixed_name.strip_suffix(".32").unwrap_or(""))
25+
};
26+
27+
// All intrinsics of the "bmi" namespace belong to the "bmi2" ISA extension.
28+
// The exception is "bextr", which belongs to "bmi1".
29+
let target_feature = if unprefixed_name == "bextr" { "bmi1" } else { "bmi2" };
30+
this.expect_target_feature_for_intrinsic(link_name, target_feature)?;
31+
32+
if is_64_bit && this.tcx.sess.target.arch != "x86_64" {
33+
return Ok(EmulateItemResult::NotSupported);
34+
}
35+
36+
let [left, right] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
37+
let left = this.read_scalar(left)?;
38+
let right = this.read_scalar(right)?;
39+
40+
let left = if is_64_bit { left.to_u64()? } else { u64::from(left.to_u32()?) };
41+
let right = if is_64_bit { right.to_u64()? } else { u64::from(right.to_u32()?) };
42+
43+
let result = match unprefixed_name {
44+
// Extract a contigous range of bits from an unsigned integer.
45+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bextr_u32
46+
"bextr" => {
47+
let start = u32::try_from(right & 0xff).unwrap();
48+
let len = u32::try_from((right >> 8) & 0xff).unwrap();
49+
let shifted = left.checked_shr(start).unwrap_or(0);
50+
// Keep the `len` lowest bits of `shifted`, or all bits if `len` is too big.
51+
if len >= 64 { shifted } else { shifted & 1u64.wrapping_shl(len).wrapping_sub(1) }
52+
}
53+
// Create a copy of an unsigned integer with bits above a certain index cleared.
54+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bzhi_u32
55+
"bzhi" => {
56+
let index = u32::try_from(right & 0xff).unwrap();
57+
// Keep the `index` lowest bits of `left`, or all bits if `index` is too big.
58+
if index >= 64 { left } else { left & 1u64.wrapping_shl(index).wrapping_sub(1) }
59+
}
60+
// Extract bit values of an unsigned integer at positions marked by a mask.
61+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pext_u32
62+
"pext" => {
63+
let mut mask = right;
64+
let mut i = 0u32;
65+
let mut result = 0;
66+
// Iterate over the mask one 1-bit at a time, from
67+
// the least significant bit to the most significant bit.
68+
while mask != 0 {
69+
// Extract the bit marked by the mask's least significant set bit
70+
// and put it at position `i` of the result.
71+
result |= u64::from(left & (1 << mask.trailing_zeros()) != 0) << i;
72+
i = i.wrapping_add(1);
73+
// Clear the least significant set bit.
74+
mask &= mask.wrapping_sub(1);
75+
}
76+
result
77+
}
78+
// Deposit bit values of an unsigned integer to positions marked by a mask.
79+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pdep_u32
80+
"pdep" => {
81+
let mut mask = right;
82+
let mut set = left;
83+
let mut result = 0;
84+
// Iterate over the mask one 1-bit at a time, from
85+
// the least significant bit to the most significant bit.
86+
while mask != 0 {
87+
// Put rightmost bit of `set` at the position of the current `mask` bit.
88+
result |= (set & 1) << mask.trailing_zeros();
89+
// Go to next bit of `set`.
90+
set >>= 1;
91+
// Clear the least significant set bit.
92+
mask &= mask.wrapping_sub(1);
93+
}
94+
result
95+
}
96+
_ => return Ok(EmulateItemResult::NotSupported),
97+
};
98+
99+
let result = if is_64_bit {
100+
Scalar::from_u64(result)
101+
} else {
102+
Scalar::from_u32(u32::try_from(result).unwrap())
103+
};
104+
this.write_scalar(result, dest)?;
105+
106+
Ok(EmulateItemResult::NeedsReturn)
107+
}
108+
}

src/shims/x86/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ use helpers::bool_to_simd_element;
1414
mod aesni;
1515
mod avx;
1616
mod avx2;
17+
mod bmi;
1718
mod sse;
1819
mod sse2;
1920
mod sse3;
@@ -113,6 +114,11 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
113114
pclmulqdq(this, left, right, imm, dest)?;
114115
}
115116

117+
name if name.starts_with("bmi.") => {
118+
return bmi::EvalContextExt::emulate_x86_bmi_intrinsic(
119+
this, link_name, abi, args, dest,
120+
);
121+
}
116122
name if name.starts_with("sse.") => {
117123
return sse::EvalContextExt::emulate_x86_sse_intrinsic(
118124
this, link_name, abi, args, dest,
Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
// Ignore everything except x86 and x86_64
2+
// Any new targets that are added to CI should be ignored here.
3+
// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.)
4+
//@ignore-target-aarch64
5+
//@ignore-target-arm
6+
//@ignore-target-avr
7+
//@ignore-target-s390x
8+
//@ignore-target-thumbv7em
9+
//@ignore-target-wasm32
10+
//@compile-flags: -C target-feature=+bmi1,+bmi2
11+
12+
#[cfg(target_arch = "x86")]
13+
use std::arch::x86::*;
14+
#[cfg(target_arch = "x86_64")]
15+
use std::arch::x86_64::*;
16+
17+
fn main() {
18+
// BMI1 and BMI2 are independent from each other, so both must be checked.
19+
assert!(is_x86_feature_detected!("bmi1"));
20+
assert!(is_x86_feature_detected!("bmi2"));
21+
22+
unsafe {
23+
test_bmi_32();
24+
test_bmi_64();
25+
}
26+
}
27+
28+
/// Test the 32-bit variants of the intrinsics.
29+
unsafe fn test_bmi_32() {
30+
unsafe fn test_bextr_u32() {
31+
let r = _bextr_u32(0b0101_0000u32, 4, 4);
32+
assert_eq!(r, 0b0000_0101u32);
33+
34+
for i in 0..16 {
35+
assert_eq!(_bextr_u32(u32::MAX, i, 4), 0b1111);
36+
assert_eq!(_bextr_u32(u32::MAX, 4, i), (1 << i) - 1);
37+
}
38+
39+
// Ensure that indices larger than the bit count are covered.
40+
// It is important to go above 32 in order to verify the bit selection
41+
// of the instruction.
42+
43+
for i in 0..256 {
44+
// If the index is out of bounds, the original input won't be changed, thus the `min(32)`.
45+
assert_eq!(_bextr_u32(u32::MAX, 0, i).count_ones(), i.min(32));
46+
}
47+
48+
for i in 0..256 {
49+
assert_eq!(_bextr_u32(u32::MAX, i, 0), 0);
50+
}
51+
52+
// Test cases with completly random values. These cases also test
53+
// that the function works even if upper bits of the control value are set.
54+
assert_eq!(_bextr2_u32(0x7408a392, 0x54ef705), 0x3a0451c);
55+
assert_eq!(_bextr2_u32(0xbc5a3494, 0xdd193203), 0x178b4692);
56+
assert_eq!(_bextr2_u32(0xc0332325, 0xf96e207), 0x1806646);
57+
}
58+
test_bextr_u32();
59+
60+
unsafe fn test_pext_u32() {
61+
let n = 0b1011_1110_1001_0011u32;
62+
63+
let m0 = 0b0110_0011_1000_0101u32;
64+
let s0 = 0b0000_0000_0011_0101u32;
65+
66+
let m1 = 0b1110_1011_1110_1111u32;
67+
let s1 = 0b0001_0111_0100_0011u32;
68+
69+
// Testing of random values.
70+
assert_eq!(_pext_u32(n, m0), s0);
71+
assert_eq!(_pext_u32(n, m1), s1);
72+
assert_eq!(_pext_u32(0x12345678, 0xff00fff0), 0x00012567);
73+
74+
// Testing of various identities.
75+
assert_eq!(_pext_u32(u32::MAX, u32::MAX), u32::MAX);
76+
assert_eq!(_pext_u32(u32::MAX, 0), 0);
77+
assert_eq!(_pext_u32(0, u32::MAX), 0);
78+
}
79+
test_pext_u32();
80+
81+
unsafe fn test_pdep_u32() {
82+
let n = 0b1011_1110_1001_0011u32;
83+
84+
let m0 = 0b0110_0011_1000_0101u32;
85+
let s0 = 0b0000_0010_0000_0101u32;
86+
87+
let m1 = 0b1110_1011_1110_1111u32;
88+
let s1 = 0b1110_1001_0010_0011u32;
89+
90+
// Testing of random values.
91+
assert_eq!(_pdep_u32(n, m0), s0);
92+
assert_eq!(_pdep_u32(n, m1), s1);
93+
assert_eq!(_pdep_u32(0x00012567, 0xff00fff0), 0x12005670);
94+
95+
// Testing of various identities.
96+
assert_eq!(_pdep_u32(u32::MAX, u32::MAX), u32::MAX);
97+
assert_eq!(_pdep_u32(0, u32::MAX), 0);
98+
assert_eq!(_pdep_u32(u32::MAX, 0), 0);
99+
}
100+
test_pdep_u32();
101+
102+
unsafe fn test_bzhi_u32() {
103+
let n = 0b1111_0010u32;
104+
let s = 0b0001_0010u32;
105+
assert_eq!(_bzhi_u32(n, 5), s);
106+
107+
// Ensure that indices larger than the bit count are covered.
108+
// It is important to go above 32 in order to verify the bit selection
109+
// of the instruction.
110+
for i in 0..=512 {
111+
// The instruction only takes the lowest eight bits to generate the index, hence `i & 0xff`.
112+
// If the index is out of bounds, the original input won't be changed, thus the `min(32)`.
113+
let expected = 1u32.checked_shl((i & 0xff).min(32)).unwrap_or(0).wrapping_sub(1);
114+
let actual = _bzhi_u32(u32::MAX, i);
115+
assert_eq!(expected, actual);
116+
}
117+
}
118+
test_bzhi_u32();
119+
}
120+
121+
#[cfg(not(target_arch = "x86_64"))]
122+
unsafe fn test_bmi_64() {}
123+
124+
/// Test the 64-bit variants of the intrinsics.
125+
#[cfg(target_arch = "x86_64")]
126+
unsafe fn test_bmi_64() {
127+
unsafe fn test_bextr_u64() {
128+
let r = _bextr_u64(0b0101_0000u64, 4, 4);
129+
assert_eq!(r, 0b0000_0101u64);
130+
131+
for i in 0..16 {
132+
assert_eq!(_bextr_u64(u64::MAX, i, 4), 0b1111);
133+
assert_eq!(_bextr_u64(u64::MAX, 32, i), (1 << i) - 1);
134+
}
135+
136+
// Ensure that indices larger than the bit count are covered.
137+
// It is important to go above 64 in order to verify the bit selection
138+
// of the instruction.
139+
140+
for i in 0..256 {
141+
// If the index is out of bounds, the original input won't be changed, thus the `min(64)`.
142+
assert_eq!(_bextr_u64(u64::MAX, 0, i).count_ones(), i.min(64));
143+
}
144+
145+
for i in 0..256 {
146+
assert_eq!(_bextr_u64(u64::MAX, i, 0), 0);
147+
}
148+
149+
// Test cases with completly random values. These cases also test
150+
// that the function works even if upper bits of the control value are set.
151+
assert_eq!(_bextr2_u64(0x4ff6cfbcea75f055, 0x216642e228425719), 0x27fb67de75);
152+
assert_eq!(_bextr2_u64(0xb05e991e6f6e1b6, 0xc76dd5d7f67dfc14), 0xb05e991e6f);
153+
assert_eq!(_bextr2_u64(0x5a3a629e323d848f, 0x95ac507d20e7719), 0x2d1d314f19);
154+
}
155+
test_bextr_u64();
156+
157+
unsafe fn test_pext_u64() {
158+
let n = 0b1011_1110_1001_0011u64;
159+
160+
let m0 = 0b0110_0011_1000_0101u64;
161+
let s0 = 0b0000_0000_0011_0101u64;
162+
163+
let m1 = 0b1110_1011_1110_1111u64;
164+
let s1 = 0b0001_0111_0100_0011u64;
165+
166+
// Testing of random values.
167+
assert_eq!(_pext_u64(n, m0), s0);
168+
assert_eq!(_pext_u64(n, m1), s1);
169+
assert_eq!(_pext_u64(0x12345678, 0xff00fff0), 0x00012567);
170+
171+
// Testing of various identities.
172+
assert_eq!(_pext_u64(u64::MAX, u64::MAX), u64::MAX);
173+
assert_eq!(_pext_u64(u64::MAX, 0), 0);
174+
assert_eq!(_pext_u64(0, u64::MAX), 0);
175+
}
176+
test_pext_u64();
177+
178+
unsafe fn test_pdep_u64() {
179+
let n = 0b1011_1110_1001_0011u64;
180+
181+
let m0 = 0b0110_0011_1000_0101u64;
182+
let s0 = 0b0000_0010_0000_0101u64;
183+
184+
let m1 = 0b1110_1011_1110_1111u64;
185+
let s1 = 0b1110_1001_0010_0011u64;
186+
187+
// Testing of random values.
188+
assert_eq!(_pdep_u64(n, m0), s0);
189+
assert_eq!(_pdep_u64(n, m1), s1);
190+
assert_eq!(_pdep_u64(0x00012567, 0xff00fff0), 0x12005670);
191+
192+
// Testing of various identities.
193+
assert_eq!(_pdep_u64(u64::MAX, u64::MAX), u64::MAX);
194+
assert_eq!(_pdep_u64(0, u64::MAX), 0);
195+
assert_eq!(_pdep_u64(u64::MAX, 0), 0);
196+
}
197+
test_pdep_u64();
198+
199+
unsafe fn test_bzhi_u64() {
200+
let n = 0b1111_0010u64;
201+
let s = 0b0001_0010u64;
202+
assert_eq!(_bzhi_u64(n, 5), s);
203+
204+
// Ensure that indices larger than the bit count are covered.
205+
// It is important to go above 255 in order to verify the bit selection
206+
// of the instruction.
207+
for i in 0..=512 {
208+
// The instruction only takes the lowest eight bits to generate the index, hence `i & 0xff`.
209+
// If the index is out of bounds, the original input won't be changed, thus the `min(64)`.
210+
let expected = 1u64.checked_shl((i & 0xff).min(64)).unwrap_or(0).wrapping_sub(1);
211+
let actual = _bzhi_u64(u64::MAX, i);
212+
assert_eq!(expected, actual);
213+
}
214+
}
215+
test_bzhi_u64();
216+
}

0 commit comments

Comments
 (0)