|
1 | 1 | //! Masks that take up full SIMD vector registers.
|
2 | 2 |
|
3 |
| -use super::{to_bitmask::ToBitMaskArray, MaskElement}; |
4 | 3 | use crate::simd::intrinsics;
|
5 |
| -use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask}; |
| 4 | +use crate::simd::{LaneCount, MaskElement, Simd, SupportedLaneCount}; |
6 | 5 |
|
7 | 6 | #[repr(transparent)]
|
8 | 7 | pub struct Mask<T, const N: usize>(Simd<T, N>)
|
@@ -143,95 +142,105 @@ where
|
143 | 142 | }
|
144 | 143 |
|
145 | 144 | #[inline]
|
146 |
| - #[must_use = "method returns a new array and does not mutate the original value"] |
147 |
| - pub fn to_bitmask_array<const M: usize>(self) -> [u8; M] |
148 |
| - where |
149 |
| - super::Mask<T, N>: ToBitMaskArray, |
150 |
| - { |
| 145 | + #[must_use = "method returns a new vector and does not mutate the original value"] |
| 146 | + pub fn to_bitmask_vector(self) -> Simd<T, N> { |
| 147 | + let mut bitmask = Self::splat(false).to_int(); |
| 148 | + |
151 | 149 | // Safety: Bytes is the right size array
|
152 | 150 | unsafe {
|
153 | 151 | // Compute the bitmask
|
154 |
| - let bitmask: <super::Mask<T, N> as ToBitMaskArray>::BitMaskArray = |
| 152 | + let mut bytes: <LaneCount<N> as SupportedLaneCount>::BitMask = |
155 | 153 | intrinsics::simd_bitmask(self.0);
|
156 | 154 |
|
157 |
| - // Transmute to the return type |
158 |
| - let mut bitmask: [u8; M] = core::mem::transmute_copy(&bitmask); |
159 |
| - |
160 | 155 | // LLVM assumes bit order should match endianness
|
161 | 156 | if cfg!(target_endian = "big") {
|
162 |
| - for x in bitmask.as_mut() { |
163 |
| - *x = x.reverse_bits(); |
| 157 | + for x in bytes.as_mut() { |
| 158 | + *x = x.reverse_bits() |
164 | 159 | }
|
165 |
| - }; |
| 160 | + } |
166 | 161 |
|
167 |
| - bitmask |
| 162 | + assert!( |
| 163 | + core::mem::size_of::<Simd<T, N>>() |
| 164 | + >= core::mem::size_of::<<LaneCount<N> as SupportedLaneCount>::BitMask>() |
| 165 | + ); |
| 166 | + core::ptr::copy_nonoverlapping( |
| 167 | + bytes.as_ref().as_ptr(), |
| 168 | + bitmask.as_mut_array().as_mut_ptr() as _, |
| 169 | + bytes.as_ref().len(), |
| 170 | + ); |
168 | 171 | }
|
| 172 | + |
| 173 | + bitmask |
169 | 174 | }
|
170 | 175 |
|
171 | 176 | #[inline]
|
172 | 177 | #[must_use = "method returns a new mask and does not mutate the original value"]
|
173 |
| - pub fn from_bitmask_array<const M: usize>(mut bitmask: [u8; M]) -> Self |
174 |
| - where |
175 |
| - super::Mask<T, N>: ToBitMaskArray, |
176 |
| - { |
| 178 | + pub fn from_bitmask_vector(bitmask: Simd<T, N>) -> Self { |
| 179 | + let mut bytes = <LaneCount<N> as SupportedLaneCount>::BitMask::default(); |
| 180 | + |
177 | 181 | // Safety: Bytes is the right size array
|
178 | 182 | unsafe {
|
| 183 | + assert!( |
| 184 | + core::mem::size_of::<Simd<T, N>>() |
| 185 | + >= core::mem::size_of::<<LaneCount<N> as SupportedLaneCount>::BitMask>() |
| 186 | + ); |
| 187 | + core::ptr::copy_nonoverlapping( |
| 188 | + bitmask.as_array().as_ptr() as _, |
| 189 | + bytes.as_mut().as_mut_ptr(), |
| 190 | + bytes.as_mut().len(), |
| 191 | + ); |
| 192 | + |
179 | 193 | // LLVM assumes bit order should match endianness
|
180 | 194 | if cfg!(target_endian = "big") {
|
181 |
| - for x in bitmask.as_mut() { |
| 195 | + for x in bytes.as_mut() { |
182 | 196 | *x = x.reverse_bits();
|
183 | 197 | }
|
184 | 198 | }
|
185 | 199 |
|
186 |
| - // Transmute to the bitmask |
187 |
| - let bitmask: <super::Mask<T, N> as ToBitMaskArray>::BitMaskArray = |
188 |
| - core::mem::transmute_copy(&bitmask); |
189 |
| - |
190 | 200 | // Compute the regular mask
|
191 | 201 | Self::from_int_unchecked(intrinsics::simd_select_bitmask(
|
192 |
| - bitmask, |
| 202 | + bytes, |
193 | 203 | Self::splat(true).to_int(),
|
194 | 204 | Self::splat(false).to_int(),
|
195 | 205 | ))
|
196 | 206 | }
|
197 | 207 | }
|
198 | 208 |
|
199 | 209 | #[inline]
|
200 |
| - pub(crate) fn to_bitmask_integer<U: ReverseBits>(self) -> U |
201 |
| - where |
202 |
| - super::Mask<T, N>: ToBitMask<BitMask = U>, |
203 |
| - { |
204 |
| - // Safety: U is required to be the appropriate bitmask type |
205 |
| - let bitmask: U = unsafe { intrinsics::simd_bitmask(self.0) }; |
| 210 | + pub(crate) fn to_bitmask_integer(self) -> u64 { |
| 211 | + let resized = self.to_int().extend::<64>(T::FALSE); |
| 212 | + |
| 213 | + // SAFETY: `resized` is an integer vector with length 64 |
| 214 | + let bitmask: u64 = unsafe { intrinsics::simd_bitmask(resized) }; |
206 | 215 |
|
207 | 216 | // LLVM assumes bit order should match endianness
|
208 | 217 | if cfg!(target_endian = "big") {
|
209 |
| - bitmask.reverse_bits(N) |
| 218 | + bitmask.reverse_bits() |
210 | 219 | } else {
|
211 | 220 | bitmask
|
212 | 221 | }
|
213 | 222 | }
|
214 | 223 |
|
215 | 224 | #[inline]
|
216 |
| - pub(crate) fn from_bitmask_integer<U: ReverseBits>(bitmask: U) -> Self |
217 |
| - where |
218 |
| - super::Mask<T, N>: ToBitMask<BitMask = U>, |
219 |
| - { |
| 225 | + pub(crate) fn from_bitmask_integer(bitmask: u64) -> Self { |
220 | 226 | // LLVM assumes bit order should match endianness
|
221 | 227 | let bitmask = if cfg!(target_endian = "big") {
|
222 |
| - bitmask.reverse_bits(N) |
| 228 | + bitmask.reverse_bits() |
223 | 229 | } else {
|
224 | 230 | bitmask
|
225 | 231 | };
|
226 | 232 |
|
227 |
| - // Safety: U is required to be the appropriate bitmask type |
228 |
| - unsafe { |
229 |
| - Self::from_int_unchecked(intrinsics::simd_select_bitmask( |
| 233 | + // SAFETY: `mask` is the correct bitmask type for a u64 bitmask |
| 234 | + let mask: Simd<T, 64> = unsafe { |
| 235 | + intrinsics::simd_select_bitmask( |
230 | 236 | bitmask,
|
231 |
| - Self::splat(true).to_int(), |
232 |
| - Self::splat(false).to_int(), |
233 |
| - )) |
234 |
| - } |
| 237 | + Simd::<T, 64>::splat(T::TRUE), |
| 238 | + Simd::<T, 64>::splat(T::FALSE), |
| 239 | + ) |
| 240 | + }; |
| 241 | + |
| 242 | + // SAFETY: `mask` only contains `T::TRUE` or `T::FALSE` |
| 243 | + unsafe { Self::from_int_unchecked(mask.extend::<N>(T::FALSE)) } |
235 | 244 | }
|
236 | 245 |
|
237 | 246 | #[inline]
|
|
0 commit comments