From 4c40ff6a2472124cd061721463f329184ca76fa3 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 3 Nov 2018 15:48:29 +0100 Subject: [PATCH] Implement rotate using funnel shift on LLVM >= 7 Implement the rotate_left and rotate_right operations using llvm.fshl and llvm.fshr if they are available (LLVM >= 7). Originally I wanted to expose the funnel_shift_left and funnel_shift_right intrinsics and implement rotate_left and rotate_right on top of them. However, emulation of funnel shifts requires emitting a conditional to check for zero shift amount, which is not necessary for rotates. I was uncomfortable doing that here, as I don't want to rely on LLVM to optimize away that conditional (and for variable rotates, I'm not sure it can). We should revisit that question when we raise our minimum version requirement to LLVM 7 and don't need emulation code anymore. --- src/libcore/intrinsics.rs | 14 +++++++++++ src/libcore/num/mod.rs | 14 +++++++++-- src/librustc_codegen_llvm/context.rs | 12 +++++++++ .../debuginfo/metadata.rs | 6 ++--- src/librustc_codegen_llvm/intrinsic.rs | 25 ++++++++++++++++++- src/librustc_codegen_llvm/llvm_util.rs | 4 +++ src/librustc_codegen_llvm/mir/mod.rs | 3 ++- src/librustc_mir/interpret/intrinsics.rs | 18 +++++++++++++ src/librustc_mir/transform/qualify_consts.rs | 2 ++ src/librustc_typeck/check/intrinsic.rs | 3 ++- 10 files changed, 93 insertions(+), 8 deletions(-) diff --git a/src/libcore/intrinsics.rs b/src/libcore/intrinsics.rs index cceae9249e456..7ed6e4a8f51eb 100644 --- a/src/libcore/intrinsics.rs +++ b/src/libcore/intrinsics.rs @@ -1465,6 +1465,20 @@ extern "rust-intrinsic" { /// y < 0 or y >= N, where N is the width of T in bits. pub fn unchecked_shr(x: T, y: T) -> T; + /// Performs rotate left. + /// The stabilized versions of this intrinsic are available on the integer + /// primitives via the `rotate_left` method. For example, + /// [`std::u32::rotate_left`](../../std/primitive.u32.html#method.rotate_left) + #[cfg(not(stage0))] + pub fn rotate_left(x: T, y: T) -> T; + + /// Performs rotate right. + /// The stabilized versions of this intrinsic are available on the integer + /// primitives via the `rotate_right` method. For example, + /// [`std::u32::rotate_right`](../../std/primitive.u32.html#method.rotate_right) + #[cfg(not(stage0))] + pub fn rotate_right(x: T, y: T) -> T; + /// Returns (a + b) mod 2N, where N is the width of T in bits. /// The stabilized versions of this intrinsic are available on the integer /// primitives via the `wrapping_add` method. For example, diff --git a/src/libcore/num/mod.rs b/src/libcore/num/mod.rs index c6cbeea5a0ea6..090147c9fe4fa 100644 --- a/src/libcore/num/mod.rs +++ b/src/libcore/num/mod.rs @@ -2301,7 +2301,12 @@ assert_eq!(n.rotate_left(", $rot, "), m); #[rustc_const_unstable(feature = "const_int_rotate")] #[inline] pub const fn rotate_left(self, n: u32) -> Self { - (self << (n % $BITS)) | (self >> (($BITS - (n % $BITS)) % $BITS)) + #[cfg(not(stage0))] { + unsafe { intrinsics::rotate_left(self, n as $SelfT) } + } + #[cfg(stage0)] { + (self << (n % $BITS)) | (self >> (($BITS - (n % $BITS)) % $BITS)) + } } } @@ -2326,7 +2331,12 @@ assert_eq!(n.rotate_right(", $rot, "), m); #[rustc_const_unstable(feature = "const_int_rotate")] #[inline] pub const fn rotate_right(self, n: u32) -> Self { - (self >> (n % $BITS)) | (self << (($BITS - (n % $BITS)) % $BITS)) + #[cfg(not(stage0))] { + unsafe { intrinsics::rotate_right(self, n as $SelfT) } + } + #[cfg(stage0)] { + (self >> (n % $BITS)) | (self << (($BITS - (n % $BITS)) % $BITS)) + } } } diff --git a/src/librustc_codegen_llvm/context.rs b/src/librustc_codegen_llvm/context.rs index 241f7989e1681..db5a3937e5226 100644 --- a/src/librustc_codegen_llvm/context.rs +++ b/src/librustc_codegen_llvm/context.rs @@ -726,6 +726,18 @@ fn declare_intrinsic(cx: &CodegenCx<'ll, '_>, key: &str) -> Option<&'ll Value> { ifn!("llvm.bitreverse.i64", fn(t_i64) -> t_i64); ifn!("llvm.bitreverse.i128", fn(t_i128) -> t_i128); + ifn!("llvm.fshl.i8", fn(t_i8, t_i8, t_i8) -> t_i8); + ifn!("llvm.fshl.i16", fn(t_i16, t_i16, t_i16) -> t_i16); + ifn!("llvm.fshl.i32", fn(t_i32, t_i32, t_i32) -> t_i32); + ifn!("llvm.fshl.i64", fn(t_i64, t_i64, t_i64) -> t_i64); + ifn!("llvm.fshl.i128", fn(t_i128, t_i128, t_i128) -> t_i128); + + ifn!("llvm.fshr.i8", fn(t_i8, t_i8, t_i8) -> t_i8); + ifn!("llvm.fshr.i16", fn(t_i16, t_i16, t_i16) -> t_i16); + ifn!("llvm.fshr.i32", fn(t_i32, t_i32, t_i32) -> t_i32); + ifn!("llvm.fshr.i64", fn(t_i64, t_i64, t_i64) -> t_i64); + ifn!("llvm.fshr.i128", fn(t_i128, t_i128, t_i128) -> t_i128); + ifn!("llvm.sadd.with.overflow.i8", fn(t_i8, t_i8) -> mk_struct!{t_i8, i1}); ifn!("llvm.sadd.with.overflow.i16", fn(t_i16, t_i16) -> mk_struct!{t_i16, i1}); ifn!("llvm.sadd.with.overflow.i32", fn(t_i32, t_i32) -> mk_struct!{t_i32, i1}); diff --git a/src/librustc_codegen_llvm/debuginfo/metadata.rs b/src/librustc_codegen_llvm/debuginfo/metadata.rs index ba1e3f5960c85..00f06645930c2 100644 --- a/src/librustc_codegen_llvm/debuginfo/metadata.rs +++ b/src/librustc_codegen_llvm/debuginfo/metadata.rs @@ -23,6 +23,7 @@ use value::Value; use llvm; use llvm::debuginfo::{DIType, DIFile, DIScope, DIDescriptor, DICompositeType, DILexicalBlock, DIFlags}; +use llvm_util; use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; use rustc::hir::CodegenFnAttrFlags; @@ -1169,9 +1170,8 @@ fn prepare_union_metadata( fn use_enum_fallback(cx: &CodegenCx) -> bool { // On MSVC we have to use the fallback mode, because LLVM doesn't // lower variant parts to PDB. - return cx.sess().target.target.options.is_like_msvc || unsafe { - llvm::LLVMRustVersionMajor() < 7 - }; + return cx.sess().target.target.options.is_like_msvc + || llvm_util::get_major_version() < 7; } // Describes the members of an enum value: An enum is described as a union of diff --git a/src/librustc_codegen_llvm/intrinsic.rs b/src/librustc_codegen_llvm/intrinsic.rs index 03244c18ac3e4..bdafa8b50ba31 100644 --- a/src/librustc_codegen_llvm/intrinsic.rs +++ b/src/librustc_codegen_llvm/intrinsic.rs @@ -13,6 +13,7 @@ use attributes; use intrinsics::{self, Intrinsic}; use llvm::{self, TypeKind}; +use llvm_util; use abi::{Abi, FnType, LlvmType, PassMode}; use mir::place::PlaceRef; use mir::operand::{OperandRef, OperandValue}; @@ -284,7 +285,8 @@ pub fn codegen_intrinsic_call( "ctlz" | "ctlz_nonzero" | "cttz" | "cttz_nonzero" | "ctpop" | "bswap" | "bitreverse" | "add_with_overflow" | "sub_with_overflow" | "mul_with_overflow" | "overflowing_add" | "overflowing_sub" | "overflowing_mul" | - "unchecked_div" | "unchecked_rem" | "unchecked_shl" | "unchecked_shr" | "exact_div" => { + "unchecked_div" | "unchecked_rem" | "unchecked_shl" | "unchecked_shr" | "exact_div" | + "rotate_left" | "rotate_right" => { let ty = arg_tys[0]; match int_type_width_signed(ty, cx) { Some((width, signed)) => @@ -363,6 +365,27 @@ pub fn codegen_intrinsic_call( } else { bx.lshr(args[0].immediate(), args[1].immediate()) }, + "rotate_left" | "rotate_right" => { + let is_left = name == "rotate_left"; + let val = args[0].immediate(); + let raw_shift = args[1].immediate(); + if llvm_util::get_major_version() >= 7 { + // rotate = funnel shift with first two args the same + let llvm_name = &format!("llvm.fsh{}.i{}", + if is_left { 'l' } else { 'r' }, width); + let llfn = cx.get_intrinsic(llvm_name); + bx.call(llfn, &[val, val, raw_shift], None) + } else { + // rotate_left: (X << (S % BW)) | (X >> ((BW - S) % BW)) + // rotate_right: (X << ((BW - S) % BW)) | (X >> (S % BW)) + let width = C_uint(Type::ix(cx, width), width); + let shift = bx.urem(raw_shift, width); + let inv_shift = bx.urem(bx.sub(width, raw_shift), width); + let shift1 = bx.shl(val, if is_left { shift } else { inv_shift }); + let shift2 = bx.lshr(val, if !is_left { shift } else { inv_shift }); + bx.or(shift1, shift2) + } + }, _ => bug!(), }, None => { diff --git a/src/librustc_codegen_llvm/llvm_util.rs b/src/librustc_codegen_llvm/llvm_util.rs index 0a80fdddbf9fd..126b19c0c83fa 100644 --- a/src/librustc_codegen_llvm/llvm_util.rs +++ b/src/librustc_codegen_llvm/llvm_util.rs @@ -256,6 +256,10 @@ pub fn print_version() { } } +pub fn get_major_version() -> u32 { + unsafe { llvm::LLVMRustVersionMajor() } +} + pub fn print_passes() { // Can be called without initializing LLVM unsafe { llvm::LLVMRustPrintPasses(); } diff --git a/src/librustc_codegen_llvm/mir/mod.rs b/src/librustc_codegen_llvm/mir/mod.rs index a6e2ccf92e4e3..e5b25ea068b3b 100644 --- a/src/librustc_codegen_llvm/mir/mod.rs +++ b/src/librustc_codegen_llvm/mir/mod.rs @@ -12,6 +12,7 @@ use common::{C_i32, C_null}; use libc::c_uint; use llvm::{self, BasicBlock}; use llvm::debuginfo::DIScope; +use llvm_util; use rustc::ty::{self, Ty, TypeFoldable, UpvarSubsts}; use rustc::ty::layout::{LayoutOf, TyLayout}; use rustc::mir::{self, Mir}; @@ -612,7 +613,7 @@ fn arg_local_refs( // doesn't actually strip the offset when splitting the closure // environment into its components so it ends up out of bounds. // (cuviper) It seems to be fine without the alloca on LLVM 6 and later. - let env_alloca = !env_ref && unsafe { llvm::LLVMRustVersionMajor() < 6 }; + let env_alloca = !env_ref && llvm_util::get_major_version() < 6; let env_ptr = if env_alloca { let scratch = PlaceRef::alloca(bx, bx.cx.layout_of(tcx.mk_mut_ptr(arg.layout.ty)), diff --git a/src/librustc_mir/interpret/intrinsics.rs b/src/librustc_mir/interpret/intrinsics.rs index 5fa0fef36935d..e4c4bfa7d6588 100644 --- a/src/librustc_mir/interpret/intrinsics.rs +++ b/src/librustc_mir/interpret/intrinsics.rs @@ -150,6 +150,24 @@ impl<'a, 'mir, 'tcx, M: Machine<'a, 'mir, 'tcx>> EvalContext<'a, 'mir, 'tcx, M> } self.write_scalar(val, dest)?; } + "rotate_left" | "rotate_right" => { + // rotate_left: (X << (S % BW)) | (X >> ((BW - S) % BW)) + // rotate_right: (X << ((BW - S) % BW)) | (X >> (S % BW)) + let layout = self.layout_of(substs.type_at(0))?; + let val_bits = self.read_scalar(args[0])?.to_bits(layout.size)?; + let raw_shift_bits = self.read_scalar(args[1])?.to_bits(layout.size)?; + let width_bits = layout.size.bits() as u128; + let shift_bits = raw_shift_bits % width_bits; + let inv_shift_bits = (width_bits - raw_shift_bits) % width_bits; + let result_bits = if intrinsic_name == "rotate_left" { + (val_bits << shift_bits) | (val_bits >> inv_shift_bits) + } else { + (val_bits >> shift_bits) | (val_bits << inv_shift_bits) + }; + let truncated_bits = self.truncate(result_bits, layout); + let result = Scalar::from_uint(truncated_bits, layout.size); + self.write_scalar(result, dest)?; + } "transmute" => { self.copy_op_transmute(args[0], dest)?; } diff --git a/src/librustc_mir/transform/qualify_consts.rs b/src/librustc_mir/transform/qualify_consts.rs index ca9c4eb9b8bb9..03497be03087b 100644 --- a/src/librustc_mir/transform/qualify_consts.rs +++ b/src/librustc_mir/transform/qualify_consts.rs @@ -869,6 +869,8 @@ impl<'a, 'tcx> Visitor<'tcx> for Qualifier<'a, 'tcx, 'tcx> { | "overflowing_mul" | "unchecked_shl" | "unchecked_shr" + | "rotate_left" + | "rotate_right" | "add_with_overflow" | "sub_with_overflow" | "mul_with_overflow" diff --git a/src/librustc_typeck/check/intrinsic.rs b/src/librustc_typeck/check/intrinsic.rs index 3156458b4aa4a..84967aaf72f57 100644 --- a/src/librustc_typeck/check/intrinsic.rs +++ b/src/librustc_typeck/check/intrinsic.rs @@ -292,7 +292,8 @@ pub fn check_intrinsic_type<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, "unchecked_div" | "unchecked_rem" | "exact_div" => (1, vec![param(0), param(0)], param(0)), - "unchecked_shl" | "unchecked_shr" => + "unchecked_shl" | "unchecked_shr" | + "rotate_left" | "rotate_right" => (1, vec![param(0), param(0)], param(0)), "overflowing_add" | "overflowing_sub" | "overflowing_mul" =>