From c6c76214b592bd7311080e208b1d2df8888e4755 Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Tue, 30 Jun 2020 23:00:09 -0700 Subject: [PATCH 01/10] mem: Move mem* functions to separate directory Signed-off-by: Joe Richey --- src/{mem.rs => mem/mod.rs} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/{mem.rs => mem/mod.rs} (100%) diff --git a/src/mem.rs b/src/mem/mod.rs similarity index 100% rename from src/mem.rs rename to src/mem/mod.rs From 80b7c01914e71deb20b0562e2b0c774ef632797b Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Tue, 30 Jun 2020 23:10:40 -0700 Subject: [PATCH 02/10] memcpy: Create separate memcpy.rs file Signed-off-by: Joe Richey --- src/mem/memcpy.rs | 41 +++++++++++++++++++++++++++++++++++++++++ src/mem/mod.rs | 42 +++--------------------------------------- 2 files changed, 44 insertions(+), 39 deletions(-) create mode 100644 src/mem/memcpy.rs diff --git a/src/mem/memcpy.rs b/src/mem/memcpy.rs new file mode 100644 index 000000000..8fada9bca --- /dev/null +++ b/src/mem/memcpy.rs @@ -0,0 +1,41 @@ +use super::c_int; + +#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] +pub unsafe extern "C" fn memcpy(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { + let mut i = 0; + while i < n { + *dest.offset(i as isize) = *src.offset(i as isize); + i += 1; + } + dest +} + +#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] +pub unsafe extern "C" fn memmove(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { + if src < dest as *const u8 { + // copy from end + let mut i = n; + while i != 0 { + i -= 1; + *dest.offset(i as isize) = *src.offset(i as isize); + } + } else { + // copy from beginning + let mut i = 0; + while i < n { + *dest.offset(i as isize) = *src.offset(i as isize); + i += 1; + } + } + dest +} + +#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] +pub unsafe extern "C" fn memset(s: *mut u8, c: c_int, n: usize) -> *mut u8 { + let mut i = 0; + while i < n { + *s.offset(i as isize) = c as u8; + i += 1; + } + s +} diff --git a/src/mem/mod.rs b/src/mem/mod.rs index 24552ed85..7a02a6e14 100644 --- a/src/mem/mod.rs +++ b/src/mem/mod.rs @@ -9,45 +9,9 @@ use core::intrinsics::{atomic_load_unordered, atomic_store_unordered, exact_div} use core::mem; use core::ops::{BitOr, Shl}; -#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] -pub unsafe extern "C" fn memcpy(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { - let mut i = 0; - while i < n { - *dest.offset(i as isize) = *src.offset(i as isize); - i += 1; - } - dest -} - -#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] -pub unsafe extern "C" fn memmove(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { - if src < dest as *const u8 { - // copy from end - let mut i = n; - while i != 0 { - i -= 1; - *dest.offset(i as isize) = *src.offset(i as isize); - } - } else { - // copy from beginning - let mut i = 0; - while i < n { - *dest.offset(i as isize) = *src.offset(i as isize); - i += 1; - } - } - dest -} - -#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] -pub unsafe extern "C" fn memset(s: *mut u8, c: c_int, n: usize) -> *mut u8 { - let mut i = 0; - while i < n { - *s.offset(i as isize) = c as u8; - i += 1; - } - s -} +// memcpy/memmove/memset have optimized implementations on some architectures +mod memcpy; +pub use self::memcpy::*; #[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { From ee5478257da8010c41efb4bc15fd242c07cc7be6 Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Wed, 8 Jul 2020 16:23:45 -0700 Subject: [PATCH 03/10] benches: Add benchmarks for mem* functions This allows comparing the "normal" implementations to the implementations provided by this crate. Signed-off-by: Joe Richey --- testcrate/benches/mem.rs | 154 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 testcrate/benches/mem.rs diff --git a/testcrate/benches/mem.rs b/testcrate/benches/mem.rs new file mode 100644 index 000000000..dc593f571 --- /dev/null +++ b/testcrate/benches/mem.rs @@ -0,0 +1,154 @@ +#![feature(test)] + +extern crate test; +use test::{black_box, Bencher}; + +extern crate compiler_builtins; +use compiler_builtins::mem::{memcmp, memcpy, memmove, memset}; + +fn memcpy_builtin(b: &mut Bencher, n: usize) { + let v1 = vec![1u8; n]; + let mut v2 = vec![0u8; n]; + b.iter(|| { + let src: &[u8] = black_box(&v1); + let dst: &mut [u8] = black_box(&mut v2); + dst.copy_from_slice(src); + }) +} + +fn memcpy_rust(b: &mut Bencher, n: usize) { + let v1 = vec![1u8; n]; + let mut v2 = vec![0u8; n]; + b.iter(|| { + let src: &[u8] = black_box(&v1); + let dst: &mut [u8] = black_box(&mut v2); + unsafe { memcpy(dst.as_mut_ptr(), src.as_ptr(), n) } + }) +} + +fn memset_builtin(b: &mut Bencher, n: usize) { + let mut v1 = vec![0u8; n]; + b.iter(|| { + let dst: &mut [u8] = black_box(&mut v1); + let val: u8 = black_box(27); + for b in dst { + *b = val; + } + }) +} + +fn memset_rust(b: &mut Bencher, n: usize) { + let mut v1 = vec![0u8; n]; + b.iter(|| { + let dst: &mut [u8] = black_box(&mut v1); + let val = black_box(27); + unsafe { memset(dst.as_mut_ptr(), val, n) } + }) +} + +fn memcmp_builtin(b: &mut Bencher, n: usize) { + let v1 = vec![0u8; n]; + let mut v2 = vec![0u8; n]; + v2[n - 1] = 1; + b.iter(|| { + let s1: &[u8] = black_box(&v1); + let s2: &[u8] = black_box(&v2); + s1.cmp(s2) + }) +} + +fn memcmp_rust(b: &mut Bencher, n: usize) { + let v1 = vec![0u8; n]; + let mut v2 = vec![0u8; n]; + v2[n - 1] = 1; + b.iter(|| { + let s1: &[u8] = black_box(&v1); + let s2: &[u8] = black_box(&v2); + unsafe { memcmp(s1.as_ptr(), s2.as_ptr(), n) } + }) +} + +fn memmove_builtin(b: &mut Bencher, n: usize) { + let mut v = vec![0u8; n + n / 2]; + b.iter(|| { + let s: &mut [u8] = black_box(&mut v); + s.copy_within(0..n, n / 2); + }) +} + +fn memmove_rust(b: &mut Bencher, n: usize) { + let mut v = vec![0u8; n + n / 2]; + b.iter(|| { + let dst: *mut u8 = black_box(&mut v[n / 2..]).as_mut_ptr(); + let src: *const u8 = black_box(&v).as_ptr(); + unsafe { memmove(dst, src, n) }; + }) +} + +#[bench] +fn memcpy_builtin_4096(b: &mut Bencher) { + memcpy_builtin(b, 4096) +} +#[bench] +fn memcpy_rust_4096(b: &mut Bencher) { + memcpy_rust(b, 4096) +} +#[bench] +fn memcpy_builtin_1048576(b: &mut Bencher) { + memcpy_builtin(b, 1048576) +} +#[bench] +fn memcpy_rust_1048576(b: &mut Bencher) { + memcpy_rust(b, 1048576) +} + +#[bench] +fn memset_builtin_4096(b: &mut Bencher) { + memset_builtin(b, 4096) +} +#[bench] +fn memset_rust_4096(b: &mut Bencher) { + memset_rust(b, 4096) +} +#[bench] +fn memset_builtin_1048576(b: &mut Bencher) { + memset_builtin(b, 1048576) +} +#[bench] +fn memset_rust_1048576(b: &mut Bencher) { + memset_rust(b, 1048576) +} + +#[bench] +fn memcmp_builtin_4096(b: &mut Bencher) { + memcmp_builtin(b, 4096) +} +#[bench] +fn memcmp_rust_4096(b: &mut Bencher) { + memcmp_rust(b, 4096) +} +#[bench] +fn memcmp_builtin_1048576(b: &mut Bencher) { + memcmp_builtin(b, 1048576) +} +#[bench] +fn memcmp_rust_1048576(b: &mut Bencher) { + memcmp_rust(b, 1048576) +} + +#[bench] +fn memmove_builtin_4096(b: &mut Bencher) { + memmove_builtin(b, 4096) +} +#[bench] +fn memmove_rust_4096(b: &mut Bencher) { + memmove_rust(b, 4096) +} +#[bench] +fn memmove_builtin_1048576(b: &mut Bencher) { + memmove_builtin(b, 1048576) +} +#[bench] +fn memmove_rust_1048576(b: &mut Bencher) { + memmove_rust(b, 1048576) +} From fb03d26e079d3b96571a67a319037b44fe58b5e8 Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Wed, 1 Jul 2020 01:07:30 -0700 Subject: [PATCH 04/10] mem: Add REP MOVSB/STOSB implementations The assembly generated seems correct: https://rust.godbolt.org/z/GGnec8 Signed-off-by: Joe Richey --- src/mem/mod.rs | 1 + src/mem/x86_64.rs | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 src/mem/x86_64.rs diff --git a/src/mem/mod.rs b/src/mem/mod.rs index 7a02a6e14..aa9d4b61d 100644 --- a/src/mem/mod.rs +++ b/src/mem/mod.rs @@ -10,6 +10,7 @@ use core::mem; use core::ops::{BitOr, Shl}; // memcpy/memmove/memset have optimized implementations on some architectures +#[cfg_attr(all(feature = "asm", target_arch = "x86_64"), path = "x86_64.rs")] mod memcpy; pub use self::memcpy::*; diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs new file mode 100644 index 000000000..42e3598cd --- /dev/null +++ b/src/mem/x86_64.rs @@ -0,0 +1,46 @@ +use super::c_int; + +#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] +pub unsafe extern "C" fn memcpy(dest: *mut u8, src: *const u8, count: usize) -> *mut u8 { + asm!( + "rep movsb [rdi], [rsi]", + inout("rcx") count => _, + inout("rdi") dest => _, + inout("rsi") src => _, + options(nostack, preserves_flags) + ); + dest +} + +#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] +pub unsafe extern "C" fn memmove(dest: *mut u8, src: *const u8, count: usize) -> *mut u8 { + let delta = dest as usize - src as usize; + if delta >= count { + // We can copy forwards because either dest is far enough ahead of src, + // or src is ahead of dest (and delta overflowed). + return self::memcpy(dest, src, count); + } + // copy backwards + asm!( + "std", + "rep movsb [rdi], [rsi]", + "cld", + inout("rcx") count => _, + inout("rdi") dest.add(count).sub(1) => _, + inout("rsi") src.add(count).sub(1) => _, + options(nostack, preserves_flags) + ); + dest +} + +#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] +pub unsafe extern "C" fn memset(dest: *mut u8, c: c_int, count: usize) -> *mut u8 { + asm!( + "rep stosb [rdi], al", + inout("rcx") count => _, + inout("rdi") dest => _, + in("al") c as u8, + options(nostack, preserves_flags) + ); + dest +} From 2a0132cfae362f429b6da7b5ba206ee20ec9aa1f Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Tue, 7 Jul 2020 23:58:55 -0700 Subject: [PATCH 05/10] mem: Add documentations for REP string insturctions Signed-off-by: Joe Richey --- src/mem/x86_64.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index 42e3598cd..2d28505f4 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -1,5 +1,21 @@ use super::c_int; +// On most modern Intel and AMD processors, "rep movsq" and "rep stosq" have +// been enhanced to perform better than an simple qword loop, making them ideal +// for implementing memcpy/memset. Note that "rep cmps" has received no such +// enhancement, so it is not used to implement memcmp. +// +// On certain recent Intel processors, "rep movsb" and "rep stosb" have been +// further enhanced to automatically select the best microarchitectural +// implementation based on length and alignment. See the following features from +// the "IntelĀ® 64 and IA-32 Architectures Optimization Reference Manual": +// - ERMSB - Enhanced REP MOVSB and STOSB (Ivy Bridge and later) +// - FSRM - Fast Short REP MOV (Ice Lake and later) +// - Fast Zero-Length MOVSB (On no current hardware) +// - Fast Short STOSB (On no current hardware) +// However, to avoid run-time feature detection, we don't use these byte-based +// instructions for most of the copying, preferring the qword variants. + #[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] pub unsafe extern "C" fn memcpy(dest: *mut u8, src: *const u8, count: usize) -> *mut u8 { asm!( From aa75260e068348b11f36dfa90c06be1794a0d67d Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Fri, 10 Jul 2020 01:34:01 -0700 Subject: [PATCH 06/10] Use quad-word rep string instructions Signed-off-by: Joe Richey --- src/mem/x86_64.rs | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index 2d28505f4..db6bb1fc7 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -18,9 +18,14 @@ use super::c_int; #[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] pub unsafe extern "C" fn memcpy(dest: *mut u8, src: *const u8, count: usize) -> *mut u8 { + let qword_count = count >> 3; + let byte_count = count & 0b111; asm!( + "rep movsq [rdi], [rsi]", + "mov ecx, {byte_count:e}", "rep movsb [rdi], [rsi]", - inout("rcx") count => _, + byte_count = in(reg) byte_count, + inout("rcx") qword_count => _, inout("rdi") dest => _, inout("rsi") src => _, options(nostack, preserves_flags) @@ -37,25 +42,37 @@ pub unsafe extern "C" fn memmove(dest: *mut u8, src: *const u8, count: usize) -> return self::memcpy(dest, src, count); } // copy backwards + let qword_count = count >> 3; + let byte_count = count & 0b111; asm!( "std", + "rep movsq [rdi], [rsi]", + "mov ecx, {byte_count:e}", + "add rdi, 7", + "add rsi, 7", "rep movsb [rdi], [rsi]", "cld", - inout("rcx") count => _, - inout("rdi") dest.add(count).sub(1) => _, - inout("rsi") src.add(count).sub(1) => _, - options(nostack, preserves_flags) + byte_count = in(reg) byte_count, + inout("rcx") qword_count => _, + inout("rdi") dest.offset(count as isize).wrapping_sub(8) => _, + inout("rsi") src.offset(count as isize).wrapping_sub(8) => _, + options(nostack) ); dest } #[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] pub unsafe extern "C" fn memset(dest: *mut u8, c: c_int, count: usize) -> *mut u8 { + let qword_count = count >> 3; + let byte_count = count & 0b111; asm!( + "rep stosq [rdi], rax", + "mov ecx, {byte_count:e}", "rep stosb [rdi], al", - inout("rcx") count => _, + byte_count = in(reg) byte_count, + inout("rcx") qword_count => _, inout("rdi") dest => _, - in("al") c as u8, + in("rax") (c as u8 as u64) * 0x0101010101010101, options(nostack, preserves_flags) ); dest From de4ed289ff88631de7d76fd985f846d768320381 Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Fri, 23 Oct 2020 01:31:14 -0700 Subject: [PATCH 07/10] Prevent panic when compiled in debug mode Signed-off-by: Joe Richey --- src/mem/x86_64.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs index db6bb1fc7..1ecffce45 100644 --- a/src/mem/x86_64.rs +++ b/src/mem/x86_64.rs @@ -35,7 +35,7 @@ pub unsafe extern "C" fn memcpy(dest: *mut u8, src: *const u8, count: usize) -> #[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] pub unsafe extern "C" fn memmove(dest: *mut u8, src: *const u8, count: usize) -> *mut u8 { - let delta = dest as usize - src as usize; + let delta = (dest as usize).wrapping_sub(src as usize); if delta >= count { // We can copy forwards because either dest is far enough ahead of src, // or src is ahead of dest (and delta overflowed). From fe71a12173682a633add9292bd84035234d3bf9a Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Fri, 23 Oct 2020 01:31:33 -0700 Subject: [PATCH 08/10] Add tests for mem* functions Signed-off-by: Joe Richey --- testcrate/tests/mem.rs | 133 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 testcrate/tests/mem.rs diff --git a/testcrate/tests/mem.rs b/testcrate/tests/mem.rs new file mode 100644 index 000000000..a5596b281 --- /dev/null +++ b/testcrate/tests/mem.rs @@ -0,0 +1,133 @@ +extern crate compiler_builtins; +use compiler_builtins::mem::{memcmp, memcpy, memmove, memset}; + +#[test] +fn memcpy_3() { + let mut arr: [u8; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; + unsafe { + let src = arr.as_ptr().offset(9); + let dst = arr.as_mut_ptr().offset(1); + assert_eq!(memcpy(dst, src, 3), dst); + assert_eq!(arr, [0, 9, 10, 11, 4, 5, 6, 7, 8, 9, 10, 11]); + } + arr = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; + unsafe { + let src = arr.as_ptr().offset(1); + let dst = arr.as_mut_ptr().offset(9); + assert_eq!(memcpy(dst, src, 3), dst); + assert_eq!(arr, [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3]); + } +} + +#[test] +fn memcpy_10() { + let arr: [u8; 18] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]; + let mut dst: [u8; 12] = [0; 12]; + unsafe { + let src = arr.as_ptr().offset(1); + assert_eq!(memcpy(dst.as_mut_ptr(), src, 10), dst.as_mut_ptr()); + assert_eq!(dst, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 0]); + } + unsafe { + let src = arr.as_ptr().offset(8); + assert_eq!(memcpy(dst.as_mut_ptr(), src, 10), dst.as_mut_ptr()); + assert_eq!(dst, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 0, 0]); + } +} + +#[test] +fn memcpy_big() { + // Make the arrays cross 3 pages + const SIZE: usize = 8193; + let src: [u8; SIZE] = [22; SIZE]; + struct Dst { + start: usize, + buf: [u8; SIZE], + end: usize, + } + + let mut dst = Dst { + start: 0, + buf: [0; SIZE], + end: 0, + }; + unsafe { + assert_eq!( + memcpy(dst.buf.as_mut_ptr(), src.as_ptr(), SIZE), + dst.buf.as_mut_ptr() + ); + assert_eq!(dst.start, 0); + assert_eq!(dst.buf, [22; SIZE]); + assert_eq!(dst.end, 0); + } +} + +#[test] +fn memmove_forward() { + let mut arr: [u8; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; + unsafe { + let src = arr.as_ptr().offset(6); + let dst = arr.as_mut_ptr().offset(3); + assert_eq!(memmove(dst, src, 5), dst); + assert_eq!(arr, [0, 1, 2, 6, 7, 8, 9, 10, 8, 9, 10, 11]); + } +} + +#[test] +fn memmove_backward() { + let mut arr: [u8; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; + unsafe { + let src = arr.as_ptr().offset(3); + let dst = arr.as_mut_ptr().offset(6); + assert_eq!(memmove(dst, src, 5), dst); + assert_eq!(arr, [0, 1, 2, 3, 4, 5, 3, 4, 5, 6, 7, 11]); + } +} + +#[test] +fn memset_zero() { + let mut arr: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; + unsafe { + let ptr = arr.as_mut_ptr().offset(5); + assert_eq!(memset(ptr, 0, 2), ptr); + assert_eq!(arr, [0, 1, 2, 3, 4, 0, 0, 7]); + + // Only the LSB matters for a memset + assert_eq!(memset(arr.as_mut_ptr(), 0x2000, 8), arr.as_mut_ptr()); + assert_eq!(arr, [0, 0, 0, 0, 0, 0, 0, 0]); + } +} + +#[test] +fn memset_nonzero() { + let mut arr: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; + unsafe { + let ptr = arr.as_mut_ptr().offset(2); + assert_eq!(memset(ptr, 22, 3), ptr); + assert_eq!(arr, [0, 1, 22, 22, 22, 5, 6, 7]); + + // Only the LSB matters for a memset + assert_eq!(memset(arr.as_mut_ptr(), 0x2009, 8), arr.as_mut_ptr()); + assert_eq!(arr, [9, 9, 9, 9, 9, 9, 9, 9]); + } +} + +#[test] +fn memcmp_eq() { + let arr1: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; + let arr2: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; + unsafe { + assert_eq!(memcmp(arr1.as_ptr(), arr2.as_ptr(), 8), 0); + assert_eq!(memcmp(arr1.as_ptr(), arr2.as_ptr(), 3), 0); + } +} + +#[test] +fn memcmp_ne() { + let arr1: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; + let arr2: [u8; 8] = [0, 1, 2, 3, 4, 5, 7, 7]; + unsafe { + assert!(memcmp(arr1.as_ptr(), arr2.as_ptr(), 8) < 0); + assert!(memcmp(arr2.as_ptr(), arr1.as_ptr(), 8) > 0); + } +} From aa326a3abd0d82ff3c9cc2881d9dd07a0cb815ba Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Sat, 24 Oct 2020 03:15:41 -0700 Subject: [PATCH 09/10] Add build/test with the "asm" feature Signed-off-by: Joe Richey --- ci/run.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ci/run.sh b/ci/run.sh index 3c9dc0247..9d1632333 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -12,12 +12,16 @@ else $run --release $run --features c $run --features c --release + $run --features asm + $run --features asm --release fi cargo build --target $1 cargo build --target $1 --release cargo build --target $1 --features c cargo build --target $1 --release --features c +cargo build --target $1 --features asm +cargo build --target $1 --release --features asm PREFIX=$(echo $1 | sed -e 's/unknown-//')- case $1 in From d4a180ad739cb2e61fbab7903b390cae98d6035d Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Sat, 24 Oct 2020 03:16:18 -0700 Subject: [PATCH 10/10] Add byte length to Bencher Signed-off-by: Joe Richey --- testcrate/benches/mem.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/testcrate/benches/mem.rs b/testcrate/benches/mem.rs index dc593f571..57d575086 100644 --- a/testcrate/benches/mem.rs +++ b/testcrate/benches/mem.rs @@ -9,6 +9,7 @@ use compiler_builtins::mem::{memcmp, memcpy, memmove, memset}; fn memcpy_builtin(b: &mut Bencher, n: usize) { let v1 = vec![1u8; n]; let mut v2 = vec![0u8; n]; + b.bytes = n as u64; b.iter(|| { let src: &[u8] = black_box(&v1); let dst: &mut [u8] = black_box(&mut v2); @@ -19,6 +20,7 @@ fn memcpy_builtin(b: &mut Bencher, n: usize) { fn memcpy_rust(b: &mut Bencher, n: usize) { let v1 = vec![1u8; n]; let mut v2 = vec![0u8; n]; + b.bytes = n as u64; b.iter(|| { let src: &[u8] = black_box(&v1); let dst: &mut [u8] = black_box(&mut v2); @@ -28,6 +30,7 @@ fn memcpy_rust(b: &mut Bencher, n: usize) { fn memset_builtin(b: &mut Bencher, n: usize) { let mut v1 = vec![0u8; n]; + b.bytes = n as u64; b.iter(|| { let dst: &mut [u8] = black_box(&mut v1); let val: u8 = black_box(27); @@ -39,6 +42,7 @@ fn memset_builtin(b: &mut Bencher, n: usize) { fn memset_rust(b: &mut Bencher, n: usize) { let mut v1 = vec![0u8; n]; + b.bytes = n as u64; b.iter(|| { let dst: &mut [u8] = black_box(&mut v1); let val = black_box(27); @@ -50,6 +54,7 @@ fn memcmp_builtin(b: &mut Bencher, n: usize) { let v1 = vec![0u8; n]; let mut v2 = vec![0u8; n]; v2[n - 1] = 1; + b.bytes = n as u64; b.iter(|| { let s1: &[u8] = black_box(&v1); let s2: &[u8] = black_box(&v2); @@ -61,6 +66,7 @@ fn memcmp_rust(b: &mut Bencher, n: usize) { let v1 = vec![0u8; n]; let mut v2 = vec![0u8; n]; v2[n - 1] = 1; + b.bytes = n as u64; b.iter(|| { let s1: &[u8] = black_box(&v1); let s2: &[u8] = black_box(&v2); @@ -70,6 +76,7 @@ fn memcmp_rust(b: &mut Bencher, n: usize) { fn memmove_builtin(b: &mut Bencher, n: usize) { let mut v = vec![0u8; n + n / 2]; + b.bytes = n as u64; b.iter(|| { let s: &mut [u8] = black_box(&mut v); s.copy_within(0..n, n / 2); @@ -78,6 +85,7 @@ fn memmove_builtin(b: &mut Bencher, n: usize) { fn memmove_rust(b: &mut Bencher, n: usize) { let mut v = vec![0u8; n + n / 2]; + b.bytes = n as u64; b.iter(|| { let dst: *mut u8 = black_box(&mut v[n / 2..]).as_mut_ptr(); let src: *const u8 = black_box(&v).as_ptr();