2
2
3
3
use core:: mem;
4
4
use v128:: * ;
5
+ use x86:: * ;
5
6
6
7
#[ cfg( test) ]
7
8
use stdsimd_test:: assert_instr;
@@ -13,9 +14,9 @@ extern "C" {
13
14
#[ link_name = "llvm.x86.sse4a.insertq" ]
14
15
fn insertq ( x : i64x2 , y : i64x2 ) -> i64x2 ;
15
16
#[ link_name = "llvm.x86.sse4a.movnt.sd" ]
16
- fn movntsd ( x : * mut f64 , y : f64x2 ) ;
17
+ fn movntsd ( x : * mut f64 , y : __m128d ) ;
17
18
#[ link_name = "llvm.x86.sse4a.movnt.ss" ]
18
- fn movntss ( x : * mut f32 , y : f32x4 ) ;
19
+ fn movntss ( x : * mut f32 , y : __m128 ) ;
19
20
}
20
21
21
22
// FIXME(blocked on #248): _mm_extracti_si64(x, len, idx) // EXTRQ
@@ -35,8 +36,8 @@ extern "C" {
35
36
#[ inline( always) ]
36
37
#[ target_feature( enable = "sse4a" ) ]
37
38
#[ cfg_attr( test, assert_instr( extrq) ) ]
38
- pub unsafe fn _mm_extract_si64 ( x : i64x2 , y : i64x2 ) -> i64x2 {
39
- extrq ( x , mem:: transmute ( y ) )
39
+ pub unsafe fn _mm_extract_si64 ( x : __m128i , y : __m128i ) -> __m128i {
40
+ mem:: transmute ( extrq ( x . as_i64x2 ( ) , y . as_i8x16 ( ) ) )
40
41
}
41
42
42
43
/// Inserts the `[length:0]` bits of `y` into `x` at `index`.
@@ -51,59 +52,58 @@ pub unsafe fn _mm_extract_si64(x: i64x2, y: i64x2) -> i64x2 {
51
52
#[ inline( always) ]
52
53
#[ target_feature( enable = "sse4a" ) ]
53
54
#[ cfg_attr( test, assert_instr( insertq) ) ]
54
- pub unsafe fn _mm_insert_si64 ( x : i64x2 , y : i64x2 ) -> i64x2 {
55
- insertq ( x, y)
55
+ pub unsafe fn _mm_insert_si64 ( x : __m128i , y : __m128i ) -> __m128i {
56
+ mem :: transmute ( insertq ( x. as_i64x2 ( ) , y. as_i64x2 ( ) ) )
56
57
}
57
58
58
59
/// Non-temporal store of `a.0` into `p`.
59
60
#[ inline( always) ]
60
61
#[ target_feature( enable = "sse4a" ) ]
61
62
#[ cfg_attr( test, assert_instr( movntsd) ) ]
62
- pub unsafe fn _mm_stream_sd ( p : * mut f64 , a : f64x2 ) {
63
+ pub unsafe fn _mm_stream_sd ( p : * mut f64 , a : __m128d ) {
63
64
movntsd ( p, a) ;
64
65
}
65
66
66
67
/// Non-temporal store of `a.0` into `p`.
67
68
#[ inline( always) ]
68
69
#[ target_feature( enable = "sse4a" ) ]
69
70
#[ cfg_attr( test, assert_instr( movntss) ) ]
70
- pub unsafe fn _mm_stream_ss ( p : * mut f32 , a : f32x4 ) {
71
+ pub unsafe fn _mm_stream_ss ( p : * mut f32 , a : __m128 ) {
71
72
movntss ( p, a) ;
72
73
}
73
74
74
75
#[ cfg( test) ]
75
76
mod tests {
76
77
use stdsimd_test:: simd_test;
77
- use x86:: i686:: sse4a;
78
- use v128:: * ;
78
+ use x86:: * ;
79
79
80
80
#[ simd_test = "sse4a" ]
81
- unsafe fn _mm_extract_si64 ( ) {
81
+ unsafe fn test_mm_extract_si64 ( ) {
82
82
let b = 0b0110_0000_0000_i64 ;
83
83
// ^^^^ bit range extracted
84
- let x = i64x2 :: new ( b, 0 ) ;
84
+ let x = _mm_setr_epi64x ( b, 0 ) ;
85
85
let v = 0b001000___00___000100_i64 ;
86
86
// ^idx: 2^3 = 8 ^length = 2^2 = 4
87
- let y = i64x2 :: new ( v, 0 ) ;
88
- let e = i64x2 :: new ( 0b0110_i64 , 0 ) ;
89
- let r = sse4a :: _mm_extract_si64 ( x, y) ;
87
+ let y = _mm_setr_epi64x ( v, 0 ) ;
88
+ let e = _mm_setr_epi64x ( 0b0110_i64 , 0 ) ;
89
+ let r = _mm_extract_si64 ( x, y) ;
90
90
assert_eq ! ( r, e) ;
91
91
}
92
92
93
93
#[ simd_test = "sse4a" ]
94
- unsafe fn _mm_insert_si64 ( ) {
94
+ unsafe fn test_mm_insert_si64 ( ) {
95
95
let i = 0b0110_i64 ;
96
96
// ^^^^ bit range inserted
97
97
let z = 0b1010_1010_1010i64 ;
98
98
// ^^^^ bit range replaced
99
99
let e = 0b0110_1010_1010i64 ;
100
100
// ^^^^ replaced 1010 with 0110
101
- let x = i64x2 :: new ( z, 0 ) ;
102
- let expected = i64x2 :: new ( e, 0 ) ;
101
+ let x = _mm_setr_epi64x ( z, 0 ) ;
102
+ let expected = _mm_setr_epi64x ( e, 0 ) ;
103
103
let v = 0b001000___00___000100_i64 ;
104
104
// ^idx: 2^3 = 8 ^length = 2^2 = 4
105
- let y = i64x2 :: new ( i, v) ;
106
- let r = sse4a :: _mm_insert_si64 ( x, y) ;
105
+ let y = _mm_setr_epi64x ( i, v) ;
106
+ let r = _mm_insert_si64 ( x, y) ;
107
107
assert_eq ! ( r, expected) ;
108
108
}
109
109
@@ -113,17 +113,17 @@ mod tests {
113
113
}
114
114
115
115
#[ simd_test = "sse4a" ]
116
- unsafe fn _mm_stream_sd ( ) {
116
+ unsafe fn test_mm_stream_sd ( ) {
117
117
let mut mem = MemoryF64 {
118
118
data : [ 1.0_f64 , 2.0 ] ,
119
119
} ;
120
120
{
121
121
let vals = & mut mem. data ;
122
122
let d = vals. as_mut_ptr ( ) ;
123
123
124
- let x = f64x2 :: new ( 3.0 , 4.0 ) ;
124
+ let x = _mm_setr_pd ( 3.0 , 4.0 ) ;
125
125
126
- sse4a :: _mm_stream_sd ( d, x) ;
126
+ _mm_stream_sd ( d, x) ;
127
127
}
128
128
assert_eq ! ( mem. data[ 0 ] , 3.0 ) ;
129
129
assert_eq ! ( mem. data[ 1 ] , 2.0 ) ;
@@ -135,17 +135,17 @@ mod tests {
135
135
}
136
136
137
137
#[ simd_test = "sse4a" ]
138
- unsafe fn _mm_stream_ss ( ) {
138
+ unsafe fn test_mm_stream_ss ( ) {
139
139
let mut mem = MemoryF32 {
140
140
data : [ 1.0_f32 , 2.0 , 3.0 , 4.0 ] ,
141
141
} ;
142
142
{
143
143
let vals = & mut mem. data ;
144
144
let d = vals. as_mut_ptr ( ) ;
145
145
146
- let x = f32x4 :: new ( 5.0 , 6.0 , 7.0 , 8.0 ) ;
146
+ let x = _mm_setr_ps ( 5.0 , 6.0 , 7.0 , 8.0 ) ;
147
147
148
- sse4a :: _mm_stream_ss ( d, x) ;
148
+ _mm_stream_ss ( d, x) ;
149
149
}
150
150
assert_eq ! ( mem. data[ 0 ] , 5.0 ) ;
151
151
assert_eq ! ( mem. data[ 1 ] , 2.0 ) ;
0 commit comments