1use crate::{
4 core_arch::{simd::*, x86::*},
5 intrinsics::simd::*,
6 intrinsics::sqrtf32,
7 mem, ptr,
8};
9
10#[cfg(test)]
11use stdarch_test::assert_instr;
12
13#[inline]
18#[target_feature(enable = "sse")]
19#[cfg_attr(test, assert_instr(addss))]
20#[stable(feature = "simd_x86", since = "1.27.0")]
21pub fn _mm_add_ss(a: __m128, b: __m128) -> __m128 {
22 unsafe { simd_insert!(a, 0, _mm_cvtss_f32(a) + _mm_cvtss_f32(b)) }
23}
24
25#[inline]
30#[target_feature(enable = "sse")]
31#[cfg_attr(test, assert_instr(addps))]
32#[stable(feature = "simd_x86", since = "1.27.0")]
33pub fn _mm_add_ps(a: __m128, b: __m128) -> __m128 {
34 unsafe { simd_add(a, b) }
35}
36
37#[inline]
42#[target_feature(enable = "sse")]
43#[cfg_attr(test, assert_instr(subss))]
44#[stable(feature = "simd_x86", since = "1.27.0")]
45pub fn _mm_sub_ss(a: __m128, b: __m128) -> __m128 {
46 unsafe { simd_insert!(a, 0, _mm_cvtss_f32(a) - _mm_cvtss_f32(b)) }
47}
48
49#[inline]
54#[target_feature(enable = "sse")]
55#[cfg_attr(test, assert_instr(subps))]
56#[stable(feature = "simd_x86", since = "1.27.0")]
57pub fn _mm_sub_ps(a: __m128, b: __m128) -> __m128 {
58 unsafe { simd_sub(a, b) }
59}
60
61#[inline]
66#[target_feature(enable = "sse")]
67#[cfg_attr(test, assert_instr(mulss))]
68#[stable(feature = "simd_x86", since = "1.27.0")]
69pub fn _mm_mul_ss(a: __m128, b: __m128) -> __m128 {
70 unsafe { simd_insert!(a, 0, _mm_cvtss_f32(a) * _mm_cvtss_f32(b)) }
71}
72
73#[inline]
78#[target_feature(enable = "sse")]
79#[cfg_attr(test, assert_instr(mulps))]
80#[stable(feature = "simd_x86", since = "1.27.0")]
81pub fn _mm_mul_ps(a: __m128, b: __m128) -> __m128 {
82 unsafe { simd_mul(a, b) }
83}
84
85#[inline]
90#[target_feature(enable = "sse")]
91#[cfg_attr(test, assert_instr(divss))]
92#[stable(feature = "simd_x86", since = "1.27.0")]
93pub fn _mm_div_ss(a: __m128, b: __m128) -> __m128 {
94 unsafe { simd_insert!(a, 0, _mm_cvtss_f32(a) / _mm_cvtss_f32(b)) }
95}
96
97#[inline]
102#[target_feature(enable = "sse")]
103#[cfg_attr(test, assert_instr(divps))]
104#[stable(feature = "simd_x86", since = "1.27.0")]
105pub fn _mm_div_ps(a: __m128, b: __m128) -> __m128 {
106 unsafe { simd_div(a, b) }
107}
108
109#[inline]
114#[target_feature(enable = "sse")]
115#[cfg_attr(test, assert_instr(sqrtss))]
116#[stable(feature = "simd_x86", since = "1.27.0")]
117pub fn _mm_sqrt_ss(a: __m128) -> __m128 {
118 unsafe { simd_insert!(a, 0, sqrtf32(_mm_cvtss_f32(a))) }
119}
120
121#[inline]
126#[target_feature(enable = "sse")]
127#[cfg_attr(test, assert_instr(sqrtps))]
128#[stable(feature = "simd_x86", since = "1.27.0")]
129pub fn _mm_sqrt_ps(a: __m128) -> __m128 {
130 unsafe { simd_fsqrt(a) }
131}
132
133#[inline]
138#[target_feature(enable = "sse")]
139#[cfg_attr(test, assert_instr(rcpss))]
140#[stable(feature = "simd_x86", since = "1.27.0")]
141pub fn _mm_rcp_ss(a: __m128) -> __m128 {
142 unsafe { rcpss(a) }
143}
144
145#[inline]
150#[target_feature(enable = "sse")]
151#[cfg_attr(test, assert_instr(rcpps))]
152#[stable(feature = "simd_x86", since = "1.27.0")]
153pub fn _mm_rcp_ps(a: __m128) -> __m128 {
154 unsafe { rcpps(a) }
155}
156
157#[inline]
162#[target_feature(enable = "sse")]
163#[cfg_attr(test, assert_instr(rsqrtss))]
164#[stable(feature = "simd_x86", since = "1.27.0")]
165pub fn _mm_rsqrt_ss(a: __m128) -> __m128 {
166 unsafe { rsqrtss(a) }
167}
168
169#[inline]
174#[target_feature(enable = "sse")]
175#[cfg_attr(test, assert_instr(rsqrtps))]
176#[stable(feature = "simd_x86", since = "1.27.0")]
177pub fn _mm_rsqrt_ps(a: __m128) -> __m128 {
178 unsafe { rsqrtps(a) }
179}
180
181#[inline]
187#[target_feature(enable = "sse")]
188#[cfg_attr(test, assert_instr(minss))]
189#[stable(feature = "simd_x86", since = "1.27.0")]
190pub fn _mm_min_ss(a: __m128, b: __m128) -> __m128 {
191 unsafe { minss(a, b) }
192}
193
194#[inline]
199#[target_feature(enable = "sse")]
200#[cfg_attr(test, assert_instr(minps))]
201#[stable(feature = "simd_x86", since = "1.27.0")]
202pub fn _mm_min_ps(a: __m128, b: __m128) -> __m128 {
203 unsafe { minps(a, b) }
205}
206
207#[inline]
213#[target_feature(enable = "sse")]
214#[cfg_attr(test, assert_instr(maxss))]
215#[stable(feature = "simd_x86", since = "1.27.0")]
216pub fn _mm_max_ss(a: __m128, b: __m128) -> __m128 {
217 unsafe { maxss(a, b) }
218}
219
220#[inline]
225#[target_feature(enable = "sse")]
226#[cfg_attr(test, assert_instr(maxps))]
227#[stable(feature = "simd_x86", since = "1.27.0")]
228pub fn _mm_max_ps(a: __m128, b: __m128) -> __m128 {
229 unsafe { maxps(a, b) }
231}
232
233#[inline]
237#[target_feature(enable = "sse")]
238#[cfg_attr(
240 all(test, any(target_arch = "x86_64", target_feature = "sse2")),
241 assert_instr(andps)
242)]
243#[stable(feature = "simd_x86", since = "1.27.0")]
244pub fn _mm_and_ps(a: __m128, b: __m128) -> __m128 {
245 unsafe {
246 let a: __m128i = mem::transmute(a);
247 let b: __m128i = mem::transmute(b);
248 mem::transmute(simd_and(a, b))
249 }
250}
251
252#[inline]
259#[target_feature(enable = "sse")]
260#[cfg_attr(
263 all(test, any(target_arch = "x86_64", target_feature = "sse2")),
264 assert_instr(andnps)
265)]
266#[stable(feature = "simd_x86", since = "1.27.0")]
267pub fn _mm_andnot_ps(a: __m128, b: __m128) -> __m128 {
268 unsafe {
269 let a: __m128i = mem::transmute(a);
270 let b: __m128i = mem::transmute(b);
271 let mask: __m128i = mem::transmute(i32x4::splat(-1));
272 mem::transmute(simd_and(simd_xor(mask, a), b))
273 }
274}
275
276#[inline]
280#[target_feature(enable = "sse")]
281#[cfg_attr(
283 all(test, any(target_arch = "x86_64", target_feature = "sse2")),
284 assert_instr(orps)
285)]
286#[stable(feature = "simd_x86", since = "1.27.0")]
287pub fn _mm_or_ps(a: __m128, b: __m128) -> __m128 {
288 unsafe {
289 let a: __m128i = mem::transmute(a);
290 let b: __m128i = mem::transmute(b);
291 mem::transmute(simd_or(a, b))
292 }
293}
294
295#[inline]
300#[target_feature(enable = "sse")]
301#[cfg_attr(
303 all(test, any(target_arch = "x86_64", target_feature = "sse2")),
304 assert_instr(xorps)
305)]
306#[stable(feature = "simd_x86", since = "1.27.0")]
307pub fn _mm_xor_ps(a: __m128, b: __m128) -> __m128 {
308 unsafe {
309 let a: __m128i = mem::transmute(a);
310 let b: __m128i = mem::transmute(b);
311 mem::transmute(simd_xor(a, b))
312 }
313}
314
315#[inline]
321#[target_feature(enable = "sse")]
322#[cfg_attr(test, assert_instr(cmpeqss))]
323#[stable(feature = "simd_x86", since = "1.27.0")]
324pub fn _mm_cmpeq_ss(a: __m128, b: __m128) -> __m128 {
325 unsafe { cmpss(a, b, 0) }
326}
327
328#[inline]
335#[target_feature(enable = "sse")]
336#[cfg_attr(test, assert_instr(cmpltss))]
337#[stable(feature = "simd_x86", since = "1.27.0")]
338pub fn _mm_cmplt_ss(a: __m128, b: __m128) -> __m128 {
339 unsafe { cmpss(a, b, 1) }
340}
341
342#[inline]
349#[target_feature(enable = "sse")]
350#[cfg_attr(test, assert_instr(cmpless))]
351#[stable(feature = "simd_x86", since = "1.27.0")]
352pub fn _mm_cmple_ss(a: __m128, b: __m128) -> __m128 {
353 unsafe { cmpss(a, b, 2) }
354}
355
356#[inline]
363#[target_feature(enable = "sse")]
364#[cfg_attr(test, assert_instr(cmpltss))]
365#[stable(feature = "simd_x86", since = "1.27.0")]
366pub fn _mm_cmpgt_ss(a: __m128, b: __m128) -> __m128 {
367 unsafe { simd_shuffle!(a, cmpss(b, a, 1), [4, 1, 2, 3]) }
368}
369
370#[inline]
377#[target_feature(enable = "sse")]
378#[cfg_attr(test, assert_instr(cmpless))]
379#[stable(feature = "simd_x86", since = "1.27.0")]
380pub fn _mm_cmpge_ss(a: __m128, b: __m128) -> __m128 {
381 unsafe { simd_shuffle!(a, cmpss(b, a, 2), [4, 1, 2, 3]) }
382}
383
384#[inline]
391#[target_feature(enable = "sse")]
392#[cfg_attr(test, assert_instr(cmpneqss))]
393#[stable(feature = "simd_x86", since = "1.27.0")]
394pub fn _mm_cmpneq_ss(a: __m128, b: __m128) -> __m128 {
395 unsafe { cmpss(a, b, 4) }
396}
397
398#[inline]
405#[target_feature(enable = "sse")]
406#[cfg_attr(test, assert_instr(cmpnltss))]
407#[stable(feature = "simd_x86", since = "1.27.0")]
408pub fn _mm_cmpnlt_ss(a: __m128, b: __m128) -> __m128 {
409 unsafe { cmpss(a, b, 5) }
410}
411
412#[inline]
419#[target_feature(enable = "sse")]
420#[cfg_attr(test, assert_instr(cmpnless))]
421#[stable(feature = "simd_x86", since = "1.27.0")]
422pub fn _mm_cmpnle_ss(a: __m128, b: __m128) -> __m128 {
423 unsafe { cmpss(a, b, 6) }
424}
425
426#[inline]
433#[target_feature(enable = "sse")]
434#[cfg_attr(test, assert_instr(cmpnltss))]
435#[stable(feature = "simd_x86", since = "1.27.0")]
436pub fn _mm_cmpngt_ss(a: __m128, b: __m128) -> __m128 {
437 unsafe { simd_shuffle!(a, cmpss(b, a, 5), [4, 1, 2, 3]) }
438}
439
440#[inline]
447#[target_feature(enable = "sse")]
448#[cfg_attr(test, assert_instr(cmpnless))]
449#[stable(feature = "simd_x86", since = "1.27.0")]
450pub fn _mm_cmpnge_ss(a: __m128, b: __m128) -> __m128 {
451 unsafe { simd_shuffle!(a, cmpss(b, a, 6), [4, 1, 2, 3]) }
452}
453
454#[inline]
461#[target_feature(enable = "sse")]
462#[cfg_attr(test, assert_instr(cmpordss))]
463#[stable(feature = "simd_x86", since = "1.27.0")]
464pub fn _mm_cmpord_ss(a: __m128, b: __m128) -> __m128 {
465 unsafe { cmpss(a, b, 7) }
466}
467
468#[inline]
475#[target_feature(enable = "sse")]
476#[cfg_attr(test, assert_instr(cmpunordss))]
477#[stable(feature = "simd_x86", since = "1.27.0")]
478pub fn _mm_cmpunord_ss(a: __m128, b: __m128) -> __m128 {
479 unsafe { cmpss(a, b, 3) }
480}
481
482#[inline]
488#[target_feature(enable = "sse")]
489#[cfg_attr(test, assert_instr(cmpeqps))]
490#[stable(feature = "simd_x86", since = "1.27.0")]
491pub fn _mm_cmpeq_ps(a: __m128, b: __m128) -> __m128 {
492 unsafe { cmpps(a, b, 0) }
493}
494
495#[inline]
501#[target_feature(enable = "sse")]
502#[cfg_attr(test, assert_instr(cmpltps))]
503#[stable(feature = "simd_x86", since = "1.27.0")]
504pub fn _mm_cmplt_ps(a: __m128, b: __m128) -> __m128 {
505 unsafe { cmpps(a, b, 1) }
506}
507
508#[inline]
515#[target_feature(enable = "sse")]
516#[cfg_attr(test, assert_instr(cmpleps))]
517#[stable(feature = "simd_x86", since = "1.27.0")]
518pub fn _mm_cmple_ps(a: __m128, b: __m128) -> __m128 {
519 unsafe { cmpps(a, b, 2) }
520}
521
522#[inline]
528#[target_feature(enable = "sse")]
529#[cfg_attr(test, assert_instr(cmpltps))]
530#[stable(feature = "simd_x86", since = "1.27.0")]
531pub fn _mm_cmpgt_ps(a: __m128, b: __m128) -> __m128 {
532 unsafe { cmpps(b, a, 1) }
533}
534
535#[inline]
542#[target_feature(enable = "sse")]
543#[cfg_attr(test, assert_instr(cmpleps))]
544#[stable(feature = "simd_x86", since = "1.27.0")]
545pub fn _mm_cmpge_ps(a: __m128, b: __m128) -> __m128 {
546 unsafe { cmpps(b, a, 2) }
547}
548
549#[inline]
555#[target_feature(enable = "sse")]
556#[cfg_attr(test, assert_instr(cmpneqps))]
557#[stable(feature = "simd_x86", since = "1.27.0")]
558pub fn _mm_cmpneq_ps(a: __m128, b: __m128) -> __m128 {
559 unsafe { cmpps(a, b, 4) }
560}
561
562#[inline]
569#[target_feature(enable = "sse")]
570#[cfg_attr(test, assert_instr(cmpnltps))]
571#[stable(feature = "simd_x86", since = "1.27.0")]
572pub fn _mm_cmpnlt_ps(a: __m128, b: __m128) -> __m128 {
573 unsafe { cmpps(a, b, 5) }
574}
575
576#[inline]
583#[target_feature(enable = "sse")]
584#[cfg_attr(test, assert_instr(cmpnleps))]
585#[stable(feature = "simd_x86", since = "1.27.0")]
586pub fn _mm_cmpnle_ps(a: __m128, b: __m128) -> __m128 {
587 unsafe { cmpps(a, b, 6) }
588}
589
590#[inline]
597#[target_feature(enable = "sse")]
598#[cfg_attr(test, assert_instr(cmpnltps))]
599#[stable(feature = "simd_x86", since = "1.27.0")]
600pub fn _mm_cmpngt_ps(a: __m128, b: __m128) -> __m128 {
601 unsafe { cmpps(b, a, 5) }
602}
603
604#[inline]
611#[target_feature(enable = "sse")]
612#[cfg_attr(test, assert_instr(cmpnleps))]
613#[stable(feature = "simd_x86", since = "1.27.0")]
614pub fn _mm_cmpnge_ps(a: __m128, b: __m128) -> __m128 {
615 unsafe { cmpps(b, a, 6) }
616}
617
618#[inline]
625#[target_feature(enable = "sse")]
626#[cfg_attr(test, assert_instr(cmpordps))]
627#[stable(feature = "simd_x86", since = "1.27.0")]
628pub fn _mm_cmpord_ps(a: __m128, b: __m128) -> __m128 {
629 unsafe { cmpps(b, a, 7) }
630}
631
632#[inline]
639#[target_feature(enable = "sse")]
640#[cfg_attr(test, assert_instr(cmpunordps))]
641#[stable(feature = "simd_x86", since = "1.27.0")]
642pub fn _mm_cmpunord_ps(a: __m128, b: __m128) -> __m128 {
643 unsafe { cmpps(b, a, 3) }
644}
645
646#[inline]
651#[target_feature(enable = "sse")]
652#[cfg_attr(test, assert_instr(comiss))]
653#[stable(feature = "simd_x86", since = "1.27.0")]
654pub fn _mm_comieq_ss(a: __m128, b: __m128) -> i32 {
655 unsafe { comieq_ss(a, b) }
656}
657
658#[inline]
663#[target_feature(enable = "sse")]
664#[cfg_attr(test, assert_instr(comiss))]
665#[stable(feature = "simd_x86", since = "1.27.0")]
666pub fn _mm_comilt_ss(a: __m128, b: __m128) -> i32 {
667 unsafe { comilt_ss(a, b) }
668}
669
670#[inline]
676#[target_feature(enable = "sse")]
677#[cfg_attr(test, assert_instr(comiss))]
678#[stable(feature = "simd_x86", since = "1.27.0")]
679pub fn _mm_comile_ss(a: __m128, b: __m128) -> i32 {
680 unsafe { comile_ss(a, b) }
681}
682
683#[inline]
689#[target_feature(enable = "sse")]
690#[cfg_attr(test, assert_instr(comiss))]
691#[stable(feature = "simd_x86", since = "1.27.0")]
692pub fn _mm_comigt_ss(a: __m128, b: __m128) -> i32 {
693 unsafe { comigt_ss(a, b) }
694}
695
696#[inline]
702#[target_feature(enable = "sse")]
703#[cfg_attr(test, assert_instr(comiss))]
704#[stable(feature = "simd_x86", since = "1.27.0")]
705pub fn _mm_comige_ss(a: __m128, b: __m128) -> i32 {
706 unsafe { comige_ss(a, b) }
707}
708
709#[inline]
714#[target_feature(enable = "sse")]
715#[cfg_attr(test, assert_instr(comiss))]
716#[stable(feature = "simd_x86", since = "1.27.0")]
717pub fn _mm_comineq_ss(a: __m128, b: __m128) -> i32 {
718 unsafe { comineq_ss(a, b) }
719}
720
721#[inline]
727#[target_feature(enable = "sse")]
728#[cfg_attr(test, assert_instr(ucomiss))]
729#[stable(feature = "simd_x86", since = "1.27.0")]
730pub fn _mm_ucomieq_ss(a: __m128, b: __m128) -> i32 {
731 unsafe { ucomieq_ss(a, b) }
732}
733
734#[inline]
741#[target_feature(enable = "sse")]
742#[cfg_attr(test, assert_instr(ucomiss))]
743#[stable(feature = "simd_x86", since = "1.27.0")]
744pub fn _mm_ucomilt_ss(a: __m128, b: __m128) -> i32 {
745 unsafe { ucomilt_ss(a, b) }
746}
747
748#[inline]
755#[target_feature(enable = "sse")]
756#[cfg_attr(test, assert_instr(ucomiss))]
757#[stable(feature = "simd_x86", since = "1.27.0")]
758pub fn _mm_ucomile_ss(a: __m128, b: __m128) -> i32 {
759 unsafe { ucomile_ss(a, b) }
760}
761
762#[inline]
769#[target_feature(enable = "sse")]
770#[cfg_attr(test, assert_instr(ucomiss))]
771#[stable(feature = "simd_x86", since = "1.27.0")]
772pub fn _mm_ucomigt_ss(a: __m128, b: __m128) -> i32 {
773 unsafe { ucomigt_ss(a, b) }
774}
775
776#[inline]
783#[target_feature(enable = "sse")]
784#[cfg_attr(test, assert_instr(ucomiss))]
785#[stable(feature = "simd_x86", since = "1.27.0")]
786pub fn _mm_ucomige_ss(a: __m128, b: __m128) -> i32 {
787 unsafe { ucomige_ss(a, b) }
788}
789
790#[inline]
796#[target_feature(enable = "sse")]
797#[cfg_attr(test, assert_instr(ucomiss))]
798#[stable(feature = "simd_x86", since = "1.27.0")]
799pub fn _mm_ucomineq_ss(a: __m128, b: __m128) -> i32 {
800 unsafe { ucomineq_ss(a, b) }
801}
802
803#[inline]
813#[target_feature(enable = "sse")]
814#[cfg_attr(test, assert_instr(cvtss2si))]
815#[stable(feature = "simd_x86", since = "1.27.0")]
816pub fn _mm_cvtss_si32(a: __m128) -> i32 {
817 unsafe { cvtss2si(a) }
818}
819
820#[inline]
824#[target_feature(enable = "sse")]
825#[cfg_attr(test, assert_instr(cvtss2si))]
826#[stable(feature = "simd_x86", since = "1.27.0")]
827pub fn _mm_cvt_ss2si(a: __m128) -> i32 {
828 _mm_cvtss_si32(a)
829}
830
831#[inline]
843#[target_feature(enable = "sse")]
844#[cfg_attr(test, assert_instr(cvttss2si))]
845#[stable(feature = "simd_x86", since = "1.27.0")]
846pub fn _mm_cvttss_si32(a: __m128) -> i32 {
847 unsafe { cvttss2si(a) }
848}
849
850#[inline]
854#[target_feature(enable = "sse")]
855#[cfg_attr(test, assert_instr(cvttss2si))]
856#[stable(feature = "simd_x86", since = "1.27.0")]
857pub fn _mm_cvtt_ss2si(a: __m128) -> i32 {
858 _mm_cvttss_si32(a)
859}
860
861#[inline]
865#[target_feature(enable = "sse")]
866#[stable(feature = "simd_x86", since = "1.27.0")]
869pub fn _mm_cvtss_f32(a: __m128) -> f32 {
870 unsafe { simd_extract!(a, 0) }
871}
872
873#[inline]
881#[target_feature(enable = "sse")]
882#[cfg_attr(test, assert_instr(cvtsi2ss))]
883#[stable(feature = "simd_x86", since = "1.27.0")]
884pub fn _mm_cvtsi32_ss(a: __m128, b: i32) -> __m128 {
885 unsafe { simd_insert!(a, 0, b as f32) }
886}
887
888#[inline]
892#[target_feature(enable = "sse")]
893#[cfg_attr(test, assert_instr(cvtsi2ss))]
894#[stable(feature = "simd_x86", since = "1.27.0")]
895pub fn _mm_cvt_si2ss(a: __m128, b: i32) -> __m128 {
896 _mm_cvtsi32_ss(a, b)
897}
898
899#[inline]
904#[target_feature(enable = "sse")]
905#[cfg_attr(test, assert_instr(movss))]
906#[stable(feature = "simd_x86", since = "1.27.0")]
907pub fn _mm_set_ss(a: f32) -> __m128 {
908 __m128([a, 0.0, 0.0, 0.0])
909}
910
911#[inline]
915#[target_feature(enable = "sse")]
916#[cfg_attr(test, assert_instr(shufps))]
917#[stable(feature = "simd_x86", since = "1.27.0")]
918pub fn _mm_set1_ps(a: f32) -> __m128 {
919 __m128([a, a, a, a])
920}
921
922#[inline]
926#[target_feature(enable = "sse")]
927#[cfg_attr(test, assert_instr(shufps))]
928#[stable(feature = "simd_x86", since = "1.27.0")]
929pub fn _mm_set_ps1(a: f32) -> __m128 {
930 _mm_set1_ps(a)
931}
932
933#[inline]
953#[target_feature(enable = "sse")]
954#[cfg_attr(test, assert_instr(unpcklps))]
955#[stable(feature = "simd_x86", since = "1.27.0")]
956pub fn _mm_set_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 {
957 __m128([d, c, b, a])
958}
959
960#[inline]
971#[target_feature(enable = "sse")]
972#[cfg_attr(
973 all(test, any(target_env = "msvc", target_arch = "x86_64")),
974 assert_instr(unpcklps)
975)]
976#[cfg_attr(
978 all(test, all(not(target_env = "msvc"), target_arch = "x86")),
979 assert_instr(movaps)
980)]
981#[stable(feature = "simd_x86", since = "1.27.0")]
982pub fn _mm_setr_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 {
983 __m128([a, b, c, d])
984}
985
986#[inline]
990#[target_feature(enable = "sse")]
991#[cfg_attr(test, assert_instr(xorps))]
992#[stable(feature = "simd_x86", since = "1.27.0")]
993pub fn _mm_setzero_ps() -> __m128 {
994 const { unsafe { mem::zeroed() } }
995}
996
997#[inline]
1000#[allow(non_snake_case)]
1001#[unstable(feature = "stdarch_x86_mm_shuffle", issue = "111147")]
1002pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 {
1003 ((z << 6) | (y << 4) | (x << 2) | w) as i32
1004}
1005
1006#[inline]
1020#[target_feature(enable = "sse")]
1021#[cfg_attr(test, assert_instr(shufps, MASK = 3))]
1022#[rustc_legacy_const_generics(2)]
1023#[stable(feature = "simd_x86", since = "1.27.0")]
1024pub fn _mm_shuffle_ps<const MASK: i32>(a: __m128, b: __m128) -> __m128 {
1025 static_assert_uimm_bits!(MASK, 8);
1026 unsafe {
1027 simd_shuffle!(
1028 a,
1029 b,
1030 [
1031 MASK as u32 & 0b11,
1032 (MASK as u32 >> 2) & 0b11,
1033 ((MASK as u32 >> 4) & 0b11) + 4,
1034 ((MASK as u32 >> 6) & 0b11) + 4,
1035 ],
1036 )
1037 }
1038}
1039
1040#[inline]
1045#[target_feature(enable = "sse")]
1046#[cfg_attr(test, assert_instr(unpckhps))]
1047#[stable(feature = "simd_x86", since = "1.27.0")]
1048pub fn _mm_unpackhi_ps(a: __m128, b: __m128) -> __m128 {
1049 unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
1050}
1051
1052#[inline]
1057#[target_feature(enable = "sse")]
1058#[cfg_attr(test, assert_instr(unpcklps))]
1059#[stable(feature = "simd_x86", since = "1.27.0")]
1060pub fn _mm_unpacklo_ps(a: __m128, b: __m128) -> __m128 {
1061 unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
1062}
1063
1064#[inline]
1069#[target_feature(enable = "sse")]
1070#[cfg_attr(test, assert_instr(movhlps))]
1071#[stable(feature = "simd_x86", since = "1.27.0")]
1072pub fn _mm_movehl_ps(a: __m128, b: __m128) -> __m128 {
1073 unsafe { simd_shuffle!(a, b, [6, 7, 2, 3]) }
1075}
1076
1077#[inline]
1082#[target_feature(enable = "sse")]
1083#[cfg_attr(test, assert_instr(movlhps))]
1084#[stable(feature = "simd_x86", since = "1.27.0")]
1085pub fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 {
1086 unsafe { simd_shuffle!(a, b, [0, 1, 4, 5]) }
1087}
1088
1089#[inline]
1096#[target_feature(enable = "sse")]
1097#[cfg_attr(test, assert_instr(movmskps))]
1098#[stable(feature = "simd_x86", since = "1.27.0")]
1099pub fn _mm_movemask_ps(a: __m128) -> i32 {
1100 unsafe {
1103 let mask: i32x4 = simd_lt(transmute(a), i32x4::ZERO);
1104 simd_bitmask::<i32x4, u8>(mask).into()
1105 }
1106}
1107
1108#[inline]
1115#[target_feature(enable = "sse")]
1116#[cfg_attr(test, assert_instr(movss))]
1117#[stable(feature = "simd_x86", since = "1.27.0")]
1118pub unsafe fn _mm_load_ss(p: *const f32) -> __m128 {
1119 __m128([*p, 0.0, 0.0, 0.0])
1120}
1121
1122#[inline]
1130#[target_feature(enable = "sse")]
1131#[cfg_attr(test, assert_instr(movss))]
1132#[stable(feature = "simd_x86", since = "1.27.0")]
1133pub unsafe fn _mm_load1_ps(p: *const f32) -> __m128 {
1134 let a = *p;
1135 __m128([a, a, a, a])
1136}
1137
1138#[inline]
1142#[target_feature(enable = "sse")]
1143#[cfg_attr(test, assert_instr(movss))]
1144#[stable(feature = "simd_x86", since = "1.27.0")]
1145pub unsafe fn _mm_load_ps1(p: *const f32) -> __m128 {
1146 _mm_load1_ps(p)
1147}
1148
1149#[inline]
1160#[target_feature(enable = "sse")]
1161#[cfg_attr(
1164 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1165 assert_instr(movaps)
1166)]
1167#[stable(feature = "simd_x86", since = "1.27.0")]
1168#[allow(clippy::cast_ptr_alignment)]
1169pub unsafe fn _mm_load_ps(p: *const f32) -> __m128 {
1170 *(p as *const __m128)
1171}
1172
1173#[inline]
1183#[target_feature(enable = "sse")]
1184#[cfg_attr(test, assert_instr(movups))]
1185#[stable(feature = "simd_x86", since = "1.27.0")]
1186pub unsafe fn _mm_loadu_ps(p: *const f32) -> __m128 {
1187 let mut dst = _mm_undefined_ps();
1190 ptr::copy_nonoverlapping(
1191 p as *const u8,
1192 ptr::addr_of_mut!(dst) as *mut u8,
1193 mem::size_of::<__m128>(),
1194 );
1195 dst
1196}
1197
1198#[inline]
1220#[target_feature(enable = "sse")]
1221#[cfg_attr(
1222 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1223 assert_instr(movaps)
1224)]
1225#[stable(feature = "simd_x86", since = "1.27.0")]
1226pub unsafe fn _mm_loadr_ps(p: *const f32) -> __m128 {
1227 let a = _mm_load_ps(p);
1228 simd_shuffle!(a, a, [3, 2, 1, 0])
1229}
1230
1231#[inline]
1237#[target_feature(enable = "sse")]
1238#[cfg_attr(test, assert_instr(movss))]
1239#[stable(feature = "simd_x86", since = "1.27.0")]
1240pub unsafe fn _mm_store_ss(p: *mut f32, a: __m128) {
1241 *p = simd_extract!(a, 0);
1242}
1243
1244#[inline]
1263#[target_feature(enable = "sse")]
1264#[cfg_attr(
1265 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1266 assert_instr(movaps)
1267)]
1268#[stable(feature = "simd_x86", since = "1.27.0")]
1269#[allow(clippy::cast_ptr_alignment)]
1270pub unsafe fn _mm_store1_ps(p: *mut f32, a: __m128) {
1271 let b: __m128 = simd_shuffle!(a, a, [0, 0, 0, 0]);
1272 *(p as *mut __m128) = b;
1273}
1274
1275#[inline]
1279#[target_feature(enable = "sse")]
1280#[cfg_attr(
1281 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1282 assert_instr(movaps)
1283)]
1284#[stable(feature = "simd_x86", since = "1.27.0")]
1285pub unsafe fn _mm_store_ps1(p: *mut f32, a: __m128) {
1286 _mm_store1_ps(p, a);
1287}
1288
1289#[inline]
1301#[target_feature(enable = "sse")]
1302#[cfg_attr(
1303 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1304 assert_instr(movaps)
1305)]
1306#[stable(feature = "simd_x86", since = "1.27.0")]
1307#[allow(clippy::cast_ptr_alignment)]
1308pub unsafe fn _mm_store_ps(p: *mut f32, a: __m128) {
1309 *(p as *mut __m128) = a;
1310}
1311
1312#[inline]
1320#[target_feature(enable = "sse")]
1321#[cfg_attr(test, assert_instr(movups))]
1322#[stable(feature = "simd_x86", since = "1.27.0")]
1323pub unsafe fn _mm_storeu_ps(p: *mut f32, a: __m128) {
1324 ptr::copy_nonoverlapping(
1325 ptr::addr_of!(a) as *const u8,
1326 p as *mut u8,
1327 mem::size_of::<__m128>(),
1328 );
1329}
1330
1331#[inline]
1348#[target_feature(enable = "sse")]
1349#[cfg_attr(
1350 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1351 assert_instr(movaps)
1352)]
1353#[stable(feature = "simd_x86", since = "1.27.0")]
1354#[allow(clippy::cast_ptr_alignment)]
1355pub unsafe fn _mm_storer_ps(p: *mut f32, a: __m128) {
1356 let b: __m128 = simd_shuffle!(a, a, [3, 2, 1, 0]);
1357 *(p as *mut __m128) = b;
1358}
1359
1360#[inline]
1370#[target_feature(enable = "sse")]
1371#[cfg_attr(test, assert_instr(movss))]
1372#[stable(feature = "simd_x86", since = "1.27.0")]
1373pub fn _mm_move_ss(a: __m128, b: __m128) -> __m128 {
1374 unsafe { simd_shuffle!(a, b, [4, 1, 2, 3]) }
1375}
1376
1377#[inline]
1445#[target_feature(enable = "sse")]
1446#[cfg_attr(test, assert_instr(sfence))]
1447#[stable(feature = "simd_x86", since = "1.27.0")]
1448pub fn _mm_sfence() {
1449 unsafe { sfence() }
1450}
1451
1452#[inline]
1467#[target_feature(enable = "sse")]
1468#[cfg_attr(test, assert_instr(stmxcsr))]
1469#[stable(feature = "simd_x86", since = "1.27.0")]
1470#[deprecated(
1471 since = "1.75.0",
1472 note = "see `_mm_getcsr` documentation - use inline assembly instead"
1473)]
1474pub unsafe fn _mm_getcsr() -> u32 {
1475 unsafe {
1476 let mut result = 0_i32;
1477 stmxcsr(ptr::addr_of_mut!(result) as *mut i8);
1478 result as u32
1479 }
1480}
1481
1482#[inline]
1616#[target_feature(enable = "sse")]
1617#[cfg_attr(test, assert_instr(ldmxcsr))]
1618#[stable(feature = "simd_x86", since = "1.27.0")]
1619#[deprecated(
1620 since = "1.75.0",
1621 note = "see `_mm_setcsr` documentation - use inline assembly instead"
1622)]
1623pub unsafe fn _mm_setcsr(val: u32) {
1624 ldmxcsr(ptr::addr_of!(val) as *const i8);
1625}
1626
1627#[stable(feature = "simd_x86", since = "1.27.0")]
1629pub const _MM_EXCEPT_INVALID: u32 = 0x0001;
1630#[stable(feature = "simd_x86", since = "1.27.0")]
1632pub const _MM_EXCEPT_DENORM: u32 = 0x0002;
1633#[stable(feature = "simd_x86", since = "1.27.0")]
1635pub const _MM_EXCEPT_DIV_ZERO: u32 = 0x0004;
1636#[stable(feature = "simd_x86", since = "1.27.0")]
1638pub const _MM_EXCEPT_OVERFLOW: u32 = 0x0008;
1639#[stable(feature = "simd_x86", since = "1.27.0")]
1641pub const _MM_EXCEPT_UNDERFLOW: u32 = 0x0010;
1642#[stable(feature = "simd_x86", since = "1.27.0")]
1644pub const _MM_EXCEPT_INEXACT: u32 = 0x0020;
1645#[stable(feature = "simd_x86", since = "1.27.0")]
1647pub const _MM_EXCEPT_MASK: u32 = 0x003f;
1648
1649#[stable(feature = "simd_x86", since = "1.27.0")]
1651pub const _MM_MASK_INVALID: u32 = 0x0080;
1652#[stable(feature = "simd_x86", since = "1.27.0")]
1654pub const _MM_MASK_DENORM: u32 = 0x0100;
1655#[stable(feature = "simd_x86", since = "1.27.0")]
1657pub const _MM_MASK_DIV_ZERO: u32 = 0x0200;
1658#[stable(feature = "simd_x86", since = "1.27.0")]
1660pub const _MM_MASK_OVERFLOW: u32 = 0x0400;
1661#[stable(feature = "simd_x86", since = "1.27.0")]
1663pub const _MM_MASK_UNDERFLOW: u32 = 0x0800;
1664#[stable(feature = "simd_x86", since = "1.27.0")]
1666pub const _MM_MASK_INEXACT: u32 = 0x1000;
1667#[stable(feature = "simd_x86", since = "1.27.0")]
1669pub const _MM_MASK_MASK: u32 = 0x1f80;
1670
1671#[stable(feature = "simd_x86", since = "1.27.0")]
1673pub const _MM_ROUND_NEAREST: u32 = 0x0000;
1674#[stable(feature = "simd_x86", since = "1.27.0")]
1676pub const _MM_ROUND_DOWN: u32 = 0x2000;
1677#[stable(feature = "simd_x86", since = "1.27.0")]
1679pub const _MM_ROUND_UP: u32 = 0x4000;
1680#[stable(feature = "simd_x86", since = "1.27.0")]
1682pub const _MM_ROUND_TOWARD_ZERO: u32 = 0x6000;
1683
1684#[stable(feature = "simd_x86", since = "1.27.0")]
1686pub const _MM_ROUND_MASK: u32 = 0x6000;
1687
1688#[stable(feature = "simd_x86", since = "1.27.0")]
1690pub const _MM_FLUSH_ZERO_MASK: u32 = 0x8000;
1691#[stable(feature = "simd_x86", since = "1.27.0")]
1693pub const _MM_FLUSH_ZERO_ON: u32 = 0x8000;
1694#[stable(feature = "simd_x86", since = "1.27.0")]
1696pub const _MM_FLUSH_ZERO_OFF: u32 = 0x0000;
1697
1698#[inline]
1702#[allow(deprecated)] #[allow(non_snake_case)]
1704#[target_feature(enable = "sse")]
1705#[stable(feature = "simd_x86", since = "1.27.0")]
1706#[deprecated(
1707 since = "1.75.0",
1708 note = "see `_mm_getcsr` documentation - use inline assembly instead"
1709)]
1710pub unsafe fn _MM_GET_EXCEPTION_MASK() -> u32 {
1711 _mm_getcsr() & _MM_MASK_MASK
1712}
1713
1714#[inline]
1718#[allow(deprecated)] #[allow(non_snake_case)]
1720#[target_feature(enable = "sse")]
1721#[stable(feature = "simd_x86", since = "1.27.0")]
1722#[deprecated(
1723 since = "1.75.0",
1724 note = "see `_mm_getcsr` documentation - use inline assembly instead"
1725)]
1726pub unsafe fn _MM_GET_EXCEPTION_STATE() -> u32 {
1727 _mm_getcsr() & _MM_EXCEPT_MASK
1728}
1729
1730#[inline]
1734#[allow(deprecated)] #[allow(non_snake_case)]
1736#[target_feature(enable = "sse")]
1737#[stable(feature = "simd_x86", since = "1.27.0")]
1738#[deprecated(
1739 since = "1.75.0",
1740 note = "see `_mm_getcsr` documentation - use inline assembly instead"
1741)]
1742pub unsafe fn _MM_GET_FLUSH_ZERO_MODE() -> u32 {
1743 _mm_getcsr() & _MM_FLUSH_ZERO_MASK
1744}
1745
1746#[inline]
1750#[allow(deprecated)] #[allow(non_snake_case)]
1752#[target_feature(enable = "sse")]
1753#[stable(feature = "simd_x86", since = "1.27.0")]
1754#[deprecated(
1755 since = "1.75.0",
1756 note = "see `_mm_getcsr` documentation - use inline assembly instead"
1757)]
1758pub unsafe fn _MM_GET_ROUNDING_MODE() -> u32 {
1759 _mm_getcsr() & _MM_ROUND_MASK
1760}
1761
1762#[inline]
1766#[allow(deprecated)] #[allow(non_snake_case)]
1768#[target_feature(enable = "sse")]
1769#[stable(feature = "simd_x86", since = "1.27.0")]
1770#[deprecated(
1771 since = "1.75.0",
1772 note = "see `_mm_setcsr` documentation - use inline assembly instead"
1773)]
1774pub unsafe fn _MM_SET_EXCEPTION_MASK(x: u32) {
1775 _mm_setcsr((_mm_getcsr() & !_MM_MASK_MASK) | (x & _MM_MASK_MASK))
1776}
1777
1778#[inline]
1782#[allow(deprecated)] #[allow(non_snake_case)]
1784#[target_feature(enable = "sse")]
1785#[stable(feature = "simd_x86", since = "1.27.0")]
1786#[deprecated(
1787 since = "1.75.0",
1788 note = "see `_mm_setcsr` documentation - use inline assembly instead"
1789)]
1790pub unsafe fn _MM_SET_EXCEPTION_STATE(x: u32) {
1791 _mm_setcsr((_mm_getcsr() & !_MM_EXCEPT_MASK) | (x & _MM_EXCEPT_MASK))
1792}
1793
1794#[inline]
1798#[allow(deprecated)] #[allow(non_snake_case)]
1800#[target_feature(enable = "sse")]
1801#[stable(feature = "simd_x86", since = "1.27.0")]
1802#[deprecated(
1803 since = "1.75.0",
1804 note = "see `_mm_setcsr` documentation - use inline assembly instead"
1805)]
1806pub unsafe fn _MM_SET_FLUSH_ZERO_MODE(x: u32) {
1807 _mm_setcsr((_mm_getcsr() & !_MM_FLUSH_ZERO_MASK) | (x & _MM_FLUSH_ZERO_MASK))
1808}
1809
1810#[inline]
1814#[allow(deprecated)] #[allow(non_snake_case)]
1816#[target_feature(enable = "sse")]
1817#[stable(feature = "simd_x86", since = "1.27.0")]
1818#[deprecated(
1819 since = "1.75.0",
1820 note = "see `_mm_setcsr` documentation - use inline assembly instead"
1821)]
1822pub unsafe fn _MM_SET_ROUNDING_MODE(x: u32) {
1823 _mm_setcsr((_mm_getcsr() & !_MM_ROUND_MASK) | (x & _MM_ROUND_MASK))
1824}
1825
1826#[stable(feature = "simd_x86", since = "1.27.0")]
1828pub const _MM_HINT_T0: i32 = 3;
1829
1830#[stable(feature = "simd_x86", since = "1.27.0")]
1832pub const _MM_HINT_T1: i32 = 2;
1833
1834#[stable(feature = "simd_x86", since = "1.27.0")]
1836pub const _MM_HINT_T2: i32 = 1;
1837
1838#[stable(feature = "simd_x86", since = "1.27.0")]
1840pub const _MM_HINT_NTA: i32 = 0;
1841
1842#[stable(feature = "simd_x86", since = "1.27.0")]
1844pub const _MM_HINT_ET0: i32 = 7;
1845
1846#[stable(feature = "simd_x86", since = "1.27.0")]
1848pub const _MM_HINT_ET1: i32 = 6;
1849
1850#[inline]
1895#[target_feature(enable = "sse")]
1896#[cfg_attr(test, assert_instr(prefetcht0, STRATEGY = _MM_HINT_T0))]
1897#[cfg_attr(test, assert_instr(prefetcht1, STRATEGY = _MM_HINT_T1))]
1898#[cfg_attr(test, assert_instr(prefetcht2, STRATEGY = _MM_HINT_T2))]
1899#[cfg_attr(test, assert_instr(prefetchnta, STRATEGY = _MM_HINT_NTA))]
1900#[rustc_legacy_const_generics(1)]
1901#[stable(feature = "simd_x86", since = "1.27.0")]
1902pub fn _mm_prefetch<const STRATEGY: i32>(p: *const i8) {
1903 static_assert_uimm_bits!(STRATEGY, 3);
1904 unsafe {
1907 prefetch(p, (STRATEGY >> 2) & 1, STRATEGY & 3, 1);
1908 }
1909}
1910
1911#[inline]
1918#[target_feature(enable = "sse")]
1919#[stable(feature = "simd_x86", since = "1.27.0")]
1920pub fn _mm_undefined_ps() -> __m128 {
1921 const { unsafe { mem::zeroed() } }
1922}
1923
1924#[inline]
1928#[allow(non_snake_case)]
1929#[target_feature(enable = "sse")]
1930#[stable(feature = "simd_x86", since = "1.27.0")]
1931pub fn _MM_TRANSPOSE4_PS(
1932 row0: &mut __m128,
1933 row1: &mut __m128,
1934 row2: &mut __m128,
1935 row3: &mut __m128,
1936) {
1937 let tmp0 = _mm_unpacklo_ps(*row0, *row1);
1938 let tmp2 = _mm_unpacklo_ps(*row2, *row3);
1939 let tmp1 = _mm_unpackhi_ps(*row0, *row1);
1940 let tmp3 = _mm_unpackhi_ps(*row2, *row3);
1941
1942 *row0 = _mm_movelh_ps(tmp0, tmp2);
1943 *row1 = _mm_movehl_ps(tmp2, tmp0);
1944 *row2 = _mm_movelh_ps(tmp1, tmp3);
1945 *row3 = _mm_movehl_ps(tmp3, tmp1);
1946}
1947
1948#[allow(improper_ctypes)]
1949unsafe extern "C" {
1950 #[link_name = "llvm.x86.sse.rcp.ss"]
1951 fn rcpss(a: __m128) -> __m128;
1952 #[link_name = "llvm.x86.sse.rcp.ps"]
1953 fn rcpps(a: __m128) -> __m128;
1954 #[link_name = "llvm.x86.sse.rsqrt.ss"]
1955 fn rsqrtss(a: __m128) -> __m128;
1956 #[link_name = "llvm.x86.sse.rsqrt.ps"]
1957 fn rsqrtps(a: __m128) -> __m128;
1958 #[link_name = "llvm.x86.sse.min.ss"]
1959 fn minss(a: __m128, b: __m128) -> __m128;
1960 #[link_name = "llvm.x86.sse.min.ps"]
1961 fn minps(a: __m128, b: __m128) -> __m128;
1962 #[link_name = "llvm.x86.sse.max.ss"]
1963 fn maxss(a: __m128, b: __m128) -> __m128;
1964 #[link_name = "llvm.x86.sse.max.ps"]
1965 fn maxps(a: __m128, b: __m128) -> __m128;
1966 #[link_name = "llvm.x86.sse.cmp.ps"]
1967 fn cmpps(a: __m128, b: __m128, imm8: i8) -> __m128;
1968 #[link_name = "llvm.x86.sse.comieq.ss"]
1969 fn comieq_ss(a: __m128, b: __m128) -> i32;
1970 #[link_name = "llvm.x86.sse.comilt.ss"]
1971 fn comilt_ss(a: __m128, b: __m128) -> i32;
1972 #[link_name = "llvm.x86.sse.comile.ss"]
1973 fn comile_ss(a: __m128, b: __m128) -> i32;
1974 #[link_name = "llvm.x86.sse.comigt.ss"]
1975 fn comigt_ss(a: __m128, b: __m128) -> i32;
1976 #[link_name = "llvm.x86.sse.comige.ss"]
1977 fn comige_ss(a: __m128, b: __m128) -> i32;
1978 #[link_name = "llvm.x86.sse.comineq.ss"]
1979 fn comineq_ss(a: __m128, b: __m128) -> i32;
1980 #[link_name = "llvm.x86.sse.ucomieq.ss"]
1981 fn ucomieq_ss(a: __m128, b: __m128) -> i32;
1982 #[link_name = "llvm.x86.sse.ucomilt.ss"]
1983 fn ucomilt_ss(a: __m128, b: __m128) -> i32;
1984 #[link_name = "llvm.x86.sse.ucomile.ss"]
1985 fn ucomile_ss(a: __m128, b: __m128) -> i32;
1986 #[link_name = "llvm.x86.sse.ucomigt.ss"]
1987 fn ucomigt_ss(a: __m128, b: __m128) -> i32;
1988 #[link_name = "llvm.x86.sse.ucomige.ss"]
1989 fn ucomige_ss(a: __m128, b: __m128) -> i32;
1990 #[link_name = "llvm.x86.sse.ucomineq.ss"]
1991 fn ucomineq_ss(a: __m128, b: __m128) -> i32;
1992 #[link_name = "llvm.x86.sse.cvtss2si"]
1993 fn cvtss2si(a: __m128) -> i32;
1994 #[link_name = "llvm.x86.sse.cvttss2si"]
1995 fn cvttss2si(a: __m128) -> i32;
1996 #[link_name = "llvm.x86.sse.sfence"]
1997 fn sfence();
1998 #[link_name = "llvm.x86.sse.stmxcsr"]
1999 fn stmxcsr(p: *mut i8);
2000 #[link_name = "llvm.x86.sse.ldmxcsr"]
2001 fn ldmxcsr(p: *const i8);
2002 #[link_name = "llvm.prefetch"]
2003 fn prefetch(p: *const i8, rw: i32, loc: i32, ty: i32);
2004 #[link_name = "llvm.x86.sse.cmp.ss"]
2005 fn cmpss(a: __m128, b: __m128, imm8: i8) -> __m128;
2006}
2007
2008#[inline]
2024#[target_feature(enable = "sse")]
2025#[cfg_attr(test, assert_instr(movntps))]
2026#[stable(feature = "simd_x86", since = "1.27.0")]
2027#[allow(clippy::cast_ptr_alignment)]
2028pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128) {
2029 crate::arch::asm!(
2031 vps!("movntps", ",{a}"),
2032 p = in(reg) mem_addr,
2033 a = in(xmm_reg) a,
2034 options(nostack, preserves_flags),
2035 );
2036}
2037
2038#[cfg(test)]
2039mod tests {
2040 use crate::{hint::black_box, mem::transmute, ptr};
2041 use std::boxed;
2042 use stdarch_test::simd_test;
2043
2044 use crate::core_arch::{simd::*, x86::*};
2045
2046 const NAN: f32 = f32::NAN;
2047
2048 #[simd_test(enable = "sse")]
2049 unsafe fn test_mm_add_ps() {
2050 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2051 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2052 let r = _mm_add_ps(a, b);
2053 assert_eq_m128(r, _mm_setr_ps(-101.0, 25.0, 0.0, -15.0));
2054 }
2055
2056 #[simd_test(enable = "sse")]
2057 unsafe fn test_mm_add_ss() {
2058 let a = _mm_set_ps(-1.0, 5.0, 0.0, -10.0);
2059 let b = _mm_set_ps(-100.0, 20.0, 0.0, -5.0);
2060 let r = _mm_add_ss(a, b);
2061 assert_eq_m128(r, _mm_set_ps(-1.0, 5.0, 0.0, -15.0));
2062 }
2063
2064 #[simd_test(enable = "sse")]
2065 unsafe fn test_mm_sub_ps() {
2066 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2067 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2068 let r = _mm_sub_ps(a, b);
2069 assert_eq_m128(r, _mm_setr_ps(99.0, -15.0, 0.0, -5.0));
2070 }
2071
2072 #[simd_test(enable = "sse")]
2073 unsafe fn test_mm_sub_ss() {
2074 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2075 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2076 let r = _mm_sub_ss(a, b);
2077 assert_eq_m128(r, _mm_setr_ps(99.0, 5.0, 0.0, -10.0));
2078 }
2079
2080 #[simd_test(enable = "sse")]
2081 unsafe fn test_mm_mul_ps() {
2082 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2083 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2084 let r = _mm_mul_ps(a, b);
2085 assert_eq_m128(r, _mm_setr_ps(100.0, 100.0, 0.0, 50.0));
2086 }
2087
2088 #[simd_test(enable = "sse")]
2089 unsafe fn test_mm_mul_ss() {
2090 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2091 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2092 let r = _mm_mul_ss(a, b);
2093 assert_eq_m128(r, _mm_setr_ps(100.0, 5.0, 0.0, -10.0));
2094 }
2095
2096 #[simd_test(enable = "sse")]
2097 unsafe fn test_mm_div_ps() {
2098 let a = _mm_setr_ps(-1.0, 5.0, 2.0, -10.0);
2099 let b = _mm_setr_ps(-100.0, 20.0, 0.2, -5.0);
2100 let r = _mm_div_ps(a, b);
2101 assert_eq_m128(r, _mm_setr_ps(0.01, 0.25, 10.0, 2.0));
2102 }
2103
2104 #[simd_test(enable = "sse")]
2105 unsafe fn test_mm_div_ss() {
2106 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2107 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2108 let r = _mm_div_ss(a, b);
2109 assert_eq_m128(r, _mm_setr_ps(0.01, 5.0, 0.0, -10.0));
2110 }
2111
2112 #[simd_test(enable = "sse")]
2113 unsafe fn test_mm_sqrt_ss() {
2114 let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2115 let r = _mm_sqrt_ss(a);
2116 let e = _mm_setr_ps(2.0, 13.0, 16.0, 100.0);
2117 assert_eq_m128(r, e);
2118 }
2119
2120 #[simd_test(enable = "sse")]
2121 unsafe fn test_mm_sqrt_ps() {
2122 let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2123 let r = _mm_sqrt_ps(a);
2124 let e = _mm_setr_ps(2.0, 3.6055512, 4.0, 10.0);
2125 assert_eq_m128(r, e);
2126 }
2127
2128 #[simd_test(enable = "sse")]
2129 unsafe fn test_mm_rcp_ss() {
2130 let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2131 let r = _mm_rcp_ss(a);
2132 let e = _mm_setr_ps(0.24993896, 13.0, 16.0, 100.0);
2133 let rel_err = 0.00048828125;
2134 assert_approx_eq!(get_m128(r, 0), get_m128(e, 0), 2. * rel_err);
2135 for i in 1..4 {
2136 assert_eq!(get_m128(r, i), get_m128(e, i));
2137 }
2138 }
2139
2140 #[simd_test(enable = "sse")]
2141 unsafe fn test_mm_rcp_ps() {
2142 let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2143 let r = _mm_rcp_ps(a);
2144 let e = _mm_setr_ps(0.24993896, 0.0769043, 0.06248474, 0.0099983215);
2145 let rel_err = 0.00048828125;
2146 for i in 0..4 {
2147 assert_approx_eq!(get_m128(r, i), get_m128(e, i), 2. * rel_err);
2148 }
2149 }
2150
2151 #[simd_test(enable = "sse")]
2152 unsafe fn test_mm_rsqrt_ss() {
2153 let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2154 let r = _mm_rsqrt_ss(a);
2155 let e = _mm_setr_ps(0.49987793, 13.0, 16.0, 100.0);
2156 let rel_err = 0.00048828125;
2157 for i in 0..4 {
2158 assert_approx_eq!(get_m128(r, i), get_m128(e, i), 2. * rel_err);
2159 }
2160 }
2161
2162 #[simd_test(enable = "sse")]
2163 unsafe fn test_mm_rsqrt_ps() {
2164 let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2165 let r = _mm_rsqrt_ps(a);
2166 let e = _mm_setr_ps(0.49987793, 0.2772827, 0.24993896, 0.099990845);
2167 let rel_err = 0.00048828125;
2168 for i in 0..4 {
2169 assert_approx_eq!(get_m128(r, i), get_m128(e, i), 2. * rel_err);
2170 }
2171 }
2172
2173 #[simd_test(enable = "sse")]
2174 unsafe fn test_mm_min_ss() {
2175 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2176 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2177 let r = _mm_min_ss(a, b);
2178 assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0));
2179 }
2180
2181 #[simd_test(enable = "sse")]
2182 unsafe fn test_mm_min_ps() {
2183 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2184 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2185 let r = _mm_min_ps(a, b);
2186 assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0));
2187
2188 let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0);
2194 let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0);
2195 let r1: [u8; 16] = transmute(_mm_min_ps(a, b));
2196 let r2: [u8; 16] = transmute(_mm_min_ps(b, a));
2197 let a: [u8; 16] = transmute(a);
2198 let b: [u8; 16] = transmute(b);
2199 assert_eq!(r1, b);
2200 assert_eq!(r2, a);
2201 assert_ne!(a, b); }
2203
2204 #[simd_test(enable = "sse")]
2205 unsafe fn test_mm_max_ss() {
2206 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2207 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2208 let r = _mm_max_ss(a, b);
2209 assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, -10.0));
2210 }
2211
2212 #[simd_test(enable = "sse")]
2213 unsafe fn test_mm_max_ps() {
2214 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2215 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2216 let r = _mm_max_ps(a, b);
2217 assert_eq_m128(r, _mm_setr_ps(-1.0, 20.0, 0.0, -5.0));
2218
2219 let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0);
2221 let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0);
2222 let r1: [u8; 16] = transmute(_mm_max_ps(a, b));
2223 let r2: [u8; 16] = transmute(_mm_max_ps(b, a));
2224 let a: [u8; 16] = transmute(a);
2225 let b: [u8; 16] = transmute(b);
2226 assert_eq!(r1, b);
2227 assert_eq!(r2, a);
2228 assert_ne!(a, b); }
2230
2231 #[simd_test(enable = "sse")]
2232 unsafe fn test_mm_and_ps() {
2233 let a = transmute(u32x4::splat(0b0011));
2234 let b = transmute(u32x4::splat(0b0101));
2235 let r = _mm_and_ps(*black_box(&a), *black_box(&b));
2236 let e = transmute(u32x4::splat(0b0001));
2237 assert_eq_m128(r, e);
2238 }
2239
2240 #[simd_test(enable = "sse")]
2241 unsafe fn test_mm_andnot_ps() {
2242 let a = transmute(u32x4::splat(0b0011));
2243 let b = transmute(u32x4::splat(0b0101));
2244 let r = _mm_andnot_ps(*black_box(&a), *black_box(&b));
2245 let e = transmute(u32x4::splat(0b0100));
2246 assert_eq_m128(r, e);
2247 }
2248
2249 #[simd_test(enable = "sse")]
2250 unsafe fn test_mm_or_ps() {
2251 let a = transmute(u32x4::splat(0b0011));
2252 let b = transmute(u32x4::splat(0b0101));
2253 let r = _mm_or_ps(*black_box(&a), *black_box(&b));
2254 let e = transmute(u32x4::splat(0b0111));
2255 assert_eq_m128(r, e);
2256 }
2257
2258 #[simd_test(enable = "sse")]
2259 unsafe fn test_mm_xor_ps() {
2260 let a = transmute(u32x4::splat(0b0011));
2261 let b = transmute(u32x4::splat(0b0101));
2262 let r = _mm_xor_ps(*black_box(&a), *black_box(&b));
2263 let e = transmute(u32x4::splat(0b0110));
2264 assert_eq_m128(r, e);
2265 }
2266
2267 #[simd_test(enable = "sse")]
2268 unsafe fn test_mm_cmpeq_ss() {
2269 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2270 let b = _mm_setr_ps(-1.0, 5.0, 6.0, 7.0);
2271 let r: u32x4 = transmute(_mm_cmpeq_ss(a, b));
2272 let e: u32x4 = transmute(_mm_setr_ps(f32::from_bits(0), 2.0, 3.0, 4.0));
2273 assert_eq!(r, e);
2274
2275 let b2 = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2276 let r2: u32x4 = transmute(_mm_cmpeq_ss(a, b2));
2277 let e2: u32x4 = transmute(_mm_setr_ps(f32::from_bits(0xffffffff), 2.0, 3.0, 4.0));
2278 assert_eq!(r2, e2);
2279 }
2280
2281 #[simd_test(enable = "sse")]
2282 unsafe fn test_mm_cmplt_ss() {
2283 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2284 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2285 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2286 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2287
2288 let b1 = 0u32; let c1 = 0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmplt_ss(a, b));
2293 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2294 assert_eq!(rb, eb);
2295
2296 let rc: u32x4 = transmute(_mm_cmplt_ss(a, c));
2297 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2298 assert_eq!(rc, ec);
2299
2300 let rd: u32x4 = transmute(_mm_cmplt_ss(a, d));
2301 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2302 assert_eq!(rd, ed);
2303 }
2304
2305 #[simd_test(enable = "sse")]
2306 unsafe fn test_mm_cmple_ss() {
2307 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2308 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2309 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2310 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2311
2312 let b1 = 0u32; let c1 = !0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmple_ss(a, b));
2317 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2318 assert_eq!(rb, eb);
2319
2320 let rc: u32x4 = transmute(_mm_cmple_ss(a, c));
2321 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2322 assert_eq!(rc, ec);
2323
2324 let rd: u32x4 = transmute(_mm_cmple_ss(a, d));
2325 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2326 assert_eq!(rd, ed);
2327 }
2328
2329 #[simd_test(enable = "sse")]
2330 unsafe fn test_mm_cmpgt_ss() {
2331 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2332 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2333 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2334 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2335
2336 let b1 = !0u32; let c1 = 0u32; let d1 = 0u32; let rb: u32x4 = transmute(_mm_cmpgt_ss(a, b));
2341 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2342 assert_eq!(rb, eb);
2343
2344 let rc: u32x4 = transmute(_mm_cmpgt_ss(a, c));
2345 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2346 assert_eq!(rc, ec);
2347
2348 let rd: u32x4 = transmute(_mm_cmpgt_ss(a, d));
2349 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2350 assert_eq!(rd, ed);
2351 }
2352
2353 #[simd_test(enable = "sse")]
2354 unsafe fn test_mm_cmpge_ss() {
2355 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2356 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2357 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2358 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2359
2360 let b1 = !0u32; let c1 = !0u32; let d1 = 0u32; let rb: u32x4 = transmute(_mm_cmpge_ss(a, b));
2365 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2366 assert_eq!(rb, eb);
2367
2368 let rc: u32x4 = transmute(_mm_cmpge_ss(a, c));
2369 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2370 assert_eq!(rc, ec);
2371
2372 let rd: u32x4 = transmute(_mm_cmpge_ss(a, d));
2373 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2374 assert_eq!(rd, ed);
2375 }
2376
2377 #[simd_test(enable = "sse")]
2378 unsafe fn test_mm_cmpneq_ss() {
2379 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2380 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2381 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2382 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2383
2384 let b1 = !0u32; let c1 = 0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmpneq_ss(a, b));
2389 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2390 assert_eq!(rb, eb);
2391
2392 let rc: u32x4 = transmute(_mm_cmpneq_ss(a, c));
2393 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2394 assert_eq!(rc, ec);
2395
2396 let rd: u32x4 = transmute(_mm_cmpneq_ss(a, d));
2397 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2398 assert_eq!(rd, ed);
2399 }
2400
2401 #[simd_test(enable = "sse")]
2402 unsafe fn test_mm_cmpnlt_ss() {
2403 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2409 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2410 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2411 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2412
2413 let b1 = !0u32; let c1 = !0u32; let d1 = 0u32; let rb: u32x4 = transmute(_mm_cmpnlt_ss(a, b));
2418 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2419 assert_eq!(rb, eb);
2420
2421 let rc: u32x4 = transmute(_mm_cmpnlt_ss(a, c));
2422 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2423 assert_eq!(rc, ec);
2424
2425 let rd: u32x4 = transmute(_mm_cmpnlt_ss(a, d));
2426 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2427 assert_eq!(rd, ed);
2428 }
2429
2430 #[simd_test(enable = "sse")]
2431 unsafe fn test_mm_cmpnle_ss() {
2432 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2438 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2439 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2440 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2441
2442 let b1 = !0u32; let c1 = 0u32; let d1 = 0u32; let rb: u32x4 = transmute(_mm_cmpnle_ss(a, b));
2447 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2448 assert_eq!(rb, eb);
2449
2450 let rc: u32x4 = transmute(_mm_cmpnle_ss(a, c));
2451 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2452 assert_eq!(rc, ec);
2453
2454 let rd: u32x4 = transmute(_mm_cmpnle_ss(a, d));
2455 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2456 assert_eq!(rd, ed);
2457 }
2458
2459 #[simd_test(enable = "sse")]
2460 unsafe fn test_mm_cmpngt_ss() {
2461 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2467 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2468 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2469 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2470
2471 let b1 = 0u32; let c1 = !0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmpngt_ss(a, b));
2476 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2477 assert_eq!(rb, eb);
2478
2479 let rc: u32x4 = transmute(_mm_cmpngt_ss(a, c));
2480 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2481 assert_eq!(rc, ec);
2482
2483 let rd: u32x4 = transmute(_mm_cmpngt_ss(a, d));
2484 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2485 assert_eq!(rd, ed);
2486 }
2487
2488 #[simd_test(enable = "sse")]
2489 unsafe fn test_mm_cmpnge_ss() {
2490 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2496 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2497 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2498 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2499
2500 let b1 = 0u32; let c1 = 0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmpnge_ss(a, b));
2505 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2506 assert_eq!(rb, eb);
2507
2508 let rc: u32x4 = transmute(_mm_cmpnge_ss(a, c));
2509 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2510 assert_eq!(rc, ec);
2511
2512 let rd: u32x4 = transmute(_mm_cmpnge_ss(a, d));
2513 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2514 assert_eq!(rd, ed);
2515 }
2516
2517 #[simd_test(enable = "sse")]
2518 unsafe fn test_mm_cmpord_ss() {
2519 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2520 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2521 let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0);
2522 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2523
2524 let b1 = !0u32; let c1 = 0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmpord_ss(a, b));
2529 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2530 assert_eq!(rb, eb);
2531
2532 let rc: u32x4 = transmute(_mm_cmpord_ss(a, c));
2533 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2534 assert_eq!(rc, ec);
2535
2536 let rd: u32x4 = transmute(_mm_cmpord_ss(a, d));
2537 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2538 assert_eq!(rd, ed);
2539 }
2540
2541 #[simd_test(enable = "sse")]
2542 unsafe fn test_mm_cmpunord_ss() {
2543 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2544 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2545 let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0);
2546 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2547
2548 let b1 = 0u32; let c1 = !0u32; let d1 = 0u32; let rb: u32x4 = transmute(_mm_cmpunord_ss(a, b));
2553 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2554 assert_eq!(rb, eb);
2555
2556 let rc: u32x4 = transmute(_mm_cmpunord_ss(a, c));
2557 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2558 assert_eq!(rc, ec);
2559
2560 let rd: u32x4 = transmute(_mm_cmpunord_ss(a, d));
2561 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2562 assert_eq!(rd, ed);
2563 }
2564
2565 #[simd_test(enable = "sse")]
2566 unsafe fn test_mm_cmpeq_ps() {
2567 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2568 let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
2569 let tru = !0u32;
2570 let fls = 0u32;
2571
2572 let e = u32x4::new(fls, fls, tru, fls);
2573 let r: u32x4 = transmute(_mm_cmpeq_ps(a, b));
2574 assert_eq!(r, e);
2575 }
2576
2577 #[simd_test(enable = "sse")]
2578 unsafe fn test_mm_cmplt_ps() {
2579 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2580 let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
2581 let tru = !0u32;
2582 let fls = 0u32;
2583
2584 let e = u32x4::new(tru, fls, fls, fls);
2585 let r: u32x4 = transmute(_mm_cmplt_ps(a, b));
2586 assert_eq!(r, e);
2587 }
2588
2589 #[simd_test(enable = "sse")]
2590 unsafe fn test_mm_cmple_ps() {
2591 let a = _mm_setr_ps(10.0, 50.0, 1.0, 4.0);
2592 let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
2593 let tru = !0u32;
2594 let fls = 0u32;
2595
2596 let e = u32x4::new(tru, fls, tru, fls);
2597 let r: u32x4 = transmute(_mm_cmple_ps(a, b));
2598 assert_eq!(r, e);
2599 }
2600
2601 #[simd_test(enable = "sse")]
2602 unsafe fn test_mm_cmpgt_ps() {
2603 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2604 let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0);
2605 let tru = !0u32;
2606 let fls = 0u32;
2607
2608 let e = u32x4::new(fls, tru, fls, fls);
2609 let r: u32x4 = transmute(_mm_cmpgt_ps(a, b));
2610 assert_eq!(r, e);
2611 }
2612
2613 #[simd_test(enable = "sse")]
2614 unsafe fn test_mm_cmpge_ps() {
2615 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2616 let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0);
2617 let tru = !0u32;
2618 let fls = 0u32;
2619
2620 let e = u32x4::new(fls, tru, tru, fls);
2621 let r: u32x4 = transmute(_mm_cmpge_ps(a, b));
2622 assert_eq!(r, e);
2623 }
2624
2625 #[simd_test(enable = "sse")]
2626 unsafe fn test_mm_cmpneq_ps() {
2627 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2628 let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
2629 let tru = !0u32;
2630 let fls = 0u32;
2631
2632 let e = u32x4::new(tru, tru, fls, tru);
2633 let r: u32x4 = transmute(_mm_cmpneq_ps(a, b));
2634 assert_eq!(r, e);
2635 }
2636
2637 #[simd_test(enable = "sse")]
2638 unsafe fn test_mm_cmpnlt_ps() {
2639 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2640 let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
2641 let tru = !0u32;
2642 let fls = 0u32;
2643
2644 let e = u32x4::new(fls, tru, tru, tru);
2645 let r: u32x4 = transmute(_mm_cmpnlt_ps(a, b));
2646 assert_eq!(r, e);
2647 }
2648
2649 #[simd_test(enable = "sse")]
2650 unsafe fn test_mm_cmpnle_ps() {
2651 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2652 let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
2653 let tru = !0u32;
2654 let fls = 0u32;
2655
2656 let e = u32x4::new(fls, tru, fls, tru);
2657 let r: u32x4 = transmute(_mm_cmpnle_ps(a, b));
2658 assert_eq!(r, e);
2659 }
2660
2661 #[simd_test(enable = "sse")]
2662 unsafe fn test_mm_cmpngt_ps() {
2663 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2664 let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
2665 let tru = !0u32;
2666 let fls = 0u32;
2667
2668 let e = u32x4::new(tru, fls, tru, tru);
2669 let r: u32x4 = transmute(_mm_cmpngt_ps(a, b));
2670 assert_eq!(r, e);
2671 }
2672
2673 #[simd_test(enable = "sse")]
2674 unsafe fn test_mm_cmpnge_ps() {
2675 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2676 let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
2677 let tru = !0u32;
2678 let fls = 0u32;
2679
2680 let e = u32x4::new(tru, fls, fls, tru);
2681 let r: u32x4 = transmute(_mm_cmpnge_ps(a, b));
2682 assert_eq!(r, e);
2683 }
2684
2685 #[simd_test(enable = "sse")]
2686 unsafe fn test_mm_cmpord_ps() {
2687 let a = _mm_setr_ps(10.0, 50.0, NAN, NAN);
2688 let b = _mm_setr_ps(15.0, NAN, 1.0, NAN);
2689 let tru = !0u32;
2690 let fls = 0u32;
2691
2692 let e = u32x4::new(tru, fls, fls, fls);
2693 let r: u32x4 = transmute(_mm_cmpord_ps(a, b));
2694 assert_eq!(r, e);
2695 }
2696
2697 #[simd_test(enable = "sse")]
2698 unsafe fn test_mm_cmpunord_ps() {
2699 let a = _mm_setr_ps(10.0, 50.0, NAN, NAN);
2700 let b = _mm_setr_ps(15.0, NAN, 1.0, NAN);
2701 let tru = !0u32;
2702 let fls = 0u32;
2703
2704 let e = u32x4::new(fls, tru, tru, tru);
2705 let r: u32x4 = transmute(_mm_cmpunord_ps(a, b));
2706 assert_eq!(r, e);
2707 }
2708
2709 #[simd_test(enable = "sse")]
2710 unsafe fn test_mm_comieq_ss() {
2711 let aa = &[3.0f32, 12.0, 23.0, NAN];
2712 let bb = &[3.0f32, 47.5, 1.5, NAN];
2713
2714 let ee = &[1i32, 0, 0, 0];
2715
2716 for i in 0..4 {
2717 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2718 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2719
2720 let r = _mm_comieq_ss(a, b);
2721
2722 assert_eq!(
2723 ee[i], r,
2724 "_mm_comieq_ss({:?}, {:?}) = {}, expected: {} (i={})",
2725 a, b, r, ee[i], i
2726 );
2727 }
2728 }
2729
2730 #[simd_test(enable = "sse")]
2731 unsafe fn test_mm_comilt_ss() {
2732 let aa = &[3.0f32, 12.0, 23.0, NAN];
2733 let bb = &[3.0f32, 47.5, 1.5, NAN];
2734
2735 let ee = &[0i32, 1, 0, 0];
2736
2737 for i in 0..4 {
2738 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2739 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2740
2741 let r = _mm_comilt_ss(a, b);
2742
2743 assert_eq!(
2744 ee[i], r,
2745 "_mm_comilt_ss({:?}, {:?}) = {}, expected: {} (i={})",
2746 a, b, r, ee[i], i
2747 );
2748 }
2749 }
2750
2751 #[simd_test(enable = "sse")]
2752 unsafe fn test_mm_comile_ss() {
2753 let aa = &[3.0f32, 12.0, 23.0, NAN];
2754 let bb = &[3.0f32, 47.5, 1.5, NAN];
2755
2756 let ee = &[1i32, 1, 0, 0];
2757
2758 for i in 0..4 {
2759 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2760 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2761
2762 let r = _mm_comile_ss(a, b);
2763
2764 assert_eq!(
2765 ee[i], r,
2766 "_mm_comile_ss({:?}, {:?}) = {}, expected: {} (i={})",
2767 a, b, r, ee[i], i
2768 );
2769 }
2770 }
2771
2772 #[simd_test(enable = "sse")]
2773 unsafe fn test_mm_comigt_ss() {
2774 let aa = &[3.0f32, 12.0, 23.0, NAN];
2775 let bb = &[3.0f32, 47.5, 1.5, NAN];
2776
2777 let ee = &[1i32, 0, 1, 0];
2778
2779 for i in 0..4 {
2780 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2781 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2782
2783 let r = _mm_comige_ss(a, b);
2784
2785 assert_eq!(
2786 ee[i], r,
2787 "_mm_comige_ss({:?}, {:?}) = {}, expected: {} (i={})",
2788 a, b, r, ee[i], i
2789 );
2790 }
2791 }
2792
2793 #[simd_test(enable = "sse")]
2794 unsafe fn test_mm_comineq_ss() {
2795 let aa = &[3.0f32, 12.0, 23.0, NAN];
2796 let bb = &[3.0f32, 47.5, 1.5, NAN];
2797
2798 let ee = &[0i32, 1, 1, 1];
2799
2800 for i in 0..4 {
2801 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2802 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2803
2804 let r = _mm_comineq_ss(a, b);
2805
2806 assert_eq!(
2807 ee[i], r,
2808 "_mm_comineq_ss({:?}, {:?}) = {}, expected: {} (i={})",
2809 a, b, r, ee[i], i
2810 );
2811 }
2812 }
2813
2814 #[simd_test(enable = "sse")]
2815 unsafe fn test_mm_ucomieq_ss() {
2816 let aa = &[3.0f32, 12.0, 23.0, NAN];
2817 let bb = &[3.0f32, 47.5, 1.5, NAN];
2818
2819 let ee = &[1i32, 0, 0, 0];
2820
2821 for i in 0..4 {
2822 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2823 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2824
2825 let r = _mm_ucomieq_ss(a, b);
2826
2827 assert_eq!(
2828 ee[i], r,
2829 "_mm_ucomieq_ss({:?}, {:?}) = {}, expected: {} (i={})",
2830 a, b, r, ee[i], i
2831 );
2832 }
2833 }
2834
2835 #[simd_test(enable = "sse")]
2836 unsafe fn test_mm_ucomilt_ss() {
2837 let aa = &[3.0f32, 12.0, 23.0, NAN];
2838 let bb = &[3.0f32, 47.5, 1.5, NAN];
2839
2840 let ee = &[0i32, 1, 0, 0];
2841
2842 for i in 0..4 {
2843 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2844 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2845
2846 let r = _mm_ucomilt_ss(a, b);
2847
2848 assert_eq!(
2849 ee[i], r,
2850 "_mm_ucomilt_ss({:?}, {:?}) = {}, expected: {} (i={})",
2851 a, b, r, ee[i], i
2852 );
2853 }
2854 }
2855
2856 #[simd_test(enable = "sse")]
2857 unsafe fn test_mm_ucomile_ss() {
2858 let aa = &[3.0f32, 12.0, 23.0, NAN];
2859 let bb = &[3.0f32, 47.5, 1.5, NAN];
2860
2861 let ee = &[1i32, 1, 0, 0];
2862
2863 for i in 0..4 {
2864 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2865 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2866
2867 let r = _mm_ucomile_ss(a, b);
2868
2869 assert_eq!(
2870 ee[i], r,
2871 "_mm_ucomile_ss({:?}, {:?}) = {}, expected: {} (i={})",
2872 a, b, r, ee[i], i
2873 );
2874 }
2875 }
2876
2877 #[simd_test(enable = "sse")]
2878 unsafe fn test_mm_ucomigt_ss() {
2879 let aa = &[3.0f32, 12.0, 23.0, NAN];
2880 let bb = &[3.0f32, 47.5, 1.5, NAN];
2881
2882 let ee = &[0i32, 0, 1, 0];
2883
2884 for i in 0..4 {
2885 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2886 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2887
2888 let r = _mm_ucomigt_ss(a, b);
2889
2890 assert_eq!(
2891 ee[i], r,
2892 "_mm_ucomigt_ss({:?}, {:?}) = {}, expected: {} (i={})",
2893 a, b, r, ee[i], i
2894 );
2895 }
2896 }
2897
2898 #[simd_test(enable = "sse")]
2899 unsafe fn test_mm_ucomige_ss() {
2900 let aa = &[3.0f32, 12.0, 23.0, NAN];
2901 let bb = &[3.0f32, 47.5, 1.5, NAN];
2902
2903 let ee = &[1i32, 0, 1, 0];
2904
2905 for i in 0..4 {
2906 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2907 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2908
2909 let r = _mm_ucomige_ss(a, b);
2910
2911 assert_eq!(
2912 ee[i], r,
2913 "_mm_ucomige_ss({:?}, {:?}) = {}, expected: {} (i={})",
2914 a, b, r, ee[i], i
2915 );
2916 }
2917 }
2918
2919 #[simd_test(enable = "sse")]
2920 unsafe fn test_mm_ucomineq_ss() {
2921 let aa = &[3.0f32, 12.0, 23.0, NAN];
2922 let bb = &[3.0f32, 47.5, 1.5, NAN];
2923
2924 let ee = &[0i32, 1, 1, 1];
2925
2926 for i in 0..4 {
2927 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2928 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2929
2930 let r = _mm_ucomineq_ss(a, b);
2931
2932 assert_eq!(
2933 ee[i], r,
2934 "_mm_ucomineq_ss({:?}, {:?}) = {}, expected: {} (i={})",
2935 a, b, r, ee[i], i
2936 );
2937 }
2938 }
2939
2940 #[simd_test(enable = "sse")]
2941 unsafe fn test_mm_cvtss_si32() {
2942 let inputs = &[42.0f32, -3.1, 4.0e10, 4.0e-20, NAN, 2147483500.1];
2943 let result = &[42i32, -3, i32::MIN, 0, i32::MIN, 2147483520];
2944 for i in 0..inputs.len() {
2945 let x = _mm_setr_ps(inputs[i], 1.0, 3.0, 4.0);
2946 let e = result[i];
2947 let r = _mm_cvtss_si32(x);
2948 assert_eq!(
2949 e, r,
2950 "TestCase #{} _mm_cvtss_si32({:?}) = {}, expected: {}",
2951 i, x, r, e
2952 );
2953 }
2954 }
2955
2956 #[simd_test(enable = "sse")]
2957 unsafe fn test_mm_cvttss_si32() {
2958 let inputs = &[
2959 (42.0f32, 42i32),
2960 (-31.4, -31),
2961 (-33.5, -33),
2962 (-34.5, -34),
2963 (10.999, 10),
2964 (-5.99, -5),
2965 (4.0e10, i32::MIN),
2966 (4.0e-10, 0),
2967 (NAN, i32::MIN),
2968 (2147483500.1, 2147483520),
2969 ];
2970 for (i, &(xi, e)) in inputs.iter().enumerate() {
2971 let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0);
2972 let r = _mm_cvttss_si32(x);
2973 assert_eq!(
2974 e, r,
2975 "TestCase #{} _mm_cvttss_si32({:?}) = {}, expected: {}",
2976 i, x, r, e
2977 );
2978 }
2979 }
2980
2981 #[simd_test(enable = "sse")]
2982 unsafe fn test_mm_cvtsi32_ss() {
2983 let inputs = &[
2984 (4555i32, 4555.0f32),
2985 (322223333, 322223330.0),
2986 (-432, -432.0),
2987 (-322223333, -322223330.0),
2988 ];
2989
2990 for &(x, f) in inputs.iter() {
2991 let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
2992 let r = _mm_cvtsi32_ss(a, x);
2993 let e = _mm_setr_ps(f, 6.0, 7.0, 8.0);
2994 assert_eq_m128(e, r);
2995 }
2996 }
2997
2998 #[simd_test(enable = "sse")]
2999 unsafe fn test_mm_cvtss_f32() {
3000 let a = _mm_setr_ps(312.0134, 5.0, 6.0, 7.0);
3001 assert_eq!(_mm_cvtss_f32(a), 312.0134);
3002 }
3003
3004 #[simd_test(enable = "sse")]
3005 unsafe fn test_mm_set_ss() {
3006 let r = _mm_set_ss(black_box(4.25));
3007 assert_eq_m128(r, _mm_setr_ps(4.25, 0.0, 0.0, 0.0));
3008 }
3009
3010 #[simd_test(enable = "sse")]
3011 unsafe fn test_mm_set1_ps() {
3012 let r1 = _mm_set1_ps(black_box(4.25));
3013 let r2 = _mm_set_ps1(black_box(4.25));
3014 assert_eq!(get_m128(r1, 0), 4.25);
3015 assert_eq!(get_m128(r1, 1), 4.25);
3016 assert_eq!(get_m128(r1, 2), 4.25);
3017 assert_eq!(get_m128(r1, 3), 4.25);
3018 assert_eq!(get_m128(r2, 0), 4.25);
3019 assert_eq!(get_m128(r2, 1), 4.25);
3020 assert_eq!(get_m128(r2, 2), 4.25);
3021 assert_eq!(get_m128(r2, 3), 4.25);
3022 }
3023
3024 #[simd_test(enable = "sse")]
3025 unsafe fn test_mm_set_ps() {
3026 let r = _mm_set_ps(
3027 black_box(1.0),
3028 black_box(2.0),
3029 black_box(3.0),
3030 black_box(4.0),
3031 );
3032 assert_eq!(get_m128(r, 0), 4.0);
3033 assert_eq!(get_m128(r, 1), 3.0);
3034 assert_eq!(get_m128(r, 2), 2.0);
3035 assert_eq!(get_m128(r, 3), 1.0);
3036 }
3037
3038 #[simd_test(enable = "sse")]
3039 unsafe fn test_mm_setr_ps() {
3040 let r = _mm_setr_ps(
3041 black_box(1.0),
3042 black_box(2.0),
3043 black_box(3.0),
3044 black_box(4.0),
3045 );
3046 assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
3047 }
3048
3049 #[simd_test(enable = "sse")]
3050 unsafe fn test_mm_setzero_ps() {
3051 let r = *black_box(&_mm_setzero_ps());
3052 assert_eq_m128(r, _mm_set1_ps(0.0));
3053 }
3054
3055 #[simd_test(enable = "sse")]
3056 unsafe fn test_MM_SHUFFLE() {
3057 assert_eq!(_MM_SHUFFLE(0, 1, 1, 3), 0b00_01_01_11);
3058 assert_eq!(_MM_SHUFFLE(3, 1, 1, 0), 0b11_01_01_00);
3059 assert_eq!(_MM_SHUFFLE(1, 2, 2, 1), 0b01_10_10_01);
3060 }
3061
3062 #[simd_test(enable = "sse")]
3063 unsafe fn test_mm_shuffle_ps() {
3064 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3065 let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3066 let r = _mm_shuffle_ps::<0b00_01_01_11>(a, b);
3067 assert_eq_m128(r, _mm_setr_ps(4.0, 2.0, 6.0, 5.0));
3068 }
3069
3070 #[simd_test(enable = "sse")]
3071 unsafe fn test_mm_unpackhi_ps() {
3072 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3073 let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3074 let r = _mm_unpackhi_ps(a, b);
3075 assert_eq_m128(r, _mm_setr_ps(3.0, 7.0, 4.0, 8.0));
3076 }
3077
3078 #[simd_test(enable = "sse")]
3079 unsafe fn test_mm_unpacklo_ps() {
3080 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3081 let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3082 let r = _mm_unpacklo_ps(a, b);
3083 assert_eq_m128(r, _mm_setr_ps(1.0, 5.0, 2.0, 6.0));
3084 }
3085
3086 #[simd_test(enable = "sse")]
3087 unsafe fn test_mm_movehl_ps() {
3088 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3089 let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3090 let r = _mm_movehl_ps(a, b);
3091 assert_eq_m128(r, _mm_setr_ps(7.0, 8.0, 3.0, 4.0));
3092 }
3093
3094 #[simd_test(enable = "sse")]
3095 unsafe fn test_mm_movelh_ps() {
3096 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3097 let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3098 let r = _mm_movelh_ps(a, b);
3099 assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 5.0, 6.0));
3100 }
3101
3102 #[simd_test(enable = "sse")]
3103 unsafe fn test_mm_load_ss() {
3104 let a = 42.0f32;
3105 let r = _mm_load_ss(ptr::addr_of!(a));
3106 assert_eq_m128(r, _mm_setr_ps(42.0, 0.0, 0.0, 0.0));
3107 }
3108
3109 #[simd_test(enable = "sse")]
3110 unsafe fn test_mm_load1_ps() {
3111 let a = 42.0f32;
3112 let r = _mm_load1_ps(ptr::addr_of!(a));
3113 assert_eq_m128(r, _mm_setr_ps(42.0, 42.0, 42.0, 42.0));
3114 }
3115
3116 #[simd_test(enable = "sse")]
3117 unsafe fn test_mm_load_ps() {
3118 let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
3119
3120 let mut p = vals.as_ptr();
3121 let mut fixup = 0.0f32;
3122
3123 let unalignment = (p as usize) & 0xf;
3127 if unalignment != 0 {
3128 let delta = (16 - unalignment) >> 2;
3129 fixup = delta as f32;
3130 p = p.add(delta);
3131 }
3132
3133 let r = _mm_load_ps(p);
3134 let e = _mm_add_ps(_mm_setr_ps(1.0, 2.0, 3.0, 4.0), _mm_set1_ps(fixup));
3135 assert_eq_m128(r, e);
3136 }
3137
3138 #[simd_test(enable = "sse")]
3139 unsafe fn test_mm_loadu_ps() {
3140 let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
3141 let p = vals.as_ptr().add(3);
3142 let r = _mm_loadu_ps(black_box(p));
3143 assert_eq_m128(r, _mm_setr_ps(4.0, 5.0, 6.0, 7.0));
3144 }
3145
3146 #[simd_test(enable = "sse")]
3147 unsafe fn test_mm_loadr_ps() {
3148 let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
3149
3150 let mut p = vals.as_ptr();
3151 let mut fixup = 0.0f32;
3152
3153 let unalignment = (p as usize) & 0xf;
3157 if unalignment != 0 {
3158 let delta = (16 - unalignment) >> 2;
3159 fixup = delta as f32;
3160 p = p.add(delta);
3161 }
3162
3163 let r = _mm_loadr_ps(p);
3164 let e = _mm_add_ps(_mm_setr_ps(4.0, 3.0, 2.0, 1.0), _mm_set1_ps(fixup));
3165 assert_eq_m128(r, e);
3166 }
3167
3168 #[simd_test(enable = "sse")]
3169 unsafe fn test_mm_store_ss() {
3170 let mut vals = [0.0f32; 8];
3171 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3172 _mm_store_ss(vals.as_mut_ptr().add(1), a);
3173
3174 assert_eq!(vals[0], 0.0);
3175 assert_eq!(vals[1], 1.0);
3176 assert_eq!(vals[2], 0.0);
3177 }
3178
3179 #[simd_test(enable = "sse")]
3180 unsafe fn test_mm_store1_ps() {
3181 let mut vals = [0.0f32; 8];
3182 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3183
3184 let mut ofs = 0;
3185 let mut p = vals.as_mut_ptr();
3186
3187 if (p as usize) & 0xf != 0 {
3188 ofs = (16 - ((p as usize) & 0xf)) >> 2;
3189 p = p.add(ofs);
3190 }
3191
3192 _mm_store1_ps(p, *black_box(&a));
3193
3194 if ofs > 0 {
3195 assert_eq!(vals[ofs - 1], 0.0);
3196 }
3197 assert_eq!(vals[ofs + 0], 1.0);
3198 assert_eq!(vals[ofs + 1], 1.0);
3199 assert_eq!(vals[ofs + 2], 1.0);
3200 assert_eq!(vals[ofs + 3], 1.0);
3201 assert_eq!(vals[ofs + 4], 0.0);
3202 }
3203
3204 #[simd_test(enable = "sse")]
3205 unsafe fn test_mm_store_ps() {
3206 let mut vals = [0.0f32; 8];
3207 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3208
3209 let mut ofs = 0;
3210 let mut p = vals.as_mut_ptr();
3211
3212 if (p as usize) & 0xf != 0 {
3214 ofs = (16 - ((p as usize) & 0xf)) >> 2;
3215 p = p.add(ofs);
3216 }
3217
3218 _mm_store_ps(p, *black_box(&a));
3219
3220 if ofs > 0 {
3221 assert_eq!(vals[ofs - 1], 0.0);
3222 }
3223 assert_eq!(vals[ofs + 0], 1.0);
3224 assert_eq!(vals[ofs + 1], 2.0);
3225 assert_eq!(vals[ofs + 2], 3.0);
3226 assert_eq!(vals[ofs + 3], 4.0);
3227 assert_eq!(vals[ofs + 4], 0.0);
3228 }
3229
3230 #[simd_test(enable = "sse")]
3231 unsafe fn test_mm_storer_ps() {
3232 let mut vals = [0.0f32; 8];
3233 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3234
3235 let mut ofs = 0;
3236 let mut p = vals.as_mut_ptr();
3237
3238 if (p as usize) & 0xf != 0 {
3240 ofs = (16 - ((p as usize) & 0xf)) >> 2;
3241 p = p.add(ofs);
3242 }
3243
3244 _mm_storer_ps(p, *black_box(&a));
3245
3246 if ofs > 0 {
3247 assert_eq!(vals[ofs - 1], 0.0);
3248 }
3249 assert_eq!(vals[ofs + 0], 4.0);
3250 assert_eq!(vals[ofs + 1], 3.0);
3251 assert_eq!(vals[ofs + 2], 2.0);
3252 assert_eq!(vals[ofs + 3], 1.0);
3253 assert_eq!(vals[ofs + 4], 0.0);
3254 }
3255
3256 #[simd_test(enable = "sse")]
3257 unsafe fn test_mm_storeu_ps() {
3258 let mut vals = [0.0f32; 8];
3259 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3260
3261 let mut ofs = 0;
3262 let mut p = vals.as_mut_ptr();
3263
3264 if (p as usize) & 0xf == 0 {
3266 ofs = 1;
3267 p = p.add(1);
3268 }
3269
3270 _mm_storeu_ps(p, *black_box(&a));
3271
3272 if ofs > 0 {
3273 assert_eq!(vals[ofs - 1], 0.0);
3274 }
3275 assert_eq!(vals[ofs + 0], 1.0);
3276 assert_eq!(vals[ofs + 1], 2.0);
3277 assert_eq!(vals[ofs + 2], 3.0);
3278 assert_eq!(vals[ofs + 3], 4.0);
3279 assert_eq!(vals[ofs + 4], 0.0);
3280 }
3281
3282 #[simd_test(enable = "sse")]
3283 unsafe fn test_mm_move_ss() {
3284 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3285 let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3286
3287 let r = _mm_move_ss(a, b);
3288 let e = _mm_setr_ps(5.0, 2.0, 3.0, 4.0);
3289 assert_eq_m128(e, r);
3290 }
3291
3292 #[simd_test(enable = "sse")]
3293 unsafe fn test_mm_movemask_ps() {
3294 let r = _mm_movemask_ps(_mm_setr_ps(-1.0, 5.0, -5.0, 0.0));
3295 assert_eq!(r, 0b0101);
3296
3297 let r = _mm_movemask_ps(_mm_setr_ps(-1.0, -5.0, -5.0, 0.0));
3298 assert_eq!(r, 0b0111);
3299 }
3300
3301 #[simd_test(enable = "sse")]
3302 #[cfg_attr(miri, ignore)]
3304 unsafe fn test_mm_sfence() {
3305 _mm_sfence();
3306 }
3307
3308 #[simd_test(enable = "sse")]
3309 unsafe fn test_MM_TRANSPOSE4_PS() {
3310 let mut a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3311 let mut b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3312 let mut c = _mm_setr_ps(9.0, 10.0, 11.0, 12.0);
3313 let mut d = _mm_setr_ps(13.0, 14.0, 15.0, 16.0);
3314
3315 _MM_TRANSPOSE4_PS(&mut a, &mut b, &mut c, &mut d);
3316
3317 assert_eq_m128(a, _mm_setr_ps(1.0, 5.0, 9.0, 13.0));
3318 assert_eq_m128(b, _mm_setr_ps(2.0, 6.0, 10.0, 14.0));
3319 assert_eq_m128(c, _mm_setr_ps(3.0, 7.0, 11.0, 15.0));
3320 assert_eq_m128(d, _mm_setr_ps(4.0, 8.0, 12.0, 16.0));
3321 }
3322
3323 #[repr(align(16))]
3324 struct Memory {
3325 pub data: [f32; 4],
3326 }
3327
3328 #[simd_test(enable = "sse")]
3329 #[cfg_attr(miri, ignore)]
3332 unsafe fn test_mm_stream_ps() {
3333 let a = _mm_set1_ps(7.0);
3334 let mut mem = Memory { data: [-1.0; 4] };
3335
3336 _mm_stream_ps(ptr::addr_of_mut!(mem.data[0]), a);
3337 _mm_sfence();
3338 for i in 0..4 {
3339 assert_eq!(mem.data[i], get_m128(a, i));
3340 }
3341 }
3342}