1use crate::{
17 core_arch::{simd::*, x86::*},
18 intrinsics::simd::*,
19 mem, ptr,
20};
21
22#[cfg(test)]
23use stdarch_test::assert_instr;
24
25#[inline]
30#[target_feature(enable = "avx")]
31#[cfg_attr(test, assert_instr(vaddpd))]
32#[stable(feature = "simd_x86", since = "1.27.0")]
33pub fn _mm256_add_pd(a: __m256d, b: __m256d) -> __m256d {
34 unsafe { simd_add(a, b) }
35}
36
37#[inline]
42#[target_feature(enable = "avx")]
43#[cfg_attr(test, assert_instr(vaddps))]
44#[stable(feature = "simd_x86", since = "1.27.0")]
45pub fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 {
46 unsafe { simd_add(a, b) }
47}
48
49#[inline]
54#[target_feature(enable = "avx")]
55#[cfg_attr(test, assert_instr(vandp))]
57#[stable(feature = "simd_x86", since = "1.27.0")]
58pub fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d {
59 unsafe {
60 let a: u64x4 = transmute(a);
61 let b: u64x4 = transmute(b);
62 transmute(simd_and(a, b))
63 }
64}
65
66#[inline]
71#[target_feature(enable = "avx")]
72#[cfg_attr(test, assert_instr(vandps))]
73#[stable(feature = "simd_x86", since = "1.27.0")]
74pub fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 {
75 unsafe {
76 let a: u32x8 = transmute(a);
77 let b: u32x8 = transmute(b);
78 transmute(simd_and(a, b))
79 }
80}
81
82#[inline]
87#[target_feature(enable = "avx")]
88#[cfg_attr(test, assert_instr(vorp))]
90#[stable(feature = "simd_x86", since = "1.27.0")]
91pub fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d {
92 unsafe {
93 let a: u64x4 = transmute(a);
94 let b: u64x4 = transmute(b);
95 transmute(simd_or(a, b))
96 }
97}
98
99#[inline]
104#[target_feature(enable = "avx")]
105#[cfg_attr(test, assert_instr(vorps))]
106#[stable(feature = "simd_x86", since = "1.27.0")]
107pub fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 {
108 unsafe {
109 let a: u32x8 = transmute(a);
110 let b: u32x8 = transmute(b);
111 transmute(simd_or(a, b))
112 }
113}
114
115#[inline]
120#[target_feature(enable = "avx")]
121#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
122#[rustc_legacy_const_generics(2)]
123#[stable(feature = "simd_x86", since = "1.27.0")]
124pub fn _mm256_shuffle_pd<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
125 static_assert_uimm_bits!(MASK, 8);
126 unsafe {
127 simd_shuffle!(
128 a,
129 b,
130 [
131 MASK as u32 & 0b1,
132 ((MASK as u32 >> 1) & 0b1) + 4,
133 ((MASK as u32 >> 2) & 0b1) + 2,
134 ((MASK as u32 >> 3) & 0b1) + 6,
135 ],
136 )
137 }
138}
139
140#[inline]
145#[target_feature(enable = "avx")]
146#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
147#[rustc_legacy_const_generics(2)]
148#[stable(feature = "simd_x86", since = "1.27.0")]
149pub fn _mm256_shuffle_ps<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
150 static_assert_uimm_bits!(MASK, 8);
151 unsafe {
152 simd_shuffle!(
153 a,
154 b,
155 [
156 MASK as u32 & 0b11,
157 (MASK as u32 >> 2) & 0b11,
158 ((MASK as u32 >> 4) & 0b11) + 8,
159 ((MASK as u32 >> 6) & 0b11) + 8,
160 (MASK as u32 & 0b11) + 4,
161 ((MASK as u32 >> 2) & 0b11) + 4,
162 ((MASK as u32 >> 4) & 0b11) + 12,
163 ((MASK as u32 >> 6) & 0b11) + 12,
164 ],
165 )
166 }
167}
168
169#[inline]
174#[target_feature(enable = "avx")]
175#[cfg_attr(test, assert_instr(vandnp))]
176#[stable(feature = "simd_x86", since = "1.27.0")]
177pub fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d {
178 unsafe {
179 let a: u64x4 = transmute(a);
180 let b: u64x4 = transmute(b);
181 transmute(simd_and(simd_xor(u64x4::splat(!(0_u64)), a), b))
182 }
183}
184
185#[inline]
191#[target_feature(enable = "avx")]
192#[cfg_attr(test, assert_instr(vandnps))]
193#[stable(feature = "simd_x86", since = "1.27.0")]
194pub fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256 {
195 unsafe {
196 let a: u32x8 = transmute(a);
197 let b: u32x8 = transmute(b);
198 transmute(simd_and(simd_xor(u32x8::splat(!(0_u32)), a), b))
199 }
200}
201
202#[inline]
207#[target_feature(enable = "avx")]
208#[cfg_attr(test, assert_instr(vmaxpd))]
209#[stable(feature = "simd_x86", since = "1.27.0")]
210pub fn _mm256_max_pd(a: __m256d, b: __m256d) -> __m256d {
211 unsafe { vmaxpd(a, b) }
212}
213
214#[inline]
219#[target_feature(enable = "avx")]
220#[cfg_attr(test, assert_instr(vmaxps))]
221#[stable(feature = "simd_x86", since = "1.27.0")]
222pub fn _mm256_max_ps(a: __m256, b: __m256) -> __m256 {
223 unsafe { vmaxps(a, b) }
224}
225
226#[inline]
231#[target_feature(enable = "avx")]
232#[cfg_attr(test, assert_instr(vminpd))]
233#[stable(feature = "simd_x86", since = "1.27.0")]
234pub fn _mm256_min_pd(a: __m256d, b: __m256d) -> __m256d {
235 unsafe { vminpd(a, b) }
236}
237
238#[inline]
243#[target_feature(enable = "avx")]
244#[cfg_attr(test, assert_instr(vminps))]
245#[stable(feature = "simd_x86", since = "1.27.0")]
246pub fn _mm256_min_ps(a: __m256, b: __m256) -> __m256 {
247 unsafe { vminps(a, b) }
248}
249
250#[inline]
255#[target_feature(enable = "avx")]
256#[cfg_attr(test, assert_instr(vmulpd))]
257#[stable(feature = "simd_x86", since = "1.27.0")]
258pub fn _mm256_mul_pd(a: __m256d, b: __m256d) -> __m256d {
259 unsafe { simd_mul(a, b) }
260}
261
262#[inline]
267#[target_feature(enable = "avx")]
268#[cfg_attr(test, assert_instr(vmulps))]
269#[stable(feature = "simd_x86", since = "1.27.0")]
270pub fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256 {
271 unsafe { simd_mul(a, b) }
272}
273
274#[inline]
279#[target_feature(enable = "avx")]
280#[cfg_attr(test, assert_instr(vaddsubpd))]
281#[stable(feature = "simd_x86", since = "1.27.0")]
282pub fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d {
283 unsafe {
284 let a = a.as_f64x4();
285 let b = b.as_f64x4();
286 let add = simd_add(a, b);
287 let sub = simd_sub(a, b);
288 simd_shuffle!(add, sub, [4, 1, 6, 3])
289 }
290}
291
292#[inline]
297#[target_feature(enable = "avx")]
298#[cfg_attr(test, assert_instr(vaddsubps))]
299#[stable(feature = "simd_x86", since = "1.27.0")]
300pub fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256 {
301 unsafe {
302 let a = a.as_f32x8();
303 let b = b.as_f32x8();
304 let add = simd_add(a, b);
305 let sub = simd_sub(a, b);
306 simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
307 }
308}
309
310#[inline]
315#[target_feature(enable = "avx")]
316#[cfg_attr(test, assert_instr(vsubpd))]
317#[stable(feature = "simd_x86", since = "1.27.0")]
318pub fn _mm256_sub_pd(a: __m256d, b: __m256d) -> __m256d {
319 unsafe { simd_sub(a, b) }
320}
321
322#[inline]
327#[target_feature(enable = "avx")]
328#[cfg_attr(test, assert_instr(vsubps))]
329#[stable(feature = "simd_x86", since = "1.27.0")]
330pub fn _mm256_sub_ps(a: __m256, b: __m256) -> __m256 {
331 unsafe { simd_sub(a, b) }
332}
333
334#[inline]
339#[target_feature(enable = "avx")]
340#[cfg_attr(test, assert_instr(vdivps))]
341#[stable(feature = "simd_x86", since = "1.27.0")]
342pub fn _mm256_div_ps(a: __m256, b: __m256) -> __m256 {
343 unsafe { simd_div(a, b) }
344}
345
346#[inline]
351#[target_feature(enable = "avx")]
352#[cfg_attr(test, assert_instr(vdivpd))]
353#[stable(feature = "simd_x86", since = "1.27.0")]
354pub fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d {
355 unsafe { simd_div(a, b) }
356}
357
358#[inline]
372#[target_feature(enable = "avx")]
373#[cfg_attr(test, assert_instr(vroundpd, ROUNDING = 0x3))]
374#[rustc_legacy_const_generics(1)]
375#[stable(feature = "simd_x86", since = "1.27.0")]
376pub fn _mm256_round_pd<const ROUNDING: i32>(a: __m256d) -> __m256d {
377 static_assert_uimm_bits!(ROUNDING, 4);
378 unsafe { roundpd256(a, ROUNDING) }
379}
380
381#[inline]
386#[target_feature(enable = "avx")]
387#[cfg_attr(test, assert_instr(vroundpd))]
388#[stable(feature = "simd_x86", since = "1.27.0")]
389pub fn _mm256_ceil_pd(a: __m256d) -> __m256d {
390 unsafe { simd_ceil(a) }
391}
392
393#[inline]
398#[target_feature(enable = "avx")]
399#[cfg_attr(test, assert_instr(vroundpd))]
400#[stable(feature = "simd_x86", since = "1.27.0")]
401pub fn _mm256_floor_pd(a: __m256d) -> __m256d {
402 unsafe { simd_floor(a) }
403}
404
405#[inline]
419#[target_feature(enable = "avx")]
420#[cfg_attr(test, assert_instr(vroundps, ROUNDING = 0x00))]
421#[rustc_legacy_const_generics(1)]
422#[stable(feature = "simd_x86", since = "1.27.0")]
423pub fn _mm256_round_ps<const ROUNDING: i32>(a: __m256) -> __m256 {
424 static_assert_uimm_bits!(ROUNDING, 4);
425 unsafe { roundps256(a, ROUNDING) }
426}
427
428#[inline]
433#[target_feature(enable = "avx")]
434#[cfg_attr(test, assert_instr(vroundps))]
435#[stable(feature = "simd_x86", since = "1.27.0")]
436pub fn _mm256_ceil_ps(a: __m256) -> __m256 {
437 unsafe { simd_ceil(a) }
438}
439
440#[inline]
445#[target_feature(enable = "avx")]
446#[cfg_attr(test, assert_instr(vroundps))]
447#[stable(feature = "simd_x86", since = "1.27.0")]
448pub fn _mm256_floor_ps(a: __m256) -> __m256 {
449 unsafe { simd_floor(a) }
450}
451
452#[inline]
457#[target_feature(enable = "avx")]
458#[cfg_attr(test, assert_instr(vsqrtps))]
459#[stable(feature = "simd_x86", since = "1.27.0")]
460pub fn _mm256_sqrt_ps(a: __m256) -> __m256 {
461 unsafe { simd_fsqrt(a) }
462}
463
464#[inline]
469#[target_feature(enable = "avx")]
470#[cfg_attr(test, assert_instr(vsqrtpd))]
471#[stable(feature = "simd_x86", since = "1.27.0")]
472pub fn _mm256_sqrt_pd(a: __m256d) -> __m256d {
473 unsafe { simd_fsqrt(a) }
474}
475
476#[inline]
481#[target_feature(enable = "avx")]
482#[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))]
486#[rustc_legacy_const_generics(2)]
487#[stable(feature = "simd_x86", since = "1.27.0")]
488pub fn _mm256_blend_pd<const IMM4: i32>(a: __m256d, b: __m256d) -> __m256d {
489 static_assert_uimm_bits!(IMM4, 4);
490 unsafe {
491 simd_shuffle!(
492 a,
493 b,
494 [
495 ((IMM4 as u32 >> 0) & 1) * 4 + 0,
496 ((IMM4 as u32 >> 1) & 1) * 4 + 1,
497 ((IMM4 as u32 >> 2) & 1) * 4 + 2,
498 ((IMM4 as u32 >> 3) & 1) * 4 + 3,
499 ],
500 )
501 }
502}
503
504#[inline]
509#[target_feature(enable = "avx")]
510#[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))]
511#[rustc_legacy_const_generics(2)]
512#[stable(feature = "simd_x86", since = "1.27.0")]
513pub fn _mm256_blend_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
514 static_assert_uimm_bits!(IMM8, 8);
515 unsafe {
516 simd_shuffle!(
517 a,
518 b,
519 [
520 ((IMM8 as u32 >> 0) & 1) * 8 + 0,
521 ((IMM8 as u32 >> 1) & 1) * 8 + 1,
522 ((IMM8 as u32 >> 2) & 1) * 8 + 2,
523 ((IMM8 as u32 >> 3) & 1) * 8 + 3,
524 ((IMM8 as u32 >> 4) & 1) * 8 + 4,
525 ((IMM8 as u32 >> 5) & 1) * 8 + 5,
526 ((IMM8 as u32 >> 6) & 1) * 8 + 6,
527 ((IMM8 as u32 >> 7) & 1) * 8 + 7,
528 ],
529 )
530 }
531}
532
533#[inline]
538#[target_feature(enable = "avx")]
539#[cfg_attr(test, assert_instr(vblendvpd))]
540#[stable(feature = "simd_x86", since = "1.27.0")]
541pub fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
542 unsafe {
543 let mask: i64x4 = simd_lt(transmute::<_, i64x4>(c), i64x4::ZERO);
544 transmute(simd_select(mask, b.as_f64x4(), a.as_f64x4()))
545 }
546}
547
548#[inline]
553#[target_feature(enable = "avx")]
554#[cfg_attr(test, assert_instr(vblendvps))]
555#[stable(feature = "simd_x86", since = "1.27.0")]
556pub fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
557 unsafe {
558 let mask: i32x8 = simd_lt(transmute::<_, i32x8>(c), i32x8::ZERO);
559 transmute(simd_select(mask, b.as_f32x8(), a.as_f32x8()))
560 }
561}
562
563#[inline]
570#[target_feature(enable = "avx")]
571#[cfg_attr(test, assert_instr(vdpps, IMM8 = 0x0))]
572#[rustc_legacy_const_generics(2)]
573#[stable(feature = "simd_x86", since = "1.27.0")]
574pub fn _mm256_dp_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
575 static_assert_uimm_bits!(IMM8, 8);
576 unsafe { vdpps(a, b, IMM8 as i8) }
577}
578
579#[inline]
586#[target_feature(enable = "avx")]
587#[cfg_attr(test, assert_instr(vhaddpd))]
588#[stable(feature = "simd_x86", since = "1.27.0")]
589pub fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d {
590 unsafe {
591 let even = simd_shuffle!(a, b, [0, 4, 2, 6]);
592 let odd = simd_shuffle!(a, b, [1, 5, 3, 7]);
593 simd_add(even, odd)
594 }
595}
596
597#[inline]
605#[target_feature(enable = "avx")]
606#[cfg_attr(test, assert_instr(vhaddps))]
607#[stable(feature = "simd_x86", since = "1.27.0")]
608pub fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256 {
609 unsafe {
610 let even = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]);
611 let odd = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]);
612 simd_add(even, odd)
613 }
614}
615
616#[inline]
623#[target_feature(enable = "avx")]
624#[cfg_attr(test, assert_instr(vhsubpd))]
625#[stable(feature = "simd_x86", since = "1.27.0")]
626pub fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d {
627 unsafe {
628 let even = simd_shuffle!(a, b, [0, 4, 2, 6]);
629 let odd = simd_shuffle!(a, b, [1, 5, 3, 7]);
630 simd_sub(even, odd)
631 }
632}
633
634#[inline]
642#[target_feature(enable = "avx")]
643#[cfg_attr(test, assert_instr(vhsubps))]
644#[stable(feature = "simd_x86", since = "1.27.0")]
645pub fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 {
646 unsafe {
647 let even = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]);
648 let odd = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]);
649 simd_sub(even, odd)
650 }
651}
652
653#[inline]
658#[target_feature(enable = "avx")]
659#[cfg_attr(test, assert_instr(vxorp))]
660#[stable(feature = "simd_x86", since = "1.27.0")]
661pub fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d {
662 unsafe {
663 let a: u64x4 = transmute(a);
664 let b: u64x4 = transmute(b);
665 transmute(simd_xor(a, b))
666 }
667}
668
669#[inline]
674#[target_feature(enable = "avx")]
675#[cfg_attr(test, assert_instr(vxorps))]
676#[stable(feature = "simd_x86", since = "1.27.0")]
677pub fn _mm256_xor_ps(a: __m256, b: __m256) -> __m256 {
678 unsafe {
679 let a: u32x8 = transmute(a);
680 let b: u32x8 = transmute(b);
681 transmute(simd_xor(a, b))
682 }
683}
684
685#[stable(feature = "simd_x86", since = "1.27.0")]
687pub const _CMP_EQ_OQ: i32 = 0x00;
688#[stable(feature = "simd_x86", since = "1.27.0")]
690pub const _CMP_LT_OS: i32 = 0x01;
691#[stable(feature = "simd_x86", since = "1.27.0")]
693pub const _CMP_LE_OS: i32 = 0x02;
694#[stable(feature = "simd_x86", since = "1.27.0")]
696pub const _CMP_UNORD_Q: i32 = 0x03;
697#[stable(feature = "simd_x86", since = "1.27.0")]
699pub const _CMP_NEQ_UQ: i32 = 0x04;
700#[stable(feature = "simd_x86", since = "1.27.0")]
702pub const _CMP_NLT_US: i32 = 0x05;
703#[stable(feature = "simd_x86", since = "1.27.0")]
705pub const _CMP_NLE_US: i32 = 0x06;
706#[stable(feature = "simd_x86", since = "1.27.0")]
708pub const _CMP_ORD_Q: i32 = 0x07;
709#[stable(feature = "simd_x86", since = "1.27.0")]
711pub const _CMP_EQ_UQ: i32 = 0x08;
712#[stable(feature = "simd_x86", since = "1.27.0")]
714pub const _CMP_NGE_US: i32 = 0x09;
715#[stable(feature = "simd_x86", since = "1.27.0")]
717pub const _CMP_NGT_US: i32 = 0x0a;
718#[stable(feature = "simd_x86", since = "1.27.0")]
720pub const _CMP_FALSE_OQ: i32 = 0x0b;
721#[stable(feature = "simd_x86", since = "1.27.0")]
723pub const _CMP_NEQ_OQ: i32 = 0x0c;
724#[stable(feature = "simd_x86", since = "1.27.0")]
726pub const _CMP_GE_OS: i32 = 0x0d;
727#[stable(feature = "simd_x86", since = "1.27.0")]
729pub const _CMP_GT_OS: i32 = 0x0e;
730#[stable(feature = "simd_x86", since = "1.27.0")]
732pub const _CMP_TRUE_UQ: i32 = 0x0f;
733#[stable(feature = "simd_x86", since = "1.27.0")]
735pub const _CMP_EQ_OS: i32 = 0x10;
736#[stable(feature = "simd_x86", since = "1.27.0")]
738pub const _CMP_LT_OQ: i32 = 0x11;
739#[stable(feature = "simd_x86", since = "1.27.0")]
741pub const _CMP_LE_OQ: i32 = 0x12;
742#[stable(feature = "simd_x86", since = "1.27.0")]
744pub const _CMP_UNORD_S: i32 = 0x13;
745#[stable(feature = "simd_x86", since = "1.27.0")]
747pub const _CMP_NEQ_US: i32 = 0x14;
748#[stable(feature = "simd_x86", since = "1.27.0")]
750pub const _CMP_NLT_UQ: i32 = 0x15;
751#[stable(feature = "simd_x86", since = "1.27.0")]
753pub const _CMP_NLE_UQ: i32 = 0x16;
754#[stable(feature = "simd_x86", since = "1.27.0")]
756pub const _CMP_ORD_S: i32 = 0x17;
757#[stable(feature = "simd_x86", since = "1.27.0")]
759pub const _CMP_EQ_US: i32 = 0x18;
760#[stable(feature = "simd_x86", since = "1.27.0")]
762pub const _CMP_NGE_UQ: i32 = 0x19;
763#[stable(feature = "simd_x86", since = "1.27.0")]
765pub const _CMP_NGT_UQ: i32 = 0x1a;
766#[stable(feature = "simd_x86", since = "1.27.0")]
768pub const _CMP_FALSE_OS: i32 = 0x1b;
769#[stable(feature = "simd_x86", since = "1.27.0")]
771pub const _CMP_NEQ_OS: i32 = 0x1c;
772#[stable(feature = "simd_x86", since = "1.27.0")]
774pub const _CMP_GE_OQ: i32 = 0x1d;
775#[stable(feature = "simd_x86", since = "1.27.0")]
777pub const _CMP_GT_OQ: i32 = 0x1e;
778#[stable(feature = "simd_x86", since = "1.27.0")]
780pub const _CMP_TRUE_US: i32 = 0x1f;
781
782#[inline]
788#[target_feature(enable = "avx")]
789#[cfg_attr(test, assert_instr(vcmpeqpd, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
791#[stable(feature = "simd_x86", since = "1.27.0")]
792pub fn _mm_cmp_pd<const IMM5: i32>(a: __m128d, b: __m128d) -> __m128d {
793 static_assert_uimm_bits!(IMM5, 5);
794 unsafe { vcmppd(a, b, const { IMM5 as i8 }) }
795}
796
797#[inline]
803#[target_feature(enable = "avx")]
804#[cfg_attr(test, assert_instr(vcmpeqpd, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
806#[stable(feature = "simd_x86", since = "1.27.0")]
807pub fn _mm256_cmp_pd<const IMM5: i32>(a: __m256d, b: __m256d) -> __m256d {
808 static_assert_uimm_bits!(IMM5, 5);
809 unsafe { vcmppd256(a, b, IMM5 as u8) }
810}
811
812#[inline]
818#[target_feature(enable = "avx")]
819#[cfg_attr(test, assert_instr(vcmpeqps, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
821#[stable(feature = "simd_x86", since = "1.27.0")]
822pub fn _mm_cmp_ps<const IMM5: i32>(a: __m128, b: __m128) -> __m128 {
823 static_assert_uimm_bits!(IMM5, 5);
824 unsafe { vcmpps(a, b, const { IMM5 as i8 }) }
825}
826
827#[inline]
833#[target_feature(enable = "avx")]
834#[cfg_attr(test, assert_instr(vcmpeqps, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
836#[stable(feature = "simd_x86", since = "1.27.0")]
837pub fn _mm256_cmp_ps<const IMM5: i32>(a: __m256, b: __m256) -> __m256 {
838 static_assert_uimm_bits!(IMM5, 5);
839 unsafe { vcmpps256(a, b, const { IMM5 as u8 }) }
840}
841
842#[inline]
850#[target_feature(enable = "avx")]
851#[cfg_attr(test, assert_instr(vcmpeqsd, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
853#[stable(feature = "simd_x86", since = "1.27.0")]
854pub fn _mm_cmp_sd<const IMM5: i32>(a: __m128d, b: __m128d) -> __m128d {
855 static_assert_uimm_bits!(IMM5, 5);
856 unsafe { vcmpsd(a, b, IMM5 as i8) }
857}
858
859#[inline]
867#[target_feature(enable = "avx")]
868#[cfg_attr(test, assert_instr(vcmpeqss, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
870#[stable(feature = "simd_x86", since = "1.27.0")]
871pub fn _mm_cmp_ss<const IMM5: i32>(a: __m128, b: __m128) -> __m128 {
872 static_assert_uimm_bits!(IMM5, 5);
873 unsafe { vcmpss(a, b, IMM5 as i8) }
874}
875
876#[inline]
881#[target_feature(enable = "avx")]
882#[cfg_attr(test, assert_instr(vcvtdq2pd))]
883#[stable(feature = "simd_x86", since = "1.27.0")]
884pub fn _mm256_cvtepi32_pd(a: __m128i) -> __m256d {
885 unsafe { simd_cast(a.as_i32x4()) }
886}
887
888#[inline]
893#[target_feature(enable = "avx")]
894#[cfg_attr(test, assert_instr(vcvtdq2ps))]
895#[stable(feature = "simd_x86", since = "1.27.0")]
896pub fn _mm256_cvtepi32_ps(a: __m256i) -> __m256 {
897 unsafe { simd_cast(a.as_i32x8()) }
898}
899
900#[inline]
905#[target_feature(enable = "avx")]
906#[cfg_attr(test, assert_instr(vcvtpd2ps))]
907#[stable(feature = "simd_x86", since = "1.27.0")]
908pub fn _mm256_cvtpd_ps(a: __m256d) -> __m128 {
909 unsafe { simd_cast(a) }
910}
911
912#[inline]
917#[target_feature(enable = "avx")]
918#[cfg_attr(test, assert_instr(vcvtps2dq))]
919#[stable(feature = "simd_x86", since = "1.27.0")]
920pub fn _mm256_cvtps_epi32(a: __m256) -> __m256i {
921 unsafe { transmute(vcvtps2dq(a)) }
922}
923
924#[inline]
929#[target_feature(enable = "avx")]
930#[cfg_attr(test, assert_instr(vcvtps2pd))]
931#[stable(feature = "simd_x86", since = "1.27.0")]
932pub fn _mm256_cvtps_pd(a: __m128) -> __m256d {
933 unsafe { simd_cast(a) }
934}
935
936#[inline]
940#[target_feature(enable = "avx")]
941#[stable(feature = "simd_x86", since = "1.27.0")]
943pub fn _mm256_cvtsd_f64(a: __m256d) -> f64 {
944 unsafe { simd_extract!(a, 0) }
945}
946
947#[inline]
952#[target_feature(enable = "avx")]
953#[cfg_attr(test, assert_instr(vcvttpd2dq))]
954#[stable(feature = "simd_x86", since = "1.27.0")]
955pub fn _mm256_cvttpd_epi32(a: __m256d) -> __m128i {
956 unsafe { transmute(vcvttpd2dq(a)) }
957}
958
959#[inline]
964#[target_feature(enable = "avx")]
965#[cfg_attr(test, assert_instr(vcvtpd2dq))]
966#[stable(feature = "simd_x86", since = "1.27.0")]
967pub fn _mm256_cvtpd_epi32(a: __m256d) -> __m128i {
968 unsafe { transmute(vcvtpd2dq(a)) }
969}
970
971#[inline]
976#[target_feature(enable = "avx")]
977#[cfg_attr(test, assert_instr(vcvttps2dq))]
978#[stable(feature = "simd_x86", since = "1.27.0")]
979pub fn _mm256_cvttps_epi32(a: __m256) -> __m256i {
980 unsafe { transmute(vcvttps2dq(a)) }
981}
982
983#[inline]
988#[target_feature(enable = "avx")]
989#[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))]
990#[rustc_legacy_const_generics(1)]
991#[stable(feature = "simd_x86", since = "1.27.0")]
992pub fn _mm256_extractf128_ps<const IMM1: i32>(a: __m256) -> __m128 {
993 static_assert_uimm_bits!(IMM1, 1);
994 unsafe {
995 simd_shuffle!(
996 a,
997 _mm256_undefined_ps(),
998 [[0, 1, 2, 3], [4, 5, 6, 7]][IMM1 as usize],
999 )
1000 }
1001}
1002
1003#[inline]
1008#[target_feature(enable = "avx")]
1009#[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))]
1010#[rustc_legacy_const_generics(1)]
1011#[stable(feature = "simd_x86", since = "1.27.0")]
1012pub fn _mm256_extractf128_pd<const IMM1: i32>(a: __m256d) -> __m128d {
1013 static_assert_uimm_bits!(IMM1, 1);
1014 unsafe { simd_shuffle!(a, _mm256_undefined_pd(), [[0, 1], [2, 3]][IMM1 as usize]) }
1015}
1016
1017#[inline]
1021#[target_feature(enable = "avx")]
1022#[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))]
1023#[rustc_legacy_const_generics(1)]
1024#[stable(feature = "simd_x86", since = "1.27.0")]
1025pub fn _mm256_extractf128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
1026 static_assert_uimm_bits!(IMM1, 1);
1027 unsafe {
1028 let dst: i64x2 = simd_shuffle!(a.as_i64x4(), i64x4::ZERO, [[0, 1], [2, 3]][IMM1 as usize],);
1029 transmute(dst)
1030 }
1031}
1032
1033#[inline]
1037#[target_feature(enable = "avx")]
1038#[rustc_legacy_const_generics(1)]
1040#[stable(feature = "simd_x86", since = "1.27.0")]
1041pub fn _mm256_extract_epi32<const INDEX: i32>(a: __m256i) -> i32 {
1042 static_assert_uimm_bits!(INDEX, 3);
1043 unsafe { simd_extract!(a.as_i32x8(), INDEX as u32) }
1044}
1045
1046#[inline]
1050#[target_feature(enable = "avx")]
1051#[stable(feature = "simd_x86", since = "1.27.0")]
1052pub fn _mm256_cvtsi256_si32(a: __m256i) -> i32 {
1053 unsafe { simd_extract!(a.as_i32x8(), 0) }
1054}
1055
1056#[inline]
1060#[target_feature(enable = "avx")]
1061#[cfg_attr(test, assert_instr(vzeroall))]
1062#[stable(feature = "simd_x86", since = "1.27.0")]
1063pub fn _mm256_zeroall() {
1064 unsafe { vzeroall() }
1065}
1066
1067#[inline]
1072#[target_feature(enable = "avx")]
1073#[cfg_attr(test, assert_instr(vzeroupper))]
1074#[stable(feature = "simd_x86", since = "1.27.0")]
1075pub fn _mm256_zeroupper() {
1076 unsafe { vzeroupper() }
1077}
1078
1079#[inline]
1084#[target_feature(enable = "avx")]
1085#[cfg_attr(test, assert_instr(vpermilps))]
1086#[stable(feature = "simd_x86", since = "1.27.0")]
1087pub fn _mm256_permutevar_ps(a: __m256, b: __m256i) -> __m256 {
1088 unsafe { vpermilps256(a, b.as_i32x8()) }
1089}
1090
1091#[inline]
1096#[target_feature(enable = "avx")]
1097#[cfg_attr(test, assert_instr(vpermilps))]
1098#[stable(feature = "simd_x86", since = "1.27.0")]
1099pub fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 {
1100 unsafe { vpermilps(a, b.as_i32x4()) }
1101}
1102
1103#[inline]
1108#[target_feature(enable = "avx")]
1109#[cfg_attr(test, assert_instr(vshufps, IMM8 = 9))]
1110#[rustc_legacy_const_generics(1)]
1111#[stable(feature = "simd_x86", since = "1.27.0")]
1112pub fn _mm256_permute_ps<const IMM8: i32>(a: __m256) -> __m256 {
1113 static_assert_uimm_bits!(IMM8, 8);
1114 unsafe {
1115 simd_shuffle!(
1116 a,
1117 _mm256_undefined_ps(),
1118 [
1119 (IMM8 as u32 >> 0) & 0b11,
1120 (IMM8 as u32 >> 2) & 0b11,
1121 (IMM8 as u32 >> 4) & 0b11,
1122 (IMM8 as u32 >> 6) & 0b11,
1123 ((IMM8 as u32 >> 0) & 0b11) + 4,
1124 ((IMM8 as u32 >> 2) & 0b11) + 4,
1125 ((IMM8 as u32 >> 4) & 0b11) + 4,
1126 ((IMM8 as u32 >> 6) & 0b11) + 4,
1127 ],
1128 )
1129 }
1130}
1131
1132#[inline]
1137#[target_feature(enable = "avx")]
1138#[cfg_attr(test, assert_instr(vshufps, IMM8 = 9))]
1139#[rustc_legacy_const_generics(1)]
1140#[stable(feature = "simd_x86", since = "1.27.0")]
1141pub fn _mm_permute_ps<const IMM8: i32>(a: __m128) -> __m128 {
1142 static_assert_uimm_bits!(IMM8, 8);
1143 unsafe {
1144 simd_shuffle!(
1145 a,
1146 _mm_undefined_ps(),
1147 [
1148 (IMM8 as u32 >> 0) & 0b11,
1149 (IMM8 as u32 >> 2) & 0b11,
1150 (IMM8 as u32 >> 4) & 0b11,
1151 (IMM8 as u32 >> 6) & 0b11,
1152 ],
1153 )
1154 }
1155}
1156
1157#[inline]
1162#[target_feature(enable = "avx")]
1163#[cfg_attr(test, assert_instr(vpermilpd))]
1164#[stable(feature = "simd_x86", since = "1.27.0")]
1165pub fn _mm256_permutevar_pd(a: __m256d, b: __m256i) -> __m256d {
1166 unsafe { vpermilpd256(a, b.as_i64x4()) }
1167}
1168
1169#[inline]
1174#[target_feature(enable = "avx")]
1175#[cfg_attr(test, assert_instr(vpermilpd))]
1176#[stable(feature = "simd_x86", since = "1.27.0")]
1177pub fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d {
1178 unsafe { vpermilpd(a, b.as_i64x2()) }
1179}
1180
1181#[inline]
1186#[target_feature(enable = "avx")]
1187#[cfg_attr(test, assert_instr(vshufpd, IMM4 = 0x1))]
1188#[rustc_legacy_const_generics(1)]
1189#[stable(feature = "simd_x86", since = "1.27.0")]
1190pub fn _mm256_permute_pd<const IMM4: i32>(a: __m256d) -> __m256d {
1191 static_assert_uimm_bits!(IMM4, 4);
1192 unsafe {
1193 simd_shuffle!(
1194 a,
1195 _mm256_undefined_pd(),
1196 [
1197 ((IMM4 as u32 >> 0) & 1),
1198 ((IMM4 as u32 >> 1) & 1),
1199 ((IMM4 as u32 >> 2) & 1) + 2,
1200 ((IMM4 as u32 >> 3) & 1) + 2,
1201 ],
1202 )
1203 }
1204}
1205
1206#[inline]
1211#[target_feature(enable = "avx")]
1212#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0x1))]
1213#[rustc_legacy_const_generics(1)]
1214#[stable(feature = "simd_x86", since = "1.27.0")]
1215pub fn _mm_permute_pd<const IMM2: i32>(a: __m128d) -> __m128d {
1216 static_assert_uimm_bits!(IMM2, 2);
1217 unsafe {
1218 simd_shuffle!(
1219 a,
1220 _mm_undefined_pd(),
1221 [(IMM2 as u32) & 1, (IMM2 as u32 >> 1) & 1],
1222 )
1223 }
1224}
1225
1226#[inline]
1231#[target_feature(enable = "avx")]
1232#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x5))]
1233#[rustc_legacy_const_generics(2)]
1234#[stable(feature = "simd_x86", since = "1.27.0")]
1235pub fn _mm256_permute2f128_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
1236 static_assert_uimm_bits!(IMM8, 8);
1237 _mm256_castsi256_ps(_mm256_permute2f128_si256::<IMM8>(
1238 _mm256_castps_si256(a),
1239 _mm256_castps_si256(b),
1240 ))
1241}
1242
1243#[inline]
1248#[target_feature(enable = "avx")]
1249#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x31))]
1250#[rustc_legacy_const_generics(2)]
1251#[stable(feature = "simd_x86", since = "1.27.0")]
1252pub fn _mm256_permute2f128_pd<const IMM8: i32>(a: __m256d, b: __m256d) -> __m256d {
1253 static_assert_uimm_bits!(IMM8, 8);
1254 _mm256_castsi256_pd(_mm256_permute2f128_si256::<IMM8>(
1255 _mm256_castpd_si256(a),
1256 _mm256_castpd_si256(b),
1257 ))
1258}
1259
1260#[inline]
1265#[target_feature(enable = "avx")]
1266#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x31))]
1267#[rustc_legacy_const_generics(2)]
1268#[stable(feature = "simd_x86", since = "1.27.0")]
1269pub fn _mm256_permute2f128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1270 static_assert_uimm_bits!(IMM8, 8);
1271 const fn idx(imm8: i32, pos: u32) -> u32 {
1272 let part = if pos < 2 {
1273 imm8 & 0xf
1274 } else {
1275 (imm8 & 0xf0) >> 4
1276 };
1277 2 * (part as u32 & 0b11) + (pos & 1)
1278 }
1279 const fn idx0(imm8: i32, pos: u32) -> u32 {
1280 let part = if pos < 2 {
1281 imm8 & 0xf
1282 } else {
1283 (imm8 & 0xf0) >> 4
1284 };
1285 if part & 0b1000 != 0 { 4 } else { pos }
1286 }
1287 unsafe {
1288 let r = simd_shuffle!(
1289 a.as_i64x4(),
1290 b.as_i64x4(),
1291 [idx(IMM8, 0), idx(IMM8, 1), idx(IMM8, 2), idx(IMM8, 3)]
1292 );
1293 let r: i64x4 = simd_shuffle!(
1294 r,
1295 i64x4::ZERO,
1296 [idx0(IMM8, 0), idx0(IMM8, 1), idx0(IMM8, 2), idx0(IMM8, 3)]
1297 );
1298 r.as_m256i()
1299 }
1300}
1301
1302#[inline]
1307#[target_feature(enable = "avx")]
1308#[cfg_attr(test, assert_instr(vbroadcastss))]
1309#[stable(feature = "simd_x86", since = "1.27.0")]
1310#[allow(clippy::trivially_copy_pass_by_ref)]
1311pub fn _mm256_broadcast_ss(f: &f32) -> __m256 {
1312 _mm256_set1_ps(*f)
1313}
1314
1315#[inline]
1320#[target_feature(enable = "avx")]
1321#[cfg_attr(test, assert_instr(vbroadcastss))]
1322#[stable(feature = "simd_x86", since = "1.27.0")]
1323#[allow(clippy::trivially_copy_pass_by_ref)]
1324pub fn _mm_broadcast_ss(f: &f32) -> __m128 {
1325 _mm_set1_ps(*f)
1326}
1327
1328#[inline]
1333#[target_feature(enable = "avx")]
1334#[cfg_attr(test, assert_instr(vbroadcastsd))]
1335#[stable(feature = "simd_x86", since = "1.27.0")]
1336#[allow(clippy::trivially_copy_pass_by_ref)]
1337pub fn _mm256_broadcast_sd(f: &f64) -> __m256d {
1338 _mm256_set1_pd(*f)
1339}
1340
1341#[inline]
1346#[target_feature(enable = "avx")]
1347#[cfg_attr(test, assert_instr(vbroadcastf128))]
1348#[stable(feature = "simd_x86", since = "1.27.0")]
1349pub fn _mm256_broadcast_ps(a: &__m128) -> __m256 {
1350 unsafe { simd_shuffle!(*a, _mm_setzero_ps(), [0, 1, 2, 3, 0, 1, 2, 3]) }
1351}
1352
1353#[inline]
1358#[target_feature(enable = "avx")]
1359#[cfg_attr(test, assert_instr(vbroadcastf128))]
1360#[stable(feature = "simd_x86", since = "1.27.0")]
1361pub fn _mm256_broadcast_pd(a: &__m128d) -> __m256d {
1362 unsafe { simd_shuffle!(*a, _mm_setzero_pd(), [0, 1, 0, 1]) }
1363}
1364
1365#[inline]
1371#[target_feature(enable = "avx")]
1372#[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))]
1373#[rustc_legacy_const_generics(2)]
1374#[stable(feature = "simd_x86", since = "1.27.0")]
1375pub fn _mm256_insertf128_ps<const IMM1: i32>(a: __m256, b: __m128) -> __m256 {
1376 static_assert_uimm_bits!(IMM1, 1);
1377 unsafe {
1378 simd_shuffle!(
1379 a,
1380 _mm256_castps128_ps256(b),
1381 [[8, 9, 10, 11, 4, 5, 6, 7], [0, 1, 2, 3, 8, 9, 10, 11]][IMM1 as usize],
1382 )
1383 }
1384}
1385
1386#[inline]
1392#[target_feature(enable = "avx")]
1393#[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))]
1394#[rustc_legacy_const_generics(2)]
1395#[stable(feature = "simd_x86", since = "1.27.0")]
1396pub fn _mm256_insertf128_pd<const IMM1: i32>(a: __m256d, b: __m128d) -> __m256d {
1397 static_assert_uimm_bits!(IMM1, 1);
1398 unsafe {
1399 simd_shuffle!(
1400 a,
1401 _mm256_castpd128_pd256(b),
1402 [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize],
1403 )
1404 }
1405}
1406
1407#[inline]
1412#[target_feature(enable = "avx")]
1413#[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))]
1414#[rustc_legacy_const_generics(2)]
1415#[stable(feature = "simd_x86", since = "1.27.0")]
1416pub fn _mm256_insertf128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
1417 static_assert_uimm_bits!(IMM1, 1);
1418 unsafe {
1419 let dst: i64x4 = simd_shuffle!(
1420 a.as_i64x4(),
1421 _mm256_castsi128_si256(b).as_i64x4(),
1422 [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize],
1423 );
1424 transmute(dst)
1425 }
1426}
1427
1428#[inline]
1433#[target_feature(enable = "avx")]
1434#[rustc_legacy_const_generics(2)]
1436#[stable(feature = "simd_x86", since = "1.27.0")]
1437pub fn _mm256_insert_epi8<const INDEX: i32>(a: __m256i, i: i8) -> __m256i {
1438 static_assert_uimm_bits!(INDEX, 5);
1439 unsafe { transmute(simd_insert!(a.as_i8x32(), INDEX as u32, i)) }
1440}
1441
1442#[inline]
1447#[target_feature(enable = "avx")]
1448#[rustc_legacy_const_generics(2)]
1450#[stable(feature = "simd_x86", since = "1.27.0")]
1451pub fn _mm256_insert_epi16<const INDEX: i32>(a: __m256i, i: i16) -> __m256i {
1452 static_assert_uimm_bits!(INDEX, 4);
1453 unsafe { transmute(simd_insert!(a.as_i16x16(), INDEX as u32, i)) }
1454}
1455
1456#[inline]
1461#[target_feature(enable = "avx")]
1462#[rustc_legacy_const_generics(2)]
1464#[stable(feature = "simd_x86", since = "1.27.0")]
1465pub fn _mm256_insert_epi32<const INDEX: i32>(a: __m256i, i: i32) -> __m256i {
1466 static_assert_uimm_bits!(INDEX, 3);
1467 unsafe { transmute(simd_insert!(a.as_i32x8(), INDEX as u32, i)) }
1468}
1469
1470#[inline]
1477#[target_feature(enable = "avx")]
1478#[cfg_attr(
1479 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1480 assert_instr(vmovap)
1481)]
1482#[stable(feature = "simd_x86", since = "1.27.0")]
1483#[allow(clippy::cast_ptr_alignment)]
1484pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d {
1485 *(mem_addr as *const __m256d)
1486}
1487
1488#[inline]
1495#[target_feature(enable = "avx")]
1496#[cfg_attr(
1497 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1498 assert_instr(vmovap)
1499)]
1500#[stable(feature = "simd_x86", since = "1.27.0")]
1501#[allow(clippy::cast_ptr_alignment)]
1502pub unsafe fn _mm256_store_pd(mem_addr: *mut f64, a: __m256d) {
1503 *(mem_addr as *mut __m256d) = a;
1504}
1505
1506#[inline]
1513#[target_feature(enable = "avx")]
1514#[cfg_attr(
1515 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1516 assert_instr(vmovaps)
1517)]
1518#[stable(feature = "simd_x86", since = "1.27.0")]
1519#[allow(clippy::cast_ptr_alignment)]
1520pub unsafe fn _mm256_load_ps(mem_addr: *const f32) -> __m256 {
1521 *(mem_addr as *const __m256)
1522}
1523
1524#[inline]
1531#[target_feature(enable = "avx")]
1532#[cfg_attr(
1533 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1534 assert_instr(vmovaps)
1535)]
1536#[stable(feature = "simd_x86", since = "1.27.0")]
1537#[allow(clippy::cast_ptr_alignment)]
1538pub unsafe fn _mm256_store_ps(mem_addr: *mut f32, a: __m256) {
1539 *(mem_addr as *mut __m256) = a;
1540}
1541
1542#[inline]
1548#[target_feature(enable = "avx")]
1549#[cfg_attr(test, assert_instr(vmovup))]
1550#[stable(feature = "simd_x86", since = "1.27.0")]
1551pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d {
1552 let mut dst = _mm256_undefined_pd();
1553 ptr::copy_nonoverlapping(
1554 mem_addr as *const u8,
1555 ptr::addr_of_mut!(dst) as *mut u8,
1556 mem::size_of::<__m256d>(),
1557 );
1558 dst
1559}
1560
1561#[inline]
1567#[target_feature(enable = "avx")]
1568#[cfg_attr(test, assert_instr(vmovup))]
1569#[stable(feature = "simd_x86", since = "1.27.0")]
1570pub unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: __m256d) {
1571 mem_addr.cast::<__m256d>().write_unaligned(a);
1572}
1573
1574#[inline]
1580#[target_feature(enable = "avx")]
1581#[cfg_attr(test, assert_instr(vmovups))]
1582#[stable(feature = "simd_x86", since = "1.27.0")]
1583pub unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> __m256 {
1584 let mut dst = _mm256_undefined_ps();
1585 ptr::copy_nonoverlapping(
1586 mem_addr as *const u8,
1587 ptr::addr_of_mut!(dst) as *mut u8,
1588 mem::size_of::<__m256>(),
1589 );
1590 dst
1591}
1592
1593#[inline]
1599#[target_feature(enable = "avx")]
1600#[cfg_attr(test, assert_instr(vmovups))]
1601#[stable(feature = "simd_x86", since = "1.27.0")]
1602pub unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: __m256) {
1603 mem_addr.cast::<__m256>().write_unaligned(a);
1604}
1605
1606#[inline]
1612#[target_feature(enable = "avx")]
1613#[cfg_attr(
1614 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1615 assert_instr(vmovaps)
1616)] #[stable(feature = "simd_x86", since = "1.27.0")]
1618pub unsafe fn _mm256_load_si256(mem_addr: *const __m256i) -> __m256i {
1619 *mem_addr
1620}
1621
1622#[inline]
1628#[target_feature(enable = "avx")]
1629#[cfg_attr(
1630 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1631 assert_instr(vmovaps)
1632)] #[stable(feature = "simd_x86", since = "1.27.0")]
1634pub unsafe fn _mm256_store_si256(mem_addr: *mut __m256i, a: __m256i) {
1635 *mem_addr = a;
1636}
1637
1638#[inline]
1643#[target_feature(enable = "avx")]
1644#[cfg_attr(test, assert_instr(vmovups))] #[stable(feature = "simd_x86", since = "1.27.0")]
1646pub unsafe fn _mm256_loadu_si256(mem_addr: *const __m256i) -> __m256i {
1647 let mut dst = _mm256_undefined_si256();
1648 ptr::copy_nonoverlapping(
1649 mem_addr as *const u8,
1650 ptr::addr_of_mut!(dst) as *mut u8,
1651 mem::size_of::<__m256i>(),
1652 );
1653 dst
1654}
1655
1656#[inline]
1661#[target_feature(enable = "avx")]
1662#[cfg_attr(test, assert_instr(vmovups))] #[stable(feature = "simd_x86", since = "1.27.0")]
1664pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) {
1665 mem_addr.write_unaligned(a);
1666}
1667
1668#[inline]
1674#[target_feature(enable = "avx")]
1675#[cfg_attr(test, assert_instr(vmaskmovpd))]
1676#[stable(feature = "simd_x86", since = "1.27.0")]
1677pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d {
1678 maskloadpd256(mem_addr as *const i8, mask.as_i64x4())
1679}
1680
1681#[inline]
1686#[target_feature(enable = "avx")]
1687#[cfg_attr(test, assert_instr(vmaskmovpd))]
1688#[stable(feature = "simd_x86", since = "1.27.0")]
1689pub unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d) {
1690 maskstorepd256(mem_addr as *mut i8, mask.as_i64x4(), a);
1691}
1692
1693#[inline]
1699#[target_feature(enable = "avx")]
1700#[cfg_attr(test, assert_instr(vmaskmovpd))]
1701#[stable(feature = "simd_x86", since = "1.27.0")]
1702pub unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d {
1703 maskloadpd(mem_addr as *const i8, mask.as_i64x2())
1704}
1705
1706#[inline]
1711#[target_feature(enable = "avx")]
1712#[cfg_attr(test, assert_instr(vmaskmovpd))]
1713#[stable(feature = "simd_x86", since = "1.27.0")]
1714pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) {
1715 maskstorepd(mem_addr as *mut i8, mask.as_i64x2(), a);
1716}
1717
1718#[inline]
1724#[target_feature(enable = "avx")]
1725#[cfg_attr(test, assert_instr(vmaskmovps))]
1726#[stable(feature = "simd_x86", since = "1.27.0")]
1727pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256 {
1728 maskloadps256(mem_addr as *const i8, mask.as_i32x8())
1729}
1730
1731#[inline]
1736#[target_feature(enable = "avx")]
1737#[cfg_attr(test, assert_instr(vmaskmovps))]
1738#[stable(feature = "simd_x86", since = "1.27.0")]
1739pub unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256) {
1740 maskstoreps256(mem_addr as *mut i8, mask.as_i32x8(), a);
1741}
1742
1743#[inline]
1749#[target_feature(enable = "avx")]
1750#[cfg_attr(test, assert_instr(vmaskmovps))]
1751#[stable(feature = "simd_x86", since = "1.27.0")]
1752pub unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128 {
1753 maskloadps(mem_addr as *const i8, mask.as_i32x4())
1754}
1755
1756#[inline]
1761#[target_feature(enable = "avx")]
1762#[cfg_attr(test, assert_instr(vmaskmovps))]
1763#[stable(feature = "simd_x86", since = "1.27.0")]
1764pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) {
1765 maskstoreps(mem_addr as *mut i8, mask.as_i32x4(), a);
1766}
1767
1768#[inline]
1773#[target_feature(enable = "avx")]
1774#[cfg_attr(test, assert_instr(vmovshdup))]
1775#[stable(feature = "simd_x86", since = "1.27.0")]
1776pub fn _mm256_movehdup_ps(a: __m256) -> __m256 {
1777 unsafe { simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7]) }
1778}
1779
1780#[inline]
1785#[target_feature(enable = "avx")]
1786#[cfg_attr(test, assert_instr(vmovsldup))]
1787#[stable(feature = "simd_x86", since = "1.27.0")]
1788pub fn _mm256_moveldup_ps(a: __m256) -> __m256 {
1789 unsafe { simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]) }
1790}
1791
1792#[inline]
1797#[target_feature(enable = "avx")]
1798#[cfg_attr(test, assert_instr(vmovddup))]
1799#[stable(feature = "simd_x86", since = "1.27.0")]
1800pub fn _mm256_movedup_pd(a: __m256d) -> __m256d {
1801 unsafe { simd_shuffle!(a, a, [0, 0, 2, 2]) }
1802}
1803
1804#[inline]
1810#[target_feature(enable = "avx")]
1811#[cfg_attr(test, assert_instr(vlddqu))]
1812#[stable(feature = "simd_x86", since = "1.27.0")]
1813pub unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i {
1814 transmute(vlddqu(mem_addr as *const i8))
1815}
1816
1817#[inline]
1832#[target_feature(enable = "avx")]
1833#[cfg_attr(test, assert_instr(vmovntdq))]
1834#[stable(feature = "simd_x86", since = "1.27.0")]
1835pub unsafe fn _mm256_stream_si256(mem_addr: *mut __m256i, a: __m256i) {
1836 crate::arch::asm!(
1838 vps!("vmovntdq", ",{a}"),
1839 p = in(reg) mem_addr,
1840 a = in(ymm_reg) a,
1841 options(nostack, preserves_flags),
1842 );
1843}
1844
1845#[inline]
1860#[target_feature(enable = "avx")]
1861#[cfg_attr(test, assert_instr(vmovntpd))]
1862#[stable(feature = "simd_x86", since = "1.27.0")]
1863#[allow(clippy::cast_ptr_alignment)]
1864pub unsafe fn _mm256_stream_pd(mem_addr: *mut f64, a: __m256d) {
1865 crate::arch::asm!(
1867 vps!("vmovntpd", ",{a}"),
1868 p = in(reg) mem_addr,
1869 a = in(ymm_reg) a,
1870 options(nostack, preserves_flags),
1871 );
1872}
1873
1874#[inline]
1890#[target_feature(enable = "avx")]
1891#[cfg_attr(test, assert_instr(vmovntps))]
1892#[stable(feature = "simd_x86", since = "1.27.0")]
1893#[allow(clippy::cast_ptr_alignment)]
1894pub unsafe fn _mm256_stream_ps(mem_addr: *mut f32, a: __m256) {
1895 crate::arch::asm!(
1897 vps!("vmovntps", ",{a}"),
1898 p = in(reg) mem_addr,
1899 a = in(ymm_reg) a,
1900 options(nostack, preserves_flags),
1901 );
1902}
1903
1904#[inline]
1910#[target_feature(enable = "avx")]
1911#[cfg_attr(test, assert_instr(vrcpps))]
1912#[stable(feature = "simd_x86", since = "1.27.0")]
1913pub fn _mm256_rcp_ps(a: __m256) -> __m256 {
1914 unsafe { vrcpps(a) }
1915}
1916
1917#[inline]
1923#[target_feature(enable = "avx")]
1924#[cfg_attr(test, assert_instr(vrsqrtps))]
1925#[stable(feature = "simd_x86", since = "1.27.0")]
1926pub fn _mm256_rsqrt_ps(a: __m256) -> __m256 {
1927 unsafe { vrsqrtps(a) }
1928}
1929
1930#[inline]
1935#[target_feature(enable = "avx")]
1936#[cfg_attr(test, assert_instr(vunpckhpd))]
1937#[stable(feature = "simd_x86", since = "1.27.0")]
1938pub fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d {
1939 unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
1940}
1941
1942#[inline]
1947#[target_feature(enable = "avx")]
1948#[cfg_attr(test, assert_instr(vunpckhps))]
1949#[stable(feature = "simd_x86", since = "1.27.0")]
1950pub fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 {
1951 unsafe { simd_shuffle!(a, b, [2, 10, 3, 11, 6, 14, 7, 15]) }
1952}
1953
1954#[inline]
1959#[target_feature(enable = "avx")]
1960#[cfg_attr(test, assert_instr(vunpcklpd))]
1961#[stable(feature = "simd_x86", since = "1.27.0")]
1962pub fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d {
1963 unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
1964}
1965
1966#[inline]
1971#[target_feature(enable = "avx")]
1972#[cfg_attr(test, assert_instr(vunpcklps))]
1973#[stable(feature = "simd_x86", since = "1.27.0")]
1974pub fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 {
1975 unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 4, 12, 5, 13]) }
1976}
1977
1978#[inline]
1985#[target_feature(enable = "avx")]
1986#[cfg_attr(test, assert_instr(vptest))]
1987#[stable(feature = "simd_x86", since = "1.27.0")]
1988pub fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 {
1989 unsafe {
1990 let r = simd_and(a.as_i64x4(), b.as_i64x4());
1991 (0i64 == simd_reduce_or(r)) as i32
1992 }
1993}
1994
1995#[inline]
2002#[target_feature(enable = "avx")]
2003#[cfg_attr(test, assert_instr(vptest))]
2004#[stable(feature = "simd_x86", since = "1.27.0")]
2005pub fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32 {
2006 unsafe {
2007 let r = simd_and(simd_xor(a.as_i64x4(), i64x4::splat(!0)), b.as_i64x4());
2008 (0i64 == simd_reduce_or(r)) as i32
2009 }
2010}
2011
2012#[inline]
2020#[target_feature(enable = "avx")]
2021#[cfg_attr(test, assert_instr(vptest))]
2022#[stable(feature = "simd_x86", since = "1.27.0")]
2023pub fn _mm256_testnzc_si256(a: __m256i, b: __m256i) -> i32 {
2024 unsafe { ptestnzc256(a.as_i64x4(), b.as_i64x4()) }
2025}
2026
2027#[inline]
2037#[target_feature(enable = "avx")]
2038#[cfg_attr(test, assert_instr(vtestpd))]
2039#[stable(feature = "simd_x86", since = "1.27.0")]
2040pub fn _mm256_testz_pd(a: __m256d, b: __m256d) -> i32 {
2041 unsafe { vtestzpd256(a, b) }
2042}
2043
2044#[inline]
2054#[target_feature(enable = "avx")]
2055#[cfg_attr(test, assert_instr(vtestpd))]
2056#[stable(feature = "simd_x86", since = "1.27.0")]
2057pub fn _mm256_testc_pd(a: __m256d, b: __m256d) -> i32 {
2058 unsafe { vtestcpd256(a, b) }
2059}
2060
2061#[inline]
2072#[target_feature(enable = "avx")]
2073#[cfg_attr(test, assert_instr(vtestpd))]
2074#[stable(feature = "simd_x86", since = "1.27.0")]
2075pub fn _mm256_testnzc_pd(a: __m256d, b: __m256d) -> i32 {
2076 unsafe { vtestnzcpd256(a, b) }
2077}
2078
2079#[inline]
2089#[target_feature(enable = "avx")]
2090#[cfg_attr(test, assert_instr(vtestpd))]
2091#[stable(feature = "simd_x86", since = "1.27.0")]
2092pub fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 {
2093 unsafe {
2094 let r: i64x2 = simd_lt(transmute(_mm_and_pd(a, b)), i64x2::ZERO);
2095 (0i64 == simd_reduce_or(r)) as i32
2096 }
2097}
2098
2099#[inline]
2109#[target_feature(enable = "avx")]
2110#[cfg_attr(test, assert_instr(vtestpd))]
2111#[stable(feature = "simd_x86", since = "1.27.0")]
2112pub fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32 {
2113 unsafe {
2114 let r: i64x2 = simd_lt(transmute(_mm_andnot_pd(a, b)), i64x2::ZERO);
2115 (0i64 == simd_reduce_or(r)) as i32
2116 }
2117}
2118
2119#[inline]
2130#[target_feature(enable = "avx")]
2131#[cfg_attr(test, assert_instr(vtestpd))]
2132#[stable(feature = "simd_x86", since = "1.27.0")]
2133pub fn _mm_testnzc_pd(a: __m128d, b: __m128d) -> i32 {
2134 unsafe { vtestnzcpd(a, b) }
2135}
2136
2137#[inline]
2147#[target_feature(enable = "avx")]
2148#[cfg_attr(test, assert_instr(vtestps))]
2149#[stable(feature = "simd_x86", since = "1.27.0")]
2150pub fn _mm256_testz_ps(a: __m256, b: __m256) -> i32 {
2151 unsafe { vtestzps256(a, b) }
2152}
2153
2154#[inline]
2164#[target_feature(enable = "avx")]
2165#[cfg_attr(test, assert_instr(vtestps))]
2166#[stable(feature = "simd_x86", since = "1.27.0")]
2167pub fn _mm256_testc_ps(a: __m256, b: __m256) -> i32 {
2168 unsafe { vtestcps256(a, b) }
2169}
2170
2171#[inline]
2182#[target_feature(enable = "avx")]
2183#[cfg_attr(test, assert_instr(vtestps))]
2184#[stable(feature = "simd_x86", since = "1.27.0")]
2185pub fn _mm256_testnzc_ps(a: __m256, b: __m256) -> i32 {
2186 unsafe { vtestnzcps256(a, b) }
2187}
2188
2189#[inline]
2199#[target_feature(enable = "avx")]
2200#[cfg_attr(test, assert_instr(vtestps))]
2201#[stable(feature = "simd_x86", since = "1.27.0")]
2202pub fn _mm_testz_ps(a: __m128, b: __m128) -> i32 {
2203 unsafe {
2204 let r: i32x4 = simd_lt(transmute(_mm_and_ps(a, b)), i32x4::ZERO);
2205 (0i32 == simd_reduce_or(r)) as i32
2206 }
2207}
2208
2209#[inline]
2219#[target_feature(enable = "avx")]
2220#[cfg_attr(test, assert_instr(vtestps))]
2221#[stable(feature = "simd_x86", since = "1.27.0")]
2222pub fn _mm_testc_ps(a: __m128, b: __m128) -> i32 {
2223 unsafe {
2224 let r: i32x4 = simd_lt(transmute(_mm_andnot_ps(a, b)), i32x4::ZERO);
2225 (0i32 == simd_reduce_or(r)) as i32
2226 }
2227}
2228
2229#[inline]
2240#[target_feature(enable = "avx")]
2241#[cfg_attr(test, assert_instr(vtestps))]
2242#[stable(feature = "simd_x86", since = "1.27.0")]
2243pub fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 {
2244 unsafe { vtestnzcps(a, b) }
2245}
2246
2247#[inline]
2253#[target_feature(enable = "avx")]
2254#[cfg_attr(test, assert_instr(vmovmskpd))]
2255#[stable(feature = "simd_x86", since = "1.27.0")]
2256pub fn _mm256_movemask_pd(a: __m256d) -> i32 {
2257 unsafe {
2260 let mask: i64x4 = simd_lt(transmute(a), i64x4::ZERO);
2261 simd_bitmask::<i64x4, u8>(mask).into()
2262 }
2263}
2264
2265#[inline]
2271#[target_feature(enable = "avx")]
2272#[cfg_attr(test, assert_instr(vmovmskps))]
2273#[stable(feature = "simd_x86", since = "1.27.0")]
2274pub fn _mm256_movemask_ps(a: __m256) -> i32 {
2275 unsafe {
2278 let mask: i32x8 = simd_lt(transmute(a), i32x8::ZERO);
2279 simd_bitmask::<i32x8, u8>(mask).into()
2280 }
2281}
2282
2283#[inline]
2287#[target_feature(enable = "avx")]
2288#[cfg_attr(test, assert_instr(vxorp))]
2289#[stable(feature = "simd_x86", since = "1.27.0")]
2290pub fn _mm256_setzero_pd() -> __m256d {
2291 const { unsafe { mem::zeroed() } }
2292}
2293
2294#[inline]
2298#[target_feature(enable = "avx")]
2299#[cfg_attr(test, assert_instr(vxorps))]
2300#[stable(feature = "simd_x86", since = "1.27.0")]
2301pub fn _mm256_setzero_ps() -> __m256 {
2302 const { unsafe { mem::zeroed() } }
2303}
2304
2305#[inline]
2309#[target_feature(enable = "avx")]
2310#[cfg_attr(test, assert_instr(vxor))]
2311#[stable(feature = "simd_x86", since = "1.27.0")]
2312pub fn _mm256_setzero_si256() -> __m256i {
2313 const { unsafe { mem::zeroed() } }
2314}
2315
2316#[inline]
2321#[target_feature(enable = "avx")]
2322#[cfg_attr(test, assert_instr(vinsertf128))]
2324#[stable(feature = "simd_x86", since = "1.27.0")]
2325pub fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
2326 _mm256_setr_pd(d, c, b, a)
2327}
2328
2329#[inline]
2334#[target_feature(enable = "avx")]
2335#[stable(feature = "simd_x86", since = "1.27.0")]
2337pub fn _mm256_set_ps(a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32) -> __m256 {
2338 _mm256_setr_ps(h, g, f, e, d, c, b, a)
2339}
2340
2341#[inline]
2345#[target_feature(enable = "avx")]
2346#[stable(feature = "simd_x86", since = "1.27.0")]
2348pub fn _mm256_set_epi8(
2349 e00: i8,
2350 e01: i8,
2351 e02: i8,
2352 e03: i8,
2353 e04: i8,
2354 e05: i8,
2355 e06: i8,
2356 e07: i8,
2357 e08: i8,
2358 e09: i8,
2359 e10: i8,
2360 e11: i8,
2361 e12: i8,
2362 e13: i8,
2363 e14: i8,
2364 e15: i8,
2365 e16: i8,
2366 e17: i8,
2367 e18: i8,
2368 e19: i8,
2369 e20: i8,
2370 e21: i8,
2371 e22: i8,
2372 e23: i8,
2373 e24: i8,
2374 e25: i8,
2375 e26: i8,
2376 e27: i8,
2377 e28: i8,
2378 e29: i8,
2379 e30: i8,
2380 e31: i8,
2381) -> __m256i {
2382 #[rustfmt::skip]
2383 _mm256_setr_epi8(
2384 e31, e30, e29, e28, e27, e26, e25, e24,
2385 e23, e22, e21, e20, e19, e18, e17, e16,
2386 e15, e14, e13, e12, e11, e10, e09, e08,
2387 e07, e06, e05, e04, e03, e02, e01, e00,
2388 )
2389}
2390
2391#[inline]
2395#[target_feature(enable = "avx")]
2396#[stable(feature = "simd_x86", since = "1.27.0")]
2398pub fn _mm256_set_epi16(
2399 e00: i16,
2400 e01: i16,
2401 e02: i16,
2402 e03: i16,
2403 e04: i16,
2404 e05: i16,
2405 e06: i16,
2406 e07: i16,
2407 e08: i16,
2408 e09: i16,
2409 e10: i16,
2410 e11: i16,
2411 e12: i16,
2412 e13: i16,
2413 e14: i16,
2414 e15: i16,
2415) -> __m256i {
2416 #[rustfmt::skip]
2417 _mm256_setr_epi16(
2418 e15, e14, e13, e12,
2419 e11, e10, e09, e08,
2420 e07, e06, e05, e04,
2421 e03, e02, e01, e00,
2422 )
2423}
2424
2425#[inline]
2429#[target_feature(enable = "avx")]
2430#[stable(feature = "simd_x86", since = "1.27.0")]
2432pub fn _mm256_set_epi32(
2433 e0: i32,
2434 e1: i32,
2435 e2: i32,
2436 e3: i32,
2437 e4: i32,
2438 e5: i32,
2439 e6: i32,
2440 e7: i32,
2441) -> __m256i {
2442 _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0)
2443}
2444
2445#[inline]
2449#[target_feature(enable = "avx")]
2450#[stable(feature = "simd_x86", since = "1.27.0")]
2452pub fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
2453 _mm256_setr_epi64x(d, c, b, a)
2454}
2455
2456#[inline]
2461#[target_feature(enable = "avx")]
2462#[stable(feature = "simd_x86", since = "1.27.0")]
2464pub fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
2465 __m256d([a, b, c, d])
2466}
2467
2468#[inline]
2473#[target_feature(enable = "avx")]
2474#[stable(feature = "simd_x86", since = "1.27.0")]
2476pub fn _mm256_setr_ps(a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32) -> __m256 {
2477 __m256([a, b, c, d, e, f, g, h])
2478}
2479
2480#[inline]
2485#[target_feature(enable = "avx")]
2486#[stable(feature = "simd_x86", since = "1.27.0")]
2488pub fn _mm256_setr_epi8(
2489 e00: i8,
2490 e01: i8,
2491 e02: i8,
2492 e03: i8,
2493 e04: i8,
2494 e05: i8,
2495 e06: i8,
2496 e07: i8,
2497 e08: i8,
2498 e09: i8,
2499 e10: i8,
2500 e11: i8,
2501 e12: i8,
2502 e13: i8,
2503 e14: i8,
2504 e15: i8,
2505 e16: i8,
2506 e17: i8,
2507 e18: i8,
2508 e19: i8,
2509 e20: i8,
2510 e21: i8,
2511 e22: i8,
2512 e23: i8,
2513 e24: i8,
2514 e25: i8,
2515 e26: i8,
2516 e27: i8,
2517 e28: i8,
2518 e29: i8,
2519 e30: i8,
2520 e31: i8,
2521) -> __m256i {
2522 unsafe {
2523 #[rustfmt::skip]
2524 transmute(i8x32::new(
2525 e00, e01, e02, e03, e04, e05, e06, e07,
2526 e08, e09, e10, e11, e12, e13, e14, e15,
2527 e16, e17, e18, e19, e20, e21, e22, e23,
2528 e24, e25, e26, e27, e28, e29, e30, e31,
2529 ))
2530 }
2531}
2532
2533#[inline]
2538#[target_feature(enable = "avx")]
2539#[stable(feature = "simd_x86", since = "1.27.0")]
2541pub fn _mm256_setr_epi16(
2542 e00: i16,
2543 e01: i16,
2544 e02: i16,
2545 e03: i16,
2546 e04: i16,
2547 e05: i16,
2548 e06: i16,
2549 e07: i16,
2550 e08: i16,
2551 e09: i16,
2552 e10: i16,
2553 e11: i16,
2554 e12: i16,
2555 e13: i16,
2556 e14: i16,
2557 e15: i16,
2558) -> __m256i {
2559 unsafe {
2560 #[rustfmt::skip]
2561 transmute(i16x16::new(
2562 e00, e01, e02, e03,
2563 e04, e05, e06, e07,
2564 e08, e09, e10, e11,
2565 e12, e13, e14, e15,
2566 ))
2567 }
2568}
2569
2570#[inline]
2575#[target_feature(enable = "avx")]
2576#[stable(feature = "simd_x86", since = "1.27.0")]
2578pub fn _mm256_setr_epi32(
2579 e0: i32,
2580 e1: i32,
2581 e2: i32,
2582 e3: i32,
2583 e4: i32,
2584 e5: i32,
2585 e6: i32,
2586 e7: i32,
2587) -> __m256i {
2588 unsafe { transmute(i32x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) }
2589}
2590
2591#[inline]
2596#[target_feature(enable = "avx")]
2597#[stable(feature = "simd_x86", since = "1.27.0")]
2599pub fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
2600 unsafe { transmute(i64x4::new(a, b, c, d)) }
2601}
2602
2603#[inline]
2608#[target_feature(enable = "avx")]
2609#[stable(feature = "simd_x86", since = "1.27.0")]
2611pub fn _mm256_set1_pd(a: f64) -> __m256d {
2612 _mm256_setr_pd(a, a, a, a)
2613}
2614
2615#[inline]
2620#[target_feature(enable = "avx")]
2621#[stable(feature = "simd_x86", since = "1.27.0")]
2623pub fn _mm256_set1_ps(a: f32) -> __m256 {
2624 _mm256_setr_ps(a, a, a, a, a, a, a, a)
2625}
2626
2627#[inline]
2632#[target_feature(enable = "avx")]
2633#[stable(feature = "simd_x86", since = "1.27.0")]
2635pub fn _mm256_set1_epi8(a: i8) -> __m256i {
2636 #[rustfmt::skip]
2637 _mm256_setr_epi8(
2638 a, a, a, a, a, a, a, a,
2639 a, a, a, a, a, a, a, a,
2640 a, a, a, a, a, a, a, a,
2641 a, a, a, a, a, a, a, a,
2642 )
2643}
2644
2645#[inline]
2650#[target_feature(enable = "avx")]
2651#[cfg_attr(test, assert_instr(vinsertf128))]
2653#[stable(feature = "simd_x86", since = "1.27.0")]
2655pub fn _mm256_set1_epi16(a: i16) -> __m256i {
2656 _mm256_setr_epi16(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
2657}
2658
2659#[inline]
2664#[target_feature(enable = "avx")]
2665#[stable(feature = "simd_x86", since = "1.27.0")]
2667pub fn _mm256_set1_epi32(a: i32) -> __m256i {
2668 _mm256_setr_epi32(a, a, a, a, a, a, a, a)
2669}
2670
2671#[inline]
2676#[target_feature(enable = "avx")]
2677#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(vinsertf128))]
2678#[cfg_attr(all(test, target_arch = "x86"), assert_instr(vbroadcastsd))]
2679#[stable(feature = "simd_x86", since = "1.27.0")]
2681pub fn _mm256_set1_epi64x(a: i64) -> __m256i {
2682 _mm256_setr_epi64x(a, a, a, a)
2683}
2684
2685#[inline]
2689#[target_feature(enable = "avx")]
2690#[stable(feature = "simd_x86", since = "1.27.0")]
2693pub fn _mm256_castpd_ps(a: __m256d) -> __m256 {
2694 unsafe { transmute(a) }
2695}
2696
2697#[inline]
2701#[target_feature(enable = "avx")]
2702#[stable(feature = "simd_x86", since = "1.27.0")]
2705pub fn _mm256_castps_pd(a: __m256) -> __m256d {
2706 unsafe { transmute(a) }
2707}
2708
2709#[inline]
2713#[target_feature(enable = "avx")]
2714#[stable(feature = "simd_x86", since = "1.27.0")]
2717pub fn _mm256_castps_si256(a: __m256) -> __m256i {
2718 unsafe { transmute(a) }
2719}
2720
2721#[inline]
2725#[target_feature(enable = "avx")]
2726#[stable(feature = "simd_x86", since = "1.27.0")]
2729pub fn _mm256_castsi256_ps(a: __m256i) -> __m256 {
2730 unsafe { transmute(a) }
2731}
2732
2733#[inline]
2737#[target_feature(enable = "avx")]
2738#[stable(feature = "simd_x86", since = "1.27.0")]
2741pub fn _mm256_castpd_si256(a: __m256d) -> __m256i {
2742 unsafe { transmute(a) }
2743}
2744
2745#[inline]
2749#[target_feature(enable = "avx")]
2750#[stable(feature = "simd_x86", since = "1.27.0")]
2753pub fn _mm256_castsi256_pd(a: __m256i) -> __m256d {
2754 unsafe { transmute(a) }
2755}
2756
2757#[inline]
2761#[target_feature(enable = "avx")]
2762#[stable(feature = "simd_x86", since = "1.27.0")]
2765pub fn _mm256_castps256_ps128(a: __m256) -> __m128 {
2766 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
2767}
2768
2769#[inline]
2773#[target_feature(enable = "avx")]
2774#[stable(feature = "simd_x86", since = "1.27.0")]
2777pub fn _mm256_castpd256_pd128(a: __m256d) -> __m128d {
2778 unsafe { simd_shuffle!(a, a, [0, 1]) }
2779}
2780
2781#[inline]
2785#[target_feature(enable = "avx")]
2786#[stable(feature = "simd_x86", since = "1.27.0")]
2789pub fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
2790 unsafe {
2791 let a = a.as_i64x4();
2792 let dst: i64x2 = simd_shuffle!(a, a, [0, 1]);
2793 transmute(dst)
2794 }
2795}
2796
2797#[inline]
2802#[target_feature(enable = "avx")]
2803#[stable(feature = "simd_x86", since = "1.27.0")]
2806pub fn _mm256_castps128_ps256(a: __m128) -> __m256 {
2807 unsafe { simd_shuffle!(a, _mm_undefined_ps(), [0, 1, 2, 3, 4, 4, 4, 4]) }
2808}
2809
2810#[inline]
2815#[target_feature(enable = "avx")]
2816#[stable(feature = "simd_x86", since = "1.27.0")]
2819pub fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
2820 unsafe { simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2]) }
2821}
2822
2823#[inline]
2828#[target_feature(enable = "avx")]
2829#[stable(feature = "simd_x86", since = "1.27.0")]
2832pub fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
2833 unsafe {
2834 let a = a.as_i64x2();
2835 let undefined = i64x2::ZERO;
2836 let dst: i64x4 = simd_shuffle!(a, undefined, [0, 1, 2, 2]);
2837 transmute(dst)
2838 }
2839}
2840
2841#[inline]
2847#[target_feature(enable = "avx")]
2848#[stable(feature = "simd_x86", since = "1.27.0")]
2851pub fn _mm256_zextps128_ps256(a: __m128) -> __m256 {
2852 unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7]) }
2853}
2854
2855#[inline]
2861#[target_feature(enable = "avx")]
2862#[stable(feature = "simd_x86", since = "1.27.0")]
2865pub fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
2866 unsafe {
2867 let b = i64x2::ZERO;
2868 let dst: i64x4 = simd_shuffle!(a.as_i64x2(), b, [0, 1, 2, 3]);
2869 transmute(dst)
2870 }
2871}
2872
2873#[inline]
2880#[target_feature(enable = "avx")]
2881#[stable(feature = "simd_x86", since = "1.27.0")]
2884pub fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d {
2885 unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0, 1, 2, 3]) }
2886}
2887
2888#[inline]
2895#[target_feature(enable = "avx")]
2896#[stable(feature = "simd_x86", since = "1.27.0")]
2898pub fn _mm256_undefined_ps() -> __m256 {
2899 const { unsafe { mem::zeroed() } }
2900}
2901
2902#[inline]
2909#[target_feature(enable = "avx")]
2910#[stable(feature = "simd_x86", since = "1.27.0")]
2912pub fn _mm256_undefined_pd() -> __m256d {
2913 const { unsafe { mem::zeroed() } }
2914}
2915
2916#[inline]
2923#[target_feature(enable = "avx")]
2924#[stable(feature = "simd_x86", since = "1.27.0")]
2926pub fn _mm256_undefined_si256() -> __m256i {
2927 const { unsafe { mem::zeroed() } }
2928}
2929
2930#[inline]
2934#[target_feature(enable = "avx")]
2935#[cfg_attr(test, assert_instr(vinsertf128))]
2936#[stable(feature = "simd_x86", since = "1.27.0")]
2937pub fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256 {
2938 unsafe { simd_shuffle!(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7]) }
2939}
2940
2941#[inline]
2945#[target_feature(enable = "avx")]
2946#[cfg_attr(test, assert_instr(vinsertf128))]
2947#[stable(feature = "simd_x86", since = "1.27.0")]
2948pub fn _mm256_set_m128d(hi: __m128d, lo: __m128d) -> __m256d {
2949 unsafe {
2950 let hi: __m128 = transmute(hi);
2951 let lo: __m128 = transmute(lo);
2952 transmute(_mm256_set_m128(hi, lo))
2953 }
2954}
2955
2956#[inline]
2960#[target_feature(enable = "avx")]
2961#[cfg_attr(test, assert_instr(vinsertf128))]
2962#[stable(feature = "simd_x86", since = "1.27.0")]
2963pub fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i {
2964 unsafe {
2965 let hi: __m128 = transmute(hi);
2966 let lo: __m128 = transmute(lo);
2967 transmute(_mm256_set_m128(hi, lo))
2968 }
2969}
2970
2971#[inline]
2975#[target_feature(enable = "avx")]
2976#[cfg_attr(test, assert_instr(vinsertf128))]
2977#[stable(feature = "simd_x86", since = "1.27.0")]
2978pub fn _mm256_setr_m128(lo: __m128, hi: __m128) -> __m256 {
2979 _mm256_set_m128(hi, lo)
2980}
2981
2982#[inline]
2986#[target_feature(enable = "avx")]
2987#[cfg_attr(test, assert_instr(vinsertf128))]
2988#[stable(feature = "simd_x86", since = "1.27.0")]
2989pub fn _mm256_setr_m128d(lo: __m128d, hi: __m128d) -> __m256d {
2990 _mm256_set_m128d(hi, lo)
2991}
2992
2993#[inline]
2997#[target_feature(enable = "avx")]
2998#[cfg_attr(test, assert_instr(vinsertf128))]
2999#[stable(feature = "simd_x86", since = "1.27.0")]
3000pub fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i {
3001 _mm256_set_m128i(hi, lo)
3002}
3003
3004#[inline]
3011#[target_feature(enable = "avx")]
3012#[stable(feature = "simd_x86", since = "1.27.0")]
3014pub unsafe fn _mm256_loadu2_m128(hiaddr: *const f32, loaddr: *const f32) -> __m256 {
3015 let a = _mm256_castps128_ps256(_mm_loadu_ps(loaddr));
3016 _mm256_insertf128_ps::<1>(a, _mm_loadu_ps(hiaddr))
3017}
3018
3019#[inline]
3026#[target_feature(enable = "avx")]
3027#[stable(feature = "simd_x86", since = "1.27.0")]
3029pub unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> __m256d {
3030 let a = _mm256_castpd128_pd256(_mm_loadu_pd(loaddr));
3031 _mm256_insertf128_pd::<1>(a, _mm_loadu_pd(hiaddr))
3032}
3033
3034#[inline]
3040#[target_feature(enable = "avx")]
3041#[stable(feature = "simd_x86", since = "1.27.0")]
3043pub unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i) -> __m256i {
3044 let a = _mm256_castsi128_si256(_mm_loadu_si128(loaddr));
3045 _mm256_insertf128_si256::<1>(a, _mm_loadu_si128(hiaddr))
3046}
3047
3048#[inline]
3055#[target_feature(enable = "avx")]
3056#[stable(feature = "simd_x86", since = "1.27.0")]
3058pub unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: __m256) {
3059 let lo = _mm256_castps256_ps128(a);
3060 _mm_storeu_ps(loaddr, lo);
3061 let hi = _mm256_extractf128_ps::<1>(a);
3062 _mm_storeu_ps(hiaddr, hi);
3063}
3064
3065#[inline]
3072#[target_feature(enable = "avx")]
3073#[stable(feature = "simd_x86", since = "1.27.0")]
3075pub unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: __m256d) {
3076 let lo = _mm256_castpd256_pd128(a);
3077 _mm_storeu_pd(loaddr, lo);
3078 let hi = _mm256_extractf128_pd::<1>(a);
3079 _mm_storeu_pd(hiaddr, hi);
3080}
3081
3082#[inline]
3088#[target_feature(enable = "avx")]
3089#[stable(feature = "simd_x86", since = "1.27.0")]
3091pub unsafe fn _mm256_storeu2_m128i(hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i) {
3092 let lo = _mm256_castsi256_si128(a);
3093 _mm_storeu_si128(loaddr, lo);
3094 let hi = _mm256_extractf128_si256::<1>(a);
3095 _mm_storeu_si128(hiaddr, hi);
3096}
3097
3098#[inline]
3102#[target_feature(enable = "avx")]
3103#[stable(feature = "simd_x86", since = "1.27.0")]
3105pub fn _mm256_cvtss_f32(a: __m256) -> f32 {
3106 unsafe { simd_extract!(a, 0) }
3107}
3108
3109#[allow(improper_ctypes)]
3111unsafe extern "C" {
3112 #[link_name = "llvm.x86.avx.round.pd.256"]
3113 fn roundpd256(a: __m256d, b: i32) -> __m256d;
3114 #[link_name = "llvm.x86.avx.round.ps.256"]
3115 fn roundps256(a: __m256, b: i32) -> __m256;
3116 #[link_name = "llvm.x86.avx.dp.ps.256"]
3117 fn vdpps(a: __m256, b: __m256, imm8: i8) -> __m256;
3118 #[link_name = "llvm.x86.sse2.cmp.pd"]
3119 fn vcmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3120 #[link_name = "llvm.x86.avx.cmp.pd.256"]
3121 fn vcmppd256(a: __m256d, b: __m256d, imm8: u8) -> __m256d;
3122 #[link_name = "llvm.x86.sse.cmp.ps"]
3123 fn vcmpps(a: __m128, b: __m128, imm8: i8) -> __m128;
3124 #[link_name = "llvm.x86.avx.cmp.ps.256"]
3125 fn vcmpps256(a: __m256, b: __m256, imm8: u8) -> __m256;
3126 #[link_name = "llvm.x86.sse2.cmp.sd"]
3127 fn vcmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3128 #[link_name = "llvm.x86.sse.cmp.ss"]
3129 fn vcmpss(a: __m128, b: __m128, imm8: i8) -> __m128;
3130 #[link_name = "llvm.x86.avx.cvt.ps2dq.256"]
3131 fn vcvtps2dq(a: __m256) -> i32x8;
3132 #[link_name = "llvm.x86.avx.cvtt.pd2dq.256"]
3133 fn vcvttpd2dq(a: __m256d) -> i32x4;
3134 #[link_name = "llvm.x86.avx.cvt.pd2dq.256"]
3135 fn vcvtpd2dq(a: __m256d) -> i32x4;
3136 #[link_name = "llvm.x86.avx.cvtt.ps2dq.256"]
3137 fn vcvttps2dq(a: __m256) -> i32x8;
3138 #[link_name = "llvm.x86.avx.vzeroall"]
3139 fn vzeroall();
3140 #[link_name = "llvm.x86.avx.vzeroupper"]
3141 fn vzeroupper();
3142 #[link_name = "llvm.x86.avx.vpermilvar.ps.256"]
3143 fn vpermilps256(a: __m256, b: i32x8) -> __m256;
3144 #[link_name = "llvm.x86.avx.vpermilvar.ps"]
3145 fn vpermilps(a: __m128, b: i32x4) -> __m128;
3146 #[link_name = "llvm.x86.avx.vpermilvar.pd.256"]
3147 fn vpermilpd256(a: __m256d, b: i64x4) -> __m256d;
3148 #[link_name = "llvm.x86.avx.vpermilvar.pd"]
3149 fn vpermilpd(a: __m128d, b: i64x2) -> __m128d;
3150 #[link_name = "llvm.x86.avx.maskload.pd.256"]
3151 fn maskloadpd256(mem_addr: *const i8, mask: i64x4) -> __m256d;
3152 #[link_name = "llvm.x86.avx.maskstore.pd.256"]
3153 fn maskstorepd256(mem_addr: *mut i8, mask: i64x4, a: __m256d);
3154 #[link_name = "llvm.x86.avx.maskload.pd"]
3155 fn maskloadpd(mem_addr: *const i8, mask: i64x2) -> __m128d;
3156 #[link_name = "llvm.x86.avx.maskstore.pd"]
3157 fn maskstorepd(mem_addr: *mut i8, mask: i64x2, a: __m128d);
3158 #[link_name = "llvm.x86.avx.maskload.ps.256"]
3159 fn maskloadps256(mem_addr: *const i8, mask: i32x8) -> __m256;
3160 #[link_name = "llvm.x86.avx.maskstore.ps.256"]
3161 fn maskstoreps256(mem_addr: *mut i8, mask: i32x8, a: __m256);
3162 #[link_name = "llvm.x86.avx.maskload.ps"]
3163 fn maskloadps(mem_addr: *const i8, mask: i32x4) -> __m128;
3164 #[link_name = "llvm.x86.avx.maskstore.ps"]
3165 fn maskstoreps(mem_addr: *mut i8, mask: i32x4, a: __m128);
3166 #[link_name = "llvm.x86.avx.ldu.dq.256"]
3167 fn vlddqu(mem_addr: *const i8) -> i8x32;
3168 #[link_name = "llvm.x86.avx.rcp.ps.256"]
3169 fn vrcpps(a: __m256) -> __m256;
3170 #[link_name = "llvm.x86.avx.rsqrt.ps.256"]
3171 fn vrsqrtps(a: __m256) -> __m256;
3172 #[link_name = "llvm.x86.avx.ptestnzc.256"]
3173 fn ptestnzc256(a: i64x4, b: i64x4) -> i32;
3174 #[link_name = "llvm.x86.avx.vtestz.pd.256"]
3175 fn vtestzpd256(a: __m256d, b: __m256d) -> i32;
3176 #[link_name = "llvm.x86.avx.vtestc.pd.256"]
3177 fn vtestcpd256(a: __m256d, b: __m256d) -> i32;
3178 #[link_name = "llvm.x86.avx.vtestnzc.pd.256"]
3179 fn vtestnzcpd256(a: __m256d, b: __m256d) -> i32;
3180 #[link_name = "llvm.x86.avx.vtestnzc.pd"]
3181 fn vtestnzcpd(a: __m128d, b: __m128d) -> i32;
3182 #[link_name = "llvm.x86.avx.vtestz.ps.256"]
3183 fn vtestzps256(a: __m256, b: __m256) -> i32;
3184 #[link_name = "llvm.x86.avx.vtestc.ps.256"]
3185 fn vtestcps256(a: __m256, b: __m256) -> i32;
3186 #[link_name = "llvm.x86.avx.vtestnzc.ps.256"]
3187 fn vtestnzcps256(a: __m256, b: __m256) -> i32;
3188 #[link_name = "llvm.x86.avx.vtestnzc.ps"]
3189 fn vtestnzcps(a: __m128, b: __m128) -> i32;
3190 #[link_name = "llvm.x86.avx.min.ps.256"]
3191 fn vminps(a: __m256, b: __m256) -> __m256;
3192 #[link_name = "llvm.x86.avx.max.ps.256"]
3193 fn vmaxps(a: __m256, b: __m256) -> __m256;
3194 #[link_name = "llvm.x86.avx.min.pd.256"]
3195 fn vminpd(a: __m256d, b: __m256d) -> __m256d;
3196 #[link_name = "llvm.x86.avx.max.pd.256"]
3197 fn vmaxpd(a: __m256d, b: __m256d) -> __m256d;
3198}
3199
3200#[cfg(test)]
3201mod tests {
3202 use crate::hint::black_box;
3203 use crate::ptr;
3204 use stdarch_test::simd_test;
3205
3206 use crate::core_arch::x86::*;
3207
3208 #[simd_test(enable = "avx")]
3209 unsafe fn test_mm256_add_pd() {
3210 let a = _mm256_setr_pd(1., 2., 3., 4.);
3211 let b = _mm256_setr_pd(5., 6., 7., 8.);
3212 let r = _mm256_add_pd(a, b);
3213 let e = _mm256_setr_pd(6., 8., 10., 12.);
3214 assert_eq_m256d(r, e);
3215 }
3216
3217 #[simd_test(enable = "avx")]
3218 unsafe fn test_mm256_add_ps() {
3219 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
3220 let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
3221 let r = _mm256_add_ps(a, b);
3222 let e = _mm256_setr_ps(10., 12., 14., 16., 18., 20., 22., 24.);
3223 assert_eq_m256(r, e);
3224 }
3225
3226 #[simd_test(enable = "avx")]
3227 unsafe fn test_mm256_and_pd() {
3228 let a = _mm256_set1_pd(1.);
3229 let b = _mm256_set1_pd(0.6);
3230 let r = _mm256_and_pd(a, b);
3231 let e = _mm256_set1_pd(0.5);
3232 assert_eq_m256d(r, e);
3233 }
3234
3235 #[simd_test(enable = "avx")]
3236 unsafe fn test_mm256_and_ps() {
3237 let a = _mm256_set1_ps(1.);
3238 let b = _mm256_set1_ps(0.6);
3239 let r = _mm256_and_ps(a, b);
3240 let e = _mm256_set1_ps(0.5);
3241 assert_eq_m256(r, e);
3242 }
3243
3244 #[simd_test(enable = "avx")]
3245 unsafe fn test_mm256_or_pd() {
3246 let a = _mm256_set1_pd(1.);
3247 let b = _mm256_set1_pd(0.6);
3248 let r = _mm256_or_pd(a, b);
3249 let e = _mm256_set1_pd(1.2);
3250 assert_eq_m256d(r, e);
3251 }
3252
3253 #[simd_test(enable = "avx")]
3254 unsafe fn test_mm256_or_ps() {
3255 let a = _mm256_set1_ps(1.);
3256 let b = _mm256_set1_ps(0.6);
3257 let r = _mm256_or_ps(a, b);
3258 let e = _mm256_set1_ps(1.2);
3259 assert_eq_m256(r, e);
3260 }
3261
3262 #[simd_test(enable = "avx")]
3263 unsafe fn test_mm256_shuffle_pd() {
3264 let a = _mm256_setr_pd(1., 4., 5., 8.);
3265 let b = _mm256_setr_pd(2., 3., 6., 7.);
3266 let r = _mm256_shuffle_pd::<0b11_11_11_11>(a, b);
3267 let e = _mm256_setr_pd(4., 3., 8., 7.);
3268 assert_eq_m256d(r, e);
3269 }
3270
3271 #[simd_test(enable = "avx")]
3272 unsafe fn test_mm256_shuffle_ps() {
3273 let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3274 let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3275 let r = _mm256_shuffle_ps::<0b00_00_11_11>(a, b);
3276 let e = _mm256_setr_ps(8., 8., 2., 2., 16., 16., 10., 10.);
3277 assert_eq_m256(r, e);
3278 }
3279
3280 #[simd_test(enable = "avx")]
3281 unsafe fn test_mm256_andnot_pd() {
3282 let a = _mm256_set1_pd(0.);
3283 let b = _mm256_set1_pd(0.6);
3284 let r = _mm256_andnot_pd(a, b);
3285 assert_eq_m256d(r, b);
3286 }
3287
3288 #[simd_test(enable = "avx")]
3289 unsafe fn test_mm256_andnot_ps() {
3290 let a = _mm256_set1_ps(0.);
3291 let b = _mm256_set1_ps(0.6);
3292 let r = _mm256_andnot_ps(a, b);
3293 assert_eq_m256(r, b);
3294 }
3295
3296 #[simd_test(enable = "avx")]
3297 unsafe fn test_mm256_max_pd() {
3298 let a = _mm256_setr_pd(1., 4., 5., 8.);
3299 let b = _mm256_setr_pd(2., 3., 6., 7.);
3300 let r = _mm256_max_pd(a, b);
3301 let e = _mm256_setr_pd(2., 4., 6., 8.);
3302 assert_eq_m256d(r, e);
3303 let w = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0));
3306 let x = _mm256_max_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0));
3307 let wu: [u64; 4] = transmute(w);
3308 let xu: [u64; 4] = transmute(x);
3309 assert_eq!(wu, [0x8000_0000_0000_0000u64; 4]);
3310 assert_eq!(xu, [0u64; 4]);
3311 let y = _mm256_max_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0));
3315 let z = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN));
3316 let yf: [f64; 4] = transmute(y);
3317 let zf: [f64; 4] = transmute(z);
3318 assert_eq!(yf, [0.0; 4]);
3319 assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3320 }
3321
3322 #[simd_test(enable = "avx")]
3323 unsafe fn test_mm256_max_ps() {
3324 let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3325 let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3326 let r = _mm256_max_ps(a, b);
3327 let e = _mm256_setr_ps(2., 4., 6., 8., 10., 12., 14., 16.);
3328 assert_eq_m256(r, e);
3329 let w = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0));
3332 let x = _mm256_max_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0));
3333 let wu: [u32; 8] = transmute(w);
3334 let xu: [u32; 8] = transmute(x);
3335 assert_eq!(wu, [0x8000_0000u32; 8]);
3336 assert_eq!(xu, [0u32; 8]);
3337 let y = _mm256_max_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0));
3341 let z = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN));
3342 let yf: [f32; 8] = transmute(y);
3343 let zf: [f32; 8] = transmute(z);
3344 assert_eq!(yf, [0.0; 8]);
3345 assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3346 }
3347
3348 #[simd_test(enable = "avx")]
3349 unsafe fn test_mm256_min_pd() {
3350 let a = _mm256_setr_pd(1., 4., 5., 8.);
3351 let b = _mm256_setr_pd(2., 3., 6., 7.);
3352 let r = _mm256_min_pd(a, b);
3353 let e = _mm256_setr_pd(1., 3., 5., 7.);
3354 assert_eq_m256d(r, e);
3355 let w = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0));
3358 let x = _mm256_min_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0));
3359 let wu: [u64; 4] = transmute(w);
3360 let xu: [u64; 4] = transmute(x);
3361 assert_eq!(wu, [0x8000_0000_0000_0000u64; 4]);
3362 assert_eq!(xu, [0u64; 4]);
3363 let y = _mm256_min_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0));
3367 let z = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN));
3368 let yf: [f64; 4] = transmute(y);
3369 let zf: [f64; 4] = transmute(z);
3370 assert_eq!(yf, [0.0; 4]);
3371 assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3372 }
3373
3374 #[simd_test(enable = "avx")]
3375 unsafe fn test_mm256_min_ps() {
3376 let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3377 let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3378 let r = _mm256_min_ps(a, b);
3379 let e = _mm256_setr_ps(1., 3., 5., 7., 9., 11., 13., 15.);
3380 assert_eq_m256(r, e);
3381 let w = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0));
3384 let x = _mm256_min_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0));
3385 let wu: [u32; 8] = transmute(w);
3386 let xu: [u32; 8] = transmute(x);
3387 assert_eq!(wu, [0x8000_0000u32; 8]);
3388 assert_eq!(xu, [0u32; 8]);
3389 let y = _mm256_min_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0));
3393 let z = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN));
3394 let yf: [f32; 8] = transmute(y);
3395 let zf: [f32; 8] = transmute(z);
3396 assert_eq!(yf, [0.0; 8]);
3397 assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3398 }
3399
3400 #[simd_test(enable = "avx")]
3401 unsafe fn test_mm256_mul_pd() {
3402 let a = _mm256_setr_pd(1., 2., 3., 4.);
3403 let b = _mm256_setr_pd(5., 6., 7., 8.);
3404 let r = _mm256_mul_pd(a, b);
3405 let e = _mm256_setr_pd(5., 12., 21., 32.);
3406 assert_eq_m256d(r, e);
3407 }
3408
3409 #[simd_test(enable = "avx")]
3410 unsafe fn test_mm256_mul_ps() {
3411 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
3412 let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
3413 let r = _mm256_mul_ps(a, b);
3414 let e = _mm256_setr_ps(9., 20., 33., 48., 65., 84., 105., 128.);
3415 assert_eq_m256(r, e);
3416 }
3417
3418 #[simd_test(enable = "avx")]
3419 unsafe fn test_mm256_addsub_pd() {
3420 let a = _mm256_setr_pd(1., 2., 3., 4.);
3421 let b = _mm256_setr_pd(5., 6., 7., 8.);
3422 let r = _mm256_addsub_pd(a, b);
3423 let e = _mm256_setr_pd(-4., 8., -4., 12.);
3424 assert_eq_m256d(r, e);
3425 }
3426
3427 #[simd_test(enable = "avx")]
3428 unsafe fn test_mm256_addsub_ps() {
3429 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3430 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3431 let r = _mm256_addsub_ps(a, b);
3432 let e = _mm256_setr_ps(-4., 8., -4., 12., -4., 8., -4., 12.);
3433 assert_eq_m256(r, e);
3434 }
3435
3436 #[simd_test(enable = "avx")]
3437 unsafe fn test_mm256_sub_pd() {
3438 let a = _mm256_setr_pd(1., 2., 3., 4.);
3439 let b = _mm256_setr_pd(5., 6., 7., 8.);
3440 let r = _mm256_sub_pd(a, b);
3441 let e = _mm256_setr_pd(-4., -4., -4., -4.);
3442 assert_eq_m256d(r, e);
3443 }
3444
3445 #[simd_test(enable = "avx")]
3446 unsafe fn test_mm256_sub_ps() {
3447 let a = _mm256_setr_ps(1., 2., 3., 4., -1., -2., -3., -4.);
3448 let b = _mm256_setr_ps(5., 6., 7., 8., 3., 2., 1., 0.);
3449 let r = _mm256_sub_ps(a, b);
3450 let e = _mm256_setr_ps(-4., -4., -4., -4., -4., -4., -4., -4.);
3451 assert_eq_m256(r, e);
3452 }
3453
3454 #[simd_test(enable = "avx")]
3455 unsafe fn test_mm256_round_pd() {
3456 let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2);
3457 let result_closest = _mm256_round_pd::<0b0000>(a);
3458 let result_down = _mm256_round_pd::<0b0001>(a);
3459 let result_up = _mm256_round_pd::<0b0010>(a);
3460 let expected_closest = _mm256_setr_pd(2., 2., 4., -1.);
3461 let expected_down = _mm256_setr_pd(1., 2., 3., -2.);
3462 let expected_up = _mm256_setr_pd(2., 3., 4., -1.);
3463 assert_eq_m256d(result_closest, expected_closest);
3464 assert_eq_m256d(result_down, expected_down);
3465 assert_eq_m256d(result_up, expected_up);
3466 }
3467
3468 #[simd_test(enable = "avx")]
3469 unsafe fn test_mm256_floor_pd() {
3470 let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2);
3471 let result_down = _mm256_floor_pd(a);
3472 let expected_down = _mm256_setr_pd(1., 2., 3., -2.);
3473 assert_eq_m256d(result_down, expected_down);
3474 }
3475
3476 #[simd_test(enable = "avx")]
3477 unsafe fn test_mm256_ceil_pd() {
3478 let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2);
3479 let result_up = _mm256_ceil_pd(a);
3480 let expected_up = _mm256_setr_pd(2., 3., 4., -1.);
3481 assert_eq_m256d(result_up, expected_up);
3482 }
3483
3484 #[simd_test(enable = "avx")]
3485 unsafe fn test_mm256_round_ps() {
3486 let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
3487 let result_closest = _mm256_round_ps::<0b0000>(a);
3488 let result_down = _mm256_round_ps::<0b0001>(a);
3489 let result_up = _mm256_round_ps::<0b0010>(a);
3490 let expected_closest = _mm256_setr_ps(2., 2., 4., -1., 2., 2., 4., -1.);
3491 let expected_down = _mm256_setr_ps(1., 2., 3., -2., 1., 2., 3., -2.);
3492 let expected_up = _mm256_setr_ps(2., 3., 4., -1., 2., 3., 4., -1.);
3493 assert_eq_m256(result_closest, expected_closest);
3494 assert_eq_m256(result_down, expected_down);
3495 assert_eq_m256(result_up, expected_up);
3496 }
3497
3498 #[simd_test(enable = "avx")]
3499 unsafe fn test_mm256_floor_ps() {
3500 let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
3501 let result_down = _mm256_floor_ps(a);
3502 let expected_down = _mm256_setr_ps(1., 2., 3., -2., 1., 2., 3., -2.);
3503 assert_eq_m256(result_down, expected_down);
3504 }
3505
3506 #[simd_test(enable = "avx")]
3507 unsafe fn test_mm256_ceil_ps() {
3508 let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
3509 let result_up = _mm256_ceil_ps(a);
3510 let expected_up = _mm256_setr_ps(2., 3., 4., -1., 2., 3., 4., -1.);
3511 assert_eq_m256(result_up, expected_up);
3512 }
3513
3514 #[simd_test(enable = "avx")]
3515 unsafe fn test_mm256_sqrt_pd() {
3516 let a = _mm256_setr_pd(4., 9., 16., 25.);
3517 let r = _mm256_sqrt_pd(a);
3518 let e = _mm256_setr_pd(2., 3., 4., 5.);
3519 assert_eq_m256d(r, e);
3520 }
3521
3522 #[simd_test(enable = "avx")]
3523 unsafe fn test_mm256_sqrt_ps() {
3524 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3525 let r = _mm256_sqrt_ps(a);
3526 let e = _mm256_setr_ps(2., 3., 4., 5., 2., 3., 4., 5.);
3527 assert_eq_m256(r, e);
3528 }
3529
3530 #[simd_test(enable = "avx")]
3531 unsafe fn test_mm256_div_ps() {
3532 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3533 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3534 let r = _mm256_div_ps(a, b);
3535 let e = _mm256_setr_ps(1., 3., 8., 5., 0.5, 1., 0.25, 0.5);
3536 assert_eq_m256(r, e);
3537 }
3538
3539 #[simd_test(enable = "avx")]
3540 unsafe fn test_mm256_div_pd() {
3541 let a = _mm256_setr_pd(4., 9., 16., 25.);
3542 let b = _mm256_setr_pd(4., 3., 2., 5.);
3543 let r = _mm256_div_pd(a, b);
3544 let e = _mm256_setr_pd(1., 3., 8., 5.);
3545 assert_eq_m256d(r, e);
3546 }
3547
3548 #[simd_test(enable = "avx")]
3549 unsafe fn test_mm256_blend_pd() {
3550 let a = _mm256_setr_pd(4., 9., 16., 25.);
3551 let b = _mm256_setr_pd(4., 3., 2., 5.);
3552 let r = _mm256_blend_pd::<0x0>(a, b);
3553 assert_eq_m256d(r, _mm256_setr_pd(4., 9., 16., 25.));
3554 let r = _mm256_blend_pd::<0x3>(a, b);
3555 assert_eq_m256d(r, _mm256_setr_pd(4., 3., 16., 25.));
3556 let r = _mm256_blend_pd::<0xF>(a, b);
3557 assert_eq_m256d(r, _mm256_setr_pd(4., 3., 2., 5.));
3558 }
3559
3560 #[simd_test(enable = "avx")]
3561 unsafe fn test_mm256_blend_ps() {
3562 let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3563 let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3564 let r = _mm256_blend_ps::<0x0>(a, b);
3565 assert_eq_m256(r, _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.));
3566 let r = _mm256_blend_ps::<0x3>(a, b);
3567 assert_eq_m256(r, _mm256_setr_ps(2., 3., 5., 8., 9., 12., 13., 16.));
3568 let r = _mm256_blend_ps::<0xF>(a, b);
3569 assert_eq_m256(r, _mm256_setr_ps(2., 3., 6., 7., 9., 12., 13., 16.));
3570 }
3571
3572 #[simd_test(enable = "avx")]
3573 unsafe fn test_mm256_blendv_pd() {
3574 let a = _mm256_setr_pd(4., 9., 16., 25.);
3575 let b = _mm256_setr_pd(4., 3., 2., 5.);
3576 let c = _mm256_setr_pd(0., 0., !0 as f64, !0 as f64);
3577 let r = _mm256_blendv_pd(a, b, c);
3578 let e = _mm256_setr_pd(4., 9., 2., 5.);
3579 assert_eq_m256d(r, e);
3580 }
3581
3582 #[simd_test(enable = "avx")]
3583 unsafe fn test_mm256_blendv_ps() {
3584 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3585 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3586 #[rustfmt::skip]
3587 let c = _mm256_setr_ps(
3588 0., 0., 0., 0., !0 as f32, !0 as f32, !0 as f32, !0 as f32,
3589 );
3590 let r = _mm256_blendv_ps(a, b, c);
3591 let e = _mm256_setr_ps(4., 9., 16., 25., 8., 9., 64., 50.);
3592 assert_eq_m256(r, e);
3593 }
3594
3595 #[simd_test(enable = "avx")]
3596 unsafe fn test_mm256_dp_ps() {
3597 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3598 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3599 let r = _mm256_dp_ps::<0xFF>(a, b);
3600 let e = _mm256_setr_ps(200., 200., 200., 200., 2387., 2387., 2387., 2387.);
3601 assert_eq_m256(r, e);
3602 }
3603
3604 #[simd_test(enable = "avx")]
3605 unsafe fn test_mm256_hadd_pd() {
3606 let a = _mm256_setr_pd(4., 9., 16., 25.);
3607 let b = _mm256_setr_pd(4., 3., 2., 5.);
3608 let r = _mm256_hadd_pd(a, b);
3609 let e = _mm256_setr_pd(13., 7., 41., 7.);
3610 assert_eq_m256d(r, e);
3611
3612 let a = _mm256_setr_pd(1., 2., 3., 4.);
3613 let b = _mm256_setr_pd(5., 6., 7., 8.);
3614 let r = _mm256_hadd_pd(a, b);
3615 let e = _mm256_setr_pd(3., 11., 7., 15.);
3616 assert_eq_m256d(r, e);
3617 }
3618
3619 #[simd_test(enable = "avx")]
3620 unsafe fn test_mm256_hadd_ps() {
3621 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3622 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3623 let r = _mm256_hadd_ps(a, b);
3624 let e = _mm256_setr_ps(13., 41., 7., 7., 13., 41., 17., 114.);
3625 assert_eq_m256(r, e);
3626
3627 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3628 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3629 let r = _mm256_hadd_ps(a, b);
3630 let e = _mm256_setr_ps(3., 7., 11., 15., 3., 7., 11., 15.);
3631 assert_eq_m256(r, e);
3632 }
3633
3634 #[simd_test(enable = "avx")]
3635 unsafe fn test_mm256_hsub_pd() {
3636 let a = _mm256_setr_pd(4., 9., 16., 25.);
3637 let b = _mm256_setr_pd(4., 3., 2., 5.);
3638 let r = _mm256_hsub_pd(a, b);
3639 let e = _mm256_setr_pd(-5., 1., -9., -3.);
3640 assert_eq_m256d(r, e);
3641
3642 let a = _mm256_setr_pd(1., 2., 3., 4.);
3643 let b = _mm256_setr_pd(5., 6., 7., 8.);
3644 let r = _mm256_hsub_pd(a, b);
3645 let e = _mm256_setr_pd(-1., -1., -1., -1.);
3646 assert_eq_m256d(r, e);
3647 }
3648
3649 #[simd_test(enable = "avx")]
3650 unsafe fn test_mm256_hsub_ps() {
3651 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3652 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3653 let r = _mm256_hsub_ps(a, b);
3654 let e = _mm256_setr_ps(-5., -9., 1., -3., -5., -9., -1., 14.);
3655 assert_eq_m256(r, e);
3656
3657 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3658 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3659 let r = _mm256_hsub_ps(a, b);
3660 let e = _mm256_setr_ps(-1., -1., -1., -1., -1., -1., -1., -1.);
3661 assert_eq_m256(r, e);
3662 }
3663
3664 #[simd_test(enable = "avx")]
3665 unsafe fn test_mm256_xor_pd() {
3666 let a = _mm256_setr_pd(4., 9., 16., 25.);
3667 let b = _mm256_set1_pd(0.);
3668 let r = _mm256_xor_pd(a, b);
3669 assert_eq_m256d(r, a);
3670 }
3671
3672 #[simd_test(enable = "avx")]
3673 unsafe fn test_mm256_xor_ps() {
3674 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3675 let b = _mm256_set1_ps(0.);
3676 let r = _mm256_xor_ps(a, b);
3677 assert_eq_m256(r, a);
3678 }
3679
3680 #[simd_test(enable = "avx")]
3681 unsafe fn test_mm_cmp_pd() {
3682 let a = _mm_setr_pd(4., 9.);
3683 let b = _mm_setr_pd(4., 3.);
3684 let r = _mm_cmp_pd::<_CMP_GE_OS>(a, b);
3685 assert!(get_m128d(r, 0).is_nan());
3686 assert!(get_m128d(r, 1).is_nan());
3687 }
3688
3689 #[simd_test(enable = "avx")]
3690 unsafe fn test_mm256_cmp_pd() {
3691 let a = _mm256_setr_pd(1., 2., 3., 4.);
3692 let b = _mm256_setr_pd(5., 6., 7., 8.);
3693 let r = _mm256_cmp_pd::<_CMP_GE_OS>(a, b);
3694 let e = _mm256_set1_pd(0.);
3695 assert_eq_m256d(r, e);
3696 }
3697
3698 #[simd_test(enable = "avx")]
3699 unsafe fn test_mm_cmp_ps() {
3700 let a = _mm_setr_ps(4., 3., 2., 5.);
3701 let b = _mm_setr_ps(4., 9., 16., 25.);
3702 let r = _mm_cmp_ps::<_CMP_GE_OS>(a, b);
3703 assert!(get_m128(r, 0).is_nan());
3704 assert_eq!(get_m128(r, 1), 0.);
3705 assert_eq!(get_m128(r, 2), 0.);
3706 assert_eq!(get_m128(r, 3), 0.);
3707 }
3708
3709 #[simd_test(enable = "avx")]
3710 unsafe fn test_mm256_cmp_ps() {
3711 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3712 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3713 let r = _mm256_cmp_ps::<_CMP_GE_OS>(a, b);
3714 let e = _mm256_set1_ps(0.);
3715 assert_eq_m256(r, e);
3716 }
3717
3718 #[simd_test(enable = "avx")]
3719 unsafe fn test_mm_cmp_sd() {
3720 let a = _mm_setr_pd(4., 9.);
3721 let b = _mm_setr_pd(4., 3.);
3722 let r = _mm_cmp_sd::<_CMP_GE_OS>(a, b);
3723 assert!(get_m128d(r, 0).is_nan());
3724 assert_eq!(get_m128d(r, 1), 9.);
3725 }
3726
3727 #[simd_test(enable = "avx")]
3728 unsafe fn test_mm_cmp_ss() {
3729 let a = _mm_setr_ps(4., 3., 2., 5.);
3730 let b = _mm_setr_ps(4., 9., 16., 25.);
3731 let r = _mm_cmp_ss::<_CMP_GE_OS>(a, b);
3732 assert!(get_m128(r, 0).is_nan());
3733 assert_eq!(get_m128(r, 1), 3.);
3734 assert_eq!(get_m128(r, 2), 2.);
3735 assert_eq!(get_m128(r, 3), 5.);
3736 }
3737
3738 #[simd_test(enable = "avx")]
3739 unsafe fn test_mm256_cvtepi32_pd() {
3740 let a = _mm_setr_epi32(4, 9, 16, 25);
3741 let r = _mm256_cvtepi32_pd(a);
3742 let e = _mm256_setr_pd(4., 9., 16., 25.);
3743 assert_eq_m256d(r, e);
3744 }
3745
3746 #[simd_test(enable = "avx")]
3747 unsafe fn test_mm256_cvtepi32_ps() {
3748 let a = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25);
3749 let r = _mm256_cvtepi32_ps(a);
3750 let e = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3751 assert_eq_m256(r, e);
3752 }
3753
3754 #[simd_test(enable = "avx")]
3755 unsafe fn test_mm256_cvtpd_ps() {
3756 let a = _mm256_setr_pd(4., 9., 16., 25.);
3757 let r = _mm256_cvtpd_ps(a);
3758 let e = _mm_setr_ps(4., 9., 16., 25.);
3759 assert_eq_m128(r, e);
3760 }
3761
3762 #[simd_test(enable = "avx")]
3763 unsafe fn test_mm256_cvtps_epi32() {
3764 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3765 let r = _mm256_cvtps_epi32(a);
3766 let e = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25);
3767 assert_eq_m256i(r, e);
3768 }
3769
3770 #[simd_test(enable = "avx")]
3771 unsafe fn test_mm256_cvtps_pd() {
3772 let a = _mm_setr_ps(4., 9., 16., 25.);
3773 let r = _mm256_cvtps_pd(a);
3774 let e = _mm256_setr_pd(4., 9., 16., 25.);
3775 assert_eq_m256d(r, e);
3776 }
3777
3778 #[simd_test(enable = "avx")]
3779 unsafe fn test_mm256_cvtsd_f64() {
3780 let a = _mm256_setr_pd(1., 2., 3., 4.);
3781 let r = _mm256_cvtsd_f64(a);
3782 assert_eq!(r, 1.);
3783 }
3784
3785 #[simd_test(enable = "avx")]
3786 unsafe fn test_mm256_cvttpd_epi32() {
3787 let a = _mm256_setr_pd(4., 9., 16., 25.);
3788 let r = _mm256_cvttpd_epi32(a);
3789 let e = _mm_setr_epi32(4, 9, 16, 25);
3790 assert_eq_m128i(r, e);
3791 }
3792
3793 #[simd_test(enable = "avx")]
3794 unsafe fn test_mm256_cvtpd_epi32() {
3795 let a = _mm256_setr_pd(4., 9., 16., 25.);
3796 let r = _mm256_cvtpd_epi32(a);
3797 let e = _mm_setr_epi32(4, 9, 16, 25);
3798 assert_eq_m128i(r, e);
3799 }
3800
3801 #[simd_test(enable = "avx")]
3802 unsafe fn test_mm256_cvttps_epi32() {
3803 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3804 let r = _mm256_cvttps_epi32(a);
3805 let e = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25);
3806 assert_eq_m256i(r, e);
3807 }
3808
3809 #[simd_test(enable = "avx")]
3810 unsafe fn test_mm256_extractf128_ps() {
3811 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3812 let r = _mm256_extractf128_ps::<0>(a);
3813 let e = _mm_setr_ps(4., 3., 2., 5.);
3814 assert_eq_m128(r, e);
3815 }
3816
3817 #[simd_test(enable = "avx")]
3818 unsafe fn test_mm256_extractf128_pd() {
3819 let a = _mm256_setr_pd(4., 3., 2., 5.);
3820 let r = _mm256_extractf128_pd::<0>(a);
3821 let e = _mm_setr_pd(4., 3.);
3822 assert_eq_m128d(r, e);
3823 }
3824
3825 #[simd_test(enable = "avx")]
3826 unsafe fn test_mm256_extractf128_si256() {
3827 let a = _mm256_setr_epi64x(4, 3, 2, 5);
3828 let r = _mm256_extractf128_si256::<0>(a);
3829 let e = _mm_setr_epi64x(4, 3);
3830 assert_eq_m128i(r, e);
3831 }
3832
3833 #[simd_test(enable = "avx")]
3834 unsafe fn test_mm256_extract_epi32() {
3835 let a = _mm256_setr_epi32(-1, 1, 2, 3, 4, 5, 6, 7);
3836 let r1 = _mm256_extract_epi32::<0>(a);
3837 let r2 = _mm256_extract_epi32::<3>(a);
3838 assert_eq!(r1, -1);
3839 assert_eq!(r2, 3);
3840 }
3841
3842 #[simd_test(enable = "avx")]
3843 unsafe fn test_mm256_cvtsi256_si32() {
3844 let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3845 let r = _mm256_cvtsi256_si32(a);
3846 assert_eq!(r, 1);
3847 }
3848
3849 #[simd_test(enable = "avx")]
3850 #[cfg_attr(miri, ignore)] unsafe fn test_mm256_zeroall() {
3852 _mm256_zeroall();
3853 }
3854
3855 #[simd_test(enable = "avx")]
3856 #[cfg_attr(miri, ignore)] unsafe fn test_mm256_zeroupper() {
3858 _mm256_zeroupper();
3859 }
3860
3861 #[simd_test(enable = "avx")]
3862 unsafe fn test_mm256_permutevar_ps() {
3863 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3864 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3865 let r = _mm256_permutevar_ps(a, b);
3866 let e = _mm256_setr_ps(3., 2., 5., 4., 9., 64., 50., 8.);
3867 assert_eq_m256(r, e);
3868 }
3869
3870 #[simd_test(enable = "avx")]
3871 unsafe fn test_mm_permutevar_ps() {
3872 let a = _mm_setr_ps(4., 3., 2., 5.);
3873 let b = _mm_setr_epi32(1, 2, 3, 4);
3874 let r = _mm_permutevar_ps(a, b);
3875 let e = _mm_setr_ps(3., 2., 5., 4.);
3876 assert_eq_m128(r, e);
3877 }
3878
3879 #[simd_test(enable = "avx")]
3880 unsafe fn test_mm256_permute_ps() {
3881 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3882 let r = _mm256_permute_ps::<0x1b>(a);
3883 let e = _mm256_setr_ps(5., 2., 3., 4., 50., 64., 9., 8.);
3884 assert_eq_m256(r, e);
3885 }
3886
3887 #[simd_test(enable = "avx")]
3888 unsafe fn test_mm_permute_ps() {
3889 let a = _mm_setr_ps(4., 3., 2., 5.);
3890 let r = _mm_permute_ps::<0x1b>(a);
3891 let e = _mm_setr_ps(5., 2., 3., 4.);
3892 assert_eq_m128(r, e);
3893 }
3894
3895 #[simd_test(enable = "avx")]
3896 unsafe fn test_mm256_permutevar_pd() {
3897 let a = _mm256_setr_pd(4., 3., 2., 5.);
3898 let b = _mm256_setr_epi64x(1, 2, 3, 4);
3899 let r = _mm256_permutevar_pd(a, b);
3900 let e = _mm256_setr_pd(4., 3., 5., 2.);
3901 assert_eq_m256d(r, e);
3902 }
3903
3904 #[simd_test(enable = "avx")]
3905 unsafe fn test_mm_permutevar_pd() {
3906 let a = _mm_setr_pd(4., 3.);
3907 let b = _mm_setr_epi64x(3, 0);
3908 let r = _mm_permutevar_pd(a, b);
3909 let e = _mm_setr_pd(3., 4.);
3910 assert_eq_m128d(r, e);
3911 }
3912
3913 #[simd_test(enable = "avx")]
3914 unsafe fn test_mm256_permute_pd() {
3915 let a = _mm256_setr_pd(4., 3., 2., 5.);
3916 let r = _mm256_permute_pd::<5>(a);
3917 let e = _mm256_setr_pd(3., 4., 5., 2.);
3918 assert_eq_m256d(r, e);
3919 }
3920
3921 #[simd_test(enable = "avx")]
3922 unsafe fn test_mm_permute_pd() {
3923 let a = _mm_setr_pd(4., 3.);
3924 let r = _mm_permute_pd::<1>(a);
3925 let e = _mm_setr_pd(3., 4.);
3926 assert_eq_m128d(r, e);
3927 }
3928
3929 #[simd_test(enable = "avx")]
3930 unsafe fn test_mm256_permute2f128_ps() {
3931 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3932 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3933 let r = _mm256_permute2f128_ps::<0x13>(a, b);
3934 let e = _mm256_setr_ps(5., 6., 7., 8., 1., 2., 3., 4.);
3935 assert_eq_m256(r, e);
3936 }
3937
3938 #[simd_test(enable = "avx")]
3939 unsafe fn test_mm256_permute2f128_pd() {
3940 let a = _mm256_setr_pd(1., 2., 3., 4.);
3941 let b = _mm256_setr_pd(5., 6., 7., 8.);
3942 let r = _mm256_permute2f128_pd::<0x31>(a, b);
3943 let e = _mm256_setr_pd(3., 4., 7., 8.);
3944 assert_eq_m256d(r, e);
3945 }
3946
3947 #[simd_test(enable = "avx")]
3948 unsafe fn test_mm256_permute2f128_si256() {
3949 let a = _mm256_setr_epi32(1, 2, 3, 4, 1, 2, 3, 4);
3950 let b = _mm256_setr_epi32(5, 6, 7, 8, 5, 6, 7, 8);
3951 let r = _mm256_permute2f128_si256::<0x20>(a, b);
3952 let e = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3953 assert_eq_m256i(r, e);
3954 }
3955
3956 #[simd_test(enable = "avx")]
3957 unsafe fn test_mm256_broadcast_ss() {
3958 let r = _mm256_broadcast_ss(&3.);
3959 let e = _mm256_set1_ps(3.);
3960 assert_eq_m256(r, e);
3961 }
3962
3963 #[simd_test(enable = "avx")]
3964 unsafe fn test_mm_broadcast_ss() {
3965 let r = _mm_broadcast_ss(&3.);
3966 let e = _mm_set1_ps(3.);
3967 assert_eq_m128(r, e);
3968 }
3969
3970 #[simd_test(enable = "avx")]
3971 unsafe fn test_mm256_broadcast_sd() {
3972 let r = _mm256_broadcast_sd(&3.);
3973 let e = _mm256_set1_pd(3.);
3974 assert_eq_m256d(r, e);
3975 }
3976
3977 #[simd_test(enable = "avx")]
3978 unsafe fn test_mm256_broadcast_ps() {
3979 let a = _mm_setr_ps(4., 3., 2., 5.);
3980 let r = _mm256_broadcast_ps(&a);
3981 let e = _mm256_setr_ps(4., 3., 2., 5., 4., 3., 2., 5.);
3982 assert_eq_m256(r, e);
3983 }
3984
3985 #[simd_test(enable = "avx")]
3986 unsafe fn test_mm256_broadcast_pd() {
3987 let a = _mm_setr_pd(4., 3.);
3988 let r = _mm256_broadcast_pd(&a);
3989 let e = _mm256_setr_pd(4., 3., 4., 3.);
3990 assert_eq_m256d(r, e);
3991 }
3992
3993 #[simd_test(enable = "avx")]
3994 unsafe fn test_mm256_insertf128_ps() {
3995 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3996 let b = _mm_setr_ps(4., 9., 16., 25.);
3997 let r = _mm256_insertf128_ps::<0>(a, b);
3998 let e = _mm256_setr_ps(4., 9., 16., 25., 8., 9., 64., 50.);
3999 assert_eq_m256(r, e);
4000 }
4001
4002 #[simd_test(enable = "avx")]
4003 unsafe fn test_mm256_insertf128_pd() {
4004 let a = _mm256_setr_pd(1., 2., 3., 4.);
4005 let b = _mm_setr_pd(5., 6.);
4006 let r = _mm256_insertf128_pd::<0>(a, b);
4007 let e = _mm256_setr_pd(5., 6., 3., 4.);
4008 assert_eq_m256d(r, e);
4009 }
4010
4011 #[simd_test(enable = "avx")]
4012 unsafe fn test_mm256_insertf128_si256() {
4013 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4014 let b = _mm_setr_epi64x(5, 6);
4015 let r = _mm256_insertf128_si256::<0>(a, b);
4016 let e = _mm256_setr_epi64x(5, 6, 3, 4);
4017 assert_eq_m256i(r, e);
4018 }
4019
4020 #[simd_test(enable = "avx")]
4021 unsafe fn test_mm256_insert_epi8() {
4022 #[rustfmt::skip]
4023 let a = _mm256_setr_epi8(
4024 1, 2, 3, 4, 5, 6, 7, 8,
4025 9, 10, 11, 12, 13, 14, 15, 16,
4026 17, 18, 19, 20, 21, 22, 23, 24,
4027 25, 26, 27, 28, 29, 30, 31, 32,
4028 );
4029 let r = _mm256_insert_epi8::<31>(a, 0);
4030 #[rustfmt::skip]
4031 let e = _mm256_setr_epi8(
4032 1, 2, 3, 4, 5, 6, 7, 8,
4033 9, 10, 11, 12, 13, 14, 15, 16,
4034 17, 18, 19, 20, 21, 22, 23, 24,
4035 25, 26, 27, 28, 29, 30, 31, 0,
4036 );
4037 assert_eq_m256i(r, e);
4038 }
4039
4040 #[simd_test(enable = "avx")]
4041 unsafe fn test_mm256_insert_epi16() {
4042 #[rustfmt::skip]
4043 let a = _mm256_setr_epi16(
4044 0, 1, 2, 3, 4, 5, 6, 7,
4045 8, 9, 10, 11, 12, 13, 14, 15,
4046 );
4047 let r = _mm256_insert_epi16::<15>(a, 0);
4048 #[rustfmt::skip]
4049 let e = _mm256_setr_epi16(
4050 0, 1, 2, 3, 4, 5, 6, 7,
4051 8, 9, 10, 11, 12, 13, 14, 0,
4052 );
4053 assert_eq_m256i(r, e);
4054 }
4055
4056 #[simd_test(enable = "avx")]
4057 unsafe fn test_mm256_insert_epi32() {
4058 let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4059 let r = _mm256_insert_epi32::<7>(a, 0);
4060 let e = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0);
4061 assert_eq_m256i(r, e);
4062 }
4063
4064 #[simd_test(enable = "avx")]
4065 unsafe fn test_mm256_load_pd() {
4066 let a = _mm256_setr_pd(1., 2., 3., 4.);
4067 let p = ptr::addr_of!(a) as *const f64;
4068 let r = _mm256_load_pd(p);
4069 let e = _mm256_setr_pd(1., 2., 3., 4.);
4070 assert_eq_m256d(r, e);
4071 }
4072
4073 #[simd_test(enable = "avx")]
4074 unsafe fn test_mm256_store_pd() {
4075 let a = _mm256_setr_pd(1., 2., 3., 4.);
4076 let mut r = _mm256_undefined_pd();
4077 _mm256_store_pd(ptr::addr_of_mut!(r) as *mut f64, a);
4078 assert_eq_m256d(r, a);
4079 }
4080
4081 #[simd_test(enable = "avx")]
4082 unsafe fn test_mm256_load_ps() {
4083 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
4084 let p = ptr::addr_of!(a) as *const f32;
4085 let r = _mm256_load_ps(p);
4086 let e = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
4087 assert_eq_m256(r, e);
4088 }
4089
4090 #[simd_test(enable = "avx")]
4091 unsafe fn test_mm256_store_ps() {
4092 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
4093 let mut r = _mm256_undefined_ps();
4094 _mm256_store_ps(ptr::addr_of_mut!(r) as *mut f32, a);
4095 assert_eq_m256(r, a);
4096 }
4097
4098 #[simd_test(enable = "avx")]
4099 unsafe fn test_mm256_loadu_pd() {
4100 let a = &[1.0f64, 2., 3., 4.];
4101 let p = a.as_ptr();
4102 let r = _mm256_loadu_pd(black_box(p));
4103 let e = _mm256_setr_pd(1., 2., 3., 4.);
4104 assert_eq_m256d(r, e);
4105 }
4106
4107 #[simd_test(enable = "avx")]
4108 unsafe fn test_mm256_storeu_pd() {
4109 let a = _mm256_set1_pd(9.);
4110 let mut r = _mm256_undefined_pd();
4111 _mm256_storeu_pd(ptr::addr_of_mut!(r) as *mut f64, a);
4112 assert_eq_m256d(r, a);
4113 }
4114
4115 #[simd_test(enable = "avx")]
4116 unsafe fn test_mm256_loadu_ps() {
4117 let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
4118 let p = a.as_ptr();
4119 let r = _mm256_loadu_ps(black_box(p));
4120 let e = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
4121 assert_eq_m256(r, e);
4122 }
4123
4124 #[simd_test(enable = "avx")]
4125 unsafe fn test_mm256_storeu_ps() {
4126 let a = _mm256_set1_ps(9.);
4127 let mut r = _mm256_undefined_ps();
4128 _mm256_storeu_ps(ptr::addr_of_mut!(r) as *mut f32, a);
4129 assert_eq_m256(r, a);
4130 }
4131
4132 #[simd_test(enable = "avx")]
4133 unsafe fn test_mm256_load_si256() {
4134 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4135 let p = ptr::addr_of!(a);
4136 let r = _mm256_load_si256(p);
4137 let e = _mm256_setr_epi64x(1, 2, 3, 4);
4138 assert_eq_m256i(r, e);
4139 }
4140
4141 #[simd_test(enable = "avx")]
4142 unsafe fn test_mm256_store_si256() {
4143 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4144 let mut r = _mm256_undefined_si256();
4145 _mm256_store_si256(ptr::addr_of_mut!(r), a);
4146 assert_eq_m256i(r, a);
4147 }
4148
4149 #[simd_test(enable = "avx")]
4150 unsafe fn test_mm256_loadu_si256() {
4151 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4152 let p = ptr::addr_of!(a);
4153 let r = _mm256_loadu_si256(black_box(p));
4154 let e = _mm256_setr_epi64x(1, 2, 3, 4);
4155 assert_eq_m256i(r, e);
4156 }
4157
4158 #[simd_test(enable = "avx")]
4159 unsafe fn test_mm256_storeu_si256() {
4160 let a = _mm256_set1_epi8(9);
4161 let mut r = _mm256_undefined_si256();
4162 _mm256_storeu_si256(ptr::addr_of_mut!(r), a);
4163 assert_eq_m256i(r, a);
4164 }
4165
4166 #[simd_test(enable = "avx")]
4167 unsafe fn test_mm256_maskload_pd() {
4168 let a = &[1.0f64, 2., 3., 4.];
4169 let p = a.as_ptr();
4170 let mask = _mm256_setr_epi64x(0, !0, 0, !0);
4171 let r = _mm256_maskload_pd(black_box(p), mask);
4172 let e = _mm256_setr_pd(0., 2., 0., 4.);
4173 assert_eq_m256d(r, e);
4174 }
4175
4176 #[simd_test(enable = "avx")]
4177 unsafe fn test_mm256_maskstore_pd() {
4178 let mut r = _mm256_set1_pd(0.);
4179 let mask = _mm256_setr_epi64x(0, !0, 0, !0);
4180 let a = _mm256_setr_pd(1., 2., 3., 4.);
4181 _mm256_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a);
4182 let e = _mm256_setr_pd(0., 2., 0., 4.);
4183 assert_eq_m256d(r, e);
4184 }
4185
4186 #[simd_test(enable = "avx")]
4187 unsafe fn test_mm_maskload_pd() {
4188 let a = &[1.0f64, 2.];
4189 let p = a.as_ptr();
4190 let mask = _mm_setr_epi64x(0, !0);
4191 let r = _mm_maskload_pd(black_box(p), mask);
4192 let e = _mm_setr_pd(0., 2.);
4193 assert_eq_m128d(r, e);
4194 }
4195
4196 #[simd_test(enable = "avx")]
4197 unsafe fn test_mm_maskstore_pd() {
4198 let mut r = _mm_set1_pd(0.);
4199 let mask = _mm_setr_epi64x(0, !0);
4200 let a = _mm_setr_pd(1., 2.);
4201 _mm_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a);
4202 let e = _mm_setr_pd(0., 2.);
4203 assert_eq_m128d(r, e);
4204 }
4205
4206 #[simd_test(enable = "avx")]
4207 unsafe fn test_mm256_maskload_ps() {
4208 let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
4209 let p = a.as_ptr();
4210 let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0);
4211 let r = _mm256_maskload_ps(black_box(p), mask);
4212 let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.);
4213 assert_eq_m256(r, e);
4214 }
4215
4216 #[simd_test(enable = "avx")]
4217 unsafe fn test_mm256_maskstore_ps() {
4218 let mut r = _mm256_set1_ps(0.);
4219 let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0);
4220 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4221 _mm256_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a);
4222 let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.);
4223 assert_eq_m256(r, e);
4224 }
4225
4226 #[simd_test(enable = "avx")]
4227 unsafe fn test_mm_maskload_ps() {
4228 let a = &[1.0f32, 2., 3., 4.];
4229 let p = a.as_ptr();
4230 let mask = _mm_setr_epi32(0, !0, 0, !0);
4231 let r = _mm_maskload_ps(black_box(p), mask);
4232 let e = _mm_setr_ps(0., 2., 0., 4.);
4233 assert_eq_m128(r, e);
4234 }
4235
4236 #[simd_test(enable = "avx")]
4237 unsafe fn test_mm_maskstore_ps() {
4238 let mut r = _mm_set1_ps(0.);
4239 let mask = _mm_setr_epi32(0, !0, 0, !0);
4240 let a = _mm_setr_ps(1., 2., 3., 4.);
4241 _mm_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a);
4242 let e = _mm_setr_ps(0., 2., 0., 4.);
4243 assert_eq_m128(r, e);
4244 }
4245
4246 #[simd_test(enable = "avx")]
4247 unsafe fn test_mm256_movehdup_ps() {
4248 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4249 let r = _mm256_movehdup_ps(a);
4250 let e = _mm256_setr_ps(2., 2., 4., 4., 6., 6., 8., 8.);
4251 assert_eq_m256(r, e);
4252 }
4253
4254 #[simd_test(enable = "avx")]
4255 unsafe fn test_mm256_moveldup_ps() {
4256 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4257 let r = _mm256_moveldup_ps(a);
4258 let e = _mm256_setr_ps(1., 1., 3., 3., 5., 5., 7., 7.);
4259 assert_eq_m256(r, e);
4260 }
4261
4262 #[simd_test(enable = "avx")]
4263 unsafe fn test_mm256_movedup_pd() {
4264 let a = _mm256_setr_pd(1., 2., 3., 4.);
4265 let r = _mm256_movedup_pd(a);
4266 let e = _mm256_setr_pd(1., 1., 3., 3.);
4267 assert_eq_m256d(r, e);
4268 }
4269
4270 #[simd_test(enable = "avx")]
4271 unsafe fn test_mm256_lddqu_si256() {
4272 #[rustfmt::skip]
4273 let a = _mm256_setr_epi8(
4274 1, 2, 3, 4, 5, 6, 7, 8,
4275 9, 10, 11, 12, 13, 14, 15, 16,
4276 17, 18, 19, 20, 21, 22, 23, 24,
4277 25, 26, 27, 28, 29, 30, 31, 32,
4278 );
4279 let p = ptr::addr_of!(a);
4280 let r = _mm256_lddqu_si256(black_box(p));
4281 #[rustfmt::skip]
4282 let e = _mm256_setr_epi8(
4283 1, 2, 3, 4, 5, 6, 7, 8,
4284 9, 10, 11, 12, 13, 14, 15, 16,
4285 17, 18, 19, 20, 21, 22, 23, 24,
4286 25, 26, 27, 28, 29, 30, 31, 32,
4287 );
4288 assert_eq_m256i(r, e);
4289 }
4290
4291 #[simd_test(enable = "avx")]
4292 #[cfg_attr(miri, ignore)] unsafe fn test_mm256_stream_si256() {
4294 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4295 let mut r = _mm256_undefined_si256();
4296 _mm256_stream_si256(ptr::addr_of_mut!(r), a);
4297 _mm_sfence();
4298 assert_eq_m256i(r, a);
4299 }
4300
4301 #[simd_test(enable = "avx")]
4302 #[cfg_attr(miri, ignore)] unsafe fn test_mm256_stream_pd() {
4304 #[repr(align(32))]
4305 struct Memory {
4306 pub data: [f64; 4],
4307 }
4308 let a = _mm256_set1_pd(7.0);
4309 let mut mem = Memory { data: [-1.0; 4] };
4310
4311 _mm256_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
4312 _mm_sfence();
4313 for i in 0..4 {
4314 assert_eq!(mem.data[i], get_m256d(a, i));
4315 }
4316 }
4317
4318 #[simd_test(enable = "avx")]
4319 #[cfg_attr(miri, ignore)] unsafe fn test_mm256_stream_ps() {
4321 #[repr(align(32))]
4322 struct Memory {
4323 pub data: [f32; 8],
4324 }
4325 let a = _mm256_set1_ps(7.0);
4326 let mut mem = Memory { data: [-1.0; 8] };
4327
4328 _mm256_stream_ps(ptr::addr_of_mut!(mem.data[0]), a);
4329 _mm_sfence();
4330 for i in 0..8 {
4331 assert_eq!(mem.data[i], get_m256(a, i));
4332 }
4333 }
4334
4335 #[simd_test(enable = "avx")]
4336 unsafe fn test_mm256_rcp_ps() {
4337 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4338 let r = _mm256_rcp_ps(a);
4339 #[rustfmt::skip]
4340 let e = _mm256_setr_ps(
4341 0.99975586, 0.49987793, 0.33325195, 0.24993896,
4342 0.19995117, 0.16662598, 0.14282227, 0.12496948,
4343 );
4344 let rel_err = 0.00048828125;
4345 for i in 0..8 {
4346 assert_approx_eq!(get_m256(r, i), get_m256(e, i), 2. * rel_err);
4347 }
4348 }
4349
4350 #[simd_test(enable = "avx")]
4351 unsafe fn test_mm256_rsqrt_ps() {
4352 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4353 let r = _mm256_rsqrt_ps(a);
4354 #[rustfmt::skip]
4355 let e = _mm256_setr_ps(
4356 0.99975586, 0.7069092, 0.5772705, 0.49987793,
4357 0.44714355, 0.40820313, 0.3779297, 0.3534546,
4358 );
4359 let rel_err = 0.00048828125;
4360 for i in 0..8 {
4361 assert_approx_eq!(get_m256(r, i), get_m256(e, i), 2. * rel_err);
4362 }
4363 }
4364
4365 #[simd_test(enable = "avx")]
4366 unsafe fn test_mm256_unpackhi_pd() {
4367 let a = _mm256_setr_pd(1., 2., 3., 4.);
4368 let b = _mm256_setr_pd(5., 6., 7., 8.);
4369 let r = _mm256_unpackhi_pd(a, b);
4370 let e = _mm256_setr_pd(2., 6., 4., 8.);
4371 assert_eq_m256d(r, e);
4372 }
4373
4374 #[simd_test(enable = "avx")]
4375 unsafe fn test_mm256_unpackhi_ps() {
4376 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4377 let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
4378 let r = _mm256_unpackhi_ps(a, b);
4379 let e = _mm256_setr_ps(3., 11., 4., 12., 7., 15., 8., 16.);
4380 assert_eq_m256(r, e);
4381 }
4382
4383 #[simd_test(enable = "avx")]
4384 unsafe fn test_mm256_unpacklo_pd() {
4385 let a = _mm256_setr_pd(1., 2., 3., 4.);
4386 let b = _mm256_setr_pd(5., 6., 7., 8.);
4387 let r = _mm256_unpacklo_pd(a, b);
4388 let e = _mm256_setr_pd(1., 5., 3., 7.);
4389 assert_eq_m256d(r, e);
4390 }
4391
4392 #[simd_test(enable = "avx")]
4393 unsafe fn test_mm256_unpacklo_ps() {
4394 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4395 let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
4396 let r = _mm256_unpacklo_ps(a, b);
4397 let e = _mm256_setr_ps(1., 9., 2., 10., 5., 13., 6., 14.);
4398 assert_eq_m256(r, e);
4399 }
4400
4401 #[simd_test(enable = "avx")]
4402 unsafe fn test_mm256_testz_si256() {
4403 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4404 let b = _mm256_setr_epi64x(5, 6, 7, 8);
4405 let r = _mm256_testz_si256(a, b);
4406 assert_eq!(r, 0);
4407 let b = _mm256_set1_epi64x(0);
4408 let r = _mm256_testz_si256(a, b);
4409 assert_eq!(r, 1);
4410 }
4411
4412 #[simd_test(enable = "avx")]
4413 unsafe fn test_mm256_testc_si256() {
4414 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4415 let b = _mm256_setr_epi64x(5, 6, 7, 8);
4416 let r = _mm256_testc_si256(a, b);
4417 assert_eq!(r, 0);
4418 let b = _mm256_set1_epi64x(0);
4419 let r = _mm256_testc_si256(a, b);
4420 assert_eq!(r, 1);
4421 }
4422
4423 #[simd_test(enable = "avx")]
4424 unsafe fn test_mm256_testnzc_si256() {
4425 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4426 let b = _mm256_setr_epi64x(5, 6, 7, 8);
4427 let r = _mm256_testnzc_si256(a, b);
4428 assert_eq!(r, 1);
4429 let a = _mm256_setr_epi64x(0, 0, 0, 0);
4430 let b = _mm256_setr_epi64x(0, 0, 0, 0);
4431 let r = _mm256_testnzc_si256(a, b);
4432 assert_eq!(r, 0);
4433 }
4434
4435 #[simd_test(enable = "avx")]
4436 unsafe fn test_mm256_testz_pd() {
4437 let a = _mm256_setr_pd(1., 2., 3., 4.);
4438 let b = _mm256_setr_pd(5., 6., 7., 8.);
4439 let r = _mm256_testz_pd(a, b);
4440 assert_eq!(r, 1);
4441 let a = _mm256_set1_pd(-1.);
4442 let r = _mm256_testz_pd(a, a);
4443 assert_eq!(r, 0);
4444 }
4445
4446 #[simd_test(enable = "avx")]
4447 unsafe fn test_mm256_testc_pd() {
4448 let a = _mm256_setr_pd(1., 2., 3., 4.);
4449 let b = _mm256_setr_pd(5., 6., 7., 8.);
4450 let r = _mm256_testc_pd(a, b);
4451 assert_eq!(r, 1);
4452 let a = _mm256_set1_pd(1.);
4453 let b = _mm256_set1_pd(-1.);
4454 let r = _mm256_testc_pd(a, b);
4455 assert_eq!(r, 0);
4456 }
4457
4458 #[simd_test(enable = "avx")]
4459 unsafe fn test_mm256_testnzc_pd() {
4460 let a = _mm256_setr_pd(1., 2., 3., 4.);
4461 let b = _mm256_setr_pd(5., 6., 7., 8.);
4462 let r = _mm256_testnzc_pd(a, b);
4463 assert_eq!(r, 0);
4464 let a = _mm256_setr_pd(1., -1., -1., -1.);
4465 let b = _mm256_setr_pd(-1., -1., 1., 1.);
4466 let r = _mm256_testnzc_pd(a, b);
4467 assert_eq!(r, 1);
4468 }
4469
4470 #[simd_test(enable = "avx")]
4471 unsafe fn test_mm_testz_pd() {
4472 let a = _mm_setr_pd(1., 2.);
4473 let b = _mm_setr_pd(5., 6.);
4474 let r = _mm_testz_pd(a, b);
4475 assert_eq!(r, 1);
4476 let a = _mm_set1_pd(-1.);
4477 let r = _mm_testz_pd(a, a);
4478 assert_eq!(r, 0);
4479 }
4480
4481 #[simd_test(enable = "avx")]
4482 unsafe fn test_mm_testc_pd() {
4483 let a = _mm_setr_pd(1., 2.);
4484 let b = _mm_setr_pd(5., 6.);
4485 let r = _mm_testc_pd(a, b);
4486 assert_eq!(r, 1);
4487 let a = _mm_set1_pd(1.);
4488 let b = _mm_set1_pd(-1.);
4489 let r = _mm_testc_pd(a, b);
4490 assert_eq!(r, 0);
4491 }
4492
4493 #[simd_test(enable = "avx")]
4494 unsafe fn test_mm_testnzc_pd() {
4495 let a = _mm_setr_pd(1., 2.);
4496 let b = _mm_setr_pd(5., 6.);
4497 let r = _mm_testnzc_pd(a, b);
4498 assert_eq!(r, 0);
4499 let a = _mm_setr_pd(1., -1.);
4500 let b = _mm_setr_pd(-1., -1.);
4501 let r = _mm_testnzc_pd(a, b);
4502 assert_eq!(r, 1);
4503 }
4504
4505 #[simd_test(enable = "avx")]
4506 unsafe fn test_mm256_testz_ps() {
4507 let a = _mm256_set1_ps(1.);
4508 let r = _mm256_testz_ps(a, a);
4509 assert_eq!(r, 1);
4510 let a = _mm256_set1_ps(-1.);
4511 let r = _mm256_testz_ps(a, a);
4512 assert_eq!(r, 0);
4513 }
4514
4515 #[simd_test(enable = "avx")]
4516 unsafe fn test_mm256_testc_ps() {
4517 let a = _mm256_set1_ps(1.);
4518 let r = _mm256_testc_ps(a, a);
4519 assert_eq!(r, 1);
4520 let b = _mm256_set1_ps(-1.);
4521 let r = _mm256_testc_ps(a, b);
4522 assert_eq!(r, 0);
4523 }
4524
4525 #[simd_test(enable = "avx")]
4526 unsafe fn test_mm256_testnzc_ps() {
4527 let a = _mm256_set1_ps(1.);
4528 let r = _mm256_testnzc_ps(a, a);
4529 assert_eq!(r, 0);
4530 let a = _mm256_setr_ps(1., -1., -1., -1., -1., -1., -1., -1.);
4531 let b = _mm256_setr_ps(-1., -1., 1., 1., 1., 1., 1., 1.);
4532 let r = _mm256_testnzc_ps(a, b);
4533 assert_eq!(r, 1);
4534 }
4535
4536 #[simd_test(enable = "avx")]
4537 unsafe fn test_mm_testz_ps() {
4538 let a = _mm_set1_ps(1.);
4539 let r = _mm_testz_ps(a, a);
4540 assert_eq!(r, 1);
4541 let a = _mm_set1_ps(-1.);
4542 let r = _mm_testz_ps(a, a);
4543 assert_eq!(r, 0);
4544 }
4545
4546 #[simd_test(enable = "avx")]
4547 unsafe fn test_mm_testc_ps() {
4548 let a = _mm_set1_ps(1.);
4549 let r = _mm_testc_ps(a, a);
4550 assert_eq!(r, 1);
4551 let b = _mm_set1_ps(-1.);
4552 let r = _mm_testc_ps(a, b);
4553 assert_eq!(r, 0);
4554 }
4555
4556 #[simd_test(enable = "avx")]
4557 unsafe fn test_mm_testnzc_ps() {
4558 let a = _mm_set1_ps(1.);
4559 let r = _mm_testnzc_ps(a, a);
4560 assert_eq!(r, 0);
4561 let a = _mm_setr_ps(1., -1., -1., -1.);
4562 let b = _mm_setr_ps(-1., -1., 1., 1.);
4563 let r = _mm_testnzc_ps(a, b);
4564 assert_eq!(r, 1);
4565 }
4566
4567 #[simd_test(enable = "avx")]
4568 unsafe fn test_mm256_movemask_pd() {
4569 let a = _mm256_setr_pd(1., -2., 3., -4.);
4570 let r = _mm256_movemask_pd(a);
4571 assert_eq!(r, 0xA);
4572 }
4573
4574 #[simd_test(enable = "avx")]
4575 unsafe fn test_mm256_movemask_ps() {
4576 let a = _mm256_setr_ps(1., -2., 3., -4., 1., -2., 3., -4.);
4577 let r = _mm256_movemask_ps(a);
4578 assert_eq!(r, 0xAA);
4579 }
4580
4581 #[simd_test(enable = "avx")]
4582 unsafe fn test_mm256_setzero_pd() {
4583 let r = _mm256_setzero_pd();
4584 assert_eq_m256d(r, _mm256_set1_pd(0.));
4585 }
4586
4587 #[simd_test(enable = "avx")]
4588 unsafe fn test_mm256_setzero_ps() {
4589 let r = _mm256_setzero_ps();
4590 assert_eq_m256(r, _mm256_set1_ps(0.));
4591 }
4592
4593 #[simd_test(enable = "avx")]
4594 unsafe fn test_mm256_setzero_si256() {
4595 let r = _mm256_setzero_si256();
4596 assert_eq_m256i(r, _mm256_set1_epi8(0));
4597 }
4598
4599 #[simd_test(enable = "avx")]
4600 unsafe fn test_mm256_set_pd() {
4601 let r = _mm256_set_pd(1., 2., 3., 4.);
4602 assert_eq_m256d(r, _mm256_setr_pd(4., 3., 2., 1.));
4603 }
4604
4605 #[simd_test(enable = "avx")]
4606 unsafe fn test_mm256_set_ps() {
4607 let r = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4608 assert_eq_m256(r, _mm256_setr_ps(8., 7., 6., 5., 4., 3., 2., 1.));
4609 }
4610
4611 #[simd_test(enable = "avx")]
4612 unsafe fn test_mm256_set_epi8() {
4613 #[rustfmt::skip]
4614 let r = _mm256_set_epi8(
4615 1, 2, 3, 4, 5, 6, 7, 8,
4616 9, 10, 11, 12, 13, 14, 15, 16,
4617 17, 18, 19, 20, 21, 22, 23, 24,
4618 25, 26, 27, 28, 29, 30, 31, 32,
4619 );
4620 #[rustfmt::skip]
4621 let e = _mm256_setr_epi8(
4622 32, 31, 30, 29, 28, 27, 26, 25,
4623 24, 23, 22, 21, 20, 19, 18, 17,
4624 16, 15, 14, 13, 12, 11, 10, 9,
4625 8, 7, 6, 5, 4, 3, 2, 1
4626 );
4627 assert_eq_m256i(r, e);
4628 }
4629
4630 #[simd_test(enable = "avx")]
4631 unsafe fn test_mm256_set_epi16() {
4632 #[rustfmt::skip]
4633 let r = _mm256_set_epi16(
4634 1, 2, 3, 4, 5, 6, 7, 8,
4635 9, 10, 11, 12, 13, 14, 15, 16,
4636 );
4637 #[rustfmt::skip]
4638 let e = _mm256_setr_epi16(
4639 16, 15, 14, 13, 12, 11, 10, 9, 8,
4640 7, 6, 5, 4, 3, 2, 1,
4641 );
4642 assert_eq_m256i(r, e);
4643 }
4644
4645 #[simd_test(enable = "avx")]
4646 unsafe fn test_mm256_set_epi32() {
4647 let r = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4648 assert_eq_m256i(r, _mm256_setr_epi32(8, 7, 6, 5, 4, 3, 2, 1));
4649 }
4650
4651 #[simd_test(enable = "avx")]
4652 unsafe fn test_mm256_set_epi64x() {
4653 let r = _mm256_set_epi64x(1, 2, 3, 4);
4654 assert_eq_m256i(r, _mm256_setr_epi64x(4, 3, 2, 1));
4655 }
4656
4657 #[simd_test(enable = "avx")]
4658 unsafe fn test_mm256_setr_pd() {
4659 let r = _mm256_setr_pd(1., 2., 3., 4.);
4660 assert_eq_m256d(r, _mm256_setr_pd(1., 2., 3., 4.));
4661 }
4662
4663 #[simd_test(enable = "avx")]
4664 unsafe fn test_mm256_setr_ps() {
4665 let r = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4666 assert_eq_m256(r, _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.));
4667 }
4668
4669 #[simd_test(enable = "avx")]
4670 unsafe fn test_mm256_setr_epi8() {
4671 #[rustfmt::skip]
4672 let r = _mm256_setr_epi8(
4673 1, 2, 3, 4, 5, 6, 7, 8,
4674 9, 10, 11, 12, 13, 14, 15, 16,
4675 17, 18, 19, 20, 21, 22, 23, 24,
4676 25, 26, 27, 28, 29, 30, 31, 32,
4677 );
4678 #[rustfmt::skip]
4679 let e = _mm256_setr_epi8(
4680 1, 2, 3, 4, 5, 6, 7, 8,
4681 9, 10, 11, 12, 13, 14, 15, 16,
4682 17, 18, 19, 20, 21, 22, 23, 24,
4683 25, 26, 27, 28, 29, 30, 31, 32
4684 );
4685
4686 assert_eq_m256i(r, e);
4687 }
4688
4689 #[simd_test(enable = "avx")]
4690 unsafe fn test_mm256_setr_epi16() {
4691 #[rustfmt::skip]
4692 let r = _mm256_setr_epi16(
4693 1, 2, 3, 4, 5, 6, 7, 8,
4694 9, 10, 11, 12, 13, 14, 15, 16,
4695 );
4696 #[rustfmt::skip]
4697 let e = _mm256_setr_epi16(
4698 1, 2, 3, 4, 5, 6, 7, 8,
4699 9, 10, 11, 12, 13, 14, 15, 16,
4700 );
4701 assert_eq_m256i(r, e);
4702 }
4703
4704 #[simd_test(enable = "avx")]
4705 unsafe fn test_mm256_setr_epi32() {
4706 let r = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4707 assert_eq_m256i(r, _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8));
4708 }
4709
4710 #[simd_test(enable = "avx")]
4711 unsafe fn test_mm256_setr_epi64x() {
4712 let r = _mm256_setr_epi64x(1, 2, 3, 4);
4713 assert_eq_m256i(r, _mm256_setr_epi64x(1, 2, 3, 4));
4714 }
4715
4716 #[simd_test(enable = "avx")]
4717 unsafe fn test_mm256_set1_pd() {
4718 let r = _mm256_set1_pd(1.);
4719 assert_eq_m256d(r, _mm256_set1_pd(1.));
4720 }
4721
4722 #[simd_test(enable = "avx")]
4723 unsafe fn test_mm256_set1_ps() {
4724 let r = _mm256_set1_ps(1.);
4725 assert_eq_m256(r, _mm256_set1_ps(1.));
4726 }
4727
4728 #[simd_test(enable = "avx")]
4729 unsafe fn test_mm256_set1_epi8() {
4730 let r = _mm256_set1_epi8(1);
4731 assert_eq_m256i(r, _mm256_set1_epi8(1));
4732 }
4733
4734 #[simd_test(enable = "avx")]
4735 unsafe fn test_mm256_set1_epi16() {
4736 let r = _mm256_set1_epi16(1);
4737 assert_eq_m256i(r, _mm256_set1_epi16(1));
4738 }
4739
4740 #[simd_test(enable = "avx")]
4741 unsafe fn test_mm256_set1_epi32() {
4742 let r = _mm256_set1_epi32(1);
4743 assert_eq_m256i(r, _mm256_set1_epi32(1));
4744 }
4745
4746 #[simd_test(enable = "avx")]
4747 unsafe fn test_mm256_set1_epi64x() {
4748 let r = _mm256_set1_epi64x(1);
4749 assert_eq_m256i(r, _mm256_set1_epi64x(1));
4750 }
4751
4752 #[simd_test(enable = "avx")]
4753 unsafe fn test_mm256_castpd_ps() {
4754 let a = _mm256_setr_pd(1., 2., 3., 4.);
4755 let r = _mm256_castpd_ps(a);
4756 let e = _mm256_setr_ps(0., 1.875, 0., 2., 0., 2.125, 0., 2.25);
4757 assert_eq_m256(r, e);
4758 }
4759
4760 #[simd_test(enable = "avx")]
4761 unsafe fn test_mm256_castps_pd() {
4762 let a = _mm256_setr_ps(0., 1.875, 0., 2., 0., 2.125, 0., 2.25);
4763 let r = _mm256_castps_pd(a);
4764 let e = _mm256_setr_pd(1., 2., 3., 4.);
4765 assert_eq_m256d(r, e);
4766 }
4767
4768 #[simd_test(enable = "avx")]
4769 unsafe fn test_mm256_castps_si256() {
4770 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4771 let r = _mm256_castps_si256(a);
4772 #[rustfmt::skip]
4773 let e = _mm256_setr_epi8(
4774 0, 0, -128, 63, 0, 0, 0, 64,
4775 0, 0, 64, 64, 0, 0, -128, 64,
4776 0, 0, -96, 64, 0, 0, -64, 64,
4777 0, 0, -32, 64, 0, 0, 0, 65,
4778 );
4779 assert_eq_m256i(r, e);
4780 }
4781
4782 #[simd_test(enable = "avx")]
4783 unsafe fn test_mm256_castsi256_ps() {
4784 #[rustfmt::skip]
4785 let a = _mm256_setr_epi8(
4786 0, 0, -128, 63, 0, 0, 0, 64,
4787 0, 0, 64, 64, 0, 0, -128, 64,
4788 0, 0, -96, 64, 0, 0, -64, 64,
4789 0, 0, -32, 64, 0, 0, 0, 65,
4790 );
4791 let r = _mm256_castsi256_ps(a);
4792 let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4793 assert_eq_m256(r, e);
4794 }
4795
4796 #[simd_test(enable = "avx")]
4797 unsafe fn test_mm256_castpd_si256() {
4798 let a = _mm256_setr_pd(1., 2., 3., 4.);
4799 let r = _mm256_castpd_si256(a);
4800 assert_eq_m256d(transmute(r), a);
4801 }
4802
4803 #[simd_test(enable = "avx")]
4804 unsafe fn test_mm256_castsi256_pd() {
4805 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4806 let r = _mm256_castsi256_pd(a);
4807 assert_eq_m256d(r, transmute(a));
4808 }
4809
4810 #[simd_test(enable = "avx")]
4811 unsafe fn test_mm256_castps256_ps128() {
4812 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4813 let r = _mm256_castps256_ps128(a);
4814 assert_eq_m128(r, _mm_setr_ps(1., 2., 3., 4.));
4815 }
4816
4817 #[simd_test(enable = "avx")]
4818 unsafe fn test_mm256_castpd256_pd128() {
4819 let a = _mm256_setr_pd(1., 2., 3., 4.);
4820 let r = _mm256_castpd256_pd128(a);
4821 assert_eq_m128d(r, _mm_setr_pd(1., 2.));
4822 }
4823
4824 #[simd_test(enable = "avx")]
4825 unsafe fn test_mm256_castsi256_si128() {
4826 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4827 let r = _mm256_castsi256_si128(a);
4828 assert_eq_m128i(r, _mm_setr_epi64x(1, 2));
4829 }
4830
4831 #[simd_test(enable = "avx")]
4832 unsafe fn test_mm256_castps128_ps256() {
4833 let a = _mm_setr_ps(1., 2., 3., 4.);
4834 let r = _mm256_castps128_ps256(a);
4835 assert_eq_m128(_mm256_castps256_ps128(r), a);
4836 }
4837
4838 #[simd_test(enable = "avx")]
4839 unsafe fn test_mm256_castpd128_pd256() {
4840 let a = _mm_setr_pd(1., 2.);
4841 let r = _mm256_castpd128_pd256(a);
4842 assert_eq_m128d(_mm256_castpd256_pd128(r), a);
4843 }
4844
4845 #[simd_test(enable = "avx")]
4846 unsafe fn test_mm256_castsi128_si256() {
4847 let a = _mm_setr_epi32(1, 2, 3, 4);
4848 let r = _mm256_castsi128_si256(a);
4849 assert_eq_m128i(_mm256_castsi256_si128(r), a);
4850 }
4851
4852 #[simd_test(enable = "avx")]
4853 unsafe fn test_mm256_zextps128_ps256() {
4854 let a = _mm_setr_ps(1., 2., 3., 4.);
4855 let r = _mm256_zextps128_ps256(a);
4856 let e = _mm256_setr_ps(1., 2., 3., 4., 0., 0., 0., 0.);
4857 assert_eq_m256(r, e);
4858 }
4859
4860 #[simd_test(enable = "avx")]
4861 unsafe fn test_mm256_zextsi128_si256() {
4862 let a = _mm_setr_epi64x(1, 2);
4863 let r = _mm256_zextsi128_si256(a);
4864 let e = _mm256_setr_epi64x(1, 2, 0, 0);
4865 assert_eq_m256i(r, e);
4866 }
4867
4868 #[simd_test(enable = "avx")]
4869 unsafe fn test_mm256_zextpd128_pd256() {
4870 let a = _mm_setr_pd(1., 2.);
4871 let r = _mm256_zextpd128_pd256(a);
4872 let e = _mm256_setr_pd(1., 2., 0., 0.);
4873 assert_eq_m256d(r, e);
4874 }
4875
4876 #[simd_test(enable = "avx")]
4877 unsafe fn test_mm256_set_m128() {
4878 let hi = _mm_setr_ps(5., 6., 7., 8.);
4879 let lo = _mm_setr_ps(1., 2., 3., 4.);
4880 let r = _mm256_set_m128(hi, lo);
4881 let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4882 assert_eq_m256(r, e);
4883 }
4884
4885 #[simd_test(enable = "avx")]
4886 unsafe fn test_mm256_set_m128d() {
4887 let hi = _mm_setr_pd(3., 4.);
4888 let lo = _mm_setr_pd(1., 2.);
4889 let r = _mm256_set_m128d(hi, lo);
4890 let e = _mm256_setr_pd(1., 2., 3., 4.);
4891 assert_eq_m256d(r, e);
4892 }
4893
4894 #[simd_test(enable = "avx")]
4895 unsafe fn test_mm256_set_m128i() {
4896 #[rustfmt::skip]
4897 let hi = _mm_setr_epi8(
4898 17, 18, 19, 20,
4899 21, 22, 23, 24,
4900 25, 26, 27, 28,
4901 29, 30, 31, 32,
4902 );
4903 #[rustfmt::skip]
4904 let lo = _mm_setr_epi8(
4905 1, 2, 3, 4,
4906 5, 6, 7, 8,
4907 9, 10, 11, 12,
4908 13, 14, 15, 16,
4909 );
4910 let r = _mm256_set_m128i(hi, lo);
4911 #[rustfmt::skip]
4912 let e = _mm256_setr_epi8(
4913 1, 2, 3, 4, 5, 6, 7, 8,
4914 9, 10, 11, 12, 13, 14, 15, 16,
4915 17, 18, 19, 20, 21, 22, 23, 24,
4916 25, 26, 27, 28, 29, 30, 31, 32,
4917 );
4918 assert_eq_m256i(r, e);
4919 }
4920
4921 #[simd_test(enable = "avx")]
4922 unsafe fn test_mm256_setr_m128() {
4923 let lo = _mm_setr_ps(1., 2., 3., 4.);
4924 let hi = _mm_setr_ps(5., 6., 7., 8.);
4925 let r = _mm256_setr_m128(lo, hi);
4926 let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4927 assert_eq_m256(r, e);
4928 }
4929
4930 #[simd_test(enable = "avx")]
4931 unsafe fn test_mm256_setr_m128d() {
4932 let lo = _mm_setr_pd(1., 2.);
4933 let hi = _mm_setr_pd(3., 4.);
4934 let r = _mm256_setr_m128d(lo, hi);
4935 let e = _mm256_setr_pd(1., 2., 3., 4.);
4936 assert_eq_m256d(r, e);
4937 }
4938
4939 #[simd_test(enable = "avx")]
4940 unsafe fn test_mm256_setr_m128i() {
4941 #[rustfmt::skip]
4942 let lo = _mm_setr_epi8(
4943 1, 2, 3, 4,
4944 5, 6, 7, 8,
4945 9, 10, 11, 12,
4946 13, 14, 15, 16,
4947 );
4948 #[rustfmt::skip]
4949 let hi = _mm_setr_epi8(
4950 17, 18, 19, 20, 21, 22, 23, 24,
4951 25, 26, 27, 28, 29, 30, 31, 32,
4952 );
4953 let r = _mm256_setr_m128i(lo, hi);
4954 #[rustfmt::skip]
4955 let e = _mm256_setr_epi8(
4956 1, 2, 3, 4, 5, 6, 7, 8,
4957 9, 10, 11, 12, 13, 14, 15, 16,
4958 17, 18, 19, 20, 21, 22, 23, 24,
4959 25, 26, 27, 28, 29, 30, 31, 32,
4960 );
4961 assert_eq_m256i(r, e);
4962 }
4963
4964 #[simd_test(enable = "avx")]
4965 unsafe fn test_mm256_loadu2_m128() {
4966 let hi = &[5., 6., 7., 8.];
4967 let hiaddr = hi.as_ptr();
4968 let lo = &[1., 2., 3., 4.];
4969 let loaddr = lo.as_ptr();
4970 let r = _mm256_loadu2_m128(hiaddr, loaddr);
4971 let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4972 assert_eq_m256(r, e);
4973 }
4974
4975 #[simd_test(enable = "avx")]
4976 unsafe fn test_mm256_loadu2_m128d() {
4977 let hi = &[3., 4.];
4978 let hiaddr = hi.as_ptr();
4979 let lo = &[1., 2.];
4980 let loaddr = lo.as_ptr();
4981 let r = _mm256_loadu2_m128d(hiaddr, loaddr);
4982 let e = _mm256_setr_pd(1., 2., 3., 4.);
4983 assert_eq_m256d(r, e);
4984 }
4985
4986 #[simd_test(enable = "avx")]
4987 unsafe fn test_mm256_loadu2_m128i() {
4988 #[rustfmt::skip]
4989 let hi = _mm_setr_epi8(
4990 17, 18, 19, 20, 21, 22, 23, 24,
4991 25, 26, 27, 28, 29, 30, 31, 32,
4992 );
4993 #[rustfmt::skip]
4994 let lo = _mm_setr_epi8(
4995 1, 2, 3, 4, 5, 6, 7, 8,
4996 9, 10, 11, 12, 13, 14, 15, 16,
4997 );
4998 let r = _mm256_loadu2_m128i(ptr::addr_of!(hi) as *const _, ptr::addr_of!(lo) as *const _);
4999 #[rustfmt::skip]
5000 let e = _mm256_setr_epi8(
5001 1, 2, 3, 4, 5, 6, 7, 8,
5002 9, 10, 11, 12, 13, 14, 15, 16,
5003 17, 18, 19, 20, 21, 22, 23, 24,
5004 25, 26, 27, 28, 29, 30, 31, 32,
5005 );
5006 assert_eq_m256i(r, e);
5007 }
5008
5009 #[simd_test(enable = "avx")]
5010 unsafe fn test_mm256_storeu2_m128() {
5011 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
5012 let mut hi = _mm_undefined_ps();
5013 let mut lo = _mm_undefined_ps();
5014 _mm256_storeu2_m128(
5015 ptr::addr_of_mut!(hi) as *mut f32,
5016 ptr::addr_of_mut!(lo) as *mut f32,
5017 a,
5018 );
5019 assert_eq_m128(hi, _mm_setr_ps(5., 6., 7., 8.));
5020 assert_eq_m128(lo, _mm_setr_ps(1., 2., 3., 4.));
5021 }
5022
5023 #[simd_test(enable = "avx")]
5024 unsafe fn test_mm256_storeu2_m128d() {
5025 let a = _mm256_setr_pd(1., 2., 3., 4.);
5026 let mut hi = _mm_undefined_pd();
5027 let mut lo = _mm_undefined_pd();
5028 _mm256_storeu2_m128d(
5029 ptr::addr_of_mut!(hi) as *mut f64,
5030 ptr::addr_of_mut!(lo) as *mut f64,
5031 a,
5032 );
5033 assert_eq_m128d(hi, _mm_setr_pd(3., 4.));
5034 assert_eq_m128d(lo, _mm_setr_pd(1., 2.));
5035 }
5036
5037 #[simd_test(enable = "avx")]
5038 unsafe fn test_mm256_storeu2_m128i() {
5039 #[rustfmt::skip]
5040 let a = _mm256_setr_epi8(
5041 1, 2, 3, 4, 5, 6, 7, 8,
5042 9, 10, 11, 12, 13, 14, 15, 16,
5043 17, 18, 19, 20, 21, 22, 23, 24,
5044 25, 26, 27, 28, 29, 30, 31, 32,
5045 );
5046 let mut hi = _mm_undefined_si128();
5047 let mut lo = _mm_undefined_si128();
5048 _mm256_storeu2_m128i(ptr::addr_of_mut!(hi), ptr::addr_of_mut!(lo), a);
5049 #[rustfmt::skip]
5050 let e_hi = _mm_setr_epi8(
5051 17, 18, 19, 20, 21, 22, 23, 24,
5052 25, 26, 27, 28, 29, 30, 31, 32
5053 );
5054 #[rustfmt::skip]
5055 let e_lo = _mm_setr_epi8(
5056 1, 2, 3, 4, 5, 6, 7, 8,
5057 9, 10, 11, 12, 13, 14, 15, 16
5058 );
5059
5060 assert_eq_m128i(hi, e_hi);
5061 assert_eq_m128i(lo, e_lo);
5062 }
5063
5064 #[simd_test(enable = "avx")]
5065 unsafe fn test_mm256_cvtss_f32() {
5066 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
5067 let r = _mm256_cvtss_f32(a);
5068 assert_eq!(r, 1.);
5069 }
5070}