1use crate::core_arch::{simd::*, x86::*};
22use crate::intrinsics::simd::*;
23
24#[cfg(test)]
25use stdarch_test::assert_instr;
26
27#[inline]
31#[target_feature(enable = "avx2")]
32#[cfg_attr(test, assert_instr(vpabsd))]
33#[stable(feature = "simd_x86", since = "1.27.0")]
34pub fn _mm256_abs_epi32(a: __m256i) -> __m256i {
35 unsafe {
36 let a = a.as_i32x8();
37 let r = simd_select::<m32x8, _>(simd_lt(a, i32x8::ZERO), simd_neg(a), a);
38 transmute(r)
39 }
40}
41
42#[inline]
46#[target_feature(enable = "avx2")]
47#[cfg_attr(test, assert_instr(vpabsw))]
48#[stable(feature = "simd_x86", since = "1.27.0")]
49pub fn _mm256_abs_epi16(a: __m256i) -> __m256i {
50 unsafe {
51 let a = a.as_i16x16();
52 let r = simd_select::<m16x16, _>(simd_lt(a, i16x16::ZERO), simd_neg(a), a);
53 transmute(r)
54 }
55}
56
57#[inline]
61#[target_feature(enable = "avx2")]
62#[cfg_attr(test, assert_instr(vpabsb))]
63#[stable(feature = "simd_x86", since = "1.27.0")]
64pub fn _mm256_abs_epi8(a: __m256i) -> __m256i {
65 unsafe {
66 let a = a.as_i8x32();
67 let r = simd_select::<m8x32, _>(simd_lt(a, i8x32::ZERO), simd_neg(a), a);
68 transmute(r)
69 }
70}
71
72#[inline]
76#[target_feature(enable = "avx2")]
77#[cfg_attr(test, assert_instr(vpaddq))]
78#[stable(feature = "simd_x86", since = "1.27.0")]
79pub fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i {
80 unsafe { transmute(simd_add(a.as_i64x4(), b.as_i64x4())) }
81}
82
83#[inline]
87#[target_feature(enable = "avx2")]
88#[cfg_attr(test, assert_instr(vpaddd))]
89#[stable(feature = "simd_x86", since = "1.27.0")]
90pub fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i {
91 unsafe { transmute(simd_add(a.as_i32x8(), b.as_i32x8())) }
92}
93
94#[inline]
98#[target_feature(enable = "avx2")]
99#[cfg_attr(test, assert_instr(vpaddw))]
100#[stable(feature = "simd_x86", since = "1.27.0")]
101pub fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i {
102 unsafe { transmute(simd_add(a.as_i16x16(), b.as_i16x16())) }
103}
104
105#[inline]
109#[target_feature(enable = "avx2")]
110#[cfg_attr(test, assert_instr(vpaddb))]
111#[stable(feature = "simd_x86", since = "1.27.0")]
112pub fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i {
113 unsafe { transmute(simd_add(a.as_i8x32(), b.as_i8x32())) }
114}
115
116#[inline]
120#[target_feature(enable = "avx2")]
121#[cfg_attr(test, assert_instr(vpaddsb))]
122#[stable(feature = "simd_x86", since = "1.27.0")]
123pub fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i {
124 unsafe { transmute(simd_saturating_add(a.as_i8x32(), b.as_i8x32())) }
125}
126
127#[inline]
131#[target_feature(enable = "avx2")]
132#[cfg_attr(test, assert_instr(vpaddsw))]
133#[stable(feature = "simd_x86", since = "1.27.0")]
134pub fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i {
135 unsafe { transmute(simd_saturating_add(a.as_i16x16(), b.as_i16x16())) }
136}
137
138#[inline]
142#[target_feature(enable = "avx2")]
143#[cfg_attr(test, assert_instr(vpaddusb))]
144#[stable(feature = "simd_x86", since = "1.27.0")]
145pub fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i {
146 unsafe { transmute(simd_saturating_add(a.as_u8x32(), b.as_u8x32())) }
147}
148
149#[inline]
153#[target_feature(enable = "avx2")]
154#[cfg_attr(test, assert_instr(vpaddusw))]
155#[stable(feature = "simd_x86", since = "1.27.0")]
156pub fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
157 unsafe { transmute(simd_saturating_add(a.as_u16x16(), b.as_u16x16())) }
158}
159
160#[inline]
165#[target_feature(enable = "avx2")]
166#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 7))]
167#[rustc_legacy_const_generics(2)]
168#[stable(feature = "simd_x86", since = "1.27.0")]
169pub fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
170 static_assert_uimm_bits!(IMM8, 8);
171
172 if IMM8 >= 32 {
175 return _mm256_setzero_si256();
176 }
177 let (a, b) = if IMM8 > 16 {
180 (_mm256_setzero_si256(), a)
181 } else {
182 (a, b)
183 };
184 unsafe {
185 if IMM8 == 16 {
186 return transmute(a);
187 }
188 }
189 const fn mask(shift: u32, i: u32) -> u32 {
190 let shift = shift % 16;
191 let mod_i = i % 16;
192 if mod_i < (16 - shift) {
193 i + shift
194 } else {
195 i + 16 + shift
196 }
197 }
198
199 unsafe {
200 let r: i8x32 = simd_shuffle!(
201 b.as_i8x32(),
202 a.as_i8x32(),
203 [
204 mask(IMM8 as u32, 0),
205 mask(IMM8 as u32, 1),
206 mask(IMM8 as u32, 2),
207 mask(IMM8 as u32, 3),
208 mask(IMM8 as u32, 4),
209 mask(IMM8 as u32, 5),
210 mask(IMM8 as u32, 6),
211 mask(IMM8 as u32, 7),
212 mask(IMM8 as u32, 8),
213 mask(IMM8 as u32, 9),
214 mask(IMM8 as u32, 10),
215 mask(IMM8 as u32, 11),
216 mask(IMM8 as u32, 12),
217 mask(IMM8 as u32, 13),
218 mask(IMM8 as u32, 14),
219 mask(IMM8 as u32, 15),
220 mask(IMM8 as u32, 16),
221 mask(IMM8 as u32, 17),
222 mask(IMM8 as u32, 18),
223 mask(IMM8 as u32, 19),
224 mask(IMM8 as u32, 20),
225 mask(IMM8 as u32, 21),
226 mask(IMM8 as u32, 22),
227 mask(IMM8 as u32, 23),
228 mask(IMM8 as u32, 24),
229 mask(IMM8 as u32, 25),
230 mask(IMM8 as u32, 26),
231 mask(IMM8 as u32, 27),
232 mask(IMM8 as u32, 28),
233 mask(IMM8 as u32, 29),
234 mask(IMM8 as u32, 30),
235 mask(IMM8 as u32, 31),
236 ],
237 );
238 transmute(r)
239 }
240}
241
242#[inline]
247#[target_feature(enable = "avx2")]
248#[cfg_attr(test, assert_instr(vandps))]
249#[stable(feature = "simd_x86", since = "1.27.0")]
250pub fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
251 unsafe { transmute(simd_and(a.as_i64x4(), b.as_i64x4())) }
252}
253
254#[inline]
259#[target_feature(enable = "avx2")]
260#[cfg_attr(test, assert_instr(vandnps))]
261#[stable(feature = "simd_x86", since = "1.27.0")]
262pub fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
263 unsafe {
264 let all_ones = _mm256_set1_epi8(-1);
265 transmute(simd_and(
266 simd_xor(a.as_i64x4(), all_ones.as_i64x4()),
267 b.as_i64x4(),
268 ))
269 }
270}
271
272#[inline]
276#[target_feature(enable = "avx2")]
277#[cfg_attr(test, assert_instr(vpavgw))]
278#[stable(feature = "simd_x86", since = "1.27.0")]
279pub fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i {
280 unsafe {
281 let a = simd_cast::<_, u32x16>(a.as_u16x16());
282 let b = simd_cast::<_, u32x16>(b.as_u16x16());
283 let r = simd_shr(simd_add(simd_add(a, b), u32x16::splat(1)), u32x16::splat(1));
284 transmute(simd_cast::<_, u16x16>(r))
285 }
286}
287
288#[inline]
292#[target_feature(enable = "avx2")]
293#[cfg_attr(test, assert_instr(vpavgb))]
294#[stable(feature = "simd_x86", since = "1.27.0")]
295pub fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
296 unsafe {
297 let a = simd_cast::<_, u16x32>(a.as_u8x32());
298 let b = simd_cast::<_, u16x32>(b.as_u8x32());
299 let r = simd_shr(simd_add(simd_add(a, b), u16x32::splat(1)), u16x32::splat(1));
300 transmute(simd_cast::<_, u8x32>(r))
301 }
302}
303
304#[inline]
308#[target_feature(enable = "avx2")]
309#[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))]
310#[rustc_legacy_const_generics(2)]
311#[stable(feature = "simd_x86", since = "1.27.0")]
312pub fn _mm_blend_epi32<const IMM4: i32>(a: __m128i, b: __m128i) -> __m128i {
313 static_assert_uimm_bits!(IMM4, 4);
314 unsafe {
315 let a = a.as_i32x4();
316 let b = b.as_i32x4();
317 let r: i32x4 = simd_shuffle!(
318 a,
319 b,
320 [
321 [0, 4, 0, 4][IMM4 as usize & 0b11],
322 [1, 1, 5, 5][IMM4 as usize & 0b11],
323 [2, 6, 2, 6][(IMM4 as usize >> 2) & 0b11],
324 [3, 3, 7, 7][(IMM4 as usize >> 2) & 0b11],
325 ],
326 );
327 transmute(r)
328 }
329}
330
331#[inline]
335#[target_feature(enable = "avx2")]
336#[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))]
337#[rustc_legacy_const_generics(2)]
338#[stable(feature = "simd_x86", since = "1.27.0")]
339pub fn _mm256_blend_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
340 static_assert_uimm_bits!(IMM8, 8);
341 unsafe {
342 let a = a.as_i32x8();
343 let b = b.as_i32x8();
344 let r: i32x8 = simd_shuffle!(
345 a,
346 b,
347 [
348 [0, 8, 0, 8][IMM8 as usize & 0b11],
349 [1, 1, 9, 9][IMM8 as usize & 0b11],
350 [2, 10, 2, 10][(IMM8 as usize >> 2) & 0b11],
351 [3, 3, 11, 11][(IMM8 as usize >> 2) & 0b11],
352 [4, 12, 4, 12][(IMM8 as usize >> 4) & 0b11],
353 [5, 5, 13, 13][(IMM8 as usize >> 4) & 0b11],
354 [6, 14, 6, 14][(IMM8 as usize >> 6) & 0b11],
355 [7, 7, 15, 15][(IMM8 as usize >> 6) & 0b11],
356 ],
357 );
358 transmute(r)
359 }
360}
361
362#[inline]
366#[target_feature(enable = "avx2")]
367#[cfg_attr(test, assert_instr(vpblendw, IMM8 = 9))]
368#[rustc_legacy_const_generics(2)]
369#[stable(feature = "simd_x86", since = "1.27.0")]
370pub fn _mm256_blend_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
371 static_assert_uimm_bits!(IMM8, 8);
372 unsafe {
373 let a = a.as_i16x16();
374 let b = b.as_i16x16();
375
376 let r: i16x16 = simd_shuffle!(
377 a,
378 b,
379 [
380 [0, 16, 0, 16][IMM8 as usize & 0b11],
381 [1, 1, 17, 17][IMM8 as usize & 0b11],
382 [2, 18, 2, 18][(IMM8 as usize >> 2) & 0b11],
383 [3, 3, 19, 19][(IMM8 as usize >> 2) & 0b11],
384 [4, 20, 4, 20][(IMM8 as usize >> 4) & 0b11],
385 [5, 5, 21, 21][(IMM8 as usize >> 4) & 0b11],
386 [6, 22, 6, 22][(IMM8 as usize >> 6) & 0b11],
387 [7, 7, 23, 23][(IMM8 as usize >> 6) & 0b11],
388 [8, 24, 8, 24][IMM8 as usize & 0b11],
389 [9, 9, 25, 25][IMM8 as usize & 0b11],
390 [10, 26, 10, 26][(IMM8 as usize >> 2) & 0b11],
391 [11, 11, 27, 27][(IMM8 as usize >> 2) & 0b11],
392 [12, 28, 12, 28][(IMM8 as usize >> 4) & 0b11],
393 [13, 13, 29, 29][(IMM8 as usize >> 4) & 0b11],
394 [14, 30, 14, 30][(IMM8 as usize >> 6) & 0b11],
395 [15, 15, 31, 31][(IMM8 as usize >> 6) & 0b11],
396 ],
397 );
398 transmute(r)
399 }
400}
401
402#[inline]
406#[target_feature(enable = "avx2")]
407#[cfg_attr(test, assert_instr(vpblendvb))]
408#[stable(feature = "simd_x86", since = "1.27.0")]
409pub fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i {
410 unsafe {
411 let mask: i8x32 = simd_lt(mask.as_i8x32(), i8x32::ZERO);
412 transmute(simd_select(mask, b.as_i8x32(), a.as_i8x32()))
413 }
414}
415
416#[inline]
421#[target_feature(enable = "avx2")]
422#[cfg_attr(test, assert_instr(vpbroadcastb))]
423#[stable(feature = "simd_x86", since = "1.27.0")]
424pub fn _mm_broadcastb_epi8(a: __m128i) -> __m128i {
425 unsafe {
426 let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 16]);
427 transmute::<i8x16, _>(ret)
428 }
429}
430
431#[inline]
436#[target_feature(enable = "avx2")]
437#[cfg_attr(test, assert_instr(vpbroadcastb))]
438#[stable(feature = "simd_x86", since = "1.27.0")]
439pub fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i {
440 unsafe {
441 let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 32]);
442 transmute::<i8x32, _>(ret)
443 }
444}
445
446#[inline]
453#[target_feature(enable = "avx2")]
454#[cfg_attr(test, assert_instr(vbroadcastss))]
455#[stable(feature = "simd_x86", since = "1.27.0")]
456pub fn _mm_broadcastd_epi32(a: __m128i) -> __m128i {
457 unsafe {
458 let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 4]);
459 transmute::<i32x4, _>(ret)
460 }
461}
462
463#[inline]
470#[target_feature(enable = "avx2")]
471#[cfg_attr(test, assert_instr(vbroadcastss))]
472#[stable(feature = "simd_x86", since = "1.27.0")]
473pub fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i {
474 unsafe {
475 let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 8]);
476 transmute::<i32x8, _>(ret)
477 }
478}
479
480#[inline]
485#[target_feature(enable = "avx2")]
486#[cfg_attr(test, assert_instr(vmovddup))]
489#[stable(feature = "simd_x86", since = "1.27.0")]
490pub fn _mm_broadcastq_epi64(a: __m128i) -> __m128i {
491 unsafe {
492 let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 2]);
493 transmute::<i64x2, _>(ret)
494 }
495}
496
497#[inline]
502#[target_feature(enable = "avx2")]
503#[cfg_attr(test, assert_instr(vbroadcastsd))]
504#[stable(feature = "simd_x86", since = "1.27.0")]
505pub fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i {
506 unsafe {
507 let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 4]);
508 transmute::<i64x4, _>(ret)
509 }
510}
511
512#[inline]
517#[target_feature(enable = "avx2")]
518#[cfg_attr(test, assert_instr(vmovddup))]
519#[stable(feature = "simd_x86", since = "1.27.0")]
520pub fn _mm_broadcastsd_pd(a: __m128d) -> __m128d {
521 unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 2]) }
522}
523
524#[inline]
529#[target_feature(enable = "avx2")]
530#[cfg_attr(test, assert_instr(vbroadcastsd))]
531#[stable(feature = "simd_x86", since = "1.27.0")]
532pub fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d {
533 unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 4]) }
534}
535
536#[inline]
541#[target_feature(enable = "avx2")]
542#[stable(feature = "simd_x86_updates", since = "1.82.0")]
543pub fn _mm_broadcastsi128_si256(a: __m128i) -> __m256i {
544 unsafe {
545 let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
546 transmute::<i64x4, _>(ret)
547 }
548}
549
550#[inline]
557#[target_feature(enable = "avx2")]
558#[stable(feature = "simd_x86", since = "1.27.0")]
559pub fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i {
560 unsafe {
561 let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
562 transmute::<i64x4, _>(ret)
563 }
564}
565
566#[inline]
571#[target_feature(enable = "avx2")]
572#[cfg_attr(test, assert_instr(vbroadcastss))]
573#[stable(feature = "simd_x86", since = "1.27.0")]
574pub fn _mm_broadcastss_ps(a: __m128) -> __m128 {
575 unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 4]) }
576}
577
578#[inline]
583#[target_feature(enable = "avx2")]
584#[cfg_attr(test, assert_instr(vbroadcastss))]
585#[stable(feature = "simd_x86", since = "1.27.0")]
586pub fn _mm256_broadcastss_ps(a: __m128) -> __m256 {
587 unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 8]) }
588}
589
590#[inline]
595#[target_feature(enable = "avx2")]
596#[cfg_attr(test, assert_instr(vpbroadcastw))]
597#[stable(feature = "simd_x86", since = "1.27.0")]
598pub fn _mm_broadcastw_epi16(a: __m128i) -> __m128i {
599 unsafe {
600 let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 8]);
601 transmute::<i16x8, _>(ret)
602 }
603}
604
605#[inline]
610#[target_feature(enable = "avx2")]
611#[cfg_attr(test, assert_instr(vpbroadcastw))]
612#[stable(feature = "simd_x86", since = "1.27.0")]
613pub fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i {
614 unsafe {
615 let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 16]);
616 transmute::<i16x16, _>(ret)
617 }
618}
619
620#[inline]
624#[target_feature(enable = "avx2")]
625#[cfg_attr(test, assert_instr(vpcmpeqq))]
626#[stable(feature = "simd_x86", since = "1.27.0")]
627pub fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i {
628 unsafe { transmute::<i64x4, _>(simd_eq(a.as_i64x4(), b.as_i64x4())) }
629}
630
631#[inline]
635#[target_feature(enable = "avx2")]
636#[cfg_attr(test, assert_instr(vpcmpeqd))]
637#[stable(feature = "simd_x86", since = "1.27.0")]
638pub fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i {
639 unsafe { transmute::<i32x8, _>(simd_eq(a.as_i32x8(), b.as_i32x8())) }
640}
641
642#[inline]
646#[target_feature(enable = "avx2")]
647#[cfg_attr(test, assert_instr(vpcmpeqw))]
648#[stable(feature = "simd_x86", since = "1.27.0")]
649pub fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i {
650 unsafe { transmute::<i16x16, _>(simd_eq(a.as_i16x16(), b.as_i16x16())) }
651}
652
653#[inline]
657#[target_feature(enable = "avx2")]
658#[cfg_attr(test, assert_instr(vpcmpeqb))]
659#[stable(feature = "simd_x86", since = "1.27.0")]
660pub fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i {
661 unsafe { transmute::<i8x32, _>(simd_eq(a.as_i8x32(), b.as_i8x32())) }
662}
663
664#[inline]
668#[target_feature(enable = "avx2")]
669#[cfg_attr(test, assert_instr(vpcmpgtq))]
670#[stable(feature = "simd_x86", since = "1.27.0")]
671pub fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i {
672 unsafe { transmute::<i64x4, _>(simd_gt(a.as_i64x4(), b.as_i64x4())) }
673}
674
675#[inline]
679#[target_feature(enable = "avx2")]
680#[cfg_attr(test, assert_instr(vpcmpgtd))]
681#[stable(feature = "simd_x86", since = "1.27.0")]
682pub fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i {
683 unsafe { transmute::<i32x8, _>(simd_gt(a.as_i32x8(), b.as_i32x8())) }
684}
685
686#[inline]
690#[target_feature(enable = "avx2")]
691#[cfg_attr(test, assert_instr(vpcmpgtw))]
692#[stable(feature = "simd_x86", since = "1.27.0")]
693pub fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i {
694 unsafe { transmute::<i16x16, _>(simd_gt(a.as_i16x16(), b.as_i16x16())) }
695}
696
697#[inline]
701#[target_feature(enable = "avx2")]
702#[cfg_attr(test, assert_instr(vpcmpgtb))]
703#[stable(feature = "simd_x86", since = "1.27.0")]
704pub fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i {
705 unsafe { transmute::<i8x32, _>(simd_gt(a.as_i8x32(), b.as_i8x32())) }
706}
707
708#[inline]
712#[target_feature(enable = "avx2")]
713#[cfg_attr(test, assert_instr(vpmovsxwd))]
714#[stable(feature = "simd_x86", since = "1.27.0")]
715pub fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i {
716 unsafe { transmute::<i32x8, _>(simd_cast(a.as_i16x8())) }
717}
718
719#[inline]
723#[target_feature(enable = "avx2")]
724#[cfg_attr(test, assert_instr(vpmovsxwq))]
725#[stable(feature = "simd_x86", since = "1.27.0")]
726pub fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i {
727 unsafe {
728 let a = a.as_i16x8();
729 let v64: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
730 transmute::<i64x4, _>(simd_cast(v64))
731 }
732}
733
734#[inline]
738#[target_feature(enable = "avx2")]
739#[cfg_attr(test, assert_instr(vpmovsxdq))]
740#[stable(feature = "simd_x86", since = "1.27.0")]
741pub fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i {
742 unsafe { transmute::<i64x4, _>(simd_cast(a.as_i32x4())) }
743}
744
745#[inline]
749#[target_feature(enable = "avx2")]
750#[cfg_attr(test, assert_instr(vpmovsxbw))]
751#[stable(feature = "simd_x86", since = "1.27.0")]
752pub fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i {
753 unsafe { transmute::<i16x16, _>(simd_cast(a.as_i8x16())) }
754}
755
756#[inline]
760#[target_feature(enable = "avx2")]
761#[cfg_attr(test, assert_instr(vpmovsxbd))]
762#[stable(feature = "simd_x86", since = "1.27.0")]
763pub fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i {
764 unsafe {
765 let a = a.as_i8x16();
766 let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
767 transmute::<i32x8, _>(simd_cast(v64))
768 }
769}
770
771#[inline]
775#[target_feature(enable = "avx2")]
776#[cfg_attr(test, assert_instr(vpmovsxbq))]
777#[stable(feature = "simd_x86", since = "1.27.0")]
778pub fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i {
779 unsafe {
780 let a = a.as_i8x16();
781 let v32: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
782 transmute::<i64x4, _>(simd_cast(v32))
783 }
784}
785
786#[inline]
791#[target_feature(enable = "avx2")]
792#[cfg_attr(test, assert_instr(vpmovzxwd))]
793#[stable(feature = "simd_x86", since = "1.27.0")]
794pub fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i {
795 unsafe { transmute::<i32x8, _>(simd_cast(a.as_u16x8())) }
796}
797
798#[inline]
803#[target_feature(enable = "avx2")]
804#[cfg_attr(test, assert_instr(vpmovzxwq))]
805#[stable(feature = "simd_x86", since = "1.27.0")]
806pub fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i {
807 unsafe {
808 let a = a.as_u16x8();
809 let v64: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
810 transmute::<i64x4, _>(simd_cast(v64))
811 }
812}
813
814#[inline]
818#[target_feature(enable = "avx2")]
819#[cfg_attr(test, assert_instr(vpmovzxdq))]
820#[stable(feature = "simd_x86", since = "1.27.0")]
821pub fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i {
822 unsafe { transmute::<i64x4, _>(simd_cast(a.as_u32x4())) }
823}
824
825#[inline]
829#[target_feature(enable = "avx2")]
830#[cfg_attr(test, assert_instr(vpmovzxbw))]
831#[stable(feature = "simd_x86", since = "1.27.0")]
832pub fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i {
833 unsafe { transmute::<i16x16, _>(simd_cast(a.as_u8x16())) }
834}
835
836#[inline]
841#[target_feature(enable = "avx2")]
842#[cfg_attr(test, assert_instr(vpmovzxbd))]
843#[stable(feature = "simd_x86", since = "1.27.0")]
844pub fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i {
845 unsafe {
846 let a = a.as_u8x16();
847 let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
848 transmute::<i32x8, _>(simd_cast(v64))
849 }
850}
851
852#[inline]
857#[target_feature(enable = "avx2")]
858#[cfg_attr(test, assert_instr(vpmovzxbq))]
859#[stable(feature = "simd_x86", since = "1.27.0")]
860pub fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i {
861 unsafe {
862 let a = a.as_u8x16();
863 let v32: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
864 transmute::<i64x4, _>(simd_cast(v32))
865 }
866}
867
868#[inline]
872#[target_feature(enable = "avx2")]
873#[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))]
874#[rustc_legacy_const_generics(1)]
875#[stable(feature = "simd_x86", since = "1.27.0")]
876pub fn _mm256_extracti128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
877 static_assert_uimm_bits!(IMM1, 1);
878 unsafe {
879 let a = a.as_i64x4();
880 let b = i64x4::ZERO;
881 let dst: i64x2 = simd_shuffle!(a, b, [[0, 1], [2, 3]][IMM1 as usize]);
882 transmute(dst)
883 }
884}
885
886#[inline]
890#[target_feature(enable = "avx2")]
891#[cfg_attr(test, assert_instr(vphaddw))]
892#[stable(feature = "simd_x86", since = "1.27.0")]
893pub fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i {
894 let a = a.as_i16x16();
895 let b = b.as_i16x16();
896 unsafe {
897 let even: i16x16 = simd_shuffle!(
898 a,
899 b,
900 [0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30]
901 );
902 let odd: i16x16 = simd_shuffle!(
903 a,
904 b,
905 [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31]
906 );
907 simd_add(even, odd).as_m256i()
908 }
909}
910
911#[inline]
915#[target_feature(enable = "avx2")]
916#[cfg_attr(test, assert_instr(vphaddd))]
917#[stable(feature = "simd_x86", since = "1.27.0")]
918pub fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i {
919 let a = a.as_i32x8();
920 let b = b.as_i32x8();
921 unsafe {
922 let even: i32x8 = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]);
923 let odd: i32x8 = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]);
924 simd_add(even, odd).as_m256i()
925 }
926}
927
928#[inline]
933#[target_feature(enable = "avx2")]
934#[cfg_attr(test, assert_instr(vphaddsw))]
935#[stable(feature = "simd_x86", since = "1.27.0")]
936pub fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i {
937 unsafe { transmute(phaddsw(a.as_i16x16(), b.as_i16x16())) }
938}
939
940#[inline]
944#[target_feature(enable = "avx2")]
945#[cfg_attr(test, assert_instr(vphsubw))]
946#[stable(feature = "simd_x86", since = "1.27.0")]
947pub fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i {
948 let a = a.as_i16x16();
949 let b = b.as_i16x16();
950 unsafe {
951 let even: i16x16 = simd_shuffle!(
952 a,
953 b,
954 [0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30]
955 );
956 let odd: i16x16 = simd_shuffle!(
957 a,
958 b,
959 [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31]
960 );
961 simd_sub(even, odd).as_m256i()
962 }
963}
964
965#[inline]
969#[target_feature(enable = "avx2")]
970#[cfg_attr(test, assert_instr(vphsubd))]
971#[stable(feature = "simd_x86", since = "1.27.0")]
972pub fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i {
973 let a = a.as_i32x8();
974 let b = b.as_i32x8();
975 unsafe {
976 let even: i32x8 = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]);
977 let odd: i32x8 = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]);
978 simd_sub(even, odd).as_m256i()
979 }
980}
981
982#[inline]
987#[target_feature(enable = "avx2")]
988#[cfg_attr(test, assert_instr(vphsubsw))]
989#[stable(feature = "simd_x86", since = "1.27.0")]
990pub fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i {
991 unsafe { transmute(phsubsw(a.as_i16x16(), b.as_i16x16())) }
992}
993
994#[inline]
1000#[target_feature(enable = "avx2")]
1001#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1002#[rustc_legacy_const_generics(2)]
1003#[stable(feature = "simd_x86", since = "1.27.0")]
1004pub unsafe fn _mm_i32gather_epi32<const SCALE: i32>(
1005 slice: *const i32,
1006 offsets: __m128i,
1007) -> __m128i {
1008 static_assert_imm8_scale!(SCALE);
1009 let zero = i32x4::ZERO;
1010 let neg_one = _mm_set1_epi32(-1).as_i32x4();
1011 let offsets = offsets.as_i32x4();
1012 let slice = slice as *const i8;
1013 let r = pgatherdd(zero, slice, offsets, neg_one, SCALE as i8);
1014 transmute(r)
1015}
1016
1017#[inline]
1024#[target_feature(enable = "avx2")]
1025#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1026#[rustc_legacy_const_generics(4)]
1027#[stable(feature = "simd_x86", since = "1.27.0")]
1028pub unsafe fn _mm_mask_i32gather_epi32<const SCALE: i32>(
1029 src: __m128i,
1030 slice: *const i32,
1031 offsets: __m128i,
1032 mask: __m128i,
1033) -> __m128i {
1034 static_assert_imm8_scale!(SCALE);
1035 let src = src.as_i32x4();
1036 let mask = mask.as_i32x4();
1037 let offsets = offsets.as_i32x4();
1038 let slice = slice as *const i8;
1039 let r = pgatherdd(src, slice, offsets, mask, SCALE as i8);
1040 transmute(r)
1041}
1042
1043#[inline]
1049#[target_feature(enable = "avx2")]
1050#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1051#[rustc_legacy_const_generics(2)]
1052#[stable(feature = "simd_x86", since = "1.27.0")]
1053pub unsafe fn _mm256_i32gather_epi32<const SCALE: i32>(
1054 slice: *const i32,
1055 offsets: __m256i,
1056) -> __m256i {
1057 static_assert_imm8_scale!(SCALE);
1058 let zero = i32x8::ZERO;
1059 let neg_one = _mm256_set1_epi32(-1).as_i32x8();
1060 let offsets = offsets.as_i32x8();
1061 let slice = slice as *const i8;
1062 let r = vpgatherdd(zero, slice, offsets, neg_one, SCALE as i8);
1063 transmute(r)
1064}
1065
1066#[inline]
1073#[target_feature(enable = "avx2")]
1074#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1075#[rustc_legacy_const_generics(4)]
1076#[stable(feature = "simd_x86", since = "1.27.0")]
1077pub unsafe fn _mm256_mask_i32gather_epi32<const SCALE: i32>(
1078 src: __m256i,
1079 slice: *const i32,
1080 offsets: __m256i,
1081 mask: __m256i,
1082) -> __m256i {
1083 static_assert_imm8_scale!(SCALE);
1084 let src = src.as_i32x8();
1085 let mask = mask.as_i32x8();
1086 let offsets = offsets.as_i32x8();
1087 let slice = slice as *const i8;
1088 let r = vpgatherdd(src, slice, offsets, mask, SCALE as i8);
1089 transmute(r)
1090}
1091
1092#[inline]
1098#[target_feature(enable = "avx2")]
1099#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1100#[rustc_legacy_const_generics(2)]
1101#[stable(feature = "simd_x86", since = "1.27.0")]
1102pub unsafe fn _mm_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1103 static_assert_imm8_scale!(SCALE);
1104 let zero = _mm_setzero_ps();
1105 let neg_one = _mm_set1_ps(-1.0);
1106 let offsets = offsets.as_i32x4();
1107 let slice = slice as *const i8;
1108 pgatherdps(zero, slice, offsets, neg_one, SCALE as i8)
1109}
1110
1111#[inline]
1118#[target_feature(enable = "avx2")]
1119#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1120#[rustc_legacy_const_generics(4)]
1121#[stable(feature = "simd_x86", since = "1.27.0")]
1122pub unsafe fn _mm_mask_i32gather_ps<const SCALE: i32>(
1123 src: __m128,
1124 slice: *const f32,
1125 offsets: __m128i,
1126 mask: __m128,
1127) -> __m128 {
1128 static_assert_imm8_scale!(SCALE);
1129 let offsets = offsets.as_i32x4();
1130 let slice = slice as *const i8;
1131 pgatherdps(src, slice, offsets, mask, SCALE as i8)
1132}
1133
1134#[inline]
1140#[target_feature(enable = "avx2")]
1141#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1142#[rustc_legacy_const_generics(2)]
1143#[stable(feature = "simd_x86", since = "1.27.0")]
1144pub unsafe fn _mm256_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m256 {
1145 static_assert_imm8_scale!(SCALE);
1146 let zero = _mm256_setzero_ps();
1147 let neg_one = _mm256_set1_ps(-1.0);
1148 let offsets = offsets.as_i32x8();
1149 let slice = slice as *const i8;
1150 vpgatherdps(zero, slice, offsets, neg_one, SCALE as i8)
1151}
1152
1153#[inline]
1160#[target_feature(enable = "avx2")]
1161#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1162#[rustc_legacy_const_generics(4)]
1163#[stable(feature = "simd_x86", since = "1.27.0")]
1164pub unsafe fn _mm256_mask_i32gather_ps<const SCALE: i32>(
1165 src: __m256,
1166 slice: *const f32,
1167 offsets: __m256i,
1168 mask: __m256,
1169) -> __m256 {
1170 static_assert_imm8_scale!(SCALE);
1171 let offsets = offsets.as_i32x8();
1172 let slice = slice as *const i8;
1173 vpgatherdps(src, slice, offsets, mask, SCALE as i8)
1174}
1175
1176#[inline]
1182#[target_feature(enable = "avx2")]
1183#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1184#[rustc_legacy_const_generics(2)]
1185#[stable(feature = "simd_x86", since = "1.27.0")]
1186pub unsafe fn _mm_i32gather_epi64<const SCALE: i32>(
1187 slice: *const i64,
1188 offsets: __m128i,
1189) -> __m128i {
1190 static_assert_imm8_scale!(SCALE);
1191 let zero = i64x2::ZERO;
1192 let neg_one = _mm_set1_epi64x(-1).as_i64x2();
1193 let offsets = offsets.as_i32x4();
1194 let slice = slice as *const i8;
1195 let r = pgatherdq(zero, slice, offsets, neg_one, SCALE as i8);
1196 transmute(r)
1197}
1198
1199#[inline]
1206#[target_feature(enable = "avx2")]
1207#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1208#[rustc_legacy_const_generics(4)]
1209#[stable(feature = "simd_x86", since = "1.27.0")]
1210pub unsafe fn _mm_mask_i32gather_epi64<const SCALE: i32>(
1211 src: __m128i,
1212 slice: *const i64,
1213 offsets: __m128i,
1214 mask: __m128i,
1215) -> __m128i {
1216 static_assert_imm8_scale!(SCALE);
1217 let src = src.as_i64x2();
1218 let mask = mask.as_i64x2();
1219 let offsets = offsets.as_i32x4();
1220 let slice = slice as *const i8;
1221 let r = pgatherdq(src, slice, offsets, mask, SCALE as i8);
1222 transmute(r)
1223}
1224
1225#[inline]
1231#[target_feature(enable = "avx2")]
1232#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1233#[rustc_legacy_const_generics(2)]
1234#[stable(feature = "simd_x86", since = "1.27.0")]
1235pub unsafe fn _mm256_i32gather_epi64<const SCALE: i32>(
1236 slice: *const i64,
1237 offsets: __m128i,
1238) -> __m256i {
1239 static_assert_imm8_scale!(SCALE);
1240 let zero = i64x4::ZERO;
1241 let neg_one = _mm256_set1_epi64x(-1).as_i64x4();
1242 let offsets = offsets.as_i32x4();
1243 let slice = slice as *const i8;
1244 let r = vpgatherdq(zero, slice, offsets, neg_one, SCALE as i8);
1245 transmute(r)
1246}
1247
1248#[inline]
1255#[target_feature(enable = "avx2")]
1256#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1257#[rustc_legacy_const_generics(4)]
1258#[stable(feature = "simd_x86", since = "1.27.0")]
1259pub unsafe fn _mm256_mask_i32gather_epi64<const SCALE: i32>(
1260 src: __m256i,
1261 slice: *const i64,
1262 offsets: __m128i,
1263 mask: __m256i,
1264) -> __m256i {
1265 static_assert_imm8_scale!(SCALE);
1266 let src = src.as_i64x4();
1267 let mask = mask.as_i64x4();
1268 let offsets = offsets.as_i32x4();
1269 let slice = slice as *const i8;
1270 let r = vpgatherdq(src, slice, offsets, mask, SCALE as i8);
1271 transmute(r)
1272}
1273
1274#[inline]
1280#[target_feature(enable = "avx2")]
1281#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1282#[rustc_legacy_const_generics(2)]
1283#[stable(feature = "simd_x86", since = "1.27.0")]
1284pub unsafe fn _mm_i32gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1285 static_assert_imm8_scale!(SCALE);
1286 let zero = _mm_setzero_pd();
1287 let neg_one = _mm_set1_pd(-1.0);
1288 let offsets = offsets.as_i32x4();
1289 let slice = slice as *const i8;
1290 pgatherdpd(zero, slice, offsets, neg_one, SCALE as i8)
1291}
1292
1293#[inline]
1300#[target_feature(enable = "avx2")]
1301#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1302#[rustc_legacy_const_generics(4)]
1303#[stable(feature = "simd_x86", since = "1.27.0")]
1304pub unsafe fn _mm_mask_i32gather_pd<const SCALE: i32>(
1305 src: __m128d,
1306 slice: *const f64,
1307 offsets: __m128i,
1308 mask: __m128d,
1309) -> __m128d {
1310 static_assert_imm8_scale!(SCALE);
1311 let offsets = offsets.as_i32x4();
1312 let slice = slice as *const i8;
1313 pgatherdpd(src, slice, offsets, mask, SCALE as i8)
1314}
1315
1316#[inline]
1322#[target_feature(enable = "avx2")]
1323#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1324#[rustc_legacy_const_generics(2)]
1325#[stable(feature = "simd_x86", since = "1.27.0")]
1326pub unsafe fn _mm256_i32gather_pd<const SCALE: i32>(
1327 slice: *const f64,
1328 offsets: __m128i,
1329) -> __m256d {
1330 static_assert_imm8_scale!(SCALE);
1331 let zero = _mm256_setzero_pd();
1332 let neg_one = _mm256_set1_pd(-1.0);
1333 let offsets = offsets.as_i32x4();
1334 let slice = slice as *const i8;
1335 vpgatherdpd(zero, slice, offsets, neg_one, SCALE as i8)
1336}
1337
1338#[inline]
1345#[target_feature(enable = "avx2")]
1346#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1347#[rustc_legacy_const_generics(4)]
1348#[stable(feature = "simd_x86", since = "1.27.0")]
1349pub unsafe fn _mm256_mask_i32gather_pd<const SCALE: i32>(
1350 src: __m256d,
1351 slice: *const f64,
1352 offsets: __m128i,
1353 mask: __m256d,
1354) -> __m256d {
1355 static_assert_imm8_scale!(SCALE);
1356 let offsets = offsets.as_i32x4();
1357 let slice = slice as *const i8;
1358 vpgatherdpd(src, slice, offsets, mask, SCALE as i8)
1359}
1360
1361#[inline]
1367#[target_feature(enable = "avx2")]
1368#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1369#[rustc_legacy_const_generics(2)]
1370#[stable(feature = "simd_x86", since = "1.27.0")]
1371pub unsafe fn _mm_i64gather_epi32<const SCALE: i32>(
1372 slice: *const i32,
1373 offsets: __m128i,
1374) -> __m128i {
1375 static_assert_imm8_scale!(SCALE);
1376 let zero = i32x4::ZERO;
1377 let neg_one = _mm_set1_epi64x(-1).as_i32x4();
1378 let offsets = offsets.as_i64x2();
1379 let slice = slice as *const i8;
1380 let r = pgatherqd(zero, slice, offsets, neg_one, SCALE as i8);
1381 transmute(r)
1382}
1383
1384#[inline]
1391#[target_feature(enable = "avx2")]
1392#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1393#[rustc_legacy_const_generics(4)]
1394#[stable(feature = "simd_x86", since = "1.27.0")]
1395pub unsafe fn _mm_mask_i64gather_epi32<const SCALE: i32>(
1396 src: __m128i,
1397 slice: *const i32,
1398 offsets: __m128i,
1399 mask: __m128i,
1400) -> __m128i {
1401 static_assert_imm8_scale!(SCALE);
1402 let src = src.as_i32x4();
1403 let mask = mask.as_i32x4();
1404 let offsets = offsets.as_i64x2();
1405 let slice = slice as *const i8;
1406 let r = pgatherqd(src, slice, offsets, mask, SCALE as i8);
1407 transmute(r)
1408}
1409
1410#[inline]
1416#[target_feature(enable = "avx2")]
1417#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1418#[rustc_legacy_const_generics(2)]
1419#[stable(feature = "simd_x86", since = "1.27.0")]
1420pub unsafe fn _mm256_i64gather_epi32<const SCALE: i32>(
1421 slice: *const i32,
1422 offsets: __m256i,
1423) -> __m128i {
1424 static_assert_imm8_scale!(SCALE);
1425 let zero = i32x4::ZERO;
1426 let neg_one = _mm_set1_epi64x(-1).as_i32x4();
1427 let offsets = offsets.as_i64x4();
1428 let slice = slice as *const i8;
1429 let r = vpgatherqd(zero, slice, offsets, neg_one, SCALE as i8);
1430 transmute(r)
1431}
1432
1433#[inline]
1440#[target_feature(enable = "avx2")]
1441#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1442#[rustc_legacy_const_generics(4)]
1443#[stable(feature = "simd_x86", since = "1.27.0")]
1444pub unsafe fn _mm256_mask_i64gather_epi32<const SCALE: i32>(
1445 src: __m128i,
1446 slice: *const i32,
1447 offsets: __m256i,
1448 mask: __m128i,
1449) -> __m128i {
1450 static_assert_imm8_scale!(SCALE);
1451 let src = src.as_i32x4();
1452 let mask = mask.as_i32x4();
1453 let offsets = offsets.as_i64x4();
1454 let slice = slice as *const i8;
1455 let r = vpgatherqd(src, slice, offsets, mask, SCALE as i8);
1456 transmute(r)
1457}
1458
1459#[inline]
1465#[target_feature(enable = "avx2")]
1466#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1467#[rustc_legacy_const_generics(2)]
1468#[stable(feature = "simd_x86", since = "1.27.0")]
1469pub unsafe fn _mm_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1470 static_assert_imm8_scale!(SCALE);
1471 let zero = _mm_setzero_ps();
1472 let neg_one = _mm_set1_ps(-1.0);
1473 let offsets = offsets.as_i64x2();
1474 let slice = slice as *const i8;
1475 pgatherqps(zero, slice, offsets, neg_one, SCALE as i8)
1476}
1477
1478#[inline]
1485#[target_feature(enable = "avx2")]
1486#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1487#[rustc_legacy_const_generics(4)]
1488#[stable(feature = "simd_x86", since = "1.27.0")]
1489pub unsafe fn _mm_mask_i64gather_ps<const SCALE: i32>(
1490 src: __m128,
1491 slice: *const f32,
1492 offsets: __m128i,
1493 mask: __m128,
1494) -> __m128 {
1495 static_assert_imm8_scale!(SCALE);
1496 let offsets = offsets.as_i64x2();
1497 let slice = slice as *const i8;
1498 pgatherqps(src, slice, offsets, mask, SCALE as i8)
1499}
1500
1501#[inline]
1507#[target_feature(enable = "avx2")]
1508#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1509#[rustc_legacy_const_generics(2)]
1510#[stable(feature = "simd_x86", since = "1.27.0")]
1511pub unsafe fn _mm256_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m128 {
1512 static_assert_imm8_scale!(SCALE);
1513 let zero = _mm_setzero_ps();
1514 let neg_one = _mm_set1_ps(-1.0);
1515 let offsets = offsets.as_i64x4();
1516 let slice = slice as *const i8;
1517 vpgatherqps(zero, slice, offsets, neg_one, SCALE as i8)
1518}
1519
1520#[inline]
1527#[target_feature(enable = "avx2")]
1528#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1529#[rustc_legacy_const_generics(4)]
1530#[stable(feature = "simd_x86", since = "1.27.0")]
1531pub unsafe fn _mm256_mask_i64gather_ps<const SCALE: i32>(
1532 src: __m128,
1533 slice: *const f32,
1534 offsets: __m256i,
1535 mask: __m128,
1536) -> __m128 {
1537 static_assert_imm8_scale!(SCALE);
1538 let offsets = offsets.as_i64x4();
1539 let slice = slice as *const i8;
1540 vpgatherqps(src, slice, offsets, mask, SCALE as i8)
1541}
1542
1543#[inline]
1549#[target_feature(enable = "avx2")]
1550#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1551#[rustc_legacy_const_generics(2)]
1552#[stable(feature = "simd_x86", since = "1.27.0")]
1553pub unsafe fn _mm_i64gather_epi64<const SCALE: i32>(
1554 slice: *const i64,
1555 offsets: __m128i,
1556) -> __m128i {
1557 static_assert_imm8_scale!(SCALE);
1558 let zero = i64x2::ZERO;
1559 let neg_one = _mm_set1_epi64x(-1).as_i64x2();
1560 let slice = slice as *const i8;
1561 let offsets = offsets.as_i64x2();
1562 let r = pgatherqq(zero, slice, offsets, neg_one, SCALE as i8);
1563 transmute(r)
1564}
1565
1566#[inline]
1573#[target_feature(enable = "avx2")]
1574#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1575#[rustc_legacy_const_generics(4)]
1576#[stable(feature = "simd_x86", since = "1.27.0")]
1577pub unsafe fn _mm_mask_i64gather_epi64<const SCALE: i32>(
1578 src: __m128i,
1579 slice: *const i64,
1580 offsets: __m128i,
1581 mask: __m128i,
1582) -> __m128i {
1583 static_assert_imm8_scale!(SCALE);
1584 let src = src.as_i64x2();
1585 let mask = mask.as_i64x2();
1586 let offsets = offsets.as_i64x2();
1587 let slice = slice as *const i8;
1588 let r = pgatherqq(src, slice, offsets, mask, SCALE as i8);
1589 transmute(r)
1590}
1591
1592#[inline]
1598#[target_feature(enable = "avx2")]
1599#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1600#[rustc_legacy_const_generics(2)]
1601#[stable(feature = "simd_x86", since = "1.27.0")]
1602pub unsafe fn _mm256_i64gather_epi64<const SCALE: i32>(
1603 slice: *const i64,
1604 offsets: __m256i,
1605) -> __m256i {
1606 static_assert_imm8_scale!(SCALE);
1607 let zero = i64x4::ZERO;
1608 let neg_one = _mm256_set1_epi64x(-1).as_i64x4();
1609 let slice = slice as *const i8;
1610 let offsets = offsets.as_i64x4();
1611 let r = vpgatherqq(zero, slice, offsets, neg_one, SCALE as i8);
1612 transmute(r)
1613}
1614
1615#[inline]
1622#[target_feature(enable = "avx2")]
1623#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1624#[rustc_legacy_const_generics(4)]
1625#[stable(feature = "simd_x86", since = "1.27.0")]
1626pub unsafe fn _mm256_mask_i64gather_epi64<const SCALE: i32>(
1627 src: __m256i,
1628 slice: *const i64,
1629 offsets: __m256i,
1630 mask: __m256i,
1631) -> __m256i {
1632 static_assert_imm8_scale!(SCALE);
1633 let src = src.as_i64x4();
1634 let mask = mask.as_i64x4();
1635 let offsets = offsets.as_i64x4();
1636 let slice = slice as *const i8;
1637 let r = vpgatherqq(src, slice, offsets, mask, SCALE as i8);
1638 transmute(r)
1639}
1640
1641#[inline]
1647#[target_feature(enable = "avx2")]
1648#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1649#[rustc_legacy_const_generics(2)]
1650#[stable(feature = "simd_x86", since = "1.27.0")]
1651pub unsafe fn _mm_i64gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1652 static_assert_imm8_scale!(SCALE);
1653 let zero = _mm_setzero_pd();
1654 let neg_one = _mm_set1_pd(-1.0);
1655 let slice = slice as *const i8;
1656 let offsets = offsets.as_i64x2();
1657 pgatherqpd(zero, slice, offsets, neg_one, SCALE as i8)
1658}
1659
1660#[inline]
1667#[target_feature(enable = "avx2")]
1668#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1669#[rustc_legacy_const_generics(4)]
1670#[stable(feature = "simd_x86", since = "1.27.0")]
1671pub unsafe fn _mm_mask_i64gather_pd<const SCALE: i32>(
1672 src: __m128d,
1673 slice: *const f64,
1674 offsets: __m128i,
1675 mask: __m128d,
1676) -> __m128d {
1677 static_assert_imm8_scale!(SCALE);
1678 let slice = slice as *const i8;
1679 let offsets = offsets.as_i64x2();
1680 pgatherqpd(src, slice, offsets, mask, SCALE as i8)
1681}
1682
1683#[inline]
1689#[target_feature(enable = "avx2")]
1690#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1691#[rustc_legacy_const_generics(2)]
1692#[stable(feature = "simd_x86", since = "1.27.0")]
1693pub unsafe fn _mm256_i64gather_pd<const SCALE: i32>(
1694 slice: *const f64,
1695 offsets: __m256i,
1696) -> __m256d {
1697 static_assert_imm8_scale!(SCALE);
1698 let zero = _mm256_setzero_pd();
1699 let neg_one = _mm256_set1_pd(-1.0);
1700 let slice = slice as *const i8;
1701 let offsets = offsets.as_i64x4();
1702 vpgatherqpd(zero, slice, offsets, neg_one, SCALE as i8)
1703}
1704
1705#[inline]
1712#[target_feature(enable = "avx2")]
1713#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1714#[rustc_legacy_const_generics(4)]
1715#[stable(feature = "simd_x86", since = "1.27.0")]
1716pub unsafe fn _mm256_mask_i64gather_pd<const SCALE: i32>(
1717 src: __m256d,
1718 slice: *const f64,
1719 offsets: __m256i,
1720 mask: __m256d,
1721) -> __m256d {
1722 static_assert_imm8_scale!(SCALE);
1723 let slice = slice as *const i8;
1724 let offsets = offsets.as_i64x4();
1725 vpgatherqpd(src, slice, offsets, mask, SCALE as i8)
1726}
1727
1728#[inline]
1733#[target_feature(enable = "avx2")]
1734#[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))]
1735#[rustc_legacy_const_generics(2)]
1736#[stable(feature = "simd_x86", since = "1.27.0")]
1737pub fn _mm256_inserti128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
1738 static_assert_uimm_bits!(IMM1, 1);
1739 unsafe {
1740 let a = a.as_i64x4();
1741 let b = _mm256_castsi128_si256(b).as_i64x4();
1742 let dst: i64x4 = simd_shuffle!(a, b, [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize]);
1743 transmute(dst)
1744 }
1745}
1746
1747#[inline]
1753#[target_feature(enable = "avx2")]
1754#[cfg_attr(test, assert_instr(vpmaddwd))]
1755#[stable(feature = "simd_x86", since = "1.27.0")]
1756pub fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i {
1757 unsafe {
1758 let r: i32x16 = simd_mul(simd_cast(a.as_i16x16()), simd_cast(b.as_i16x16()));
1759 let even: i32x8 = simd_shuffle!(r, r, [0, 2, 4, 6, 8, 10, 12, 14]);
1760 let odd: i32x8 = simd_shuffle!(r, r, [1, 3, 5, 7, 9, 11, 13, 15]);
1761 simd_add(even, odd).as_m256i()
1762 }
1763}
1764
1765#[inline]
1772#[target_feature(enable = "avx2")]
1773#[cfg_attr(test, assert_instr(vpmaddubsw))]
1774#[stable(feature = "simd_x86", since = "1.27.0")]
1775pub fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
1776 unsafe { transmute(pmaddubsw(a.as_u8x32(), b.as_u8x32())) }
1777}
1778
1779#[inline]
1785#[target_feature(enable = "avx2")]
1786#[cfg_attr(test, assert_instr(vpmaskmovd))]
1787#[stable(feature = "simd_x86", since = "1.27.0")]
1788pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i {
1789 transmute(maskloadd(mem_addr as *const i8, mask.as_i32x4()))
1790}
1791
1792#[inline]
1798#[target_feature(enable = "avx2")]
1799#[cfg_attr(test, assert_instr(vpmaskmovd))]
1800#[stable(feature = "simd_x86", since = "1.27.0")]
1801pub unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m256i {
1802 transmute(maskloadd256(mem_addr as *const i8, mask.as_i32x8()))
1803}
1804
1805#[inline]
1811#[target_feature(enable = "avx2")]
1812#[cfg_attr(test, assert_instr(vpmaskmovq))]
1813#[stable(feature = "simd_x86", since = "1.27.0")]
1814pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i {
1815 transmute(maskloadq(mem_addr as *const i8, mask.as_i64x2()))
1816}
1817
1818#[inline]
1824#[target_feature(enable = "avx2")]
1825#[cfg_attr(test, assert_instr(vpmaskmovq))]
1826#[stable(feature = "simd_x86", since = "1.27.0")]
1827pub unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m256i {
1828 transmute(maskloadq256(mem_addr as *const i8, mask.as_i64x4()))
1829}
1830
1831#[inline]
1837#[target_feature(enable = "avx2")]
1838#[cfg_attr(test, assert_instr(vpmaskmovd))]
1839#[stable(feature = "simd_x86", since = "1.27.0")]
1840pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) {
1841 maskstored(mem_addr as *mut i8, mask.as_i32x4(), a.as_i32x4())
1842}
1843
1844#[inline]
1850#[target_feature(enable = "avx2")]
1851#[cfg_attr(test, assert_instr(vpmaskmovd))]
1852#[stable(feature = "simd_x86", since = "1.27.0")]
1853pub unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m256i) {
1854 maskstored256(mem_addr as *mut i8, mask.as_i32x8(), a.as_i32x8())
1855}
1856
1857#[inline]
1863#[target_feature(enable = "avx2")]
1864#[cfg_attr(test, assert_instr(vpmaskmovq))]
1865#[stable(feature = "simd_x86", since = "1.27.0")]
1866pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) {
1867 maskstoreq(mem_addr as *mut i8, mask.as_i64x2(), a.as_i64x2())
1868}
1869
1870#[inline]
1876#[target_feature(enable = "avx2")]
1877#[cfg_attr(test, assert_instr(vpmaskmovq))]
1878#[stable(feature = "simd_x86", since = "1.27.0")]
1879pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m256i) {
1880 maskstoreq256(mem_addr as *mut i8, mask.as_i64x4(), a.as_i64x4())
1881}
1882
1883#[inline]
1888#[target_feature(enable = "avx2")]
1889#[cfg_attr(test, assert_instr(vpmaxsw))]
1890#[stable(feature = "simd_x86", since = "1.27.0")]
1891pub fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
1892 unsafe {
1893 let a = a.as_i16x16();
1894 let b = b.as_i16x16();
1895 transmute(simd_select::<i16x16, _>(simd_gt(a, b), a, b))
1896 }
1897}
1898
1899#[inline]
1904#[target_feature(enable = "avx2")]
1905#[cfg_attr(test, assert_instr(vpmaxsd))]
1906#[stable(feature = "simd_x86", since = "1.27.0")]
1907pub fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
1908 unsafe {
1909 let a = a.as_i32x8();
1910 let b = b.as_i32x8();
1911 transmute(simd_select::<i32x8, _>(simd_gt(a, b), a, b))
1912 }
1913}
1914
1915#[inline]
1920#[target_feature(enable = "avx2")]
1921#[cfg_attr(test, assert_instr(vpmaxsb))]
1922#[stable(feature = "simd_x86", since = "1.27.0")]
1923pub fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
1924 unsafe {
1925 let a = a.as_i8x32();
1926 let b = b.as_i8x32();
1927 transmute(simd_select::<i8x32, _>(simd_gt(a, b), a, b))
1928 }
1929}
1930
1931#[inline]
1936#[target_feature(enable = "avx2")]
1937#[cfg_attr(test, assert_instr(vpmaxuw))]
1938#[stable(feature = "simd_x86", since = "1.27.0")]
1939pub fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
1940 unsafe {
1941 let a = a.as_u16x16();
1942 let b = b.as_u16x16();
1943 transmute(simd_select::<i16x16, _>(simd_gt(a, b), a, b))
1944 }
1945}
1946
1947#[inline]
1952#[target_feature(enable = "avx2")]
1953#[cfg_attr(test, assert_instr(vpmaxud))]
1954#[stable(feature = "simd_x86", since = "1.27.0")]
1955pub fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
1956 unsafe {
1957 let a = a.as_u32x8();
1958 let b = b.as_u32x8();
1959 transmute(simd_select::<i32x8, _>(simd_gt(a, b), a, b))
1960 }
1961}
1962
1963#[inline]
1968#[target_feature(enable = "avx2")]
1969#[cfg_attr(test, assert_instr(vpmaxub))]
1970#[stable(feature = "simd_x86", since = "1.27.0")]
1971pub fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
1972 unsafe {
1973 let a = a.as_u8x32();
1974 let b = b.as_u8x32();
1975 transmute(simd_select::<i8x32, _>(simd_gt(a, b), a, b))
1976 }
1977}
1978
1979#[inline]
1984#[target_feature(enable = "avx2")]
1985#[cfg_attr(test, assert_instr(vpminsw))]
1986#[stable(feature = "simd_x86", since = "1.27.0")]
1987pub fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
1988 unsafe {
1989 let a = a.as_i16x16();
1990 let b = b.as_i16x16();
1991 transmute(simd_select::<i16x16, _>(simd_lt(a, b), a, b))
1992 }
1993}
1994
1995#[inline]
2000#[target_feature(enable = "avx2")]
2001#[cfg_attr(test, assert_instr(vpminsd))]
2002#[stable(feature = "simd_x86", since = "1.27.0")]
2003pub fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
2004 unsafe {
2005 let a = a.as_i32x8();
2006 let b = b.as_i32x8();
2007 transmute(simd_select::<i32x8, _>(simd_lt(a, b), a, b))
2008 }
2009}
2010
2011#[inline]
2016#[target_feature(enable = "avx2")]
2017#[cfg_attr(test, assert_instr(vpminsb))]
2018#[stable(feature = "simd_x86", since = "1.27.0")]
2019pub fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
2020 unsafe {
2021 let a = a.as_i8x32();
2022 let b = b.as_i8x32();
2023 transmute(simd_select::<i8x32, _>(simd_lt(a, b), a, b))
2024 }
2025}
2026
2027#[inline]
2032#[target_feature(enable = "avx2")]
2033#[cfg_attr(test, assert_instr(vpminuw))]
2034#[stable(feature = "simd_x86", since = "1.27.0")]
2035pub fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
2036 unsafe {
2037 let a = a.as_u16x16();
2038 let b = b.as_u16x16();
2039 transmute(simd_select::<i16x16, _>(simd_lt(a, b), a, b))
2040 }
2041}
2042
2043#[inline]
2048#[target_feature(enable = "avx2")]
2049#[cfg_attr(test, assert_instr(vpminud))]
2050#[stable(feature = "simd_x86", since = "1.27.0")]
2051pub fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
2052 unsafe {
2053 let a = a.as_u32x8();
2054 let b = b.as_u32x8();
2055 transmute(simd_select::<i32x8, _>(simd_lt(a, b), a, b))
2056 }
2057}
2058
2059#[inline]
2064#[target_feature(enable = "avx2")]
2065#[cfg_attr(test, assert_instr(vpminub))]
2066#[stable(feature = "simd_x86", since = "1.27.0")]
2067pub fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
2068 unsafe {
2069 let a = a.as_u8x32();
2070 let b = b.as_u8x32();
2071 transmute(simd_select::<i8x32, _>(simd_lt(a, b), a, b))
2072 }
2073}
2074
2075#[inline]
2080#[target_feature(enable = "avx2")]
2081#[cfg_attr(test, assert_instr(vpmovmskb))]
2082#[stable(feature = "simd_x86", since = "1.27.0")]
2083pub fn _mm256_movemask_epi8(a: __m256i) -> i32 {
2084 unsafe {
2085 let z = i8x32::ZERO;
2086 let m: i8x32 = simd_lt(a.as_i8x32(), z);
2087 simd_bitmask::<_, u32>(m) as i32
2088 }
2089}
2090
2091#[inline]
2101#[target_feature(enable = "avx2")]
2102#[cfg_attr(test, assert_instr(vmpsadbw, IMM8 = 0))]
2103#[rustc_legacy_const_generics(2)]
2104#[stable(feature = "simd_x86", since = "1.27.0")]
2105pub fn _mm256_mpsadbw_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2106 static_assert_uimm_bits!(IMM8, 8);
2107 unsafe { transmute(mpsadbw(a.as_u8x32(), b.as_u8x32(), IMM8 as i8)) }
2108}
2109
2110#[inline]
2117#[target_feature(enable = "avx2")]
2118#[cfg_attr(test, assert_instr(vpmuldq))]
2119#[stable(feature = "simd_x86", since = "1.27.0")]
2120pub fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i {
2121 unsafe {
2122 let a = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(a.as_i64x4()));
2123 let b = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(b.as_i64x4()));
2124 transmute(simd_mul(a, b))
2125 }
2126}
2127
2128#[inline]
2135#[target_feature(enable = "avx2")]
2136#[cfg_attr(test, assert_instr(vpmuludq))]
2137#[stable(feature = "simd_x86", since = "1.27.0")]
2138pub fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i {
2139 unsafe {
2140 let a = a.as_u64x4();
2141 let b = b.as_u64x4();
2142 let mask = u64x4::splat(u32::MAX.into());
2143 transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
2144 }
2145}
2146
2147#[inline]
2153#[target_feature(enable = "avx2")]
2154#[cfg_attr(test, assert_instr(vpmulhw))]
2155#[stable(feature = "simd_x86", since = "1.27.0")]
2156pub fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i {
2157 unsafe {
2158 let a = simd_cast::<_, i32x16>(a.as_i16x16());
2159 let b = simd_cast::<_, i32x16>(b.as_i16x16());
2160 let r = simd_shr(simd_mul(a, b), i32x16::splat(16));
2161 transmute(simd_cast::<i32x16, i16x16>(r))
2162 }
2163}
2164
2165#[inline]
2171#[target_feature(enable = "avx2")]
2172#[cfg_attr(test, assert_instr(vpmulhuw))]
2173#[stable(feature = "simd_x86", since = "1.27.0")]
2174pub fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i {
2175 unsafe {
2176 let a = simd_cast::<_, u32x16>(a.as_u16x16());
2177 let b = simd_cast::<_, u32x16>(b.as_u16x16());
2178 let r = simd_shr(simd_mul(a, b), u32x16::splat(16));
2179 transmute(simd_cast::<u32x16, u16x16>(r))
2180 }
2181}
2182
2183#[inline]
2189#[target_feature(enable = "avx2")]
2190#[cfg_attr(test, assert_instr(vpmullw))]
2191#[stable(feature = "simd_x86", since = "1.27.0")]
2192pub fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i {
2193 unsafe { transmute(simd_mul(a.as_i16x16(), b.as_i16x16())) }
2194}
2195
2196#[inline]
2202#[target_feature(enable = "avx2")]
2203#[cfg_attr(test, assert_instr(vpmulld))]
2204#[stable(feature = "simd_x86", since = "1.27.0")]
2205pub fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i {
2206 unsafe { transmute(simd_mul(a.as_i32x8(), b.as_i32x8())) }
2207}
2208
2209#[inline]
2216#[target_feature(enable = "avx2")]
2217#[cfg_attr(test, assert_instr(vpmulhrsw))]
2218#[stable(feature = "simd_x86", since = "1.27.0")]
2219pub fn _mm256_mulhrs_epi16(a: __m256i, b: __m256i) -> __m256i {
2220 unsafe { transmute(pmulhrsw(a.as_i16x16(), b.as_i16x16())) }
2221}
2222
2223#[inline]
2228#[target_feature(enable = "avx2")]
2229#[cfg_attr(test, assert_instr(vorps))]
2230#[stable(feature = "simd_x86", since = "1.27.0")]
2231pub fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
2232 unsafe { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) }
2233}
2234
2235#[inline]
2240#[target_feature(enable = "avx2")]
2241#[cfg_attr(test, assert_instr(vpacksswb))]
2242#[stable(feature = "simd_x86", since = "1.27.0")]
2243pub fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i {
2244 unsafe { transmute(packsswb(a.as_i16x16(), b.as_i16x16())) }
2245}
2246
2247#[inline]
2252#[target_feature(enable = "avx2")]
2253#[cfg_attr(test, assert_instr(vpackssdw))]
2254#[stable(feature = "simd_x86", since = "1.27.0")]
2255pub fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i {
2256 unsafe { transmute(packssdw(a.as_i32x8(), b.as_i32x8())) }
2257}
2258
2259#[inline]
2264#[target_feature(enable = "avx2")]
2265#[cfg_attr(test, assert_instr(vpackuswb))]
2266#[stable(feature = "simd_x86", since = "1.27.0")]
2267pub fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i {
2268 unsafe { transmute(packuswb(a.as_i16x16(), b.as_i16x16())) }
2269}
2270
2271#[inline]
2276#[target_feature(enable = "avx2")]
2277#[cfg_attr(test, assert_instr(vpackusdw))]
2278#[stable(feature = "simd_x86", since = "1.27.0")]
2279pub fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i {
2280 unsafe { transmute(packusdw(a.as_i32x8(), b.as_i32x8())) }
2281}
2282
2283#[inline]
2290#[target_feature(enable = "avx2")]
2291#[cfg_attr(test, assert_instr(vpermps))]
2292#[stable(feature = "simd_x86", since = "1.27.0")]
2293pub fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i {
2294 unsafe { transmute(permd(a.as_u32x8(), b.as_u32x8())) }
2295}
2296
2297#[inline]
2301#[target_feature(enable = "avx2")]
2302#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 9))]
2303#[rustc_legacy_const_generics(1)]
2304#[stable(feature = "simd_x86", since = "1.27.0")]
2305pub fn _mm256_permute4x64_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2306 static_assert_uimm_bits!(IMM8, 8);
2307 unsafe {
2308 let zero = i64x4::ZERO;
2309 let r: i64x4 = simd_shuffle!(
2310 a.as_i64x4(),
2311 zero,
2312 [
2313 IMM8 as u32 & 0b11,
2314 (IMM8 as u32 >> 2) & 0b11,
2315 (IMM8 as u32 >> 4) & 0b11,
2316 (IMM8 as u32 >> 6) & 0b11,
2317 ],
2318 );
2319 transmute(r)
2320 }
2321}
2322
2323#[inline]
2327#[target_feature(enable = "avx2")]
2328#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 9))]
2329#[rustc_legacy_const_generics(2)]
2330#[stable(feature = "simd_x86", since = "1.27.0")]
2331pub fn _mm256_permute2x128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2332 static_assert_uimm_bits!(IMM8, 8);
2333 _mm256_permute2f128_si256::<IMM8>(a, b)
2334}
2335
2336#[inline]
2341#[target_feature(enable = "avx2")]
2342#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 1))]
2343#[rustc_legacy_const_generics(1)]
2344#[stable(feature = "simd_x86", since = "1.27.0")]
2345pub fn _mm256_permute4x64_pd<const IMM8: i32>(a: __m256d) -> __m256d {
2346 static_assert_uimm_bits!(IMM8, 8);
2347 unsafe {
2348 simd_shuffle!(
2349 a,
2350 _mm256_undefined_pd(),
2351 [
2352 IMM8 as u32 & 0b11,
2353 (IMM8 as u32 >> 2) & 0b11,
2354 (IMM8 as u32 >> 4) & 0b11,
2355 (IMM8 as u32 >> 6) & 0b11,
2356 ],
2357 )
2358 }
2359}
2360
2361#[inline]
2366#[target_feature(enable = "avx2")]
2367#[cfg_attr(test, assert_instr(vpermps))]
2368#[stable(feature = "simd_x86", since = "1.27.0")]
2369pub fn _mm256_permutevar8x32_ps(a: __m256, idx: __m256i) -> __m256 {
2370 unsafe { permps(a, idx.as_i32x8()) }
2371}
2372
2373#[inline]
2380#[target_feature(enable = "avx2")]
2381#[cfg_attr(test, assert_instr(vpsadbw))]
2382#[stable(feature = "simd_x86", since = "1.27.0")]
2383pub fn _mm256_sad_epu8(a: __m256i, b: __m256i) -> __m256i {
2384 unsafe { transmute(psadbw(a.as_u8x32(), b.as_u8x32())) }
2385}
2386
2387#[inline]
2418#[target_feature(enable = "avx2")]
2419#[cfg_attr(test, assert_instr(vpshufb))]
2420#[stable(feature = "simd_x86", since = "1.27.0")]
2421pub fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
2422 unsafe { transmute(pshufb(a.as_u8x32(), b.as_u8x32())) }
2423}
2424
2425#[inline]
2456#[target_feature(enable = "avx2")]
2457#[cfg_attr(test, assert_instr(vshufps, MASK = 9))]
2458#[rustc_legacy_const_generics(1)]
2459#[stable(feature = "simd_x86", since = "1.27.0")]
2460pub fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i {
2461 static_assert_uimm_bits!(MASK, 8);
2462 unsafe {
2463 let r: i32x8 = simd_shuffle!(
2464 a.as_i32x8(),
2465 a.as_i32x8(),
2466 [
2467 MASK as u32 & 0b11,
2468 (MASK as u32 >> 2) & 0b11,
2469 (MASK as u32 >> 4) & 0b11,
2470 (MASK as u32 >> 6) & 0b11,
2471 (MASK as u32 & 0b11) + 4,
2472 ((MASK as u32 >> 2) & 0b11) + 4,
2473 ((MASK as u32 >> 4) & 0b11) + 4,
2474 ((MASK as u32 >> 6) & 0b11) + 4,
2475 ],
2476 );
2477 transmute(r)
2478 }
2479}
2480
2481#[inline]
2487#[target_feature(enable = "avx2")]
2488#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 9))]
2489#[rustc_legacy_const_generics(1)]
2490#[stable(feature = "simd_x86", since = "1.27.0")]
2491pub fn _mm256_shufflehi_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2492 static_assert_uimm_bits!(IMM8, 8);
2493 unsafe {
2494 let a = a.as_i16x16();
2495 let r: i16x16 = simd_shuffle!(
2496 a,
2497 a,
2498 [
2499 0,
2500 1,
2501 2,
2502 3,
2503 4 + (IMM8 as u32 & 0b11),
2504 4 + ((IMM8 as u32 >> 2) & 0b11),
2505 4 + ((IMM8 as u32 >> 4) & 0b11),
2506 4 + ((IMM8 as u32 >> 6) & 0b11),
2507 8,
2508 9,
2509 10,
2510 11,
2511 12 + (IMM8 as u32 & 0b11),
2512 12 + ((IMM8 as u32 >> 2) & 0b11),
2513 12 + ((IMM8 as u32 >> 4) & 0b11),
2514 12 + ((IMM8 as u32 >> 6) & 0b11),
2515 ],
2516 );
2517 transmute(r)
2518 }
2519}
2520
2521#[inline]
2527#[target_feature(enable = "avx2")]
2528#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 9))]
2529#[rustc_legacy_const_generics(1)]
2530#[stable(feature = "simd_x86", since = "1.27.0")]
2531pub fn _mm256_shufflelo_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2532 static_assert_uimm_bits!(IMM8, 8);
2533 unsafe {
2534 let a = a.as_i16x16();
2535 let r: i16x16 = simd_shuffle!(
2536 a,
2537 a,
2538 [
2539 0 + (IMM8 as u32 & 0b11),
2540 0 + ((IMM8 as u32 >> 2) & 0b11),
2541 0 + ((IMM8 as u32 >> 4) & 0b11),
2542 0 + ((IMM8 as u32 >> 6) & 0b11),
2543 4,
2544 5,
2545 6,
2546 7,
2547 8 + (IMM8 as u32 & 0b11),
2548 8 + ((IMM8 as u32 >> 2) & 0b11),
2549 8 + ((IMM8 as u32 >> 4) & 0b11),
2550 8 + ((IMM8 as u32 >> 6) & 0b11),
2551 12,
2552 13,
2553 14,
2554 15,
2555 ],
2556 );
2557 transmute(r)
2558 }
2559}
2560
2561#[inline]
2567#[target_feature(enable = "avx2")]
2568#[cfg_attr(test, assert_instr(vpsignw))]
2569#[stable(feature = "simd_x86", since = "1.27.0")]
2570pub fn _mm256_sign_epi16(a: __m256i, b: __m256i) -> __m256i {
2571 unsafe { transmute(psignw(a.as_i16x16(), b.as_i16x16())) }
2572}
2573
2574#[inline]
2580#[target_feature(enable = "avx2")]
2581#[cfg_attr(test, assert_instr(vpsignd))]
2582#[stable(feature = "simd_x86", since = "1.27.0")]
2583pub fn _mm256_sign_epi32(a: __m256i, b: __m256i) -> __m256i {
2584 unsafe { transmute(psignd(a.as_i32x8(), b.as_i32x8())) }
2585}
2586
2587#[inline]
2593#[target_feature(enable = "avx2")]
2594#[cfg_attr(test, assert_instr(vpsignb))]
2595#[stable(feature = "simd_x86", since = "1.27.0")]
2596pub fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i {
2597 unsafe { transmute(psignb(a.as_i8x32(), b.as_i8x32())) }
2598}
2599
2600#[inline]
2605#[target_feature(enable = "avx2")]
2606#[cfg_attr(test, assert_instr(vpsllw))]
2607#[stable(feature = "simd_x86", since = "1.27.0")]
2608pub fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i {
2609 unsafe { transmute(psllw(a.as_i16x16(), count.as_i16x8())) }
2610}
2611
2612#[inline]
2617#[target_feature(enable = "avx2")]
2618#[cfg_attr(test, assert_instr(vpslld))]
2619#[stable(feature = "simd_x86", since = "1.27.0")]
2620pub fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i {
2621 unsafe { transmute(pslld(a.as_i32x8(), count.as_i32x4())) }
2622}
2623
2624#[inline]
2629#[target_feature(enable = "avx2")]
2630#[cfg_attr(test, assert_instr(vpsllq))]
2631#[stable(feature = "simd_x86", since = "1.27.0")]
2632pub fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i {
2633 unsafe { transmute(psllq(a.as_i64x4(), count.as_i64x2())) }
2634}
2635
2636#[inline]
2641#[target_feature(enable = "avx2")]
2642#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 7))]
2643#[rustc_legacy_const_generics(1)]
2644#[stable(feature = "simd_x86", since = "1.27.0")]
2645pub fn _mm256_slli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2646 static_assert_uimm_bits!(IMM8, 8);
2647 unsafe {
2648 if IMM8 >= 16 {
2649 _mm256_setzero_si256()
2650 } else {
2651 transmute(simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16)))
2652 }
2653 }
2654}
2655
2656#[inline]
2661#[target_feature(enable = "avx2")]
2662#[cfg_attr(test, assert_instr(vpslld, IMM8 = 7))]
2663#[rustc_legacy_const_generics(1)]
2664#[stable(feature = "simd_x86", since = "1.27.0")]
2665pub fn _mm256_slli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2666 unsafe {
2667 static_assert_uimm_bits!(IMM8, 8);
2668 if IMM8 >= 32 {
2669 _mm256_setzero_si256()
2670 } else {
2671 transmute(simd_shl(a.as_u32x8(), u32x8::splat(IMM8 as u32)))
2672 }
2673 }
2674}
2675
2676#[inline]
2681#[target_feature(enable = "avx2")]
2682#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 7))]
2683#[rustc_legacy_const_generics(1)]
2684#[stable(feature = "simd_x86", since = "1.27.0")]
2685pub fn _mm256_slli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2686 unsafe {
2687 static_assert_uimm_bits!(IMM8, 8);
2688 if IMM8 >= 64 {
2689 _mm256_setzero_si256()
2690 } else {
2691 transmute(simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64)))
2692 }
2693 }
2694}
2695
2696#[inline]
2700#[target_feature(enable = "avx2")]
2701#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2702#[rustc_legacy_const_generics(1)]
2703#[stable(feature = "simd_x86", since = "1.27.0")]
2704pub fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
2705 static_assert_uimm_bits!(IMM8, 8);
2706 _mm256_bslli_epi128::<IMM8>(a)
2707}
2708
2709#[inline]
2713#[target_feature(enable = "avx2")]
2714#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2715#[rustc_legacy_const_generics(1)]
2716#[stable(feature = "simd_x86", since = "1.27.0")]
2717pub fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
2718 static_assert_uimm_bits!(IMM8, 8);
2719 const fn mask(shift: i32, i: u32) -> u32 {
2720 let shift = shift as u32 & 0xff;
2721 if shift > 15 || i % 16 < shift {
2722 0
2723 } else {
2724 32 + (i - shift)
2725 }
2726 }
2727 unsafe {
2728 let a = a.as_i8x32();
2729 let r: i8x32 = simd_shuffle!(
2730 i8x32::ZERO,
2731 a,
2732 [
2733 mask(IMM8, 0),
2734 mask(IMM8, 1),
2735 mask(IMM8, 2),
2736 mask(IMM8, 3),
2737 mask(IMM8, 4),
2738 mask(IMM8, 5),
2739 mask(IMM8, 6),
2740 mask(IMM8, 7),
2741 mask(IMM8, 8),
2742 mask(IMM8, 9),
2743 mask(IMM8, 10),
2744 mask(IMM8, 11),
2745 mask(IMM8, 12),
2746 mask(IMM8, 13),
2747 mask(IMM8, 14),
2748 mask(IMM8, 15),
2749 mask(IMM8, 16),
2750 mask(IMM8, 17),
2751 mask(IMM8, 18),
2752 mask(IMM8, 19),
2753 mask(IMM8, 20),
2754 mask(IMM8, 21),
2755 mask(IMM8, 22),
2756 mask(IMM8, 23),
2757 mask(IMM8, 24),
2758 mask(IMM8, 25),
2759 mask(IMM8, 26),
2760 mask(IMM8, 27),
2761 mask(IMM8, 28),
2762 mask(IMM8, 29),
2763 mask(IMM8, 30),
2764 mask(IMM8, 31),
2765 ],
2766 );
2767 transmute(r)
2768 }
2769}
2770
2771#[inline]
2777#[target_feature(enable = "avx2")]
2778#[cfg_attr(test, assert_instr(vpsllvd))]
2779#[stable(feature = "simd_x86", since = "1.27.0")]
2780pub fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
2781 unsafe { transmute(psllvd(a.as_i32x4(), count.as_i32x4())) }
2782}
2783
2784#[inline]
2790#[target_feature(enable = "avx2")]
2791#[cfg_attr(test, assert_instr(vpsllvd))]
2792#[stable(feature = "simd_x86", since = "1.27.0")]
2793pub fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
2794 unsafe { transmute(psllvd256(a.as_i32x8(), count.as_i32x8())) }
2795}
2796
2797#[inline]
2803#[target_feature(enable = "avx2")]
2804#[cfg_attr(test, assert_instr(vpsllvq))]
2805#[stable(feature = "simd_x86", since = "1.27.0")]
2806pub fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
2807 unsafe { transmute(psllvq(a.as_i64x2(), count.as_i64x2())) }
2808}
2809
2810#[inline]
2816#[target_feature(enable = "avx2")]
2817#[cfg_attr(test, assert_instr(vpsllvq))]
2818#[stable(feature = "simd_x86", since = "1.27.0")]
2819pub fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i {
2820 unsafe { transmute(psllvq256(a.as_i64x4(), count.as_i64x4())) }
2821}
2822
2823#[inline]
2828#[target_feature(enable = "avx2")]
2829#[cfg_attr(test, assert_instr(vpsraw))]
2830#[stable(feature = "simd_x86", since = "1.27.0")]
2831pub fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i {
2832 unsafe { transmute(psraw(a.as_i16x16(), count.as_i16x8())) }
2833}
2834
2835#[inline]
2840#[target_feature(enable = "avx2")]
2841#[cfg_attr(test, assert_instr(vpsrad))]
2842#[stable(feature = "simd_x86", since = "1.27.0")]
2843pub fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i {
2844 unsafe { transmute(psrad(a.as_i32x8(), count.as_i32x4())) }
2845}
2846
2847#[inline]
2852#[target_feature(enable = "avx2")]
2853#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 7))]
2854#[rustc_legacy_const_generics(1)]
2855#[stable(feature = "simd_x86", since = "1.27.0")]
2856pub fn _mm256_srai_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2857 static_assert_uimm_bits!(IMM8, 8);
2858 unsafe { transmute(simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16))) }
2859}
2860
2861#[inline]
2866#[target_feature(enable = "avx2")]
2867#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 7))]
2868#[rustc_legacy_const_generics(1)]
2869#[stable(feature = "simd_x86", since = "1.27.0")]
2870pub fn _mm256_srai_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2871 static_assert_uimm_bits!(IMM8, 8);
2872 unsafe { transmute(simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31)))) }
2873}
2874
2875#[inline]
2880#[target_feature(enable = "avx2")]
2881#[cfg_attr(test, assert_instr(vpsravd))]
2882#[stable(feature = "simd_x86", since = "1.27.0")]
2883pub fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
2884 unsafe { transmute(psravd(a.as_i32x4(), count.as_i32x4())) }
2885}
2886
2887#[inline]
2892#[target_feature(enable = "avx2")]
2893#[cfg_attr(test, assert_instr(vpsravd))]
2894#[stable(feature = "simd_x86", since = "1.27.0")]
2895pub fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
2896 unsafe { transmute(psravd256(a.as_i32x8(), count.as_i32x8())) }
2897}
2898
2899#[inline]
2903#[target_feature(enable = "avx2")]
2904#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
2905#[rustc_legacy_const_generics(1)]
2906#[stable(feature = "simd_x86", since = "1.27.0")]
2907pub fn _mm256_srli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
2908 static_assert_uimm_bits!(IMM8, 8);
2909 _mm256_bsrli_epi128::<IMM8>(a)
2910}
2911
2912#[inline]
2916#[target_feature(enable = "avx2")]
2917#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
2918#[rustc_legacy_const_generics(1)]
2919#[stable(feature = "simd_x86", since = "1.27.0")]
2920pub fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
2921 static_assert_uimm_bits!(IMM8, 8);
2922 const fn mask(shift: i32, i: u32) -> u32 {
2923 let shift = shift as u32 & 0xff;
2924 if shift > 15 || (15 - (i % 16)) < shift {
2925 0
2926 } else {
2927 32 + (i + shift)
2928 }
2929 }
2930 unsafe {
2931 let a = a.as_i8x32();
2932 let r: i8x32 = simd_shuffle!(
2933 i8x32::ZERO,
2934 a,
2935 [
2936 mask(IMM8, 0),
2937 mask(IMM8, 1),
2938 mask(IMM8, 2),
2939 mask(IMM8, 3),
2940 mask(IMM8, 4),
2941 mask(IMM8, 5),
2942 mask(IMM8, 6),
2943 mask(IMM8, 7),
2944 mask(IMM8, 8),
2945 mask(IMM8, 9),
2946 mask(IMM8, 10),
2947 mask(IMM8, 11),
2948 mask(IMM8, 12),
2949 mask(IMM8, 13),
2950 mask(IMM8, 14),
2951 mask(IMM8, 15),
2952 mask(IMM8, 16),
2953 mask(IMM8, 17),
2954 mask(IMM8, 18),
2955 mask(IMM8, 19),
2956 mask(IMM8, 20),
2957 mask(IMM8, 21),
2958 mask(IMM8, 22),
2959 mask(IMM8, 23),
2960 mask(IMM8, 24),
2961 mask(IMM8, 25),
2962 mask(IMM8, 26),
2963 mask(IMM8, 27),
2964 mask(IMM8, 28),
2965 mask(IMM8, 29),
2966 mask(IMM8, 30),
2967 mask(IMM8, 31),
2968 ],
2969 );
2970 transmute(r)
2971 }
2972}
2973
2974#[inline]
2979#[target_feature(enable = "avx2")]
2980#[cfg_attr(test, assert_instr(vpsrlw))]
2981#[stable(feature = "simd_x86", since = "1.27.0")]
2982pub fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i {
2983 unsafe { transmute(psrlw(a.as_i16x16(), count.as_i16x8())) }
2984}
2985
2986#[inline]
2991#[target_feature(enable = "avx2")]
2992#[cfg_attr(test, assert_instr(vpsrld))]
2993#[stable(feature = "simd_x86", since = "1.27.0")]
2994pub fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i {
2995 unsafe { transmute(psrld(a.as_i32x8(), count.as_i32x4())) }
2996}
2997
2998#[inline]
3003#[target_feature(enable = "avx2")]
3004#[cfg_attr(test, assert_instr(vpsrlq))]
3005#[stable(feature = "simd_x86", since = "1.27.0")]
3006pub fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i {
3007 unsafe { transmute(psrlq(a.as_i64x4(), count.as_i64x2())) }
3008}
3009
3010#[inline]
3015#[target_feature(enable = "avx2")]
3016#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 7))]
3017#[rustc_legacy_const_generics(1)]
3018#[stable(feature = "simd_x86", since = "1.27.0")]
3019pub fn _mm256_srli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
3020 static_assert_uimm_bits!(IMM8, 8);
3021 unsafe {
3022 if IMM8 >= 16 {
3023 _mm256_setzero_si256()
3024 } else {
3025 transmute(simd_shr(a.as_u16x16(), u16x16::splat(IMM8 as u16)))
3026 }
3027 }
3028}
3029
3030#[inline]
3035#[target_feature(enable = "avx2")]
3036#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 7))]
3037#[rustc_legacy_const_generics(1)]
3038#[stable(feature = "simd_x86", since = "1.27.0")]
3039pub fn _mm256_srli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
3040 static_assert_uimm_bits!(IMM8, 8);
3041 unsafe {
3042 if IMM8 >= 32 {
3043 _mm256_setzero_si256()
3044 } else {
3045 transmute(simd_shr(a.as_u32x8(), u32x8::splat(IMM8 as u32)))
3046 }
3047 }
3048}
3049
3050#[inline]
3055#[target_feature(enable = "avx2")]
3056#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 7))]
3057#[rustc_legacy_const_generics(1)]
3058#[stable(feature = "simd_x86", since = "1.27.0")]
3059pub fn _mm256_srli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
3060 static_assert_uimm_bits!(IMM8, 8);
3061 unsafe {
3062 if IMM8 >= 64 {
3063 _mm256_setzero_si256()
3064 } else {
3065 transmute(simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64)))
3066 }
3067 }
3068}
3069
3070#[inline]
3075#[target_feature(enable = "avx2")]
3076#[cfg_attr(test, assert_instr(vpsrlvd))]
3077#[stable(feature = "simd_x86", since = "1.27.0")]
3078pub fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
3079 unsafe { transmute(psrlvd(a.as_i32x4(), count.as_i32x4())) }
3080}
3081
3082#[inline]
3087#[target_feature(enable = "avx2")]
3088#[cfg_attr(test, assert_instr(vpsrlvd))]
3089#[stable(feature = "simd_x86", since = "1.27.0")]
3090pub fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
3091 unsafe { transmute(psrlvd256(a.as_i32x8(), count.as_i32x8())) }
3092}
3093
3094#[inline]
3099#[target_feature(enable = "avx2")]
3100#[cfg_attr(test, assert_instr(vpsrlvq))]
3101#[stable(feature = "simd_x86", since = "1.27.0")]
3102pub fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
3103 unsafe { transmute(psrlvq(a.as_i64x2(), count.as_i64x2())) }
3104}
3105
3106#[inline]
3111#[target_feature(enable = "avx2")]
3112#[cfg_attr(test, assert_instr(vpsrlvq))]
3113#[stable(feature = "simd_x86", since = "1.27.0")]
3114pub fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i {
3115 unsafe { transmute(psrlvq256(a.as_i64x4(), count.as_i64x4())) }
3116}
3117
3118#[inline]
3124#[target_feature(enable = "avx2")]
3125#[cfg_attr(test, assert_instr(vmovntdqa))]
3126#[stable(feature = "simd_x86_updates", since = "1.82.0")]
3127pub unsafe fn _mm256_stream_load_si256(mem_addr: *const __m256i) -> __m256i {
3128 let dst: __m256i;
3129 crate::arch::asm!(
3130 vpl!("vmovntdqa {a}"),
3131 a = out(ymm_reg) dst,
3132 p = in(reg) mem_addr,
3133 options(pure, readonly, nostack, preserves_flags),
3134 );
3135 dst
3136}
3137
3138#[inline]
3142#[target_feature(enable = "avx2")]
3143#[cfg_attr(test, assert_instr(vpsubw))]
3144#[stable(feature = "simd_x86", since = "1.27.0")]
3145pub fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i {
3146 unsafe { transmute(simd_sub(a.as_i16x16(), b.as_i16x16())) }
3147}
3148
3149#[inline]
3153#[target_feature(enable = "avx2")]
3154#[cfg_attr(test, assert_instr(vpsubd))]
3155#[stable(feature = "simd_x86", since = "1.27.0")]
3156pub fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i {
3157 unsafe { transmute(simd_sub(a.as_i32x8(), b.as_i32x8())) }
3158}
3159
3160#[inline]
3164#[target_feature(enable = "avx2")]
3165#[cfg_attr(test, assert_instr(vpsubq))]
3166#[stable(feature = "simd_x86", since = "1.27.0")]
3167pub fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i {
3168 unsafe { transmute(simd_sub(a.as_i64x4(), b.as_i64x4())) }
3169}
3170
3171#[inline]
3175#[target_feature(enable = "avx2")]
3176#[cfg_attr(test, assert_instr(vpsubb))]
3177#[stable(feature = "simd_x86", since = "1.27.0")]
3178pub fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i {
3179 unsafe { transmute(simd_sub(a.as_i8x32(), b.as_i8x32())) }
3180}
3181
3182#[inline]
3187#[target_feature(enable = "avx2")]
3188#[cfg_attr(test, assert_instr(vpsubsw))]
3189#[stable(feature = "simd_x86", since = "1.27.0")]
3190pub fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i {
3191 unsafe { transmute(simd_saturating_sub(a.as_i16x16(), b.as_i16x16())) }
3192}
3193
3194#[inline]
3199#[target_feature(enable = "avx2")]
3200#[cfg_attr(test, assert_instr(vpsubsb))]
3201#[stable(feature = "simd_x86", since = "1.27.0")]
3202pub fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i {
3203 unsafe { transmute(simd_saturating_sub(a.as_i8x32(), b.as_i8x32())) }
3204}
3205
3206#[inline]
3211#[target_feature(enable = "avx2")]
3212#[cfg_attr(test, assert_instr(vpsubusw))]
3213#[stable(feature = "simd_x86", since = "1.27.0")]
3214pub fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i {
3215 unsafe { transmute(simd_saturating_sub(a.as_u16x16(), b.as_u16x16())) }
3216}
3217
3218#[inline]
3223#[target_feature(enable = "avx2")]
3224#[cfg_attr(test, assert_instr(vpsubusb))]
3225#[stable(feature = "simd_x86", since = "1.27.0")]
3226pub fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
3227 unsafe { transmute(simd_saturating_sub(a.as_u8x32(), b.as_u8x32())) }
3228}
3229
3230#[inline]
3270#[target_feature(enable = "avx2")]
3271#[cfg_attr(test, assert_instr(vpunpckhbw))]
3272#[stable(feature = "simd_x86", since = "1.27.0")]
3273pub fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
3274 unsafe {
3275 #[rustfmt::skip]
3276 let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3277 8, 40, 9, 41, 10, 42, 11, 43,
3278 12, 44, 13, 45, 14, 46, 15, 47,
3279 24, 56, 25, 57, 26, 58, 27, 59,
3280 28, 60, 29, 61, 30, 62, 31, 63,
3281 ]);
3282 transmute(r)
3283 }
3284}
3285
3286#[inline]
3325#[target_feature(enable = "avx2")]
3326#[cfg_attr(test, assert_instr(vpunpcklbw))]
3327#[stable(feature = "simd_x86", since = "1.27.0")]
3328pub fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
3329 unsafe {
3330 #[rustfmt::skip]
3331 let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3332 0, 32, 1, 33, 2, 34, 3, 35,
3333 4, 36, 5, 37, 6, 38, 7, 39,
3334 16, 48, 17, 49, 18, 50, 19, 51,
3335 20, 52, 21, 53, 22, 54, 23, 55,
3336 ]);
3337 transmute(r)
3338 }
3339}
3340
3341#[inline]
3376#[target_feature(enable = "avx2")]
3377#[cfg_attr(test, assert_instr(vpunpckhwd))]
3378#[stable(feature = "simd_x86", since = "1.27.0")]
3379pub fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
3380 unsafe {
3381 let r: i16x16 = simd_shuffle!(
3382 a.as_i16x16(),
3383 b.as_i16x16(),
3384 [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31],
3385 );
3386 transmute(r)
3387 }
3388}
3389
3390#[inline]
3426#[target_feature(enable = "avx2")]
3427#[cfg_attr(test, assert_instr(vpunpcklwd))]
3428#[stable(feature = "simd_x86", since = "1.27.0")]
3429pub fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
3430 unsafe {
3431 let r: i16x16 = simd_shuffle!(
3432 a.as_i16x16(),
3433 b.as_i16x16(),
3434 [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27],
3435 );
3436 transmute(r)
3437 }
3438}
3439
3440#[inline]
3469#[target_feature(enable = "avx2")]
3470#[cfg_attr(test, assert_instr(vunpckhps))]
3471#[stable(feature = "simd_x86", since = "1.27.0")]
3472pub fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
3473 unsafe {
3474 let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]);
3475 transmute(r)
3476 }
3477}
3478
3479#[inline]
3508#[target_feature(enable = "avx2")]
3509#[cfg_attr(test, assert_instr(vunpcklps))]
3510#[stable(feature = "simd_x86", since = "1.27.0")]
3511pub fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
3512 unsafe {
3513 let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]);
3514 transmute(r)
3515 }
3516}
3517
3518#[inline]
3547#[target_feature(enable = "avx2")]
3548#[cfg_attr(test, assert_instr(vunpckhpd))]
3549#[stable(feature = "simd_x86", since = "1.27.0")]
3550pub fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
3551 unsafe {
3552 let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [1, 5, 3, 7]);
3553 transmute(r)
3554 }
3555}
3556
3557#[inline]
3586#[target_feature(enable = "avx2")]
3587#[cfg_attr(test, assert_instr(vunpcklpd))]
3588#[stable(feature = "simd_x86", since = "1.27.0")]
3589pub fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i {
3590 unsafe {
3591 let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [0, 4, 2, 6]);
3592 transmute(r)
3593 }
3594}
3595
3596#[inline]
3601#[target_feature(enable = "avx2")]
3602#[cfg_attr(test, assert_instr(vxorps))]
3603#[stable(feature = "simd_x86", since = "1.27.0")]
3604pub fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i {
3605 unsafe { transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) }
3606}
3607
3608#[inline]
3615#[target_feature(enable = "avx2")]
3616#[rustc_legacy_const_generics(1)]
3618#[stable(feature = "simd_x86", since = "1.27.0")]
3619pub fn _mm256_extract_epi8<const INDEX: i32>(a: __m256i) -> i32 {
3620 static_assert_uimm_bits!(INDEX, 5);
3621 unsafe { simd_extract!(a.as_u8x32(), INDEX as u32, u8) as i32 }
3622}
3623
3624#[inline]
3631#[target_feature(enable = "avx2")]
3632#[rustc_legacy_const_generics(1)]
3634#[stable(feature = "simd_x86", since = "1.27.0")]
3635pub fn _mm256_extract_epi16<const INDEX: i32>(a: __m256i) -> i32 {
3636 static_assert_uimm_bits!(INDEX, 4);
3637 unsafe { simd_extract!(a.as_u16x16(), INDEX as u32, u16) as i32 }
3638}
3639
3640#[allow(improper_ctypes)]
3641unsafe extern "C" {
3642 #[link_name = "llvm.x86.avx2.phadd.sw"]
3643 fn phaddsw(a: i16x16, b: i16x16) -> i16x16;
3644 #[link_name = "llvm.x86.avx2.phsub.sw"]
3645 fn phsubsw(a: i16x16, b: i16x16) -> i16x16;
3646 #[link_name = "llvm.x86.avx2.pmadd.ub.sw"]
3647 fn pmaddubsw(a: u8x32, b: u8x32) -> i16x16;
3648 #[link_name = "llvm.x86.avx2.maskload.d"]
3649 fn maskloadd(mem_addr: *const i8, mask: i32x4) -> i32x4;
3650 #[link_name = "llvm.x86.avx2.maskload.d.256"]
3651 fn maskloadd256(mem_addr: *const i8, mask: i32x8) -> i32x8;
3652 #[link_name = "llvm.x86.avx2.maskload.q"]
3653 fn maskloadq(mem_addr: *const i8, mask: i64x2) -> i64x2;
3654 #[link_name = "llvm.x86.avx2.maskload.q.256"]
3655 fn maskloadq256(mem_addr: *const i8, mask: i64x4) -> i64x4;
3656 #[link_name = "llvm.x86.avx2.maskstore.d"]
3657 fn maskstored(mem_addr: *mut i8, mask: i32x4, a: i32x4);
3658 #[link_name = "llvm.x86.avx2.maskstore.d.256"]
3659 fn maskstored256(mem_addr: *mut i8, mask: i32x8, a: i32x8);
3660 #[link_name = "llvm.x86.avx2.maskstore.q"]
3661 fn maskstoreq(mem_addr: *mut i8, mask: i64x2, a: i64x2);
3662 #[link_name = "llvm.x86.avx2.maskstore.q.256"]
3663 fn maskstoreq256(mem_addr: *mut i8, mask: i64x4, a: i64x4);
3664 #[link_name = "llvm.x86.avx2.mpsadbw"]
3665 fn mpsadbw(a: u8x32, b: u8x32, imm8: i8) -> u16x16;
3666 #[link_name = "llvm.x86.avx2.pmul.hr.sw"]
3667 fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16;
3668 #[link_name = "llvm.x86.avx2.packsswb"]
3669 fn packsswb(a: i16x16, b: i16x16) -> i8x32;
3670 #[link_name = "llvm.x86.avx2.packssdw"]
3671 fn packssdw(a: i32x8, b: i32x8) -> i16x16;
3672 #[link_name = "llvm.x86.avx2.packuswb"]
3673 fn packuswb(a: i16x16, b: i16x16) -> u8x32;
3674 #[link_name = "llvm.x86.avx2.packusdw"]
3675 fn packusdw(a: i32x8, b: i32x8) -> u16x16;
3676 #[link_name = "llvm.x86.avx2.psad.bw"]
3677 fn psadbw(a: u8x32, b: u8x32) -> u64x4;
3678 #[link_name = "llvm.x86.avx2.psign.b"]
3679 fn psignb(a: i8x32, b: i8x32) -> i8x32;
3680 #[link_name = "llvm.x86.avx2.psign.w"]
3681 fn psignw(a: i16x16, b: i16x16) -> i16x16;
3682 #[link_name = "llvm.x86.avx2.psign.d"]
3683 fn psignd(a: i32x8, b: i32x8) -> i32x8;
3684 #[link_name = "llvm.x86.avx2.psll.w"]
3685 fn psllw(a: i16x16, count: i16x8) -> i16x16;
3686 #[link_name = "llvm.x86.avx2.psll.d"]
3687 fn pslld(a: i32x8, count: i32x4) -> i32x8;
3688 #[link_name = "llvm.x86.avx2.psll.q"]
3689 fn psllq(a: i64x4, count: i64x2) -> i64x4;
3690 #[link_name = "llvm.x86.avx2.psllv.d"]
3691 fn psllvd(a: i32x4, count: i32x4) -> i32x4;
3692 #[link_name = "llvm.x86.avx2.psllv.d.256"]
3693 fn psllvd256(a: i32x8, count: i32x8) -> i32x8;
3694 #[link_name = "llvm.x86.avx2.psllv.q"]
3695 fn psllvq(a: i64x2, count: i64x2) -> i64x2;
3696 #[link_name = "llvm.x86.avx2.psllv.q.256"]
3697 fn psllvq256(a: i64x4, count: i64x4) -> i64x4;
3698 #[link_name = "llvm.x86.avx2.psra.w"]
3699 fn psraw(a: i16x16, count: i16x8) -> i16x16;
3700 #[link_name = "llvm.x86.avx2.psra.d"]
3701 fn psrad(a: i32x8, count: i32x4) -> i32x8;
3702 #[link_name = "llvm.x86.avx2.psrav.d"]
3703 fn psravd(a: i32x4, count: i32x4) -> i32x4;
3704 #[link_name = "llvm.x86.avx2.psrav.d.256"]
3705 fn psravd256(a: i32x8, count: i32x8) -> i32x8;
3706 #[link_name = "llvm.x86.avx2.psrl.w"]
3707 fn psrlw(a: i16x16, count: i16x8) -> i16x16;
3708 #[link_name = "llvm.x86.avx2.psrl.d"]
3709 fn psrld(a: i32x8, count: i32x4) -> i32x8;
3710 #[link_name = "llvm.x86.avx2.psrl.q"]
3711 fn psrlq(a: i64x4, count: i64x2) -> i64x4;
3712 #[link_name = "llvm.x86.avx2.psrlv.d"]
3713 fn psrlvd(a: i32x4, count: i32x4) -> i32x4;
3714 #[link_name = "llvm.x86.avx2.psrlv.d.256"]
3715 fn psrlvd256(a: i32x8, count: i32x8) -> i32x8;
3716 #[link_name = "llvm.x86.avx2.psrlv.q"]
3717 fn psrlvq(a: i64x2, count: i64x2) -> i64x2;
3718 #[link_name = "llvm.x86.avx2.psrlv.q.256"]
3719 fn psrlvq256(a: i64x4, count: i64x4) -> i64x4;
3720 #[link_name = "llvm.x86.avx2.pshuf.b"]
3721 fn pshufb(a: u8x32, b: u8x32) -> u8x32;
3722 #[link_name = "llvm.x86.avx2.permd"]
3723 fn permd(a: u32x8, b: u32x8) -> u32x8;
3724 #[link_name = "llvm.x86.avx2.permps"]
3725 fn permps(a: __m256, b: i32x8) -> __m256;
3726 #[link_name = "llvm.x86.avx2.gather.d.d"]
3727 fn pgatherdd(src: i32x4, slice: *const i8, offsets: i32x4, mask: i32x4, scale: i8) -> i32x4;
3728 #[link_name = "llvm.x86.avx2.gather.d.d.256"]
3729 fn vpgatherdd(src: i32x8, slice: *const i8, offsets: i32x8, mask: i32x8, scale: i8) -> i32x8;
3730 #[link_name = "llvm.x86.avx2.gather.d.q"]
3731 fn pgatherdq(src: i64x2, slice: *const i8, offsets: i32x4, mask: i64x2, scale: i8) -> i64x2;
3732 #[link_name = "llvm.x86.avx2.gather.d.q.256"]
3733 fn vpgatherdq(src: i64x4, slice: *const i8, offsets: i32x4, mask: i64x4, scale: i8) -> i64x4;
3734 #[link_name = "llvm.x86.avx2.gather.q.d"]
3735 fn pgatherqd(src: i32x4, slice: *const i8, offsets: i64x2, mask: i32x4, scale: i8) -> i32x4;
3736 #[link_name = "llvm.x86.avx2.gather.q.d.256"]
3737 fn vpgatherqd(src: i32x4, slice: *const i8, offsets: i64x4, mask: i32x4, scale: i8) -> i32x4;
3738 #[link_name = "llvm.x86.avx2.gather.q.q"]
3739 fn pgatherqq(src: i64x2, slice: *const i8, offsets: i64x2, mask: i64x2, scale: i8) -> i64x2;
3740 #[link_name = "llvm.x86.avx2.gather.q.q.256"]
3741 fn vpgatherqq(src: i64x4, slice: *const i8, offsets: i64x4, mask: i64x4, scale: i8) -> i64x4;
3742 #[link_name = "llvm.x86.avx2.gather.d.pd"]
3743 fn pgatherdpd(
3744 src: __m128d,
3745 slice: *const i8,
3746 offsets: i32x4,
3747 mask: __m128d,
3748 scale: i8,
3749 ) -> __m128d;
3750 #[link_name = "llvm.x86.avx2.gather.d.pd.256"]
3751 fn vpgatherdpd(
3752 src: __m256d,
3753 slice: *const i8,
3754 offsets: i32x4,
3755 mask: __m256d,
3756 scale: i8,
3757 ) -> __m256d;
3758 #[link_name = "llvm.x86.avx2.gather.q.pd"]
3759 fn pgatherqpd(
3760 src: __m128d,
3761 slice: *const i8,
3762 offsets: i64x2,
3763 mask: __m128d,
3764 scale: i8,
3765 ) -> __m128d;
3766 #[link_name = "llvm.x86.avx2.gather.q.pd.256"]
3767 fn vpgatherqpd(
3768 src: __m256d,
3769 slice: *const i8,
3770 offsets: i64x4,
3771 mask: __m256d,
3772 scale: i8,
3773 ) -> __m256d;
3774 #[link_name = "llvm.x86.avx2.gather.d.ps"]
3775 fn pgatherdps(src: __m128, slice: *const i8, offsets: i32x4, mask: __m128, scale: i8)
3776 -> __m128;
3777 #[link_name = "llvm.x86.avx2.gather.d.ps.256"]
3778 fn vpgatherdps(
3779 src: __m256,
3780 slice: *const i8,
3781 offsets: i32x8,
3782 mask: __m256,
3783 scale: i8,
3784 ) -> __m256;
3785 #[link_name = "llvm.x86.avx2.gather.q.ps"]
3786 fn pgatherqps(src: __m128, slice: *const i8, offsets: i64x2, mask: __m128, scale: i8)
3787 -> __m128;
3788 #[link_name = "llvm.x86.avx2.gather.q.ps.256"]
3789 fn vpgatherqps(
3790 src: __m128,
3791 slice: *const i8,
3792 offsets: i64x4,
3793 mask: __m128,
3794 scale: i8,
3795 ) -> __m128;
3796}
3797
3798#[cfg(test)]
3799mod tests {
3800
3801 use stdarch_test::simd_test;
3802
3803 use crate::core_arch::x86::*;
3804
3805 #[simd_test(enable = "avx2")]
3806 unsafe fn test_mm256_abs_epi32() {
3807 #[rustfmt::skip]
3808 let a = _mm256_setr_epi32(
3809 0, 1, -1, i32::MAX,
3810 i32::MIN, 100, -100, -32,
3811 );
3812 let r = _mm256_abs_epi32(a);
3813 #[rustfmt::skip]
3814 let e = _mm256_setr_epi32(
3815 0, 1, 1, i32::MAX,
3816 i32::MAX.wrapping_add(1), 100, 100, 32,
3817 );
3818 assert_eq_m256i(r, e);
3819 }
3820
3821 #[simd_test(enable = "avx2")]
3822 unsafe fn test_mm256_abs_epi16() {
3823 #[rustfmt::skip]
3824 let a = _mm256_setr_epi16(
3825 0, 1, -1, 2, -2, 3, -3, 4,
3826 -4, 5, -5, i16::MAX, i16::MIN, 100, -100, -32,
3827 );
3828 let r = _mm256_abs_epi16(a);
3829 #[rustfmt::skip]
3830 let e = _mm256_setr_epi16(
3831 0, 1, 1, 2, 2, 3, 3, 4,
3832 4, 5, 5, i16::MAX, i16::MAX.wrapping_add(1), 100, 100, 32,
3833 );
3834 assert_eq_m256i(r, e);
3835 }
3836
3837 #[simd_test(enable = "avx2")]
3838 unsafe fn test_mm256_abs_epi8() {
3839 #[rustfmt::skip]
3840 let a = _mm256_setr_epi8(
3841 0, 1, -1, 2, -2, 3, -3, 4,
3842 -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
3843 0, 1, -1, 2, -2, 3, -3, 4,
3844 -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
3845 );
3846 let r = _mm256_abs_epi8(a);
3847 #[rustfmt::skip]
3848 let e = _mm256_setr_epi8(
3849 0, 1, 1, 2, 2, 3, 3, 4,
3850 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
3851 0, 1, 1, 2, 2, 3, 3, 4,
3852 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
3853 );
3854 assert_eq_m256i(r, e);
3855 }
3856
3857 #[simd_test(enable = "avx2")]
3858 unsafe fn test_mm256_add_epi64() {
3859 let a = _mm256_setr_epi64x(-10, 0, 100, 1_000_000_000);
3860 let b = _mm256_setr_epi64x(-1, 0, 1, 2);
3861 let r = _mm256_add_epi64(a, b);
3862 let e = _mm256_setr_epi64x(-11, 0, 101, 1_000_000_002);
3863 assert_eq_m256i(r, e);
3864 }
3865
3866 #[simd_test(enable = "avx2")]
3867 unsafe fn test_mm256_add_epi32() {
3868 let a = _mm256_setr_epi32(-1, 0, 1, 2, 3, 4, 5, 6);
3869 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3870 let r = _mm256_add_epi32(a, b);
3871 let e = _mm256_setr_epi32(0, 2, 4, 6, 8, 10, 12, 14);
3872 assert_eq_m256i(r, e);
3873 }
3874
3875 #[simd_test(enable = "avx2")]
3876 unsafe fn test_mm256_add_epi16() {
3877 #[rustfmt::skip]
3878 let a = _mm256_setr_epi16(
3879 0, 1, 2, 3, 4, 5, 6, 7,
3880 8, 9, 10, 11, 12, 13, 14, 15,
3881 );
3882 #[rustfmt::skip]
3883 let b = _mm256_setr_epi16(
3884 0, 1, 2, 3, 4, 5, 6, 7,
3885 8, 9, 10, 11, 12, 13, 14, 15,
3886 );
3887 let r = _mm256_add_epi16(a, b);
3888 #[rustfmt::skip]
3889 let e = _mm256_setr_epi16(
3890 0, 2, 4, 6, 8, 10, 12, 14,
3891 16, 18, 20, 22, 24, 26, 28, 30,
3892 );
3893 assert_eq_m256i(r, e);
3894 }
3895
3896 #[simd_test(enable = "avx2")]
3897 unsafe fn test_mm256_add_epi8() {
3898 #[rustfmt::skip]
3899 let a = _mm256_setr_epi8(
3900 0, 1, 2, 3, 4, 5, 6, 7,
3901 8, 9, 10, 11, 12, 13, 14, 15,
3902 16, 17, 18, 19, 20, 21, 22, 23,
3903 24, 25, 26, 27, 28, 29, 30, 31,
3904 );
3905 #[rustfmt::skip]
3906 let b = _mm256_setr_epi8(
3907 0, 1, 2, 3, 4, 5, 6, 7,
3908 8, 9, 10, 11, 12, 13, 14, 15,
3909 16, 17, 18, 19, 20, 21, 22, 23,
3910 24, 25, 26, 27, 28, 29, 30, 31,
3911 );
3912 let r = _mm256_add_epi8(a, b);
3913 #[rustfmt::skip]
3914 let e = _mm256_setr_epi8(
3915 0, 2, 4, 6, 8, 10, 12, 14,
3916 16, 18, 20, 22, 24, 26, 28, 30,
3917 32, 34, 36, 38, 40, 42, 44, 46,
3918 48, 50, 52, 54, 56, 58, 60, 62,
3919 );
3920 assert_eq_m256i(r, e);
3921 }
3922
3923 #[simd_test(enable = "avx2")]
3924 unsafe fn test_mm256_adds_epi8() {
3925 #[rustfmt::skip]
3926 let a = _mm256_setr_epi8(
3927 0, 1, 2, 3, 4, 5, 6, 7,
3928 8, 9, 10, 11, 12, 13, 14, 15,
3929 16, 17, 18, 19, 20, 21, 22, 23,
3930 24, 25, 26, 27, 28, 29, 30, 31,
3931 );
3932 #[rustfmt::skip]
3933 let b = _mm256_setr_epi8(
3934 32, 33, 34, 35, 36, 37, 38, 39,
3935 40, 41, 42, 43, 44, 45, 46, 47,
3936 48, 49, 50, 51, 52, 53, 54, 55,
3937 56, 57, 58, 59, 60, 61, 62, 63,
3938 );
3939 let r = _mm256_adds_epi8(a, b);
3940 #[rustfmt::skip]
3941 let e = _mm256_setr_epi8(
3942 32, 34, 36, 38, 40, 42, 44, 46,
3943 48, 50, 52, 54, 56, 58, 60, 62,
3944 64, 66, 68, 70, 72, 74, 76, 78,
3945 80, 82, 84, 86, 88, 90, 92, 94,
3946 );
3947 assert_eq_m256i(r, e);
3948 }
3949
3950 #[simd_test(enable = "avx2")]
3951 unsafe fn test_mm256_adds_epi8_saturate_positive() {
3952 let a = _mm256_set1_epi8(0x7F);
3953 let b = _mm256_set1_epi8(1);
3954 let r = _mm256_adds_epi8(a, b);
3955 assert_eq_m256i(r, a);
3956 }
3957
3958 #[simd_test(enable = "avx2")]
3959 unsafe fn test_mm256_adds_epi8_saturate_negative() {
3960 let a = _mm256_set1_epi8(-0x80);
3961 let b = _mm256_set1_epi8(-1);
3962 let r = _mm256_adds_epi8(a, b);
3963 assert_eq_m256i(r, a);
3964 }
3965
3966 #[simd_test(enable = "avx2")]
3967 unsafe fn test_mm256_adds_epi16() {
3968 #[rustfmt::skip]
3969 let a = _mm256_setr_epi16(
3970 0, 1, 2, 3, 4, 5, 6, 7,
3971 8, 9, 10, 11, 12, 13, 14, 15,
3972 );
3973 #[rustfmt::skip]
3974 let b = _mm256_setr_epi16(
3975 32, 33, 34, 35, 36, 37, 38, 39,
3976 40, 41, 42, 43, 44, 45, 46, 47,
3977 );
3978 let r = _mm256_adds_epi16(a, b);
3979 #[rustfmt::skip]
3980 let e = _mm256_setr_epi16(
3981 32, 34, 36, 38, 40, 42, 44, 46,
3982 48, 50, 52, 54, 56, 58, 60, 62,
3983 );
3984
3985 assert_eq_m256i(r, e);
3986 }
3987
3988 #[simd_test(enable = "avx2")]
3989 unsafe fn test_mm256_adds_epi16_saturate_positive() {
3990 let a = _mm256_set1_epi16(0x7FFF);
3991 let b = _mm256_set1_epi16(1);
3992 let r = _mm256_adds_epi16(a, b);
3993 assert_eq_m256i(r, a);
3994 }
3995
3996 #[simd_test(enable = "avx2")]
3997 unsafe fn test_mm256_adds_epi16_saturate_negative() {
3998 let a = _mm256_set1_epi16(-0x8000);
3999 let b = _mm256_set1_epi16(-1);
4000 let r = _mm256_adds_epi16(a, b);
4001 assert_eq_m256i(r, a);
4002 }
4003
4004 #[simd_test(enable = "avx2")]
4005 unsafe fn test_mm256_adds_epu8() {
4006 #[rustfmt::skip]
4007 let a = _mm256_setr_epi8(
4008 0, 1, 2, 3, 4, 5, 6, 7,
4009 8, 9, 10, 11, 12, 13, 14, 15,
4010 16, 17, 18, 19, 20, 21, 22, 23,
4011 24, 25, 26, 27, 28, 29, 30, 31,
4012 );
4013 #[rustfmt::skip]
4014 let b = _mm256_setr_epi8(
4015 32, 33, 34, 35, 36, 37, 38, 39,
4016 40, 41, 42, 43, 44, 45, 46, 47,
4017 48, 49, 50, 51, 52, 53, 54, 55,
4018 56, 57, 58, 59, 60, 61, 62, 63,
4019 );
4020 let r = _mm256_adds_epu8(a, b);
4021 #[rustfmt::skip]
4022 let e = _mm256_setr_epi8(
4023 32, 34, 36, 38, 40, 42, 44, 46,
4024 48, 50, 52, 54, 56, 58, 60, 62,
4025 64, 66, 68, 70, 72, 74, 76, 78,
4026 80, 82, 84, 86, 88, 90, 92, 94,
4027 );
4028 assert_eq_m256i(r, e);
4029 }
4030
4031 #[simd_test(enable = "avx2")]
4032 unsafe fn test_mm256_adds_epu8_saturate() {
4033 let a = _mm256_set1_epi8(!0);
4034 let b = _mm256_set1_epi8(1);
4035 let r = _mm256_adds_epu8(a, b);
4036 assert_eq_m256i(r, a);
4037 }
4038
4039 #[simd_test(enable = "avx2")]
4040 unsafe fn test_mm256_adds_epu16() {
4041 #[rustfmt::skip]
4042 let a = _mm256_setr_epi16(
4043 0, 1, 2, 3, 4, 5, 6, 7,
4044 8, 9, 10, 11, 12, 13, 14, 15,
4045 );
4046 #[rustfmt::skip]
4047 let b = _mm256_setr_epi16(
4048 32, 33, 34, 35, 36, 37, 38, 39,
4049 40, 41, 42, 43, 44, 45, 46, 47,
4050 );
4051 let r = _mm256_adds_epu16(a, b);
4052 #[rustfmt::skip]
4053 let e = _mm256_setr_epi16(
4054 32, 34, 36, 38, 40, 42, 44, 46,
4055 48, 50, 52, 54, 56, 58, 60, 62,
4056 );
4057
4058 assert_eq_m256i(r, e);
4059 }
4060
4061 #[simd_test(enable = "avx2")]
4062 unsafe fn test_mm256_adds_epu16_saturate() {
4063 let a = _mm256_set1_epi16(!0);
4064 let b = _mm256_set1_epi16(1);
4065 let r = _mm256_adds_epu16(a, b);
4066 assert_eq_m256i(r, a);
4067 }
4068
4069 #[simd_test(enable = "avx2")]
4070 unsafe fn test_mm256_and_si256() {
4071 let a = _mm256_set1_epi8(5);
4072 let b = _mm256_set1_epi8(3);
4073 let got = _mm256_and_si256(a, b);
4074 assert_eq_m256i(got, _mm256_set1_epi8(1));
4075 }
4076
4077 #[simd_test(enable = "avx2")]
4078 unsafe fn test_mm256_andnot_si256() {
4079 let a = _mm256_set1_epi8(5);
4080 let b = _mm256_set1_epi8(3);
4081 let got = _mm256_andnot_si256(a, b);
4082 assert_eq_m256i(got, _mm256_set1_epi8(2));
4083 }
4084
4085 #[simd_test(enable = "avx2")]
4086 unsafe fn test_mm256_avg_epu8() {
4087 let (a, b) = (_mm256_set1_epi8(3), _mm256_set1_epi8(9));
4088 let r = _mm256_avg_epu8(a, b);
4089 assert_eq_m256i(r, _mm256_set1_epi8(6));
4090 }
4091
4092 #[simd_test(enable = "avx2")]
4093 unsafe fn test_mm256_avg_epu16() {
4094 let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4095 let r = _mm256_avg_epu16(a, b);
4096 assert_eq_m256i(r, _mm256_set1_epi16(6));
4097 }
4098
4099 #[simd_test(enable = "avx2")]
4100 unsafe fn test_mm_blend_epi32() {
4101 let (a, b) = (_mm_set1_epi32(3), _mm_set1_epi32(9));
4102 let e = _mm_setr_epi32(9, 3, 3, 3);
4103 let r = _mm_blend_epi32::<0x01>(a, b);
4104 assert_eq_m128i(r, e);
4105
4106 let r = _mm_blend_epi32::<0x0E>(b, a);
4107 assert_eq_m128i(r, e);
4108 }
4109
4110 #[simd_test(enable = "avx2")]
4111 unsafe fn test_mm256_blend_epi32() {
4112 let (a, b) = (_mm256_set1_epi32(3), _mm256_set1_epi32(9));
4113 let e = _mm256_setr_epi32(9, 3, 3, 3, 3, 3, 3, 3);
4114 let r = _mm256_blend_epi32::<0x01>(a, b);
4115 assert_eq_m256i(r, e);
4116
4117 let e = _mm256_setr_epi32(3, 9, 3, 3, 3, 3, 3, 9);
4118 let r = _mm256_blend_epi32::<0x82>(a, b);
4119 assert_eq_m256i(r, e);
4120
4121 let e = _mm256_setr_epi32(3, 3, 9, 9, 9, 9, 9, 3);
4122 let r = _mm256_blend_epi32::<0x7C>(a, b);
4123 assert_eq_m256i(r, e);
4124 }
4125
4126 #[simd_test(enable = "avx2")]
4127 unsafe fn test_mm256_blend_epi16() {
4128 let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4129 let e = _mm256_setr_epi16(9, 3, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3);
4130 let r = _mm256_blend_epi16::<0x01>(a, b);
4131 assert_eq_m256i(r, e);
4132
4133 let r = _mm256_blend_epi16::<0xFE>(b, a);
4134 assert_eq_m256i(r, e);
4135 }
4136
4137 #[simd_test(enable = "avx2")]
4138 unsafe fn test_mm256_blendv_epi8() {
4139 let (a, b) = (_mm256_set1_epi8(4), _mm256_set1_epi8(2));
4140 let mask = _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), -1);
4141 let e = _mm256_insert_epi8::<2>(_mm256_set1_epi8(4), 2);
4142 let r = _mm256_blendv_epi8(a, b, mask);
4143 assert_eq_m256i(r, e);
4144 }
4145
4146 #[simd_test(enable = "avx2")]
4147 unsafe fn test_mm_broadcastb_epi8() {
4148 let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4149 let res = _mm_broadcastb_epi8(a);
4150 assert_eq_m128i(res, _mm_set1_epi8(0x2a));
4151 }
4152
4153 #[simd_test(enable = "avx2")]
4154 unsafe fn test_mm256_broadcastb_epi8() {
4155 let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4156 let res = _mm256_broadcastb_epi8(a);
4157 assert_eq_m256i(res, _mm256_set1_epi8(0x2a));
4158 }
4159
4160 #[simd_test(enable = "avx2")]
4161 unsafe fn test_mm_broadcastd_epi32() {
4162 let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4163 let res = _mm_broadcastd_epi32(a);
4164 assert_eq_m128i(res, _mm_set1_epi32(0x2a));
4165 }
4166
4167 #[simd_test(enable = "avx2")]
4168 unsafe fn test_mm256_broadcastd_epi32() {
4169 let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4170 let res = _mm256_broadcastd_epi32(a);
4171 assert_eq_m256i(res, _mm256_set1_epi32(0x2a));
4172 }
4173
4174 #[simd_test(enable = "avx2")]
4175 unsafe fn test_mm_broadcastq_epi64() {
4176 let a = _mm_setr_epi64x(0x1ffffffff, 0);
4177 let res = _mm_broadcastq_epi64(a);
4178 assert_eq_m128i(res, _mm_set1_epi64x(0x1ffffffff));
4179 }
4180
4181 #[simd_test(enable = "avx2")]
4182 unsafe fn test_mm256_broadcastq_epi64() {
4183 let a = _mm_setr_epi64x(0x1ffffffff, 0);
4184 let res = _mm256_broadcastq_epi64(a);
4185 assert_eq_m256i(res, _mm256_set1_epi64x(0x1ffffffff));
4186 }
4187
4188 #[simd_test(enable = "avx2")]
4189 unsafe fn test_mm_broadcastsd_pd() {
4190 let a = _mm_setr_pd(6.88, 3.44);
4191 let res = _mm_broadcastsd_pd(a);
4192 assert_eq_m128d(res, _mm_set1_pd(6.88));
4193 }
4194
4195 #[simd_test(enable = "avx2")]
4196 unsafe fn test_mm256_broadcastsd_pd() {
4197 let a = _mm_setr_pd(6.88, 3.44);
4198 let res = _mm256_broadcastsd_pd(a);
4199 assert_eq_m256d(res, _mm256_set1_pd(6.88f64));
4200 }
4201
4202 #[simd_test(enable = "avx2")]
4203 unsafe fn test_mm_broadcastsi128_si256() {
4204 let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4205 let res = _mm_broadcastsi128_si256(a);
4206 let retval = _mm256_setr_epi64x(
4207 0x0987654321012334,
4208 0x5678909876543210,
4209 0x0987654321012334,
4210 0x5678909876543210,
4211 );
4212 assert_eq_m256i(res, retval);
4213 }
4214
4215 #[simd_test(enable = "avx2")]
4216 unsafe fn test_mm256_broadcastsi128_si256() {
4217 let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4218 let res = _mm256_broadcastsi128_si256(a);
4219 let retval = _mm256_setr_epi64x(
4220 0x0987654321012334,
4221 0x5678909876543210,
4222 0x0987654321012334,
4223 0x5678909876543210,
4224 );
4225 assert_eq_m256i(res, retval);
4226 }
4227
4228 #[simd_test(enable = "avx2")]
4229 unsafe fn test_mm_broadcastss_ps() {
4230 let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
4231 let res = _mm_broadcastss_ps(a);
4232 assert_eq_m128(res, _mm_set1_ps(6.88));
4233 }
4234
4235 #[simd_test(enable = "avx2")]
4236 unsafe fn test_mm256_broadcastss_ps() {
4237 let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
4238 let res = _mm256_broadcastss_ps(a);
4239 assert_eq_m256(res, _mm256_set1_ps(6.88));
4240 }
4241
4242 #[simd_test(enable = "avx2")]
4243 unsafe fn test_mm_broadcastw_epi16() {
4244 let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4245 let res = _mm_broadcastw_epi16(a);
4246 assert_eq_m128i(res, _mm_set1_epi16(0x22b));
4247 }
4248
4249 #[simd_test(enable = "avx2")]
4250 unsafe fn test_mm256_broadcastw_epi16() {
4251 let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4252 let res = _mm256_broadcastw_epi16(a);
4253 assert_eq_m256i(res, _mm256_set1_epi16(0x22b));
4254 }
4255
4256 #[simd_test(enable = "avx2")]
4257 unsafe fn test_mm256_cmpeq_epi8() {
4258 #[rustfmt::skip]
4259 let a = _mm256_setr_epi8(
4260 0, 1, 2, 3, 4, 5, 6, 7,
4261 8, 9, 10, 11, 12, 13, 14, 15,
4262 16, 17, 18, 19, 20, 21, 22, 23,
4263 24, 25, 26, 27, 28, 29, 30, 31,
4264 );
4265 #[rustfmt::skip]
4266 let b = _mm256_setr_epi8(
4267 31, 30, 2, 28, 27, 26, 25, 24,
4268 23, 22, 21, 20, 19, 18, 17, 16,
4269 15, 14, 13, 12, 11, 10, 9, 8,
4270 7, 6, 5, 4, 3, 2, 1, 0,
4271 );
4272 let r = _mm256_cmpeq_epi8(a, b);
4273 assert_eq_m256i(r, _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), !0));
4274 }
4275
4276 #[simd_test(enable = "avx2")]
4277 unsafe fn test_mm256_cmpeq_epi16() {
4278 #[rustfmt::skip]
4279 let a = _mm256_setr_epi16(
4280 0, 1, 2, 3, 4, 5, 6, 7,
4281 8, 9, 10, 11, 12, 13, 14, 15,
4282 );
4283 #[rustfmt::skip]
4284 let b = _mm256_setr_epi16(
4285 15, 14, 2, 12, 11, 10, 9, 8,
4286 7, 6, 5, 4, 3, 2, 1, 0,
4287 );
4288 let r = _mm256_cmpeq_epi16(a, b);
4289 assert_eq_m256i(r, _mm256_insert_epi16::<2>(_mm256_set1_epi16(0), !0));
4290 }
4291
4292 #[simd_test(enable = "avx2")]
4293 unsafe fn test_mm256_cmpeq_epi32() {
4294 let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4295 let b = _mm256_setr_epi32(7, 6, 2, 4, 3, 2, 1, 0);
4296 let r = _mm256_cmpeq_epi32(a, b);
4297 let e = _mm256_set1_epi32(0);
4298 let e = _mm256_insert_epi32::<2>(e, !0);
4299 assert_eq_m256i(r, e);
4300 }
4301
4302 #[simd_test(enable = "avx2")]
4303 unsafe fn test_mm256_cmpeq_epi64() {
4304 let a = _mm256_setr_epi64x(0, 1, 2, 3);
4305 let b = _mm256_setr_epi64x(3, 2, 2, 0);
4306 let r = _mm256_cmpeq_epi64(a, b);
4307 assert_eq_m256i(r, _mm256_insert_epi64::<2>(_mm256_set1_epi64x(0), !0));
4308 }
4309
4310 #[simd_test(enable = "avx2")]
4311 unsafe fn test_mm256_cmpgt_epi8() {
4312 let a = _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), 5);
4313 let b = _mm256_set1_epi8(0);
4314 let r = _mm256_cmpgt_epi8(a, b);
4315 assert_eq_m256i(r, _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), !0));
4316 }
4317
4318 #[simd_test(enable = "avx2")]
4319 unsafe fn test_mm256_cmpgt_epi16() {
4320 let a = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 5);
4321 let b = _mm256_set1_epi16(0);
4322 let r = _mm256_cmpgt_epi16(a, b);
4323 assert_eq_m256i(r, _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), !0));
4324 }
4325
4326 #[simd_test(enable = "avx2")]
4327 unsafe fn test_mm256_cmpgt_epi32() {
4328 let a = _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), 5);
4329 let b = _mm256_set1_epi32(0);
4330 let r = _mm256_cmpgt_epi32(a, b);
4331 assert_eq_m256i(r, _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), !0));
4332 }
4333
4334 #[simd_test(enable = "avx2")]
4335 unsafe fn test_mm256_cmpgt_epi64() {
4336 let a = _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), 5);
4337 let b = _mm256_set1_epi64x(0);
4338 let r = _mm256_cmpgt_epi64(a, b);
4339 assert_eq_m256i(r, _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), !0));
4340 }
4341
4342 #[simd_test(enable = "avx2")]
4343 unsafe fn test_mm256_cvtepi8_epi16() {
4344 #[rustfmt::skip]
4345 let a = _mm_setr_epi8(
4346 0, 0, -1, 1, -2, 2, -3, 3,
4347 -4, 4, -5, 5, -6, 6, -7, 7,
4348 );
4349 #[rustfmt::skip]
4350 let r = _mm256_setr_epi16(
4351 0, 0, -1, 1, -2, 2, -3, 3,
4352 -4, 4, -5, 5, -6, 6, -7, 7,
4353 );
4354 assert_eq_m256i(r, _mm256_cvtepi8_epi16(a));
4355 }
4356
4357 #[simd_test(enable = "avx2")]
4358 unsafe fn test_mm256_cvtepi8_epi32() {
4359 #[rustfmt::skip]
4360 let a = _mm_setr_epi8(
4361 0, 0, -1, 1, -2, 2, -3, 3,
4362 -4, 4, -5, 5, -6, 6, -7, 7,
4363 );
4364 let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4365 assert_eq_m256i(r, _mm256_cvtepi8_epi32(a));
4366 }
4367
4368 #[simd_test(enable = "avx2")]
4369 unsafe fn test_mm256_cvtepi8_epi64() {
4370 #[rustfmt::skip]
4371 let a = _mm_setr_epi8(
4372 0, 0, -1, 1, -2, 2, -3, 3,
4373 -4, 4, -5, 5, -6, 6, -7, 7,
4374 );
4375 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4376 assert_eq_m256i(r, _mm256_cvtepi8_epi64(a));
4377 }
4378
4379 #[simd_test(enable = "avx2")]
4380 unsafe fn test_mm256_cvtepi16_epi32() {
4381 let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4382 let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4383 assert_eq_m256i(r, _mm256_cvtepi16_epi32(a));
4384 }
4385
4386 #[simd_test(enable = "avx2")]
4387 unsafe fn test_mm256_cvtepi16_epi64() {
4388 let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4389 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4390 assert_eq_m256i(r, _mm256_cvtepi16_epi64(a));
4391 }
4392
4393 #[simd_test(enable = "avx2")]
4394 unsafe fn test_mm256_cvtepi32_epi64() {
4395 let a = _mm_setr_epi32(0, 0, -1, 1);
4396 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4397 assert_eq_m256i(r, _mm256_cvtepi32_epi64(a));
4398 }
4399
4400 #[simd_test(enable = "avx2")]
4401 unsafe fn test_mm256_cvtepu16_epi32() {
4402 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4403 let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4404 assert_eq_m256i(r, _mm256_cvtepu16_epi32(a));
4405 }
4406
4407 #[simd_test(enable = "avx2")]
4408 unsafe fn test_mm256_cvtepu16_epi64() {
4409 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4410 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4411 assert_eq_m256i(r, _mm256_cvtepu16_epi64(a));
4412 }
4413
4414 #[simd_test(enable = "avx2")]
4415 unsafe fn test_mm256_cvtepu32_epi64() {
4416 let a = _mm_setr_epi32(0, 1, 2, 3);
4417 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4418 assert_eq_m256i(r, _mm256_cvtepu32_epi64(a));
4419 }
4420
4421 #[simd_test(enable = "avx2")]
4422 unsafe fn test_mm256_cvtepu8_epi16() {
4423 #[rustfmt::skip]
4424 let a = _mm_setr_epi8(
4425 0, 1, 2, 3, 4, 5, 6, 7,
4426 8, 9, 10, 11, 12, 13, 14, 15,
4427 );
4428 #[rustfmt::skip]
4429 let r = _mm256_setr_epi16(
4430 0, 1, 2, 3, 4, 5, 6, 7,
4431 8, 9, 10, 11, 12, 13, 14, 15,
4432 );
4433 assert_eq_m256i(r, _mm256_cvtepu8_epi16(a));
4434 }
4435
4436 #[simd_test(enable = "avx2")]
4437 unsafe fn test_mm256_cvtepu8_epi32() {
4438 #[rustfmt::skip]
4439 let a = _mm_setr_epi8(
4440 0, 1, 2, 3, 4, 5, 6, 7,
4441 8, 9, 10, 11, 12, 13, 14, 15,
4442 );
4443 let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4444 assert_eq_m256i(r, _mm256_cvtepu8_epi32(a));
4445 }
4446
4447 #[simd_test(enable = "avx2")]
4448 unsafe fn test_mm256_cvtepu8_epi64() {
4449 #[rustfmt::skip]
4450 let a = _mm_setr_epi8(
4451 0, 1, 2, 3, 4, 5, 6, 7,
4452 8, 9, 10, 11, 12, 13, 14, 15,
4453 );
4454 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4455 assert_eq_m256i(r, _mm256_cvtepu8_epi64(a));
4456 }
4457
4458 #[simd_test(enable = "avx2")]
4459 unsafe fn test_mm256_extracti128_si256() {
4460 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4461 let r = _mm256_extracti128_si256::<1>(a);
4462 let e = _mm_setr_epi64x(3, 4);
4463 assert_eq_m128i(r, e);
4464 }
4465
4466 #[simd_test(enable = "avx2")]
4467 unsafe fn test_mm256_hadd_epi16() {
4468 let a = _mm256_set1_epi16(2);
4469 let b = _mm256_set1_epi16(4);
4470 let r = _mm256_hadd_epi16(a, b);
4471 let e = _mm256_setr_epi16(4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8);
4472 assert_eq_m256i(r, e);
4473 }
4474
4475 #[simd_test(enable = "avx2")]
4476 unsafe fn test_mm256_hadd_epi32() {
4477 let a = _mm256_set1_epi32(2);
4478 let b = _mm256_set1_epi32(4);
4479 let r = _mm256_hadd_epi32(a, b);
4480 let e = _mm256_setr_epi32(4, 4, 8, 8, 4, 4, 8, 8);
4481 assert_eq_m256i(r, e);
4482 }
4483
4484 #[simd_test(enable = "avx2")]
4485 unsafe fn test_mm256_hadds_epi16() {
4486 let a = _mm256_set1_epi16(2);
4487 let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4488 let a = _mm256_insert_epi16::<1>(a, 1);
4489 let b = _mm256_set1_epi16(4);
4490 let r = _mm256_hadds_epi16(a, b);
4491 #[rustfmt::skip]
4492 let e = _mm256_setr_epi16(
4493 0x7FFF, 4, 4, 4, 8, 8, 8, 8,
4494 4, 4, 4, 4, 8, 8, 8, 8,
4495 );
4496 assert_eq_m256i(r, e);
4497 }
4498
4499 #[simd_test(enable = "avx2")]
4500 unsafe fn test_mm256_hsub_epi16() {
4501 let a = _mm256_set1_epi16(2);
4502 let b = _mm256_set1_epi16(4);
4503 let r = _mm256_hsub_epi16(a, b);
4504 let e = _mm256_set1_epi16(0);
4505 assert_eq_m256i(r, e);
4506 }
4507
4508 #[simd_test(enable = "avx2")]
4509 unsafe fn test_mm256_hsub_epi32() {
4510 let a = _mm256_set1_epi32(2);
4511 let b = _mm256_set1_epi32(4);
4512 let r = _mm256_hsub_epi32(a, b);
4513 let e = _mm256_set1_epi32(0);
4514 assert_eq_m256i(r, e);
4515 }
4516
4517 #[simd_test(enable = "avx2")]
4518 unsafe fn test_mm256_hsubs_epi16() {
4519 let a = _mm256_set1_epi16(2);
4520 let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4521 let a = _mm256_insert_epi16::<1>(a, -1);
4522 let b = _mm256_set1_epi16(4);
4523 let r = _mm256_hsubs_epi16(a, b);
4524 let e = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 0x7FFF);
4525 assert_eq_m256i(r, e);
4526 }
4527
4528 #[simd_test(enable = "avx2")]
4529 unsafe fn test_mm256_madd_epi16() {
4530 let a = _mm256_set1_epi16(2);
4531 let b = _mm256_set1_epi16(4);
4532 let r = _mm256_madd_epi16(a, b);
4533 let e = _mm256_set1_epi32(16);
4534 assert_eq_m256i(r, e);
4535 }
4536
4537 #[simd_test(enable = "avx2")]
4538 unsafe fn test_mm256_inserti128_si256() {
4539 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4540 let b = _mm_setr_epi64x(7, 8);
4541 let r = _mm256_inserti128_si256::<1>(a, b);
4542 let e = _mm256_setr_epi64x(1, 2, 7, 8);
4543 assert_eq_m256i(r, e);
4544 }
4545
4546 #[simd_test(enable = "avx2")]
4547 unsafe fn test_mm256_maddubs_epi16() {
4548 let a = _mm256_set1_epi8(2);
4549 let b = _mm256_set1_epi8(4);
4550 let r = _mm256_maddubs_epi16(a, b);
4551 let e = _mm256_set1_epi16(16);
4552 assert_eq_m256i(r, e);
4553 }
4554
4555 #[simd_test(enable = "avx2")]
4556 unsafe fn test_mm_maskload_epi32() {
4557 let nums = [1, 2, 3, 4];
4558 let a = &nums as *const i32;
4559 let mask = _mm_setr_epi32(-1, 0, 0, -1);
4560 let r = _mm_maskload_epi32(a, mask);
4561 let e = _mm_setr_epi32(1, 0, 0, 4);
4562 assert_eq_m128i(r, e);
4563 }
4564
4565 #[simd_test(enable = "avx2")]
4566 unsafe fn test_mm256_maskload_epi32() {
4567 let nums = [1, 2, 3, 4, 5, 6, 7, 8];
4568 let a = &nums as *const i32;
4569 let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4570 let r = _mm256_maskload_epi32(a, mask);
4571 let e = _mm256_setr_epi32(1, 0, 0, 4, 0, 6, 7, 0);
4572 assert_eq_m256i(r, e);
4573 }
4574
4575 #[simd_test(enable = "avx2")]
4576 unsafe fn test_mm_maskload_epi64() {
4577 let nums = [1_i64, 2_i64];
4578 let a = &nums as *const i64;
4579 let mask = _mm_setr_epi64x(0, -1);
4580 let r = _mm_maskload_epi64(a, mask);
4581 let e = _mm_setr_epi64x(0, 2);
4582 assert_eq_m128i(r, e);
4583 }
4584
4585 #[simd_test(enable = "avx2")]
4586 unsafe fn test_mm256_maskload_epi64() {
4587 let nums = [1_i64, 2_i64, 3_i64, 4_i64];
4588 let a = &nums as *const i64;
4589 let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4590 let r = _mm256_maskload_epi64(a, mask);
4591 let e = _mm256_setr_epi64x(0, 2, 3, 0);
4592 assert_eq_m256i(r, e);
4593 }
4594
4595 #[simd_test(enable = "avx2")]
4596 unsafe fn test_mm_maskstore_epi32() {
4597 let a = _mm_setr_epi32(1, 2, 3, 4);
4598 let mut arr = [-1, -1, -1, -1];
4599 let mask = _mm_setr_epi32(-1, 0, 0, -1);
4600 _mm_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4601 let e = [1, -1, -1, 4];
4602 assert_eq!(arr, e);
4603 }
4604
4605 #[simd_test(enable = "avx2")]
4606 unsafe fn test_mm256_maskstore_epi32() {
4607 let a = _mm256_setr_epi32(1, 0x6d726f, 3, 42, 0x777161, 6, 7, 8);
4608 let mut arr = [-1, -1, -1, 0x776173, -1, 0x68657265, -1, -1];
4609 let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4610 _mm256_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4611 let e = [1, -1, -1, 42, -1, 6, 7, -1];
4612 assert_eq!(arr, e);
4613 }
4614
4615 #[simd_test(enable = "avx2")]
4616 unsafe fn test_mm_maskstore_epi64() {
4617 let a = _mm_setr_epi64x(1_i64, 2_i64);
4618 let mut arr = [-1_i64, -1_i64];
4619 let mask = _mm_setr_epi64x(0, -1);
4620 _mm_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4621 let e = [-1, 2];
4622 assert_eq!(arr, e);
4623 }
4624
4625 #[simd_test(enable = "avx2")]
4626 unsafe fn test_mm256_maskstore_epi64() {
4627 let a = _mm256_setr_epi64x(1_i64, 2_i64, 3_i64, 4_i64);
4628 let mut arr = [-1_i64, -1_i64, -1_i64, -1_i64];
4629 let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4630 _mm256_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4631 let e = [-1, 2, 3, -1];
4632 assert_eq!(arr, e);
4633 }
4634
4635 #[simd_test(enable = "avx2")]
4636 unsafe fn test_mm256_max_epi16() {
4637 let a = _mm256_set1_epi16(2);
4638 let b = _mm256_set1_epi16(4);
4639 let r = _mm256_max_epi16(a, b);
4640 assert_eq_m256i(r, b);
4641 }
4642
4643 #[simd_test(enable = "avx2")]
4644 unsafe fn test_mm256_max_epi32() {
4645 let a = _mm256_set1_epi32(2);
4646 let b = _mm256_set1_epi32(4);
4647 let r = _mm256_max_epi32(a, b);
4648 assert_eq_m256i(r, b);
4649 }
4650
4651 #[simd_test(enable = "avx2")]
4652 unsafe fn test_mm256_max_epi8() {
4653 let a = _mm256_set1_epi8(2);
4654 let b = _mm256_set1_epi8(4);
4655 let r = _mm256_max_epi8(a, b);
4656 assert_eq_m256i(r, b);
4657 }
4658
4659 #[simd_test(enable = "avx2")]
4660 unsafe fn test_mm256_max_epu16() {
4661 let a = _mm256_set1_epi16(2);
4662 let b = _mm256_set1_epi16(4);
4663 let r = _mm256_max_epu16(a, b);
4664 assert_eq_m256i(r, b);
4665 }
4666
4667 #[simd_test(enable = "avx2")]
4668 unsafe fn test_mm256_max_epu32() {
4669 let a = _mm256_set1_epi32(2);
4670 let b = _mm256_set1_epi32(4);
4671 let r = _mm256_max_epu32(a, b);
4672 assert_eq_m256i(r, b);
4673 }
4674
4675 #[simd_test(enable = "avx2")]
4676 unsafe fn test_mm256_max_epu8() {
4677 let a = _mm256_set1_epi8(2);
4678 let b = _mm256_set1_epi8(4);
4679 let r = _mm256_max_epu8(a, b);
4680 assert_eq_m256i(r, b);
4681 }
4682
4683 #[simd_test(enable = "avx2")]
4684 unsafe fn test_mm256_min_epi16() {
4685 let a = _mm256_set1_epi16(2);
4686 let b = _mm256_set1_epi16(4);
4687 let r = _mm256_min_epi16(a, b);
4688 assert_eq_m256i(r, a);
4689 }
4690
4691 #[simd_test(enable = "avx2")]
4692 unsafe fn test_mm256_min_epi32() {
4693 let a = _mm256_set1_epi32(2);
4694 let b = _mm256_set1_epi32(4);
4695 let r = _mm256_min_epi32(a, b);
4696 assert_eq_m256i(r, a);
4697 }
4698
4699 #[simd_test(enable = "avx2")]
4700 unsafe fn test_mm256_min_epi8() {
4701 let a = _mm256_set1_epi8(2);
4702 let b = _mm256_set1_epi8(4);
4703 let r = _mm256_min_epi8(a, b);
4704 assert_eq_m256i(r, a);
4705 }
4706
4707 #[simd_test(enable = "avx2")]
4708 unsafe fn test_mm256_min_epu16() {
4709 let a = _mm256_set1_epi16(2);
4710 let b = _mm256_set1_epi16(4);
4711 let r = _mm256_min_epu16(a, b);
4712 assert_eq_m256i(r, a);
4713 }
4714
4715 #[simd_test(enable = "avx2")]
4716 unsafe fn test_mm256_min_epu32() {
4717 let a = _mm256_set1_epi32(2);
4718 let b = _mm256_set1_epi32(4);
4719 let r = _mm256_min_epu32(a, b);
4720 assert_eq_m256i(r, a);
4721 }
4722
4723 #[simd_test(enable = "avx2")]
4724 unsafe fn test_mm256_min_epu8() {
4725 let a = _mm256_set1_epi8(2);
4726 let b = _mm256_set1_epi8(4);
4727 let r = _mm256_min_epu8(a, b);
4728 assert_eq_m256i(r, a);
4729 }
4730
4731 #[simd_test(enable = "avx2")]
4732 unsafe fn test_mm256_movemask_epi8() {
4733 let a = _mm256_set1_epi8(-1);
4734 let r = _mm256_movemask_epi8(a);
4735 let e = -1;
4736 assert_eq!(r, e);
4737 }
4738
4739 #[simd_test(enable = "avx2")]
4740 unsafe fn test_mm256_mpsadbw_epu8() {
4741 let a = _mm256_set1_epi8(2);
4742 let b = _mm256_set1_epi8(4);
4743 let r = _mm256_mpsadbw_epu8::<0>(a, b);
4744 let e = _mm256_set1_epi16(8);
4745 assert_eq_m256i(r, e);
4746 }
4747
4748 #[simd_test(enable = "avx2")]
4749 unsafe fn test_mm256_mul_epi32() {
4750 let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4751 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4752 let r = _mm256_mul_epi32(a, b);
4753 let e = _mm256_setr_epi64x(0, 0, 10, 14);
4754 assert_eq_m256i(r, e);
4755 }
4756
4757 #[simd_test(enable = "avx2")]
4758 unsafe fn test_mm256_mul_epu32() {
4759 let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4760 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4761 let r = _mm256_mul_epu32(a, b);
4762 let e = _mm256_setr_epi64x(0, 0, 10, 14);
4763 assert_eq_m256i(r, e);
4764 }
4765
4766 #[simd_test(enable = "avx2")]
4767 unsafe fn test_mm256_mulhi_epi16() {
4768 let a = _mm256_set1_epi16(6535);
4769 let b = _mm256_set1_epi16(6535);
4770 let r = _mm256_mulhi_epi16(a, b);
4771 let e = _mm256_set1_epi16(651);
4772 assert_eq_m256i(r, e);
4773 }
4774
4775 #[simd_test(enable = "avx2")]
4776 unsafe fn test_mm256_mulhi_epu16() {
4777 let a = _mm256_set1_epi16(6535);
4778 let b = _mm256_set1_epi16(6535);
4779 let r = _mm256_mulhi_epu16(a, b);
4780 let e = _mm256_set1_epi16(651);
4781 assert_eq_m256i(r, e);
4782 }
4783
4784 #[simd_test(enable = "avx2")]
4785 unsafe fn test_mm256_mullo_epi16() {
4786 let a = _mm256_set1_epi16(2);
4787 let b = _mm256_set1_epi16(4);
4788 let r = _mm256_mullo_epi16(a, b);
4789 let e = _mm256_set1_epi16(8);
4790 assert_eq_m256i(r, e);
4791 }
4792
4793 #[simd_test(enable = "avx2")]
4794 unsafe fn test_mm256_mullo_epi32() {
4795 let a = _mm256_set1_epi32(2);
4796 let b = _mm256_set1_epi32(4);
4797 let r = _mm256_mullo_epi32(a, b);
4798 let e = _mm256_set1_epi32(8);
4799 assert_eq_m256i(r, e);
4800 }
4801
4802 #[simd_test(enable = "avx2")]
4803 unsafe fn test_mm256_mulhrs_epi16() {
4804 let a = _mm256_set1_epi16(2);
4805 let b = _mm256_set1_epi16(4);
4806 let r = _mm256_mullo_epi16(a, b);
4807 let e = _mm256_set1_epi16(8);
4808 assert_eq_m256i(r, e);
4809 }
4810
4811 #[simd_test(enable = "avx2")]
4812 unsafe fn test_mm256_or_si256() {
4813 let a = _mm256_set1_epi8(-1);
4814 let b = _mm256_set1_epi8(0);
4815 let r = _mm256_or_si256(a, b);
4816 assert_eq_m256i(r, a);
4817 }
4818
4819 #[simd_test(enable = "avx2")]
4820 unsafe fn test_mm256_packs_epi16() {
4821 let a = _mm256_set1_epi16(2);
4822 let b = _mm256_set1_epi16(4);
4823 let r = _mm256_packs_epi16(a, b);
4824 #[rustfmt::skip]
4825 let e = _mm256_setr_epi8(
4826 2, 2, 2, 2, 2, 2, 2, 2,
4827 4, 4, 4, 4, 4, 4, 4, 4,
4828 2, 2, 2, 2, 2, 2, 2, 2,
4829 4, 4, 4, 4, 4, 4, 4, 4,
4830 );
4831
4832 assert_eq_m256i(r, e);
4833 }
4834
4835 #[simd_test(enable = "avx2")]
4836 unsafe fn test_mm256_packs_epi32() {
4837 let a = _mm256_set1_epi32(2);
4838 let b = _mm256_set1_epi32(4);
4839 let r = _mm256_packs_epi32(a, b);
4840 let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
4841
4842 assert_eq_m256i(r, e);
4843 }
4844
4845 #[simd_test(enable = "avx2")]
4846 unsafe fn test_mm256_packus_epi16() {
4847 let a = _mm256_set1_epi16(2);
4848 let b = _mm256_set1_epi16(4);
4849 let r = _mm256_packus_epi16(a, b);
4850 #[rustfmt::skip]
4851 let e = _mm256_setr_epi8(
4852 2, 2, 2, 2, 2, 2, 2, 2,
4853 4, 4, 4, 4, 4, 4, 4, 4,
4854 2, 2, 2, 2, 2, 2, 2, 2,
4855 4, 4, 4, 4, 4, 4, 4, 4,
4856 );
4857
4858 assert_eq_m256i(r, e);
4859 }
4860
4861 #[simd_test(enable = "avx2")]
4862 unsafe fn test_mm256_packus_epi32() {
4863 let a = _mm256_set1_epi32(2);
4864 let b = _mm256_set1_epi32(4);
4865 let r = _mm256_packus_epi32(a, b);
4866 let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
4867
4868 assert_eq_m256i(r, e);
4869 }
4870
4871 #[simd_test(enable = "avx2")]
4872 unsafe fn test_mm256_sad_epu8() {
4873 let a = _mm256_set1_epi8(2);
4874 let b = _mm256_set1_epi8(4);
4875 let r = _mm256_sad_epu8(a, b);
4876 let e = _mm256_set1_epi64x(16);
4877 assert_eq_m256i(r, e);
4878 }
4879
4880 #[simd_test(enable = "avx2")]
4881 unsafe fn test_mm256_shufflehi_epi16() {
4882 #[rustfmt::skip]
4883 let a = _mm256_setr_epi16(
4884 0, 1, 2, 3, 11, 22, 33, 44,
4885 4, 5, 6, 7, 55, 66, 77, 88,
4886 );
4887 #[rustfmt::skip]
4888 let e = _mm256_setr_epi16(
4889 0, 1, 2, 3, 44, 22, 22, 11,
4890 4, 5, 6, 7, 88, 66, 66, 55,
4891 );
4892 let r = _mm256_shufflehi_epi16::<0b00_01_01_11>(a);
4893 assert_eq_m256i(r, e);
4894 }
4895
4896 #[simd_test(enable = "avx2")]
4897 unsafe fn test_mm256_shufflelo_epi16() {
4898 #[rustfmt::skip]
4899 let a = _mm256_setr_epi16(
4900 11, 22, 33, 44, 0, 1, 2, 3,
4901 55, 66, 77, 88, 4, 5, 6, 7,
4902 );
4903 #[rustfmt::skip]
4904 let e = _mm256_setr_epi16(
4905 44, 22, 22, 11, 0, 1, 2, 3,
4906 88, 66, 66, 55, 4, 5, 6, 7,
4907 );
4908 let r = _mm256_shufflelo_epi16::<0b00_01_01_11>(a);
4909 assert_eq_m256i(r, e);
4910 }
4911
4912 #[simd_test(enable = "avx2")]
4913 unsafe fn test_mm256_sign_epi16() {
4914 let a = _mm256_set1_epi16(2);
4915 let b = _mm256_set1_epi16(-1);
4916 let r = _mm256_sign_epi16(a, b);
4917 let e = _mm256_set1_epi16(-2);
4918 assert_eq_m256i(r, e);
4919 }
4920
4921 #[simd_test(enable = "avx2")]
4922 unsafe fn test_mm256_sign_epi32() {
4923 let a = _mm256_set1_epi32(2);
4924 let b = _mm256_set1_epi32(-1);
4925 let r = _mm256_sign_epi32(a, b);
4926 let e = _mm256_set1_epi32(-2);
4927 assert_eq_m256i(r, e);
4928 }
4929
4930 #[simd_test(enable = "avx2")]
4931 unsafe fn test_mm256_sign_epi8() {
4932 let a = _mm256_set1_epi8(2);
4933 let b = _mm256_set1_epi8(-1);
4934 let r = _mm256_sign_epi8(a, b);
4935 let e = _mm256_set1_epi8(-2);
4936 assert_eq_m256i(r, e);
4937 }
4938
4939 #[simd_test(enable = "avx2")]
4940 unsafe fn test_mm256_sll_epi16() {
4941 let a = _mm256_set1_epi16(0xFF);
4942 let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
4943 let r = _mm256_sll_epi16(a, b);
4944 assert_eq_m256i(r, _mm256_set1_epi16(0xFF0));
4945 }
4946
4947 #[simd_test(enable = "avx2")]
4948 unsafe fn test_mm256_sll_epi32() {
4949 let a = _mm256_set1_epi32(0xFFFF);
4950 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
4951 let r = _mm256_sll_epi32(a, b);
4952 assert_eq_m256i(r, _mm256_set1_epi32(0xFFFF0));
4953 }
4954
4955 #[simd_test(enable = "avx2")]
4956 unsafe fn test_mm256_sll_epi64() {
4957 let a = _mm256_set1_epi64x(0xFFFFFFFF);
4958 let b = _mm_insert_epi64::<0>(_mm_set1_epi64x(0), 4);
4959 let r = _mm256_sll_epi64(a, b);
4960 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF0));
4961 }
4962
4963 #[simd_test(enable = "avx2")]
4964 unsafe fn test_mm256_slli_epi16() {
4965 assert_eq_m256i(
4966 _mm256_slli_epi16::<4>(_mm256_set1_epi16(0xFF)),
4967 _mm256_set1_epi16(0xFF0),
4968 );
4969 }
4970
4971 #[simd_test(enable = "avx2")]
4972 unsafe fn test_mm256_slli_epi32() {
4973 assert_eq_m256i(
4974 _mm256_slli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
4975 _mm256_set1_epi32(0xFFFF0),
4976 );
4977 }
4978
4979 #[simd_test(enable = "avx2")]
4980 unsafe fn test_mm256_slli_epi64() {
4981 assert_eq_m256i(
4982 _mm256_slli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
4983 _mm256_set1_epi64x(0xFFFFFFFF0),
4984 );
4985 }
4986
4987 #[simd_test(enable = "avx2")]
4988 unsafe fn test_mm256_slli_si256() {
4989 let a = _mm256_set1_epi64x(0xFFFFFFFF);
4990 let r = _mm256_slli_si256::<3>(a);
4991 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF000000));
4992 }
4993
4994 #[simd_test(enable = "avx2")]
4995 unsafe fn test_mm_sllv_epi32() {
4996 let a = _mm_set1_epi32(2);
4997 let b = _mm_set1_epi32(1);
4998 let r = _mm_sllv_epi32(a, b);
4999 let e = _mm_set1_epi32(4);
5000 assert_eq_m128i(r, e);
5001 }
5002
5003 #[simd_test(enable = "avx2")]
5004 unsafe fn test_mm256_sllv_epi32() {
5005 let a = _mm256_set1_epi32(2);
5006 let b = _mm256_set1_epi32(1);
5007 let r = _mm256_sllv_epi32(a, b);
5008 let e = _mm256_set1_epi32(4);
5009 assert_eq_m256i(r, e);
5010 }
5011
5012 #[simd_test(enable = "avx2")]
5013 unsafe fn test_mm_sllv_epi64() {
5014 let a = _mm_set1_epi64x(2);
5015 let b = _mm_set1_epi64x(1);
5016 let r = _mm_sllv_epi64(a, b);
5017 let e = _mm_set1_epi64x(4);
5018 assert_eq_m128i(r, e);
5019 }
5020
5021 #[simd_test(enable = "avx2")]
5022 unsafe fn test_mm256_sllv_epi64() {
5023 let a = _mm256_set1_epi64x(2);
5024 let b = _mm256_set1_epi64x(1);
5025 let r = _mm256_sllv_epi64(a, b);
5026 let e = _mm256_set1_epi64x(4);
5027 assert_eq_m256i(r, e);
5028 }
5029
5030 #[simd_test(enable = "avx2")]
5031 unsafe fn test_mm256_sra_epi16() {
5032 let a = _mm256_set1_epi16(-1);
5033 let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
5034 let r = _mm256_sra_epi16(a, b);
5035 assert_eq_m256i(r, _mm256_set1_epi16(-1));
5036 }
5037
5038 #[simd_test(enable = "avx2")]
5039 unsafe fn test_mm256_sra_epi32() {
5040 let a = _mm256_set1_epi32(-1);
5041 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 1);
5042 let r = _mm256_sra_epi32(a, b);
5043 assert_eq_m256i(r, _mm256_set1_epi32(-1));
5044 }
5045
5046 #[simd_test(enable = "avx2")]
5047 unsafe fn test_mm256_srai_epi16() {
5048 assert_eq_m256i(
5049 _mm256_srai_epi16::<1>(_mm256_set1_epi16(-1)),
5050 _mm256_set1_epi16(-1),
5051 );
5052 }
5053
5054 #[simd_test(enable = "avx2")]
5055 unsafe fn test_mm256_srai_epi32() {
5056 assert_eq_m256i(
5057 _mm256_srai_epi32::<1>(_mm256_set1_epi32(-1)),
5058 _mm256_set1_epi32(-1),
5059 );
5060 }
5061
5062 #[simd_test(enable = "avx2")]
5063 unsafe fn test_mm_srav_epi32() {
5064 let a = _mm_set1_epi32(4);
5065 let count = _mm_set1_epi32(1);
5066 let r = _mm_srav_epi32(a, count);
5067 let e = _mm_set1_epi32(2);
5068 assert_eq_m128i(r, e);
5069 }
5070
5071 #[simd_test(enable = "avx2")]
5072 unsafe fn test_mm256_srav_epi32() {
5073 let a = _mm256_set1_epi32(4);
5074 let count = _mm256_set1_epi32(1);
5075 let r = _mm256_srav_epi32(a, count);
5076 let e = _mm256_set1_epi32(2);
5077 assert_eq_m256i(r, e);
5078 }
5079
5080 #[simd_test(enable = "avx2")]
5081 unsafe fn test_mm256_srli_si256() {
5082 #[rustfmt::skip]
5083 let a = _mm256_setr_epi8(
5084 1, 2, 3, 4, 5, 6, 7, 8,
5085 9, 10, 11, 12, 13, 14, 15, 16,
5086 17, 18, 19, 20, 21, 22, 23, 24,
5087 25, 26, 27, 28, 29, 30, 31, 32,
5088 );
5089 let r = _mm256_srli_si256::<3>(a);
5090 #[rustfmt::skip]
5091 let e = _mm256_setr_epi8(
5092 4, 5, 6, 7, 8, 9, 10, 11,
5093 12, 13, 14, 15, 16, 0, 0, 0,
5094 20, 21, 22, 23, 24, 25, 26, 27,
5095 28, 29, 30, 31, 32, 0, 0, 0,
5096 );
5097 assert_eq_m256i(r, e);
5098 }
5099
5100 #[simd_test(enable = "avx2")]
5101 unsafe fn test_mm256_srl_epi16() {
5102 let a = _mm256_set1_epi16(0xFF);
5103 let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
5104 let r = _mm256_srl_epi16(a, b);
5105 assert_eq_m256i(r, _mm256_set1_epi16(0xF));
5106 }
5107
5108 #[simd_test(enable = "avx2")]
5109 unsafe fn test_mm256_srl_epi32() {
5110 let a = _mm256_set1_epi32(0xFFFF);
5111 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
5112 let r = _mm256_srl_epi32(a, b);
5113 assert_eq_m256i(r, _mm256_set1_epi32(0xFFF));
5114 }
5115
5116 #[simd_test(enable = "avx2")]
5117 unsafe fn test_mm256_srl_epi64() {
5118 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5119 let b = _mm_setr_epi64x(4, 0);
5120 let r = _mm256_srl_epi64(a, b);
5121 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFF));
5122 }
5123
5124 #[simd_test(enable = "avx2")]
5125 unsafe fn test_mm256_srli_epi16() {
5126 assert_eq_m256i(
5127 _mm256_srli_epi16::<4>(_mm256_set1_epi16(0xFF)),
5128 _mm256_set1_epi16(0xF),
5129 );
5130 }
5131
5132 #[simd_test(enable = "avx2")]
5133 unsafe fn test_mm256_srli_epi32() {
5134 assert_eq_m256i(
5135 _mm256_srli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
5136 _mm256_set1_epi32(0xFFF),
5137 );
5138 }
5139
5140 #[simd_test(enable = "avx2")]
5141 unsafe fn test_mm256_srli_epi64() {
5142 assert_eq_m256i(
5143 _mm256_srli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
5144 _mm256_set1_epi64x(0xFFFFFFF),
5145 );
5146 }
5147
5148 #[simd_test(enable = "avx2")]
5149 unsafe fn test_mm_srlv_epi32() {
5150 let a = _mm_set1_epi32(2);
5151 let count = _mm_set1_epi32(1);
5152 let r = _mm_srlv_epi32(a, count);
5153 let e = _mm_set1_epi32(1);
5154 assert_eq_m128i(r, e);
5155 }
5156
5157 #[simd_test(enable = "avx2")]
5158 unsafe fn test_mm256_srlv_epi32() {
5159 let a = _mm256_set1_epi32(2);
5160 let count = _mm256_set1_epi32(1);
5161 let r = _mm256_srlv_epi32(a, count);
5162 let e = _mm256_set1_epi32(1);
5163 assert_eq_m256i(r, e);
5164 }
5165
5166 #[simd_test(enable = "avx2")]
5167 unsafe fn test_mm_srlv_epi64() {
5168 let a = _mm_set1_epi64x(2);
5169 let count = _mm_set1_epi64x(1);
5170 let r = _mm_srlv_epi64(a, count);
5171 let e = _mm_set1_epi64x(1);
5172 assert_eq_m128i(r, e);
5173 }
5174
5175 #[simd_test(enable = "avx2")]
5176 unsafe fn test_mm256_srlv_epi64() {
5177 let a = _mm256_set1_epi64x(2);
5178 let count = _mm256_set1_epi64x(1);
5179 let r = _mm256_srlv_epi64(a, count);
5180 let e = _mm256_set1_epi64x(1);
5181 assert_eq_m256i(r, e);
5182 }
5183
5184 #[simd_test(enable = "avx2")]
5185 unsafe fn test_mm256_stream_load_si256() {
5186 let a = _mm256_set_epi64x(5, 6, 7, 8);
5187 let r = _mm256_stream_load_si256(core::ptr::addr_of!(a) as *const _);
5188 assert_eq_m256i(a, r);
5189 }
5190
5191 #[simd_test(enable = "avx2")]
5192 unsafe fn test_mm256_sub_epi16() {
5193 let a = _mm256_set1_epi16(4);
5194 let b = _mm256_set1_epi16(2);
5195 let r = _mm256_sub_epi16(a, b);
5196 assert_eq_m256i(r, b);
5197 }
5198
5199 #[simd_test(enable = "avx2")]
5200 unsafe fn test_mm256_sub_epi32() {
5201 let a = _mm256_set1_epi32(4);
5202 let b = _mm256_set1_epi32(2);
5203 let r = _mm256_sub_epi32(a, b);
5204 assert_eq_m256i(r, b);
5205 }
5206
5207 #[simd_test(enable = "avx2")]
5208 unsafe fn test_mm256_sub_epi64() {
5209 let a = _mm256_set1_epi64x(4);
5210 let b = _mm256_set1_epi64x(2);
5211 let r = _mm256_sub_epi64(a, b);
5212 assert_eq_m256i(r, b);
5213 }
5214
5215 #[simd_test(enable = "avx2")]
5216 unsafe fn test_mm256_sub_epi8() {
5217 let a = _mm256_set1_epi8(4);
5218 let b = _mm256_set1_epi8(2);
5219 let r = _mm256_sub_epi8(a, b);
5220 assert_eq_m256i(r, b);
5221 }
5222
5223 #[simd_test(enable = "avx2")]
5224 unsafe fn test_mm256_subs_epi16() {
5225 let a = _mm256_set1_epi16(4);
5226 let b = _mm256_set1_epi16(2);
5227 let r = _mm256_subs_epi16(a, b);
5228 assert_eq_m256i(r, b);
5229 }
5230
5231 #[simd_test(enable = "avx2")]
5232 unsafe fn test_mm256_subs_epi8() {
5233 let a = _mm256_set1_epi8(4);
5234 let b = _mm256_set1_epi8(2);
5235 let r = _mm256_subs_epi8(a, b);
5236 assert_eq_m256i(r, b);
5237 }
5238
5239 #[simd_test(enable = "avx2")]
5240 unsafe fn test_mm256_subs_epu16() {
5241 let a = _mm256_set1_epi16(4);
5242 let b = _mm256_set1_epi16(2);
5243 let r = _mm256_subs_epu16(a, b);
5244 assert_eq_m256i(r, b);
5245 }
5246
5247 #[simd_test(enable = "avx2")]
5248 unsafe fn test_mm256_subs_epu8() {
5249 let a = _mm256_set1_epi8(4);
5250 let b = _mm256_set1_epi8(2);
5251 let r = _mm256_subs_epu8(a, b);
5252 assert_eq_m256i(r, b);
5253 }
5254
5255 #[simd_test(enable = "avx2")]
5256 unsafe fn test_mm256_xor_si256() {
5257 let a = _mm256_set1_epi8(5);
5258 let b = _mm256_set1_epi8(3);
5259 let r = _mm256_xor_si256(a, b);
5260 assert_eq_m256i(r, _mm256_set1_epi8(6));
5261 }
5262
5263 #[simd_test(enable = "avx2")]
5264 unsafe fn test_mm256_alignr_epi8() {
5265 #[rustfmt::skip]
5266 let a = _mm256_setr_epi8(
5267 1, 2, 3, 4, 5, 6, 7, 8,
5268 9, 10, 11, 12, 13, 14, 15, 16,
5269 17, 18, 19, 20, 21, 22, 23, 24,
5270 25, 26, 27, 28, 29, 30, 31, 32,
5271 );
5272 #[rustfmt::skip]
5273 let b = _mm256_setr_epi8(
5274 -1, -2, -3, -4, -5, -6, -7, -8,
5275 -9, -10, -11, -12, -13, -14, -15, -16,
5276 -17, -18, -19, -20, -21, -22, -23, -24,
5277 -25, -26, -27, -28, -29, -30, -31, -32,
5278 );
5279 let r = _mm256_alignr_epi8::<33>(a, b);
5280 assert_eq_m256i(r, _mm256_set1_epi8(0));
5281
5282 let r = _mm256_alignr_epi8::<17>(a, b);
5283 #[rustfmt::skip]
5284 let expected = _mm256_setr_epi8(
5285 2, 3, 4, 5, 6, 7, 8, 9,
5286 10, 11, 12, 13, 14, 15, 16, 0,
5287 18, 19, 20, 21, 22, 23, 24, 25,
5288 26, 27, 28, 29, 30, 31, 32, 0,
5289 );
5290 assert_eq_m256i(r, expected);
5291
5292 let r = _mm256_alignr_epi8::<4>(a, b);
5293 #[rustfmt::skip]
5294 let expected = _mm256_setr_epi8(
5295 -5, -6, -7, -8, -9, -10, -11, -12,
5296 -13, -14, -15, -16, 1, 2, 3, 4,
5297 -21, -22, -23, -24, -25, -26, -27, -28,
5298 -29, -30, -31, -32, 17, 18, 19, 20,
5299 );
5300 assert_eq_m256i(r, expected);
5301
5302 let r = _mm256_alignr_epi8::<15>(a, b);
5303 #[rustfmt::skip]
5304 let expected = _mm256_setr_epi8(
5305 -16, 1, 2, 3, 4, 5, 6, 7,
5306 8, 9, 10, 11, 12, 13, 14, 15,
5307 -32, 17, 18, 19, 20, 21, 22, 23,
5308 24, 25, 26, 27, 28, 29, 30, 31,
5309 );
5310 assert_eq_m256i(r, expected);
5311
5312 let r = _mm256_alignr_epi8::<0>(a, b);
5313 assert_eq_m256i(r, b);
5314
5315 let r = _mm256_alignr_epi8::<16>(a, b);
5316 assert_eq_m256i(r, a);
5317 }
5318
5319 #[simd_test(enable = "avx2")]
5320 unsafe fn test_mm256_shuffle_epi8() {
5321 #[rustfmt::skip]
5322 let a = _mm256_setr_epi8(
5323 1, 2, 3, 4, 5, 6, 7, 8,
5324 9, 10, 11, 12, 13, 14, 15, 16,
5325 17, 18, 19, 20, 21, 22, 23, 24,
5326 25, 26, 27, 28, 29, 30, 31, 32,
5327 );
5328 #[rustfmt::skip]
5329 let b = _mm256_setr_epi8(
5330 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5331 12, 5, 5, 10, 4, 1, 8, 0,
5332 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5333 12, 5, 5, 10, 4, 1, 8, 0,
5334 );
5335 #[rustfmt::skip]
5336 let expected = _mm256_setr_epi8(
5337 5, 0, 5, 4, 9, 13, 7, 4,
5338 13, 6, 6, 11, 5, 2, 9, 1,
5339 21, 0, 21, 20, 25, 29, 23, 20,
5340 29, 22, 22, 27, 21, 18, 25, 17,
5341 );
5342 let r = _mm256_shuffle_epi8(a, b);
5343 assert_eq_m256i(r, expected);
5344 }
5345
5346 #[simd_test(enable = "avx2")]
5347 unsafe fn test_mm256_permutevar8x32_epi32() {
5348 let a = _mm256_setr_epi32(100, 200, 300, 400, 500, 600, 700, 800);
5349 let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5350 let expected = _mm256_setr_epi32(600, 100, 600, 200, 800, 700, 400, 500);
5351 let r = _mm256_permutevar8x32_epi32(a, b);
5352 assert_eq_m256i(r, expected);
5353 }
5354
5355 #[simd_test(enable = "avx2")]
5356 unsafe fn test_mm256_permute4x64_epi64() {
5357 let a = _mm256_setr_epi64x(100, 200, 300, 400);
5358 let expected = _mm256_setr_epi64x(400, 100, 200, 100);
5359 let r = _mm256_permute4x64_epi64::<0b00010011>(a);
5360 assert_eq_m256i(r, expected);
5361 }
5362
5363 #[simd_test(enable = "avx2")]
5364 unsafe fn test_mm256_permute2x128_si256() {
5365 let a = _mm256_setr_epi64x(100, 200, 500, 600);
5366 let b = _mm256_setr_epi64x(300, 400, 700, 800);
5367 let r = _mm256_permute2x128_si256::<0b00_01_00_11>(a, b);
5368 let e = _mm256_setr_epi64x(700, 800, 500, 600);
5369 assert_eq_m256i(r, e);
5370 }
5371
5372 #[simd_test(enable = "avx2")]
5373 unsafe fn test_mm256_permute4x64_pd() {
5374 let a = _mm256_setr_pd(1., 2., 3., 4.);
5375 let r = _mm256_permute4x64_pd::<0b00_01_00_11>(a);
5376 let e = _mm256_setr_pd(4., 1., 2., 1.);
5377 assert_eq_m256d(r, e);
5378 }
5379
5380 #[simd_test(enable = "avx2")]
5381 unsafe fn test_mm256_permutevar8x32_ps() {
5382 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
5383 let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5384 let r = _mm256_permutevar8x32_ps(a, b);
5385 let e = _mm256_setr_ps(6., 1., 6., 2., 8., 7., 4., 5.);
5386 assert_eq_m256(r, e);
5387 }
5388
5389 #[simd_test(enable = "avx2")]
5390 unsafe fn test_mm_i32gather_epi32() {
5391 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5392 let r = _mm_i32gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5394 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5395 }
5396
5397 #[simd_test(enable = "avx2")]
5398 unsafe fn test_mm_mask_i32gather_epi32() {
5399 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5400 let r = _mm_mask_i32gather_epi32::<4>(
5402 _mm_set1_epi32(256),
5403 arr.as_ptr(),
5404 _mm_setr_epi32(0, 16, 64, 96),
5405 _mm_setr_epi32(-1, -1, -1, 0),
5406 );
5407 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5408 }
5409
5410 #[simd_test(enable = "avx2")]
5411 unsafe fn test_mm256_i32gather_epi32() {
5412 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5413 let r =
5415 _mm256_i32gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5416 assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5417 }
5418
5419 #[simd_test(enable = "avx2")]
5420 unsafe fn test_mm256_mask_i32gather_epi32() {
5421 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5422 let r = _mm256_mask_i32gather_epi32::<4>(
5424 _mm256_set1_epi32(256),
5425 arr.as_ptr(),
5426 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5427 _mm256_setr_epi32(-1, -1, -1, 0, 0, 0, 0, 0),
5428 );
5429 assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 64, 256, 256, 256, 256, 256));
5430 }
5431
5432 #[simd_test(enable = "avx2")]
5433 unsafe fn test_mm_i32gather_ps() {
5434 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5435 let r = _mm_i32gather_ps::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5437 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5438 }
5439
5440 #[simd_test(enable = "avx2")]
5441 unsafe fn test_mm_mask_i32gather_ps() {
5442 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5443 let r = _mm_mask_i32gather_ps::<4>(
5445 _mm_set1_ps(256.0),
5446 arr.as_ptr(),
5447 _mm_setr_epi32(0, 16, 64, 96),
5448 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5449 );
5450 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5451 }
5452
5453 #[simd_test(enable = "avx2")]
5454 unsafe fn test_mm256_i32gather_ps() {
5455 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5456 let r =
5458 _mm256_i32gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5459 assert_eq_m256(r, _mm256_setr_ps(0.0, 16.0, 32.0, 48.0, 1.0, 2.0, 3.0, 4.0));
5460 }
5461
5462 #[simd_test(enable = "avx2")]
5463 unsafe fn test_mm256_mask_i32gather_ps() {
5464 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5465 let r = _mm256_mask_i32gather_ps::<4>(
5467 _mm256_set1_ps(256.0),
5468 arr.as_ptr(),
5469 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5470 _mm256_setr_ps(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0),
5471 );
5472 assert_eq_m256(
5473 r,
5474 _mm256_setr_ps(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0),
5475 );
5476 }
5477
5478 #[simd_test(enable = "avx2")]
5479 unsafe fn test_mm_i32gather_epi64() {
5480 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5481 let r = _mm_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
5483 assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5484 }
5485
5486 #[simd_test(enable = "avx2")]
5487 unsafe fn test_mm_mask_i32gather_epi64() {
5488 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5489 let r = _mm_mask_i32gather_epi64::<8>(
5491 _mm_set1_epi64x(256),
5492 arr.as_ptr(),
5493 _mm_setr_epi32(16, 16, 16, 16),
5494 _mm_setr_epi64x(-1, 0),
5495 );
5496 assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5497 }
5498
5499 #[simd_test(enable = "avx2")]
5500 unsafe fn test_mm256_i32gather_epi64() {
5501 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5502 let r = _mm256_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5504 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5505 }
5506
5507 #[simd_test(enable = "avx2")]
5508 unsafe fn test_mm256_mask_i32gather_epi64() {
5509 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5510 let r = _mm256_mask_i32gather_epi64::<8>(
5512 _mm256_set1_epi64x(256),
5513 arr.as_ptr(),
5514 _mm_setr_epi32(0, 16, 64, 96),
5515 _mm256_setr_epi64x(-1, -1, -1, 0),
5516 );
5517 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5518 }
5519
5520 #[simd_test(enable = "avx2")]
5521 unsafe fn test_mm_i32gather_pd() {
5522 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5523 let r = _mm_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
5525 assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5526 }
5527
5528 #[simd_test(enable = "avx2")]
5529 unsafe fn test_mm_mask_i32gather_pd() {
5530 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5531 let r = _mm_mask_i32gather_pd::<8>(
5533 _mm_set1_pd(256.0),
5534 arr.as_ptr(),
5535 _mm_setr_epi32(16, 16, 16, 16),
5536 _mm_setr_pd(-1.0, 0.0),
5537 );
5538 assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5539 }
5540
5541 #[simd_test(enable = "avx2")]
5542 unsafe fn test_mm256_i32gather_pd() {
5543 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5544 let r = _mm256_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5546 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5547 }
5548
5549 #[simd_test(enable = "avx2")]
5550 unsafe fn test_mm256_mask_i32gather_pd() {
5551 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5552 let r = _mm256_mask_i32gather_pd::<8>(
5554 _mm256_set1_pd(256.0),
5555 arr.as_ptr(),
5556 _mm_setr_epi32(0, 16, 64, 96),
5557 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5558 );
5559 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5560 }
5561
5562 #[simd_test(enable = "avx2")]
5563 unsafe fn test_mm_i64gather_epi32() {
5564 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5565 let r = _mm_i64gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5567 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 0, 0));
5568 }
5569
5570 #[simd_test(enable = "avx2")]
5571 unsafe fn test_mm_mask_i64gather_epi32() {
5572 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5573 let r = _mm_mask_i64gather_epi32::<4>(
5575 _mm_set1_epi32(256),
5576 arr.as_ptr(),
5577 _mm_setr_epi64x(0, 16),
5578 _mm_setr_epi32(-1, 0, -1, 0),
5579 );
5580 assert_eq_m128i(r, _mm_setr_epi32(0, 256, 0, 0));
5581 }
5582
5583 #[simd_test(enable = "avx2")]
5584 unsafe fn test_mm256_i64gather_epi32() {
5585 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5586 let r = _mm256_i64gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5588 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5589 }
5590
5591 #[simd_test(enable = "avx2")]
5592 unsafe fn test_mm256_mask_i64gather_epi32() {
5593 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5594 let r = _mm256_mask_i64gather_epi32::<4>(
5596 _mm_set1_epi32(256),
5597 arr.as_ptr(),
5598 _mm256_setr_epi64x(0, 16, 64, 96),
5599 _mm_setr_epi32(-1, -1, -1, 0),
5600 );
5601 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5602 }
5603
5604 #[simd_test(enable = "avx2")]
5605 unsafe fn test_mm_i64gather_ps() {
5606 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5607 let r = _mm_i64gather_ps::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5609 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 0.0, 0.0));
5610 }
5611
5612 #[simd_test(enable = "avx2")]
5613 unsafe fn test_mm_mask_i64gather_ps() {
5614 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5615 let r = _mm_mask_i64gather_ps::<4>(
5617 _mm_set1_ps(256.0),
5618 arr.as_ptr(),
5619 _mm_setr_epi64x(0, 16),
5620 _mm_setr_ps(-1.0, 0.0, -1.0, 0.0),
5621 );
5622 assert_eq_m128(r, _mm_setr_ps(0.0, 256.0, 0.0, 0.0));
5623 }
5624
5625 #[simd_test(enable = "avx2")]
5626 unsafe fn test_mm256_i64gather_ps() {
5627 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5628 let r = _mm256_i64gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5630 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5631 }
5632
5633 #[simd_test(enable = "avx2")]
5634 unsafe fn test_mm256_mask_i64gather_ps() {
5635 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5636 let r = _mm256_mask_i64gather_ps::<4>(
5638 _mm_set1_ps(256.0),
5639 arr.as_ptr(),
5640 _mm256_setr_epi64x(0, 16, 64, 96),
5641 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5642 );
5643 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5644 }
5645
5646 #[simd_test(enable = "avx2")]
5647 unsafe fn test_mm_i64gather_epi64() {
5648 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5649 let r = _mm_i64gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5651 assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5652 }
5653
5654 #[simd_test(enable = "avx2")]
5655 unsafe fn test_mm_mask_i64gather_epi64() {
5656 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5657 let r = _mm_mask_i64gather_epi64::<8>(
5659 _mm_set1_epi64x(256),
5660 arr.as_ptr(),
5661 _mm_setr_epi64x(16, 16),
5662 _mm_setr_epi64x(-1, 0),
5663 );
5664 assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5665 }
5666
5667 #[simd_test(enable = "avx2")]
5668 unsafe fn test_mm256_i64gather_epi64() {
5669 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5670 let r = _mm256_i64gather_epi64::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5672 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5673 }
5674
5675 #[simd_test(enable = "avx2")]
5676 unsafe fn test_mm256_mask_i64gather_epi64() {
5677 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5678 let r = _mm256_mask_i64gather_epi64::<8>(
5680 _mm256_set1_epi64x(256),
5681 arr.as_ptr(),
5682 _mm256_setr_epi64x(0, 16, 64, 96),
5683 _mm256_setr_epi64x(-1, -1, -1, 0),
5684 );
5685 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5686 }
5687
5688 #[simd_test(enable = "avx2")]
5689 unsafe fn test_mm_i64gather_pd() {
5690 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5691 let r = _mm_i64gather_pd::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5693 assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5694 }
5695
5696 #[simd_test(enable = "avx2")]
5697 unsafe fn test_mm_mask_i64gather_pd() {
5698 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5699 let r = _mm_mask_i64gather_pd::<8>(
5701 _mm_set1_pd(256.0),
5702 arr.as_ptr(),
5703 _mm_setr_epi64x(16, 16),
5704 _mm_setr_pd(-1.0, 0.0),
5705 );
5706 assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5707 }
5708
5709 #[simd_test(enable = "avx2")]
5710 unsafe fn test_mm256_i64gather_pd() {
5711 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5712 let r = _mm256_i64gather_pd::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5714 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5715 }
5716
5717 #[simd_test(enable = "avx2")]
5718 unsafe fn test_mm256_mask_i64gather_pd() {
5719 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5720 let r = _mm256_mask_i64gather_pd::<8>(
5722 _mm256_set1_pd(256.0),
5723 arr.as_ptr(),
5724 _mm256_setr_epi64x(0, 16, 64, 96),
5725 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5726 );
5727 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5728 }
5729
5730 #[simd_test(enable = "avx")]
5731 unsafe fn test_mm256_extract_epi8() {
5732 #[rustfmt::skip]
5733 let a = _mm256_setr_epi8(
5734 -1, 1, 2, 3, 4, 5, 6, 7,
5735 8, 9, 10, 11, 12, 13, 14, 15,
5736 16, 17, 18, 19, 20, 21, 22, 23,
5737 24, 25, 26, 27, 28, 29, 30, 31
5738 );
5739 let r1 = _mm256_extract_epi8::<0>(a);
5740 let r2 = _mm256_extract_epi8::<3>(a);
5741 assert_eq!(r1, 0xFF);
5742 assert_eq!(r2, 3);
5743 }
5744
5745 #[simd_test(enable = "avx2")]
5746 unsafe fn test_mm256_extract_epi16() {
5747 #[rustfmt::skip]
5748 let a = _mm256_setr_epi16(
5749 -1, 1, 2, 3, 4, 5, 6, 7,
5750 8, 9, 10, 11, 12, 13, 14, 15,
5751 );
5752 let r1 = _mm256_extract_epi16::<0>(a);
5753 let r2 = _mm256_extract_epi16::<3>(a);
5754 assert_eq!(r1, 0xFFFF);
5755 assert_eq!(r2, 3);
5756 }
5757}