1use crate::core_arch::{simd::*, x86::*};
22use crate::intrinsics::simd::*;
23
24#[cfg(test)]
25use stdarch_test::assert_instr;
26
27#[inline]
31#[target_feature(enable = "avx2")]
32#[cfg_attr(test, assert_instr(vpabsd))]
33#[stable(feature = "simd_x86", since = "1.27.0")]
34#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35pub const fn _mm256_abs_epi32(a: __m256i) -> __m256i {
36 unsafe {
37 let a = a.as_i32x8();
38 let r = simd_select::<m32x8, _>(simd_lt(a, i32x8::ZERO), simd_neg(a), a);
39 transmute(r)
40 }
41}
42
43#[inline]
47#[target_feature(enable = "avx2")]
48#[cfg_attr(test, assert_instr(vpabsw))]
49#[stable(feature = "simd_x86", since = "1.27.0")]
50#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
51pub const fn _mm256_abs_epi16(a: __m256i) -> __m256i {
52 unsafe {
53 let a = a.as_i16x16();
54 let r = simd_select::<m16x16, _>(simd_lt(a, i16x16::ZERO), simd_neg(a), a);
55 transmute(r)
56 }
57}
58
59#[inline]
63#[target_feature(enable = "avx2")]
64#[cfg_attr(test, assert_instr(vpabsb))]
65#[stable(feature = "simd_x86", since = "1.27.0")]
66#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
67pub const fn _mm256_abs_epi8(a: __m256i) -> __m256i {
68 unsafe {
69 let a = a.as_i8x32();
70 let r = simd_select::<m8x32, _>(simd_lt(a, i8x32::ZERO), simd_neg(a), a);
71 transmute(r)
72 }
73}
74
75#[inline]
79#[target_feature(enable = "avx2")]
80#[cfg_attr(test, assert_instr(vpaddq))]
81#[stable(feature = "simd_x86", since = "1.27.0")]
82#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
83pub const fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i {
84 unsafe { transmute(simd_add(a.as_i64x4(), b.as_i64x4())) }
85}
86
87#[inline]
91#[target_feature(enable = "avx2")]
92#[cfg_attr(test, assert_instr(vpaddd))]
93#[stable(feature = "simd_x86", since = "1.27.0")]
94#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
95pub const fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i {
96 unsafe { transmute(simd_add(a.as_i32x8(), b.as_i32x8())) }
97}
98
99#[inline]
103#[target_feature(enable = "avx2")]
104#[cfg_attr(test, assert_instr(vpaddw))]
105#[stable(feature = "simd_x86", since = "1.27.0")]
106#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
107pub const fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i {
108 unsafe { transmute(simd_add(a.as_i16x16(), b.as_i16x16())) }
109}
110
111#[inline]
115#[target_feature(enable = "avx2")]
116#[cfg_attr(test, assert_instr(vpaddb))]
117#[stable(feature = "simd_x86", since = "1.27.0")]
118#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
119pub const fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i {
120 unsafe { transmute(simd_add(a.as_i8x32(), b.as_i8x32())) }
121}
122
123#[inline]
127#[target_feature(enable = "avx2")]
128#[cfg_attr(test, assert_instr(vpaddsb))]
129#[stable(feature = "simd_x86", since = "1.27.0")]
130#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
131pub const fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i {
132 unsafe { transmute(simd_saturating_add(a.as_i8x32(), b.as_i8x32())) }
133}
134
135#[inline]
139#[target_feature(enable = "avx2")]
140#[cfg_attr(test, assert_instr(vpaddsw))]
141#[stable(feature = "simd_x86", since = "1.27.0")]
142#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
143pub const fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i {
144 unsafe { transmute(simd_saturating_add(a.as_i16x16(), b.as_i16x16())) }
145}
146
147#[inline]
151#[target_feature(enable = "avx2")]
152#[cfg_attr(test, assert_instr(vpaddusb))]
153#[stable(feature = "simd_x86", since = "1.27.0")]
154#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
155pub const fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i {
156 unsafe { transmute(simd_saturating_add(a.as_u8x32(), b.as_u8x32())) }
157}
158
159#[inline]
163#[target_feature(enable = "avx2")]
164#[cfg_attr(test, assert_instr(vpaddusw))]
165#[stable(feature = "simd_x86", since = "1.27.0")]
166#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
167pub const fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
168 unsafe { transmute(simd_saturating_add(a.as_u16x16(), b.as_u16x16())) }
169}
170
171#[inline]
176#[target_feature(enable = "avx2")]
177#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 7))]
178#[rustc_legacy_const_generics(2)]
179#[stable(feature = "simd_x86", since = "1.27.0")]
180#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
181pub const fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
182 static_assert_uimm_bits!(IMM8, 8);
183
184 if IMM8 >= 32 {
187 return _mm256_setzero_si256();
188 }
189 let (a, b) = if IMM8 > 16 {
192 (_mm256_setzero_si256(), a)
193 } else {
194 (a, b)
195 };
196 unsafe {
197 if IMM8 == 16 {
198 return transmute(a);
199 }
200 }
201 const fn mask(shift: u32, i: u32) -> u32 {
202 let shift = shift % 16;
203 let mod_i = i % 16;
204 if mod_i < (16 - shift) {
205 i + shift
206 } else {
207 i + 16 + shift
208 }
209 }
210
211 unsafe {
212 let r: i8x32 = simd_shuffle!(
213 b.as_i8x32(),
214 a.as_i8x32(),
215 [
216 mask(IMM8 as u32, 0),
217 mask(IMM8 as u32, 1),
218 mask(IMM8 as u32, 2),
219 mask(IMM8 as u32, 3),
220 mask(IMM8 as u32, 4),
221 mask(IMM8 as u32, 5),
222 mask(IMM8 as u32, 6),
223 mask(IMM8 as u32, 7),
224 mask(IMM8 as u32, 8),
225 mask(IMM8 as u32, 9),
226 mask(IMM8 as u32, 10),
227 mask(IMM8 as u32, 11),
228 mask(IMM8 as u32, 12),
229 mask(IMM8 as u32, 13),
230 mask(IMM8 as u32, 14),
231 mask(IMM8 as u32, 15),
232 mask(IMM8 as u32, 16),
233 mask(IMM8 as u32, 17),
234 mask(IMM8 as u32, 18),
235 mask(IMM8 as u32, 19),
236 mask(IMM8 as u32, 20),
237 mask(IMM8 as u32, 21),
238 mask(IMM8 as u32, 22),
239 mask(IMM8 as u32, 23),
240 mask(IMM8 as u32, 24),
241 mask(IMM8 as u32, 25),
242 mask(IMM8 as u32, 26),
243 mask(IMM8 as u32, 27),
244 mask(IMM8 as u32, 28),
245 mask(IMM8 as u32, 29),
246 mask(IMM8 as u32, 30),
247 mask(IMM8 as u32, 31),
248 ],
249 );
250 transmute(r)
251 }
252}
253
254#[inline]
259#[target_feature(enable = "avx2")]
260#[cfg_attr(test, assert_instr(vandps))]
261#[stable(feature = "simd_x86", since = "1.27.0")]
262#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
263pub const fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
264 unsafe { transmute(simd_and(a.as_i64x4(), b.as_i64x4())) }
265}
266
267#[inline]
272#[target_feature(enable = "avx2")]
273#[cfg_attr(test, assert_instr(vandnps))]
274#[stable(feature = "simd_x86", since = "1.27.0")]
275#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
276pub const fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
277 unsafe {
278 let all_ones = _mm256_set1_epi8(-1);
279 transmute(simd_and(
280 simd_xor(a.as_i64x4(), all_ones.as_i64x4()),
281 b.as_i64x4(),
282 ))
283 }
284}
285
286#[inline]
290#[target_feature(enable = "avx2")]
291#[cfg_attr(test, assert_instr(vpavgw))]
292#[stable(feature = "simd_x86", since = "1.27.0")]
293#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
294pub const fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i {
295 unsafe {
296 let a = simd_cast::<_, u32x16>(a.as_u16x16());
297 let b = simd_cast::<_, u32x16>(b.as_u16x16());
298 let r = simd_shr(simd_add(simd_add(a, b), u32x16::splat(1)), u32x16::splat(1));
299 transmute(simd_cast::<_, u16x16>(r))
300 }
301}
302
303#[inline]
307#[target_feature(enable = "avx2")]
308#[cfg_attr(test, assert_instr(vpavgb))]
309#[stable(feature = "simd_x86", since = "1.27.0")]
310#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
311pub const fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
312 unsafe {
313 let a = simd_cast::<_, u16x32>(a.as_u8x32());
314 let b = simd_cast::<_, u16x32>(b.as_u8x32());
315 let r = simd_shr(simd_add(simd_add(a, b), u16x32::splat(1)), u16x32::splat(1));
316 transmute(simd_cast::<_, u8x32>(r))
317 }
318}
319
320#[inline]
324#[target_feature(enable = "avx2")]
325#[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))]
326#[rustc_legacy_const_generics(2)]
327#[stable(feature = "simd_x86", since = "1.27.0")]
328#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
329pub const fn _mm_blend_epi32<const IMM4: i32>(a: __m128i, b: __m128i) -> __m128i {
330 static_assert_uimm_bits!(IMM4, 4);
331 unsafe {
332 let a = a.as_i32x4();
333 let b = b.as_i32x4();
334 let r: i32x4 = simd_shuffle!(
335 a,
336 b,
337 [
338 [0, 4, 0, 4][IMM4 as usize & 0b11],
339 [1, 1, 5, 5][IMM4 as usize & 0b11],
340 [2, 6, 2, 6][(IMM4 as usize >> 2) & 0b11],
341 [3, 3, 7, 7][(IMM4 as usize >> 2) & 0b11],
342 ],
343 );
344 transmute(r)
345 }
346}
347
348#[inline]
352#[target_feature(enable = "avx2")]
353#[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))]
354#[rustc_legacy_const_generics(2)]
355#[stable(feature = "simd_x86", since = "1.27.0")]
356#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
357pub const fn _mm256_blend_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
358 static_assert_uimm_bits!(IMM8, 8);
359 unsafe {
360 let a = a.as_i32x8();
361 let b = b.as_i32x8();
362 let r: i32x8 = simd_shuffle!(
363 a,
364 b,
365 [
366 [0, 8, 0, 8][IMM8 as usize & 0b11],
367 [1, 1, 9, 9][IMM8 as usize & 0b11],
368 [2, 10, 2, 10][(IMM8 as usize >> 2) & 0b11],
369 [3, 3, 11, 11][(IMM8 as usize >> 2) & 0b11],
370 [4, 12, 4, 12][(IMM8 as usize >> 4) & 0b11],
371 [5, 5, 13, 13][(IMM8 as usize >> 4) & 0b11],
372 [6, 14, 6, 14][(IMM8 as usize >> 6) & 0b11],
373 [7, 7, 15, 15][(IMM8 as usize >> 6) & 0b11],
374 ],
375 );
376 transmute(r)
377 }
378}
379
380#[inline]
384#[target_feature(enable = "avx2")]
385#[cfg_attr(test, assert_instr(vpblendw, IMM8 = 9))]
386#[rustc_legacy_const_generics(2)]
387#[stable(feature = "simd_x86", since = "1.27.0")]
388#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
389pub const fn _mm256_blend_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
390 static_assert_uimm_bits!(IMM8, 8);
391 unsafe {
392 let a = a.as_i16x16();
393 let b = b.as_i16x16();
394
395 let r: i16x16 = simd_shuffle!(
396 a,
397 b,
398 [
399 [0, 16, 0, 16][IMM8 as usize & 0b11],
400 [1, 1, 17, 17][IMM8 as usize & 0b11],
401 [2, 18, 2, 18][(IMM8 as usize >> 2) & 0b11],
402 [3, 3, 19, 19][(IMM8 as usize >> 2) & 0b11],
403 [4, 20, 4, 20][(IMM8 as usize >> 4) & 0b11],
404 [5, 5, 21, 21][(IMM8 as usize >> 4) & 0b11],
405 [6, 22, 6, 22][(IMM8 as usize >> 6) & 0b11],
406 [7, 7, 23, 23][(IMM8 as usize >> 6) & 0b11],
407 [8, 24, 8, 24][IMM8 as usize & 0b11],
408 [9, 9, 25, 25][IMM8 as usize & 0b11],
409 [10, 26, 10, 26][(IMM8 as usize >> 2) & 0b11],
410 [11, 11, 27, 27][(IMM8 as usize >> 2) & 0b11],
411 [12, 28, 12, 28][(IMM8 as usize >> 4) & 0b11],
412 [13, 13, 29, 29][(IMM8 as usize >> 4) & 0b11],
413 [14, 30, 14, 30][(IMM8 as usize >> 6) & 0b11],
414 [15, 15, 31, 31][(IMM8 as usize >> 6) & 0b11],
415 ],
416 );
417 transmute(r)
418 }
419}
420
421#[inline]
425#[target_feature(enable = "avx2")]
426#[cfg_attr(test, assert_instr(vpblendvb))]
427#[stable(feature = "simd_x86", since = "1.27.0")]
428#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
429pub const fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i {
430 unsafe {
431 let mask: i8x32 = simd_lt(mask.as_i8x32(), i8x32::ZERO);
432 transmute(simd_select(mask, b.as_i8x32(), a.as_i8x32()))
433 }
434}
435
436#[inline]
441#[target_feature(enable = "avx2")]
442#[cfg_attr(test, assert_instr(vpbroadcastb))]
443#[stable(feature = "simd_x86", since = "1.27.0")]
444#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
445pub const fn _mm_broadcastb_epi8(a: __m128i) -> __m128i {
446 unsafe {
447 let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 16]);
448 transmute::<i8x16, _>(ret)
449 }
450}
451
452#[inline]
457#[target_feature(enable = "avx2")]
458#[cfg_attr(test, assert_instr(vpbroadcastb))]
459#[stable(feature = "simd_x86", since = "1.27.0")]
460#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
461pub const fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i {
462 unsafe {
463 let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 32]);
464 transmute::<i8x32, _>(ret)
465 }
466}
467
468#[inline]
475#[target_feature(enable = "avx2")]
476#[cfg_attr(test, assert_instr(vbroadcastss))]
477#[stable(feature = "simd_x86", since = "1.27.0")]
478#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
479pub const fn _mm_broadcastd_epi32(a: __m128i) -> __m128i {
480 unsafe {
481 let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 4]);
482 transmute::<i32x4, _>(ret)
483 }
484}
485
486#[inline]
493#[target_feature(enable = "avx2")]
494#[cfg_attr(test, assert_instr(vbroadcastss))]
495#[stable(feature = "simd_x86", since = "1.27.0")]
496#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
497pub const fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i {
498 unsafe {
499 let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 8]);
500 transmute::<i32x8, _>(ret)
501 }
502}
503
504#[inline]
509#[target_feature(enable = "avx2")]
510#[cfg_attr(test, assert_instr(vmovddup))]
513#[stable(feature = "simd_x86", since = "1.27.0")]
514#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
515pub const fn _mm_broadcastq_epi64(a: __m128i) -> __m128i {
516 unsafe {
517 let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 2]);
518 transmute::<i64x2, _>(ret)
519 }
520}
521
522#[inline]
527#[target_feature(enable = "avx2")]
528#[cfg_attr(test, assert_instr(vbroadcastsd))]
529#[stable(feature = "simd_x86", since = "1.27.0")]
530#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
531pub const fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i {
532 unsafe {
533 let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 4]);
534 transmute::<i64x4, _>(ret)
535 }
536}
537
538#[inline]
543#[target_feature(enable = "avx2")]
544#[cfg_attr(test, assert_instr(vmovddup))]
545#[stable(feature = "simd_x86", since = "1.27.0")]
546#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
547pub const fn _mm_broadcastsd_pd(a: __m128d) -> __m128d {
548 unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 2]) }
549}
550
551#[inline]
556#[target_feature(enable = "avx2")]
557#[cfg_attr(test, assert_instr(vbroadcastsd))]
558#[stable(feature = "simd_x86", since = "1.27.0")]
559#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
560pub const fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d {
561 unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 4]) }
562}
563
564#[inline]
569#[target_feature(enable = "avx2")]
570#[stable(feature = "simd_x86_updates", since = "1.82.0")]
571#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
572pub const fn _mm_broadcastsi128_si256(a: __m128i) -> __m256i {
573 unsafe {
574 let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
575 transmute::<i64x4, _>(ret)
576 }
577}
578
579#[inline]
586#[target_feature(enable = "avx2")]
587#[stable(feature = "simd_x86", since = "1.27.0")]
588#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
589pub const fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i {
590 unsafe {
591 let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
592 transmute::<i64x4, _>(ret)
593 }
594}
595
596#[inline]
601#[target_feature(enable = "avx2")]
602#[cfg_attr(test, assert_instr(vbroadcastss))]
603#[stable(feature = "simd_x86", since = "1.27.0")]
604#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
605pub const fn _mm_broadcastss_ps(a: __m128) -> __m128 {
606 unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 4]) }
607}
608
609#[inline]
614#[target_feature(enable = "avx2")]
615#[cfg_attr(test, assert_instr(vbroadcastss))]
616#[stable(feature = "simd_x86", since = "1.27.0")]
617#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
618pub const fn _mm256_broadcastss_ps(a: __m128) -> __m256 {
619 unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 8]) }
620}
621
622#[inline]
627#[target_feature(enable = "avx2")]
628#[cfg_attr(test, assert_instr(vpbroadcastw))]
629#[stable(feature = "simd_x86", since = "1.27.0")]
630#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
631pub const fn _mm_broadcastw_epi16(a: __m128i) -> __m128i {
632 unsafe {
633 let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 8]);
634 transmute::<i16x8, _>(ret)
635 }
636}
637
638#[inline]
643#[target_feature(enable = "avx2")]
644#[cfg_attr(test, assert_instr(vpbroadcastw))]
645#[stable(feature = "simd_x86", since = "1.27.0")]
646#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
647pub const fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i {
648 unsafe {
649 let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 16]);
650 transmute::<i16x16, _>(ret)
651 }
652}
653
654#[inline]
658#[target_feature(enable = "avx2")]
659#[cfg_attr(test, assert_instr(vpcmpeqq))]
660#[stable(feature = "simd_x86", since = "1.27.0")]
661#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
662pub const fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i {
663 unsafe { transmute::<i64x4, _>(simd_eq(a.as_i64x4(), b.as_i64x4())) }
664}
665
666#[inline]
670#[target_feature(enable = "avx2")]
671#[cfg_attr(test, assert_instr(vpcmpeqd))]
672#[stable(feature = "simd_x86", since = "1.27.0")]
673#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
674pub const fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i {
675 unsafe { transmute::<i32x8, _>(simd_eq(a.as_i32x8(), b.as_i32x8())) }
676}
677
678#[inline]
682#[target_feature(enable = "avx2")]
683#[cfg_attr(test, assert_instr(vpcmpeqw))]
684#[stable(feature = "simd_x86", since = "1.27.0")]
685#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
686pub const fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i {
687 unsafe { transmute::<i16x16, _>(simd_eq(a.as_i16x16(), b.as_i16x16())) }
688}
689
690#[inline]
694#[target_feature(enable = "avx2")]
695#[cfg_attr(test, assert_instr(vpcmpeqb))]
696#[stable(feature = "simd_x86", since = "1.27.0")]
697#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
698pub const fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i {
699 unsafe { transmute::<i8x32, _>(simd_eq(a.as_i8x32(), b.as_i8x32())) }
700}
701
702#[inline]
706#[target_feature(enable = "avx2")]
707#[cfg_attr(test, assert_instr(vpcmpgtq))]
708#[stable(feature = "simd_x86", since = "1.27.0")]
709#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
710pub const fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i {
711 unsafe { transmute::<i64x4, _>(simd_gt(a.as_i64x4(), b.as_i64x4())) }
712}
713
714#[inline]
718#[target_feature(enable = "avx2")]
719#[cfg_attr(test, assert_instr(vpcmpgtd))]
720#[stable(feature = "simd_x86", since = "1.27.0")]
721#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
722pub const fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i {
723 unsafe { transmute::<i32x8, _>(simd_gt(a.as_i32x8(), b.as_i32x8())) }
724}
725
726#[inline]
730#[target_feature(enable = "avx2")]
731#[cfg_attr(test, assert_instr(vpcmpgtw))]
732#[stable(feature = "simd_x86", since = "1.27.0")]
733#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
734pub const fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i {
735 unsafe { transmute::<i16x16, _>(simd_gt(a.as_i16x16(), b.as_i16x16())) }
736}
737
738#[inline]
742#[target_feature(enable = "avx2")]
743#[cfg_attr(test, assert_instr(vpcmpgtb))]
744#[stable(feature = "simd_x86", since = "1.27.0")]
745#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
746pub const fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i {
747 unsafe { transmute::<i8x32, _>(simd_gt(a.as_i8x32(), b.as_i8x32())) }
748}
749
750#[inline]
754#[target_feature(enable = "avx2")]
755#[cfg_attr(test, assert_instr(vpmovsxwd))]
756#[stable(feature = "simd_x86", since = "1.27.0")]
757#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
758pub const fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i {
759 unsafe { transmute::<i32x8, _>(simd_cast(a.as_i16x8())) }
760}
761
762#[inline]
766#[target_feature(enable = "avx2")]
767#[cfg_attr(test, assert_instr(vpmovsxwq))]
768#[stable(feature = "simd_x86", since = "1.27.0")]
769#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
770pub const fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i {
771 unsafe {
772 let a = a.as_i16x8();
773 let v64: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
774 transmute::<i64x4, _>(simd_cast(v64))
775 }
776}
777
778#[inline]
782#[target_feature(enable = "avx2")]
783#[cfg_attr(test, assert_instr(vpmovsxdq))]
784#[stable(feature = "simd_x86", since = "1.27.0")]
785#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
786pub const fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i {
787 unsafe { transmute::<i64x4, _>(simd_cast(a.as_i32x4())) }
788}
789
790#[inline]
794#[target_feature(enable = "avx2")]
795#[cfg_attr(test, assert_instr(vpmovsxbw))]
796#[stable(feature = "simd_x86", since = "1.27.0")]
797#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
798pub const fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i {
799 unsafe { transmute::<i16x16, _>(simd_cast(a.as_i8x16())) }
800}
801
802#[inline]
806#[target_feature(enable = "avx2")]
807#[cfg_attr(test, assert_instr(vpmovsxbd))]
808#[stable(feature = "simd_x86", since = "1.27.0")]
809#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
810pub const fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i {
811 unsafe {
812 let a = a.as_i8x16();
813 let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
814 transmute::<i32x8, _>(simd_cast(v64))
815 }
816}
817
818#[inline]
822#[target_feature(enable = "avx2")]
823#[cfg_attr(test, assert_instr(vpmovsxbq))]
824#[stable(feature = "simd_x86", since = "1.27.0")]
825#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
826pub const fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i {
827 unsafe {
828 let a = a.as_i8x16();
829 let v32: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
830 transmute::<i64x4, _>(simd_cast(v32))
831 }
832}
833
834#[inline]
839#[target_feature(enable = "avx2")]
840#[cfg_attr(test, assert_instr(vpmovzxwd))]
841#[stable(feature = "simd_x86", since = "1.27.0")]
842#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
843pub const fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i {
844 unsafe { transmute::<i32x8, _>(simd_cast(a.as_u16x8())) }
845}
846
847#[inline]
852#[target_feature(enable = "avx2")]
853#[cfg_attr(test, assert_instr(vpmovzxwq))]
854#[stable(feature = "simd_x86", since = "1.27.0")]
855#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
856pub const fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i {
857 unsafe {
858 let a = a.as_u16x8();
859 let v64: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
860 transmute::<i64x4, _>(simd_cast(v64))
861 }
862}
863
864#[inline]
868#[target_feature(enable = "avx2")]
869#[cfg_attr(test, assert_instr(vpmovzxdq))]
870#[stable(feature = "simd_x86", since = "1.27.0")]
871#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
872pub const fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i {
873 unsafe { transmute::<i64x4, _>(simd_cast(a.as_u32x4())) }
874}
875
876#[inline]
880#[target_feature(enable = "avx2")]
881#[cfg_attr(test, assert_instr(vpmovzxbw))]
882#[stable(feature = "simd_x86", since = "1.27.0")]
883#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
884pub const fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i {
885 unsafe { transmute::<i16x16, _>(simd_cast(a.as_u8x16())) }
886}
887
888#[inline]
893#[target_feature(enable = "avx2")]
894#[cfg_attr(test, assert_instr(vpmovzxbd))]
895#[stable(feature = "simd_x86", since = "1.27.0")]
896#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
897pub const fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i {
898 unsafe {
899 let a = a.as_u8x16();
900 let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
901 transmute::<i32x8, _>(simd_cast(v64))
902 }
903}
904
905#[inline]
910#[target_feature(enable = "avx2")]
911#[cfg_attr(test, assert_instr(vpmovzxbq))]
912#[stable(feature = "simd_x86", since = "1.27.0")]
913#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
914pub const fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i {
915 unsafe {
916 let a = a.as_u8x16();
917 let v32: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
918 transmute::<i64x4, _>(simd_cast(v32))
919 }
920}
921
922#[inline]
926#[target_feature(enable = "avx2")]
927#[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))]
928#[rustc_legacy_const_generics(1)]
929#[stable(feature = "simd_x86", since = "1.27.0")]
930#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
931pub const fn _mm256_extracti128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
932 static_assert_uimm_bits!(IMM1, 1);
933 unsafe {
934 let a = a.as_i64x4();
935 let b = i64x4::ZERO;
936 let dst: i64x2 = simd_shuffle!(a, b, [[0, 1], [2, 3]][IMM1 as usize]);
937 transmute(dst)
938 }
939}
940
941#[inline]
945#[target_feature(enable = "avx2")]
946#[cfg_attr(test, assert_instr(vphaddw))]
947#[stable(feature = "simd_x86", since = "1.27.0")]
948#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
949pub const fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i {
950 let a = a.as_i16x16();
951 let b = b.as_i16x16();
952 unsafe {
953 let even: i16x16 = simd_shuffle!(
954 a,
955 b,
956 [0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30]
957 );
958 let odd: i16x16 = simd_shuffle!(
959 a,
960 b,
961 [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31]
962 );
963 simd_add(even, odd).as_m256i()
964 }
965}
966
967#[inline]
971#[target_feature(enable = "avx2")]
972#[cfg_attr(test, assert_instr(vphaddd))]
973#[stable(feature = "simd_x86", since = "1.27.0")]
974#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
975pub const fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i {
976 let a = a.as_i32x8();
977 let b = b.as_i32x8();
978 unsafe {
979 let even: i32x8 = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]);
980 let odd: i32x8 = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]);
981 simd_add(even, odd).as_m256i()
982 }
983}
984
985#[inline]
990#[target_feature(enable = "avx2")]
991#[cfg_attr(test, assert_instr(vphaddsw))]
992#[stable(feature = "simd_x86", since = "1.27.0")]
993pub fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i {
994 unsafe { transmute(phaddsw(a.as_i16x16(), b.as_i16x16())) }
995}
996
997#[inline]
1001#[target_feature(enable = "avx2")]
1002#[cfg_attr(test, assert_instr(vphsubw))]
1003#[stable(feature = "simd_x86", since = "1.27.0")]
1004#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1005pub const fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i {
1006 let a = a.as_i16x16();
1007 let b = b.as_i16x16();
1008 unsafe {
1009 let even: i16x16 = simd_shuffle!(
1010 a,
1011 b,
1012 [0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30]
1013 );
1014 let odd: i16x16 = simd_shuffle!(
1015 a,
1016 b,
1017 [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31]
1018 );
1019 simd_sub(even, odd).as_m256i()
1020 }
1021}
1022
1023#[inline]
1027#[target_feature(enable = "avx2")]
1028#[cfg_attr(test, assert_instr(vphsubd))]
1029#[stable(feature = "simd_x86", since = "1.27.0")]
1030#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1031pub const fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i {
1032 let a = a.as_i32x8();
1033 let b = b.as_i32x8();
1034 unsafe {
1035 let even: i32x8 = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]);
1036 let odd: i32x8 = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]);
1037 simd_sub(even, odd).as_m256i()
1038 }
1039}
1040
1041#[inline]
1046#[target_feature(enable = "avx2")]
1047#[cfg_attr(test, assert_instr(vphsubsw))]
1048#[stable(feature = "simd_x86", since = "1.27.0")]
1049pub fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i {
1050 unsafe { transmute(phsubsw(a.as_i16x16(), b.as_i16x16())) }
1051}
1052
1053#[inline]
1059#[target_feature(enable = "avx2")]
1060#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1061#[rustc_legacy_const_generics(2)]
1062#[stable(feature = "simd_x86", since = "1.27.0")]
1063pub unsafe fn _mm_i32gather_epi32<const SCALE: i32>(
1064 slice: *const i32,
1065 offsets: __m128i,
1066) -> __m128i {
1067 static_assert_imm8_scale!(SCALE);
1068 let zero = i32x4::ZERO;
1069 let neg_one = _mm_set1_epi32(-1).as_i32x4();
1070 let offsets = offsets.as_i32x4();
1071 let slice = slice as *const i8;
1072 let r = pgatherdd(zero, slice, offsets, neg_one, SCALE as i8);
1073 transmute(r)
1074}
1075
1076#[inline]
1083#[target_feature(enable = "avx2")]
1084#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1085#[rustc_legacy_const_generics(4)]
1086#[stable(feature = "simd_x86", since = "1.27.0")]
1087pub unsafe fn _mm_mask_i32gather_epi32<const SCALE: i32>(
1088 src: __m128i,
1089 slice: *const i32,
1090 offsets: __m128i,
1091 mask: __m128i,
1092) -> __m128i {
1093 static_assert_imm8_scale!(SCALE);
1094 let src = src.as_i32x4();
1095 let mask = mask.as_i32x4();
1096 let offsets = offsets.as_i32x4();
1097 let slice = slice as *const i8;
1098 let r = pgatherdd(src, slice, offsets, mask, SCALE as i8);
1099 transmute(r)
1100}
1101
1102#[inline]
1108#[target_feature(enable = "avx2")]
1109#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1110#[rustc_legacy_const_generics(2)]
1111#[stable(feature = "simd_x86", since = "1.27.0")]
1112pub unsafe fn _mm256_i32gather_epi32<const SCALE: i32>(
1113 slice: *const i32,
1114 offsets: __m256i,
1115) -> __m256i {
1116 static_assert_imm8_scale!(SCALE);
1117 let zero = i32x8::ZERO;
1118 let neg_one = _mm256_set1_epi32(-1).as_i32x8();
1119 let offsets = offsets.as_i32x8();
1120 let slice = slice as *const i8;
1121 let r = vpgatherdd(zero, slice, offsets, neg_one, SCALE as i8);
1122 transmute(r)
1123}
1124
1125#[inline]
1132#[target_feature(enable = "avx2")]
1133#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1134#[rustc_legacy_const_generics(4)]
1135#[stable(feature = "simd_x86", since = "1.27.0")]
1136pub unsafe fn _mm256_mask_i32gather_epi32<const SCALE: i32>(
1137 src: __m256i,
1138 slice: *const i32,
1139 offsets: __m256i,
1140 mask: __m256i,
1141) -> __m256i {
1142 static_assert_imm8_scale!(SCALE);
1143 let src = src.as_i32x8();
1144 let mask = mask.as_i32x8();
1145 let offsets = offsets.as_i32x8();
1146 let slice = slice as *const i8;
1147 let r = vpgatherdd(src, slice, offsets, mask, SCALE as i8);
1148 transmute(r)
1149}
1150
1151#[inline]
1157#[target_feature(enable = "avx2")]
1158#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1159#[rustc_legacy_const_generics(2)]
1160#[stable(feature = "simd_x86", since = "1.27.0")]
1161pub unsafe fn _mm_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1162 static_assert_imm8_scale!(SCALE);
1163 let zero = _mm_setzero_ps();
1164 let neg_one = _mm_set1_ps(-1.0);
1165 let offsets = offsets.as_i32x4();
1166 let slice = slice as *const i8;
1167 pgatherdps(zero, slice, offsets, neg_one, SCALE as i8)
1168}
1169
1170#[inline]
1177#[target_feature(enable = "avx2")]
1178#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1179#[rustc_legacy_const_generics(4)]
1180#[stable(feature = "simd_x86", since = "1.27.0")]
1181pub unsafe fn _mm_mask_i32gather_ps<const SCALE: i32>(
1182 src: __m128,
1183 slice: *const f32,
1184 offsets: __m128i,
1185 mask: __m128,
1186) -> __m128 {
1187 static_assert_imm8_scale!(SCALE);
1188 let offsets = offsets.as_i32x4();
1189 let slice = slice as *const i8;
1190 pgatherdps(src, slice, offsets, mask, SCALE as i8)
1191}
1192
1193#[inline]
1199#[target_feature(enable = "avx2")]
1200#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1201#[rustc_legacy_const_generics(2)]
1202#[stable(feature = "simd_x86", since = "1.27.0")]
1203pub unsafe fn _mm256_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m256 {
1204 static_assert_imm8_scale!(SCALE);
1205 let zero = _mm256_setzero_ps();
1206 let neg_one = _mm256_set1_ps(-1.0);
1207 let offsets = offsets.as_i32x8();
1208 let slice = slice as *const i8;
1209 vpgatherdps(zero, slice, offsets, neg_one, SCALE as i8)
1210}
1211
1212#[inline]
1219#[target_feature(enable = "avx2")]
1220#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1221#[rustc_legacy_const_generics(4)]
1222#[stable(feature = "simd_x86", since = "1.27.0")]
1223pub unsafe fn _mm256_mask_i32gather_ps<const SCALE: i32>(
1224 src: __m256,
1225 slice: *const f32,
1226 offsets: __m256i,
1227 mask: __m256,
1228) -> __m256 {
1229 static_assert_imm8_scale!(SCALE);
1230 let offsets = offsets.as_i32x8();
1231 let slice = slice as *const i8;
1232 vpgatherdps(src, slice, offsets, mask, SCALE as i8)
1233}
1234
1235#[inline]
1241#[target_feature(enable = "avx2")]
1242#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1243#[rustc_legacy_const_generics(2)]
1244#[stable(feature = "simd_x86", since = "1.27.0")]
1245pub unsafe fn _mm_i32gather_epi64<const SCALE: i32>(
1246 slice: *const i64,
1247 offsets: __m128i,
1248) -> __m128i {
1249 static_assert_imm8_scale!(SCALE);
1250 let zero = i64x2::ZERO;
1251 let neg_one = _mm_set1_epi64x(-1).as_i64x2();
1252 let offsets = offsets.as_i32x4();
1253 let slice = slice as *const i8;
1254 let r = pgatherdq(zero, slice, offsets, neg_one, SCALE as i8);
1255 transmute(r)
1256}
1257
1258#[inline]
1265#[target_feature(enable = "avx2")]
1266#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1267#[rustc_legacy_const_generics(4)]
1268#[stable(feature = "simd_x86", since = "1.27.0")]
1269pub unsafe fn _mm_mask_i32gather_epi64<const SCALE: i32>(
1270 src: __m128i,
1271 slice: *const i64,
1272 offsets: __m128i,
1273 mask: __m128i,
1274) -> __m128i {
1275 static_assert_imm8_scale!(SCALE);
1276 let src = src.as_i64x2();
1277 let mask = mask.as_i64x2();
1278 let offsets = offsets.as_i32x4();
1279 let slice = slice as *const i8;
1280 let r = pgatherdq(src, slice, offsets, mask, SCALE as i8);
1281 transmute(r)
1282}
1283
1284#[inline]
1290#[target_feature(enable = "avx2")]
1291#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1292#[rustc_legacy_const_generics(2)]
1293#[stable(feature = "simd_x86", since = "1.27.0")]
1294pub unsafe fn _mm256_i32gather_epi64<const SCALE: i32>(
1295 slice: *const i64,
1296 offsets: __m128i,
1297) -> __m256i {
1298 static_assert_imm8_scale!(SCALE);
1299 let zero = i64x4::ZERO;
1300 let neg_one = _mm256_set1_epi64x(-1).as_i64x4();
1301 let offsets = offsets.as_i32x4();
1302 let slice = slice as *const i8;
1303 let r = vpgatherdq(zero, slice, offsets, neg_one, SCALE as i8);
1304 transmute(r)
1305}
1306
1307#[inline]
1314#[target_feature(enable = "avx2")]
1315#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1316#[rustc_legacy_const_generics(4)]
1317#[stable(feature = "simd_x86", since = "1.27.0")]
1318pub unsafe fn _mm256_mask_i32gather_epi64<const SCALE: i32>(
1319 src: __m256i,
1320 slice: *const i64,
1321 offsets: __m128i,
1322 mask: __m256i,
1323) -> __m256i {
1324 static_assert_imm8_scale!(SCALE);
1325 let src = src.as_i64x4();
1326 let mask = mask.as_i64x4();
1327 let offsets = offsets.as_i32x4();
1328 let slice = slice as *const i8;
1329 let r = vpgatherdq(src, slice, offsets, mask, SCALE as i8);
1330 transmute(r)
1331}
1332
1333#[inline]
1339#[target_feature(enable = "avx2")]
1340#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1341#[rustc_legacy_const_generics(2)]
1342#[stable(feature = "simd_x86", since = "1.27.0")]
1343pub unsafe fn _mm_i32gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1344 static_assert_imm8_scale!(SCALE);
1345 let zero = _mm_setzero_pd();
1346 let neg_one = _mm_set1_pd(-1.0);
1347 let offsets = offsets.as_i32x4();
1348 let slice = slice as *const i8;
1349 pgatherdpd(zero, slice, offsets, neg_one, SCALE as i8)
1350}
1351
1352#[inline]
1359#[target_feature(enable = "avx2")]
1360#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1361#[rustc_legacy_const_generics(4)]
1362#[stable(feature = "simd_x86", since = "1.27.0")]
1363pub unsafe fn _mm_mask_i32gather_pd<const SCALE: i32>(
1364 src: __m128d,
1365 slice: *const f64,
1366 offsets: __m128i,
1367 mask: __m128d,
1368) -> __m128d {
1369 static_assert_imm8_scale!(SCALE);
1370 let offsets = offsets.as_i32x4();
1371 let slice = slice as *const i8;
1372 pgatherdpd(src, slice, offsets, mask, SCALE as i8)
1373}
1374
1375#[inline]
1381#[target_feature(enable = "avx2")]
1382#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1383#[rustc_legacy_const_generics(2)]
1384#[stable(feature = "simd_x86", since = "1.27.0")]
1385pub unsafe fn _mm256_i32gather_pd<const SCALE: i32>(
1386 slice: *const f64,
1387 offsets: __m128i,
1388) -> __m256d {
1389 static_assert_imm8_scale!(SCALE);
1390 let zero = _mm256_setzero_pd();
1391 let neg_one = _mm256_set1_pd(-1.0);
1392 let offsets = offsets.as_i32x4();
1393 let slice = slice as *const i8;
1394 vpgatherdpd(zero, slice, offsets, neg_one, SCALE as i8)
1395}
1396
1397#[inline]
1404#[target_feature(enable = "avx2")]
1405#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1406#[rustc_legacy_const_generics(4)]
1407#[stable(feature = "simd_x86", since = "1.27.0")]
1408pub unsafe fn _mm256_mask_i32gather_pd<const SCALE: i32>(
1409 src: __m256d,
1410 slice: *const f64,
1411 offsets: __m128i,
1412 mask: __m256d,
1413) -> __m256d {
1414 static_assert_imm8_scale!(SCALE);
1415 let offsets = offsets.as_i32x4();
1416 let slice = slice as *const i8;
1417 vpgatherdpd(src, slice, offsets, mask, SCALE as i8)
1418}
1419
1420#[inline]
1426#[target_feature(enable = "avx2")]
1427#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1428#[rustc_legacy_const_generics(2)]
1429#[stable(feature = "simd_x86", since = "1.27.0")]
1430pub unsafe fn _mm_i64gather_epi32<const SCALE: i32>(
1431 slice: *const i32,
1432 offsets: __m128i,
1433) -> __m128i {
1434 static_assert_imm8_scale!(SCALE);
1435 let zero = i32x4::ZERO;
1436 let neg_one = _mm_set1_epi64x(-1).as_i32x4();
1437 let offsets = offsets.as_i64x2();
1438 let slice = slice as *const i8;
1439 let r = pgatherqd(zero, slice, offsets, neg_one, SCALE as i8);
1440 transmute(r)
1441}
1442
1443#[inline]
1450#[target_feature(enable = "avx2")]
1451#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1452#[rustc_legacy_const_generics(4)]
1453#[stable(feature = "simd_x86", since = "1.27.0")]
1454pub unsafe fn _mm_mask_i64gather_epi32<const SCALE: i32>(
1455 src: __m128i,
1456 slice: *const i32,
1457 offsets: __m128i,
1458 mask: __m128i,
1459) -> __m128i {
1460 static_assert_imm8_scale!(SCALE);
1461 let src = src.as_i32x4();
1462 let mask = mask.as_i32x4();
1463 let offsets = offsets.as_i64x2();
1464 let slice = slice as *const i8;
1465 let r = pgatherqd(src, slice, offsets, mask, SCALE as i8);
1466 transmute(r)
1467}
1468
1469#[inline]
1475#[target_feature(enable = "avx2")]
1476#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1477#[rustc_legacy_const_generics(2)]
1478#[stable(feature = "simd_x86", since = "1.27.0")]
1479pub unsafe fn _mm256_i64gather_epi32<const SCALE: i32>(
1480 slice: *const i32,
1481 offsets: __m256i,
1482) -> __m128i {
1483 static_assert_imm8_scale!(SCALE);
1484 let zero = i32x4::ZERO;
1485 let neg_one = _mm_set1_epi64x(-1).as_i32x4();
1486 let offsets = offsets.as_i64x4();
1487 let slice = slice as *const i8;
1488 let r = vpgatherqd(zero, slice, offsets, neg_one, SCALE as i8);
1489 transmute(r)
1490}
1491
1492#[inline]
1499#[target_feature(enable = "avx2")]
1500#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1501#[rustc_legacy_const_generics(4)]
1502#[stable(feature = "simd_x86", since = "1.27.0")]
1503pub unsafe fn _mm256_mask_i64gather_epi32<const SCALE: i32>(
1504 src: __m128i,
1505 slice: *const i32,
1506 offsets: __m256i,
1507 mask: __m128i,
1508) -> __m128i {
1509 static_assert_imm8_scale!(SCALE);
1510 let src = src.as_i32x4();
1511 let mask = mask.as_i32x4();
1512 let offsets = offsets.as_i64x4();
1513 let slice = slice as *const i8;
1514 let r = vpgatherqd(src, slice, offsets, mask, SCALE as i8);
1515 transmute(r)
1516}
1517
1518#[inline]
1524#[target_feature(enable = "avx2")]
1525#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1526#[rustc_legacy_const_generics(2)]
1527#[stable(feature = "simd_x86", since = "1.27.0")]
1528pub unsafe fn _mm_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1529 static_assert_imm8_scale!(SCALE);
1530 let zero = _mm_setzero_ps();
1531 let neg_one = _mm_set1_ps(-1.0);
1532 let offsets = offsets.as_i64x2();
1533 let slice = slice as *const i8;
1534 pgatherqps(zero, slice, offsets, neg_one, SCALE as i8)
1535}
1536
1537#[inline]
1544#[target_feature(enable = "avx2")]
1545#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1546#[rustc_legacy_const_generics(4)]
1547#[stable(feature = "simd_x86", since = "1.27.0")]
1548pub unsafe fn _mm_mask_i64gather_ps<const SCALE: i32>(
1549 src: __m128,
1550 slice: *const f32,
1551 offsets: __m128i,
1552 mask: __m128,
1553) -> __m128 {
1554 static_assert_imm8_scale!(SCALE);
1555 let offsets = offsets.as_i64x2();
1556 let slice = slice as *const i8;
1557 pgatherqps(src, slice, offsets, mask, SCALE as i8)
1558}
1559
1560#[inline]
1566#[target_feature(enable = "avx2")]
1567#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1568#[rustc_legacy_const_generics(2)]
1569#[stable(feature = "simd_x86", since = "1.27.0")]
1570pub unsafe fn _mm256_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m128 {
1571 static_assert_imm8_scale!(SCALE);
1572 let zero = _mm_setzero_ps();
1573 let neg_one = _mm_set1_ps(-1.0);
1574 let offsets = offsets.as_i64x4();
1575 let slice = slice as *const i8;
1576 vpgatherqps(zero, slice, offsets, neg_one, SCALE as i8)
1577}
1578
1579#[inline]
1586#[target_feature(enable = "avx2")]
1587#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1588#[rustc_legacy_const_generics(4)]
1589#[stable(feature = "simd_x86", since = "1.27.0")]
1590pub unsafe fn _mm256_mask_i64gather_ps<const SCALE: i32>(
1591 src: __m128,
1592 slice: *const f32,
1593 offsets: __m256i,
1594 mask: __m128,
1595) -> __m128 {
1596 static_assert_imm8_scale!(SCALE);
1597 let offsets = offsets.as_i64x4();
1598 let slice = slice as *const i8;
1599 vpgatherqps(src, slice, offsets, mask, SCALE as i8)
1600}
1601
1602#[inline]
1608#[target_feature(enable = "avx2")]
1609#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1610#[rustc_legacy_const_generics(2)]
1611#[stable(feature = "simd_x86", since = "1.27.0")]
1612pub unsafe fn _mm_i64gather_epi64<const SCALE: i32>(
1613 slice: *const i64,
1614 offsets: __m128i,
1615) -> __m128i {
1616 static_assert_imm8_scale!(SCALE);
1617 let zero = i64x2::ZERO;
1618 let neg_one = _mm_set1_epi64x(-1).as_i64x2();
1619 let slice = slice as *const i8;
1620 let offsets = offsets.as_i64x2();
1621 let r = pgatherqq(zero, slice, offsets, neg_one, SCALE as i8);
1622 transmute(r)
1623}
1624
1625#[inline]
1632#[target_feature(enable = "avx2")]
1633#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1634#[rustc_legacy_const_generics(4)]
1635#[stable(feature = "simd_x86", since = "1.27.0")]
1636pub unsafe fn _mm_mask_i64gather_epi64<const SCALE: i32>(
1637 src: __m128i,
1638 slice: *const i64,
1639 offsets: __m128i,
1640 mask: __m128i,
1641) -> __m128i {
1642 static_assert_imm8_scale!(SCALE);
1643 let src = src.as_i64x2();
1644 let mask = mask.as_i64x2();
1645 let offsets = offsets.as_i64x2();
1646 let slice = slice as *const i8;
1647 let r = pgatherqq(src, slice, offsets, mask, SCALE as i8);
1648 transmute(r)
1649}
1650
1651#[inline]
1657#[target_feature(enable = "avx2")]
1658#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1659#[rustc_legacy_const_generics(2)]
1660#[stable(feature = "simd_x86", since = "1.27.0")]
1661pub unsafe fn _mm256_i64gather_epi64<const SCALE: i32>(
1662 slice: *const i64,
1663 offsets: __m256i,
1664) -> __m256i {
1665 static_assert_imm8_scale!(SCALE);
1666 let zero = i64x4::ZERO;
1667 let neg_one = _mm256_set1_epi64x(-1).as_i64x4();
1668 let slice = slice as *const i8;
1669 let offsets = offsets.as_i64x4();
1670 let r = vpgatherqq(zero, slice, offsets, neg_one, SCALE as i8);
1671 transmute(r)
1672}
1673
1674#[inline]
1681#[target_feature(enable = "avx2")]
1682#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1683#[rustc_legacy_const_generics(4)]
1684#[stable(feature = "simd_x86", since = "1.27.0")]
1685pub unsafe fn _mm256_mask_i64gather_epi64<const SCALE: i32>(
1686 src: __m256i,
1687 slice: *const i64,
1688 offsets: __m256i,
1689 mask: __m256i,
1690) -> __m256i {
1691 static_assert_imm8_scale!(SCALE);
1692 let src = src.as_i64x4();
1693 let mask = mask.as_i64x4();
1694 let offsets = offsets.as_i64x4();
1695 let slice = slice as *const i8;
1696 let r = vpgatherqq(src, slice, offsets, mask, SCALE as i8);
1697 transmute(r)
1698}
1699
1700#[inline]
1706#[target_feature(enable = "avx2")]
1707#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1708#[rustc_legacy_const_generics(2)]
1709#[stable(feature = "simd_x86", since = "1.27.0")]
1710pub unsafe fn _mm_i64gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1711 static_assert_imm8_scale!(SCALE);
1712 let zero = _mm_setzero_pd();
1713 let neg_one = _mm_set1_pd(-1.0);
1714 let slice = slice as *const i8;
1715 let offsets = offsets.as_i64x2();
1716 pgatherqpd(zero, slice, offsets, neg_one, SCALE as i8)
1717}
1718
1719#[inline]
1726#[target_feature(enable = "avx2")]
1727#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1728#[rustc_legacy_const_generics(4)]
1729#[stable(feature = "simd_x86", since = "1.27.0")]
1730pub unsafe fn _mm_mask_i64gather_pd<const SCALE: i32>(
1731 src: __m128d,
1732 slice: *const f64,
1733 offsets: __m128i,
1734 mask: __m128d,
1735) -> __m128d {
1736 static_assert_imm8_scale!(SCALE);
1737 let slice = slice as *const i8;
1738 let offsets = offsets.as_i64x2();
1739 pgatherqpd(src, slice, offsets, mask, SCALE as i8)
1740}
1741
1742#[inline]
1748#[target_feature(enable = "avx2")]
1749#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1750#[rustc_legacy_const_generics(2)]
1751#[stable(feature = "simd_x86", since = "1.27.0")]
1752pub unsafe fn _mm256_i64gather_pd<const SCALE: i32>(
1753 slice: *const f64,
1754 offsets: __m256i,
1755) -> __m256d {
1756 static_assert_imm8_scale!(SCALE);
1757 let zero = _mm256_setzero_pd();
1758 let neg_one = _mm256_set1_pd(-1.0);
1759 let slice = slice as *const i8;
1760 let offsets = offsets.as_i64x4();
1761 vpgatherqpd(zero, slice, offsets, neg_one, SCALE as i8)
1762}
1763
1764#[inline]
1771#[target_feature(enable = "avx2")]
1772#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1773#[rustc_legacy_const_generics(4)]
1774#[stable(feature = "simd_x86", since = "1.27.0")]
1775pub unsafe fn _mm256_mask_i64gather_pd<const SCALE: i32>(
1776 src: __m256d,
1777 slice: *const f64,
1778 offsets: __m256i,
1779 mask: __m256d,
1780) -> __m256d {
1781 static_assert_imm8_scale!(SCALE);
1782 let slice = slice as *const i8;
1783 let offsets = offsets.as_i64x4();
1784 vpgatherqpd(src, slice, offsets, mask, SCALE as i8)
1785}
1786
1787#[inline]
1792#[target_feature(enable = "avx2")]
1793#[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))]
1794#[rustc_legacy_const_generics(2)]
1795#[stable(feature = "simd_x86", since = "1.27.0")]
1796#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1797pub const fn _mm256_inserti128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
1798 static_assert_uimm_bits!(IMM1, 1);
1799 unsafe {
1800 let a = a.as_i64x4();
1801 let b = _mm256_castsi128_si256(b).as_i64x4();
1802 let dst: i64x4 = simd_shuffle!(a, b, [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize]);
1803 transmute(dst)
1804 }
1805}
1806
1807#[inline]
1813#[target_feature(enable = "avx2")]
1814#[cfg_attr(test, assert_instr(vpmaddwd))]
1815#[stable(feature = "simd_x86", since = "1.27.0")]
1816pub fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i {
1817 unsafe { transmute(pmaddwd(a.as_i16x16(), b.as_i16x16())) }
1830}
1831
1832#[inline]
1839#[target_feature(enable = "avx2")]
1840#[cfg_attr(test, assert_instr(vpmaddubsw))]
1841#[stable(feature = "simd_x86", since = "1.27.0")]
1842pub fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
1843 unsafe { transmute(pmaddubsw(a.as_u8x32(), b.as_i8x32())) }
1844}
1845
1846#[inline]
1852#[target_feature(enable = "avx2")]
1853#[cfg_attr(test, assert_instr(vpmaskmovd))]
1854#[stable(feature = "simd_x86", since = "1.27.0")]
1855#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1856pub const unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i {
1857 let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
1858 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i32x4::ZERO).as_m128i()
1859}
1860
1861#[inline]
1867#[target_feature(enable = "avx2")]
1868#[cfg_attr(test, assert_instr(vpmaskmovd))]
1869#[stable(feature = "simd_x86", since = "1.27.0")]
1870#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1871pub const unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m256i {
1872 let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
1873 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i32x8::ZERO).as_m256i()
1874}
1875
1876#[inline]
1882#[target_feature(enable = "avx2")]
1883#[cfg_attr(test, assert_instr(vpmaskmovq))]
1884#[stable(feature = "simd_x86", since = "1.27.0")]
1885#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1886pub const unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i {
1887 let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
1888 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i64x2::ZERO).as_m128i()
1889}
1890
1891#[inline]
1897#[target_feature(enable = "avx2")]
1898#[cfg_attr(test, assert_instr(vpmaskmovq))]
1899#[stable(feature = "simd_x86", since = "1.27.0")]
1900#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1901pub const unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m256i {
1902 let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
1903 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i64x4::ZERO).as_m256i()
1904}
1905
1906#[inline]
1912#[target_feature(enable = "avx2")]
1913#[cfg_attr(test, assert_instr(vpmaskmovd))]
1914#[stable(feature = "simd_x86", since = "1.27.0")]
1915#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1916pub const unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) {
1917 let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
1918 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x4())
1919}
1920
1921#[inline]
1927#[target_feature(enable = "avx2")]
1928#[cfg_attr(test, assert_instr(vpmaskmovd))]
1929#[stable(feature = "simd_x86", since = "1.27.0")]
1930#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1931pub const unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m256i) {
1932 let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
1933 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x8())
1934}
1935
1936#[inline]
1942#[target_feature(enable = "avx2")]
1943#[cfg_attr(test, assert_instr(vpmaskmovq))]
1944#[stable(feature = "simd_x86", since = "1.27.0")]
1945#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1946pub const unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) {
1947 let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
1948 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x2())
1949}
1950
1951#[inline]
1957#[target_feature(enable = "avx2")]
1958#[cfg_attr(test, assert_instr(vpmaskmovq))]
1959#[stable(feature = "simd_x86", since = "1.27.0")]
1960#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1961pub const unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m256i) {
1962 let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
1963 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x4())
1964}
1965
1966#[inline]
1971#[target_feature(enable = "avx2")]
1972#[cfg_attr(test, assert_instr(vpmaxsw))]
1973#[stable(feature = "simd_x86", since = "1.27.0")]
1974#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1975pub const fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
1976 unsafe { simd_imax(a.as_i16x16(), b.as_i16x16()).as_m256i() }
1977}
1978
1979#[inline]
1984#[target_feature(enable = "avx2")]
1985#[cfg_attr(test, assert_instr(vpmaxsd))]
1986#[stable(feature = "simd_x86", since = "1.27.0")]
1987#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1988pub const fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
1989 unsafe { simd_imax(a.as_i32x8(), b.as_i32x8()).as_m256i() }
1990}
1991
1992#[inline]
1997#[target_feature(enable = "avx2")]
1998#[cfg_attr(test, assert_instr(vpmaxsb))]
1999#[stable(feature = "simd_x86", since = "1.27.0")]
2000#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2001pub const fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
2002 unsafe { simd_imax(a.as_i8x32(), b.as_i8x32()).as_m256i() }
2003}
2004
2005#[inline]
2010#[target_feature(enable = "avx2")]
2011#[cfg_attr(test, assert_instr(vpmaxuw))]
2012#[stable(feature = "simd_x86", since = "1.27.0")]
2013#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2014pub const fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
2015 unsafe { simd_imax(a.as_u16x16(), b.as_u16x16()).as_m256i() }
2016}
2017
2018#[inline]
2023#[target_feature(enable = "avx2")]
2024#[cfg_attr(test, assert_instr(vpmaxud))]
2025#[stable(feature = "simd_x86", since = "1.27.0")]
2026#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2027pub const fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
2028 unsafe { simd_imax(a.as_u32x8(), b.as_u32x8()).as_m256i() }
2029}
2030
2031#[inline]
2036#[target_feature(enable = "avx2")]
2037#[cfg_attr(test, assert_instr(vpmaxub))]
2038#[stable(feature = "simd_x86", since = "1.27.0")]
2039#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2040pub const fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
2041 unsafe { simd_imax(a.as_u8x32(), b.as_u8x32()).as_m256i() }
2042}
2043
2044#[inline]
2049#[target_feature(enable = "avx2")]
2050#[cfg_attr(test, assert_instr(vpminsw))]
2051#[stable(feature = "simd_x86", since = "1.27.0")]
2052#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2053pub const fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
2054 unsafe { simd_imin(a.as_i16x16(), b.as_i16x16()).as_m256i() }
2055}
2056
2057#[inline]
2062#[target_feature(enable = "avx2")]
2063#[cfg_attr(test, assert_instr(vpminsd))]
2064#[stable(feature = "simd_x86", since = "1.27.0")]
2065#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2066pub const fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
2067 unsafe { simd_imin(a.as_i32x8(), b.as_i32x8()).as_m256i() }
2068}
2069
2070#[inline]
2075#[target_feature(enable = "avx2")]
2076#[cfg_attr(test, assert_instr(vpminsb))]
2077#[stable(feature = "simd_x86", since = "1.27.0")]
2078#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2079pub const fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
2080 unsafe { simd_imin(a.as_i8x32(), b.as_i8x32()).as_m256i() }
2081}
2082
2083#[inline]
2088#[target_feature(enable = "avx2")]
2089#[cfg_attr(test, assert_instr(vpminuw))]
2090#[stable(feature = "simd_x86", since = "1.27.0")]
2091#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2092pub const fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
2093 unsafe { simd_imin(a.as_u16x16(), b.as_u16x16()).as_m256i() }
2094}
2095
2096#[inline]
2101#[target_feature(enable = "avx2")]
2102#[cfg_attr(test, assert_instr(vpminud))]
2103#[stable(feature = "simd_x86", since = "1.27.0")]
2104#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2105pub const fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
2106 unsafe { simd_imin(a.as_u32x8(), b.as_u32x8()).as_m256i() }
2107}
2108
2109#[inline]
2114#[target_feature(enable = "avx2")]
2115#[cfg_attr(test, assert_instr(vpminub))]
2116#[stable(feature = "simd_x86", since = "1.27.0")]
2117#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2118pub const fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
2119 unsafe { simd_imin(a.as_u8x32(), b.as_u8x32()).as_m256i() }
2120}
2121
2122#[inline]
2127#[target_feature(enable = "avx2")]
2128#[cfg_attr(test, assert_instr(vpmovmskb))]
2129#[stable(feature = "simd_x86", since = "1.27.0")]
2130#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2131pub const fn _mm256_movemask_epi8(a: __m256i) -> i32 {
2132 unsafe {
2133 let z = i8x32::ZERO;
2134 let m: i8x32 = simd_lt(a.as_i8x32(), z);
2135 simd_bitmask::<_, u32>(m) as i32
2136 }
2137}
2138
2139#[inline]
2149#[target_feature(enable = "avx2")]
2150#[cfg_attr(test, assert_instr(vmpsadbw, IMM8 = 0))]
2151#[rustc_legacy_const_generics(2)]
2152#[stable(feature = "simd_x86", since = "1.27.0")]
2153pub fn _mm256_mpsadbw_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2154 static_assert_uimm_bits!(IMM8, 8);
2155 unsafe { transmute(mpsadbw(a.as_u8x32(), b.as_u8x32(), IMM8 as i8)) }
2156}
2157
2158#[inline]
2165#[target_feature(enable = "avx2")]
2166#[cfg_attr(test, assert_instr(vpmuldq))]
2167#[stable(feature = "simd_x86", since = "1.27.0")]
2168#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2169pub const fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i {
2170 unsafe {
2171 let a = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(a.as_i64x4()));
2172 let b = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(b.as_i64x4()));
2173 transmute(simd_mul(a, b))
2174 }
2175}
2176
2177#[inline]
2184#[target_feature(enable = "avx2")]
2185#[cfg_attr(test, assert_instr(vpmuludq))]
2186#[stable(feature = "simd_x86", since = "1.27.0")]
2187#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2188pub const fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i {
2189 unsafe {
2190 let a = a.as_u64x4();
2191 let b = b.as_u64x4();
2192 let mask = u64x4::splat(u32::MAX as u64);
2193 transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
2194 }
2195}
2196
2197#[inline]
2203#[target_feature(enable = "avx2")]
2204#[cfg_attr(test, assert_instr(vpmulhw))]
2205#[stable(feature = "simd_x86", since = "1.27.0")]
2206#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2207pub const fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i {
2208 unsafe {
2209 let a = simd_cast::<_, i32x16>(a.as_i16x16());
2210 let b = simd_cast::<_, i32x16>(b.as_i16x16());
2211 let r = simd_shr(simd_mul(a, b), i32x16::splat(16));
2212 transmute(simd_cast::<i32x16, i16x16>(r))
2213 }
2214}
2215
2216#[inline]
2222#[target_feature(enable = "avx2")]
2223#[cfg_attr(test, assert_instr(vpmulhuw))]
2224#[stable(feature = "simd_x86", since = "1.27.0")]
2225#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2226pub const fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i {
2227 unsafe {
2228 let a = simd_cast::<_, u32x16>(a.as_u16x16());
2229 let b = simd_cast::<_, u32x16>(b.as_u16x16());
2230 let r = simd_shr(simd_mul(a, b), u32x16::splat(16));
2231 transmute(simd_cast::<u32x16, u16x16>(r))
2232 }
2233}
2234
2235#[inline]
2241#[target_feature(enable = "avx2")]
2242#[cfg_attr(test, assert_instr(vpmullw))]
2243#[stable(feature = "simd_x86", since = "1.27.0")]
2244#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2245pub const fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i {
2246 unsafe { transmute(simd_mul(a.as_i16x16(), b.as_i16x16())) }
2247}
2248
2249#[inline]
2255#[target_feature(enable = "avx2")]
2256#[cfg_attr(test, assert_instr(vpmulld))]
2257#[stable(feature = "simd_x86", since = "1.27.0")]
2258#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2259pub const fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i {
2260 unsafe { transmute(simd_mul(a.as_i32x8(), b.as_i32x8())) }
2261}
2262
2263#[inline]
2270#[target_feature(enable = "avx2")]
2271#[cfg_attr(test, assert_instr(vpmulhrsw))]
2272#[stable(feature = "simd_x86", since = "1.27.0")]
2273pub fn _mm256_mulhrs_epi16(a: __m256i, b: __m256i) -> __m256i {
2274 unsafe { transmute(pmulhrsw(a.as_i16x16(), b.as_i16x16())) }
2275}
2276
2277#[inline]
2282#[target_feature(enable = "avx2")]
2283#[cfg_attr(test, assert_instr(vorps))]
2284#[stable(feature = "simd_x86", since = "1.27.0")]
2285#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2286pub const fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
2287 unsafe { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) }
2288}
2289
2290#[inline]
2295#[target_feature(enable = "avx2")]
2296#[cfg_attr(test, assert_instr(vpacksswb))]
2297#[stable(feature = "simd_x86", since = "1.27.0")]
2298pub fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i {
2299 unsafe { transmute(packsswb(a.as_i16x16(), b.as_i16x16())) }
2300}
2301
2302#[inline]
2307#[target_feature(enable = "avx2")]
2308#[cfg_attr(test, assert_instr(vpackssdw))]
2309#[stable(feature = "simd_x86", since = "1.27.0")]
2310pub fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i {
2311 unsafe { transmute(packssdw(a.as_i32x8(), b.as_i32x8())) }
2312}
2313
2314#[inline]
2319#[target_feature(enable = "avx2")]
2320#[cfg_attr(test, assert_instr(vpackuswb))]
2321#[stable(feature = "simd_x86", since = "1.27.0")]
2322pub fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i {
2323 unsafe { transmute(packuswb(a.as_i16x16(), b.as_i16x16())) }
2324}
2325
2326#[inline]
2331#[target_feature(enable = "avx2")]
2332#[cfg_attr(test, assert_instr(vpackusdw))]
2333#[stable(feature = "simd_x86", since = "1.27.0")]
2334pub fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i {
2335 unsafe { transmute(packusdw(a.as_i32x8(), b.as_i32x8())) }
2336}
2337
2338#[inline]
2345#[target_feature(enable = "avx2")]
2346#[cfg_attr(test, assert_instr(vpermps))]
2347#[stable(feature = "simd_x86", since = "1.27.0")]
2348pub fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i {
2349 unsafe { transmute(permd(a.as_u32x8(), b.as_u32x8())) }
2350}
2351
2352#[inline]
2356#[target_feature(enable = "avx2")]
2357#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 9))]
2358#[rustc_legacy_const_generics(1)]
2359#[stable(feature = "simd_x86", since = "1.27.0")]
2360#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2361pub const fn _mm256_permute4x64_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2362 static_assert_uimm_bits!(IMM8, 8);
2363 unsafe {
2364 let zero = i64x4::ZERO;
2365 let r: i64x4 = simd_shuffle!(
2366 a.as_i64x4(),
2367 zero,
2368 [
2369 IMM8 as u32 & 0b11,
2370 (IMM8 as u32 >> 2) & 0b11,
2371 (IMM8 as u32 >> 4) & 0b11,
2372 (IMM8 as u32 >> 6) & 0b11,
2373 ],
2374 );
2375 transmute(r)
2376 }
2377}
2378
2379#[inline]
2383#[target_feature(enable = "avx2")]
2384#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 9))]
2385#[rustc_legacy_const_generics(2)]
2386#[stable(feature = "simd_x86", since = "1.27.0")]
2387#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2388pub const fn _mm256_permute2x128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2389 static_assert_uimm_bits!(IMM8, 8);
2390 _mm256_permute2f128_si256::<IMM8>(a, b)
2391}
2392
2393#[inline]
2398#[target_feature(enable = "avx2")]
2399#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 1))]
2400#[rustc_legacy_const_generics(1)]
2401#[stable(feature = "simd_x86", since = "1.27.0")]
2402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2403pub const fn _mm256_permute4x64_pd<const IMM8: i32>(a: __m256d) -> __m256d {
2404 static_assert_uimm_bits!(IMM8, 8);
2405 unsafe {
2406 simd_shuffle!(
2407 a,
2408 _mm256_undefined_pd(),
2409 [
2410 IMM8 as u32 & 0b11,
2411 (IMM8 as u32 >> 2) & 0b11,
2412 (IMM8 as u32 >> 4) & 0b11,
2413 (IMM8 as u32 >> 6) & 0b11,
2414 ],
2415 )
2416 }
2417}
2418
2419#[inline]
2424#[target_feature(enable = "avx2")]
2425#[cfg_attr(test, assert_instr(vpermps))]
2426#[stable(feature = "simd_x86", since = "1.27.0")]
2427pub fn _mm256_permutevar8x32_ps(a: __m256, idx: __m256i) -> __m256 {
2428 unsafe { permps(a, idx.as_i32x8()) }
2429}
2430
2431#[inline]
2438#[target_feature(enable = "avx2")]
2439#[cfg_attr(test, assert_instr(vpsadbw))]
2440#[stable(feature = "simd_x86", since = "1.27.0")]
2441pub fn _mm256_sad_epu8(a: __m256i, b: __m256i) -> __m256i {
2442 unsafe { transmute(psadbw(a.as_u8x32(), b.as_u8x32())) }
2443}
2444
2445#[inline]
2476#[target_feature(enable = "avx2")]
2477#[cfg_attr(test, assert_instr(vpshufb))]
2478#[stable(feature = "simd_x86", since = "1.27.0")]
2479pub fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
2480 unsafe { transmute(pshufb(a.as_u8x32(), b.as_u8x32())) }
2481}
2482
2483#[inline]
2514#[target_feature(enable = "avx2")]
2515#[cfg_attr(test, assert_instr(vshufps, MASK = 9))]
2516#[rustc_legacy_const_generics(1)]
2517#[stable(feature = "simd_x86", since = "1.27.0")]
2518#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2519pub const fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i {
2520 static_assert_uimm_bits!(MASK, 8);
2521 unsafe {
2522 let r: i32x8 = simd_shuffle!(
2523 a.as_i32x8(),
2524 a.as_i32x8(),
2525 [
2526 MASK as u32 & 0b11,
2527 (MASK as u32 >> 2) & 0b11,
2528 (MASK as u32 >> 4) & 0b11,
2529 (MASK as u32 >> 6) & 0b11,
2530 (MASK as u32 & 0b11) + 4,
2531 ((MASK as u32 >> 2) & 0b11) + 4,
2532 ((MASK as u32 >> 4) & 0b11) + 4,
2533 ((MASK as u32 >> 6) & 0b11) + 4,
2534 ],
2535 );
2536 transmute(r)
2537 }
2538}
2539
2540#[inline]
2546#[target_feature(enable = "avx2")]
2547#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 9))]
2548#[rustc_legacy_const_generics(1)]
2549#[stable(feature = "simd_x86", since = "1.27.0")]
2550#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2551pub const fn _mm256_shufflehi_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2552 static_assert_uimm_bits!(IMM8, 8);
2553 unsafe {
2554 let a = a.as_i16x16();
2555 let r: i16x16 = simd_shuffle!(
2556 a,
2557 a,
2558 [
2559 0,
2560 1,
2561 2,
2562 3,
2563 4 + (IMM8 as u32 & 0b11),
2564 4 + ((IMM8 as u32 >> 2) & 0b11),
2565 4 + ((IMM8 as u32 >> 4) & 0b11),
2566 4 + ((IMM8 as u32 >> 6) & 0b11),
2567 8,
2568 9,
2569 10,
2570 11,
2571 12 + (IMM8 as u32 & 0b11),
2572 12 + ((IMM8 as u32 >> 2) & 0b11),
2573 12 + ((IMM8 as u32 >> 4) & 0b11),
2574 12 + ((IMM8 as u32 >> 6) & 0b11),
2575 ],
2576 );
2577 transmute(r)
2578 }
2579}
2580
2581#[inline]
2587#[target_feature(enable = "avx2")]
2588#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 9))]
2589#[rustc_legacy_const_generics(1)]
2590#[stable(feature = "simd_x86", since = "1.27.0")]
2591#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2592pub const fn _mm256_shufflelo_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2593 static_assert_uimm_bits!(IMM8, 8);
2594 unsafe {
2595 let a = a.as_i16x16();
2596 let r: i16x16 = simd_shuffle!(
2597 a,
2598 a,
2599 [
2600 0 + (IMM8 as u32 & 0b11),
2601 0 + ((IMM8 as u32 >> 2) & 0b11),
2602 0 + ((IMM8 as u32 >> 4) & 0b11),
2603 0 + ((IMM8 as u32 >> 6) & 0b11),
2604 4,
2605 5,
2606 6,
2607 7,
2608 8 + (IMM8 as u32 & 0b11),
2609 8 + ((IMM8 as u32 >> 2) & 0b11),
2610 8 + ((IMM8 as u32 >> 4) & 0b11),
2611 8 + ((IMM8 as u32 >> 6) & 0b11),
2612 12,
2613 13,
2614 14,
2615 15,
2616 ],
2617 );
2618 transmute(r)
2619 }
2620}
2621
2622#[inline]
2628#[target_feature(enable = "avx2")]
2629#[cfg_attr(test, assert_instr(vpsignw))]
2630#[stable(feature = "simd_x86", since = "1.27.0")]
2631pub fn _mm256_sign_epi16(a: __m256i, b: __m256i) -> __m256i {
2632 unsafe { transmute(psignw(a.as_i16x16(), b.as_i16x16())) }
2633}
2634
2635#[inline]
2641#[target_feature(enable = "avx2")]
2642#[cfg_attr(test, assert_instr(vpsignd))]
2643#[stable(feature = "simd_x86", since = "1.27.0")]
2644pub fn _mm256_sign_epi32(a: __m256i, b: __m256i) -> __m256i {
2645 unsafe { transmute(psignd(a.as_i32x8(), b.as_i32x8())) }
2646}
2647
2648#[inline]
2654#[target_feature(enable = "avx2")]
2655#[cfg_attr(test, assert_instr(vpsignb))]
2656#[stable(feature = "simd_x86", since = "1.27.0")]
2657pub fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i {
2658 unsafe { transmute(psignb(a.as_i8x32(), b.as_i8x32())) }
2659}
2660
2661#[inline]
2666#[target_feature(enable = "avx2")]
2667#[cfg_attr(test, assert_instr(vpsllw))]
2668#[stable(feature = "simd_x86", since = "1.27.0")]
2669pub fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i {
2670 unsafe { transmute(psllw(a.as_i16x16(), count.as_i16x8())) }
2671}
2672
2673#[inline]
2678#[target_feature(enable = "avx2")]
2679#[cfg_attr(test, assert_instr(vpslld))]
2680#[stable(feature = "simd_x86", since = "1.27.0")]
2681pub fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i {
2682 unsafe { transmute(pslld(a.as_i32x8(), count.as_i32x4())) }
2683}
2684
2685#[inline]
2690#[target_feature(enable = "avx2")]
2691#[cfg_attr(test, assert_instr(vpsllq))]
2692#[stable(feature = "simd_x86", since = "1.27.0")]
2693pub fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i {
2694 unsafe { transmute(psllq(a.as_i64x4(), count.as_i64x2())) }
2695}
2696
2697#[inline]
2702#[target_feature(enable = "avx2")]
2703#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 7))]
2704#[rustc_legacy_const_generics(1)]
2705#[stable(feature = "simd_x86", since = "1.27.0")]
2706#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2707pub const fn _mm256_slli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2708 static_assert_uimm_bits!(IMM8, 8);
2709 unsafe {
2710 if IMM8 >= 16 {
2711 _mm256_setzero_si256()
2712 } else {
2713 transmute(simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16)))
2714 }
2715 }
2716}
2717
2718#[inline]
2723#[target_feature(enable = "avx2")]
2724#[cfg_attr(test, assert_instr(vpslld, IMM8 = 7))]
2725#[rustc_legacy_const_generics(1)]
2726#[stable(feature = "simd_x86", since = "1.27.0")]
2727#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2728pub const fn _mm256_slli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2729 unsafe {
2730 static_assert_uimm_bits!(IMM8, 8);
2731 if IMM8 >= 32 {
2732 _mm256_setzero_si256()
2733 } else {
2734 transmute(simd_shl(a.as_u32x8(), u32x8::splat(IMM8 as u32)))
2735 }
2736 }
2737}
2738
2739#[inline]
2744#[target_feature(enable = "avx2")]
2745#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 7))]
2746#[rustc_legacy_const_generics(1)]
2747#[stable(feature = "simd_x86", since = "1.27.0")]
2748#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2749pub const fn _mm256_slli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2750 unsafe {
2751 static_assert_uimm_bits!(IMM8, 8);
2752 if IMM8 >= 64 {
2753 _mm256_setzero_si256()
2754 } else {
2755 transmute(simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64)))
2756 }
2757 }
2758}
2759
2760#[inline]
2764#[target_feature(enable = "avx2")]
2765#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2766#[rustc_legacy_const_generics(1)]
2767#[stable(feature = "simd_x86", since = "1.27.0")]
2768#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2769pub const fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
2770 static_assert_uimm_bits!(IMM8, 8);
2771 _mm256_bslli_epi128::<IMM8>(a)
2772}
2773
2774#[inline]
2778#[target_feature(enable = "avx2")]
2779#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2780#[rustc_legacy_const_generics(1)]
2781#[stable(feature = "simd_x86", since = "1.27.0")]
2782#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2783pub const fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
2784 static_assert_uimm_bits!(IMM8, 8);
2785 const fn mask(shift: i32, i: u32) -> u32 {
2786 let shift = shift as u32 & 0xff;
2787 if shift > 15 || i % 16 < shift {
2788 0
2789 } else {
2790 32 + (i - shift)
2791 }
2792 }
2793 unsafe {
2794 let a = a.as_i8x32();
2795 let r: i8x32 = simd_shuffle!(
2796 i8x32::ZERO,
2797 a,
2798 [
2799 mask(IMM8, 0),
2800 mask(IMM8, 1),
2801 mask(IMM8, 2),
2802 mask(IMM8, 3),
2803 mask(IMM8, 4),
2804 mask(IMM8, 5),
2805 mask(IMM8, 6),
2806 mask(IMM8, 7),
2807 mask(IMM8, 8),
2808 mask(IMM8, 9),
2809 mask(IMM8, 10),
2810 mask(IMM8, 11),
2811 mask(IMM8, 12),
2812 mask(IMM8, 13),
2813 mask(IMM8, 14),
2814 mask(IMM8, 15),
2815 mask(IMM8, 16),
2816 mask(IMM8, 17),
2817 mask(IMM8, 18),
2818 mask(IMM8, 19),
2819 mask(IMM8, 20),
2820 mask(IMM8, 21),
2821 mask(IMM8, 22),
2822 mask(IMM8, 23),
2823 mask(IMM8, 24),
2824 mask(IMM8, 25),
2825 mask(IMM8, 26),
2826 mask(IMM8, 27),
2827 mask(IMM8, 28),
2828 mask(IMM8, 29),
2829 mask(IMM8, 30),
2830 mask(IMM8, 31),
2831 ],
2832 );
2833 transmute(r)
2834 }
2835}
2836
2837#[inline]
2843#[target_feature(enable = "avx2")]
2844#[cfg_attr(test, assert_instr(vpsllvd))]
2845#[stable(feature = "simd_x86", since = "1.27.0")]
2846#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2847pub const fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
2848 unsafe {
2849 let count = count.as_u32x4();
2850 let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
2851 let count = simd_select(no_overflow, count, u32x4::ZERO);
2852 simd_select(no_overflow, simd_shl(a.as_u32x4(), count), u32x4::ZERO).as_m128i()
2853 }
2854}
2855
2856#[inline]
2862#[target_feature(enable = "avx2")]
2863#[cfg_attr(test, assert_instr(vpsllvd))]
2864#[stable(feature = "simd_x86", since = "1.27.0")]
2865#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2866pub const fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
2867 unsafe {
2868 let count = count.as_u32x8();
2869 let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
2870 let count = simd_select(no_overflow, count, u32x8::ZERO);
2871 simd_select(no_overflow, simd_shl(a.as_u32x8(), count), u32x8::ZERO).as_m256i()
2872 }
2873}
2874
2875#[inline]
2881#[target_feature(enable = "avx2")]
2882#[cfg_attr(test, assert_instr(vpsllvq))]
2883#[stable(feature = "simd_x86", since = "1.27.0")]
2884#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2885pub const fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
2886 unsafe {
2887 let count = count.as_u64x2();
2888 let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
2889 let count = simd_select(no_overflow, count, u64x2::ZERO);
2890 simd_select(no_overflow, simd_shl(a.as_u64x2(), count), u64x2::ZERO).as_m128i()
2891 }
2892}
2893
2894#[inline]
2900#[target_feature(enable = "avx2")]
2901#[cfg_attr(test, assert_instr(vpsllvq))]
2902#[stable(feature = "simd_x86", since = "1.27.0")]
2903#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2904pub const fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i {
2905 unsafe {
2906 let count = count.as_u64x4();
2907 let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
2908 let count = simd_select(no_overflow, count, u64x4::ZERO);
2909 simd_select(no_overflow, simd_shl(a.as_u64x4(), count), u64x4::ZERO).as_m256i()
2910 }
2911}
2912
2913#[inline]
2918#[target_feature(enable = "avx2")]
2919#[cfg_attr(test, assert_instr(vpsraw))]
2920#[stable(feature = "simd_x86", since = "1.27.0")]
2921pub fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i {
2922 unsafe { transmute(psraw(a.as_i16x16(), count.as_i16x8())) }
2923}
2924
2925#[inline]
2930#[target_feature(enable = "avx2")]
2931#[cfg_attr(test, assert_instr(vpsrad))]
2932#[stable(feature = "simd_x86", since = "1.27.0")]
2933pub fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i {
2934 unsafe { transmute(psrad(a.as_i32x8(), count.as_i32x4())) }
2935}
2936
2937#[inline]
2942#[target_feature(enable = "avx2")]
2943#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 7))]
2944#[rustc_legacy_const_generics(1)]
2945#[stable(feature = "simd_x86", since = "1.27.0")]
2946#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2947pub const fn _mm256_srai_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2948 static_assert_uimm_bits!(IMM8, 8);
2949 unsafe { transmute(simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16))) }
2950}
2951
2952#[inline]
2957#[target_feature(enable = "avx2")]
2958#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 7))]
2959#[rustc_legacy_const_generics(1)]
2960#[stable(feature = "simd_x86", since = "1.27.0")]
2961#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2962pub const fn _mm256_srai_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2963 static_assert_uimm_bits!(IMM8, 8);
2964 unsafe { transmute(simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31)))) }
2965}
2966
2967#[inline]
2972#[target_feature(enable = "avx2")]
2973#[cfg_attr(test, assert_instr(vpsravd))]
2974#[stable(feature = "simd_x86", since = "1.27.0")]
2975#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2976pub const fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
2977 unsafe {
2978 let count = count.as_u32x4();
2979 let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
2980 let count = simd_select(no_overflow, transmute(count), i32x4::splat(31));
2981 simd_shr(a.as_i32x4(), count).as_m128i()
2982 }
2983}
2984
2985#[inline]
2990#[target_feature(enable = "avx2")]
2991#[cfg_attr(test, assert_instr(vpsravd))]
2992#[stable(feature = "simd_x86", since = "1.27.0")]
2993#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2994pub const fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
2995 unsafe {
2996 let count = count.as_u32x8();
2997 let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
2998 let count = simd_select(no_overflow, transmute(count), i32x8::splat(31));
2999 simd_shr(a.as_i32x8(), count).as_m256i()
3000 }
3001}
3002
3003#[inline]
3007#[target_feature(enable = "avx2")]
3008#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
3009#[rustc_legacy_const_generics(1)]
3010#[stable(feature = "simd_x86", since = "1.27.0")]
3011#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3012pub const fn _mm256_srli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
3013 static_assert_uimm_bits!(IMM8, 8);
3014 _mm256_bsrli_epi128::<IMM8>(a)
3015}
3016
3017#[inline]
3021#[target_feature(enable = "avx2")]
3022#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
3023#[rustc_legacy_const_generics(1)]
3024#[stable(feature = "simd_x86", since = "1.27.0")]
3025#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3026pub const fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
3027 static_assert_uimm_bits!(IMM8, 8);
3028 const fn mask(shift: i32, i: u32) -> u32 {
3029 let shift = shift as u32 & 0xff;
3030 if shift > 15 || (15 - (i % 16)) < shift {
3031 0
3032 } else {
3033 32 + (i + shift)
3034 }
3035 }
3036 unsafe {
3037 let a = a.as_i8x32();
3038 let r: i8x32 = simd_shuffle!(
3039 i8x32::ZERO,
3040 a,
3041 [
3042 mask(IMM8, 0),
3043 mask(IMM8, 1),
3044 mask(IMM8, 2),
3045 mask(IMM8, 3),
3046 mask(IMM8, 4),
3047 mask(IMM8, 5),
3048 mask(IMM8, 6),
3049 mask(IMM8, 7),
3050 mask(IMM8, 8),
3051 mask(IMM8, 9),
3052 mask(IMM8, 10),
3053 mask(IMM8, 11),
3054 mask(IMM8, 12),
3055 mask(IMM8, 13),
3056 mask(IMM8, 14),
3057 mask(IMM8, 15),
3058 mask(IMM8, 16),
3059 mask(IMM8, 17),
3060 mask(IMM8, 18),
3061 mask(IMM8, 19),
3062 mask(IMM8, 20),
3063 mask(IMM8, 21),
3064 mask(IMM8, 22),
3065 mask(IMM8, 23),
3066 mask(IMM8, 24),
3067 mask(IMM8, 25),
3068 mask(IMM8, 26),
3069 mask(IMM8, 27),
3070 mask(IMM8, 28),
3071 mask(IMM8, 29),
3072 mask(IMM8, 30),
3073 mask(IMM8, 31),
3074 ],
3075 );
3076 transmute(r)
3077 }
3078}
3079
3080#[inline]
3085#[target_feature(enable = "avx2")]
3086#[cfg_attr(test, assert_instr(vpsrlw))]
3087#[stable(feature = "simd_x86", since = "1.27.0")]
3088pub fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i {
3089 unsafe { transmute(psrlw(a.as_i16x16(), count.as_i16x8())) }
3090}
3091
3092#[inline]
3097#[target_feature(enable = "avx2")]
3098#[cfg_attr(test, assert_instr(vpsrld))]
3099#[stable(feature = "simd_x86", since = "1.27.0")]
3100pub fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i {
3101 unsafe { transmute(psrld(a.as_i32x8(), count.as_i32x4())) }
3102}
3103
3104#[inline]
3109#[target_feature(enable = "avx2")]
3110#[cfg_attr(test, assert_instr(vpsrlq))]
3111#[stable(feature = "simd_x86", since = "1.27.0")]
3112pub fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i {
3113 unsafe { transmute(psrlq(a.as_i64x4(), count.as_i64x2())) }
3114}
3115
3116#[inline]
3121#[target_feature(enable = "avx2")]
3122#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 7))]
3123#[rustc_legacy_const_generics(1)]
3124#[stable(feature = "simd_x86", since = "1.27.0")]
3125#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3126pub const fn _mm256_srli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
3127 static_assert_uimm_bits!(IMM8, 8);
3128 unsafe {
3129 if IMM8 >= 16 {
3130 _mm256_setzero_si256()
3131 } else {
3132 transmute(simd_shr(a.as_u16x16(), u16x16::splat(IMM8 as u16)))
3133 }
3134 }
3135}
3136
3137#[inline]
3142#[target_feature(enable = "avx2")]
3143#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 7))]
3144#[rustc_legacy_const_generics(1)]
3145#[stable(feature = "simd_x86", since = "1.27.0")]
3146#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3147pub const fn _mm256_srli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
3148 static_assert_uimm_bits!(IMM8, 8);
3149 unsafe {
3150 if IMM8 >= 32 {
3151 _mm256_setzero_si256()
3152 } else {
3153 transmute(simd_shr(a.as_u32x8(), u32x8::splat(IMM8 as u32)))
3154 }
3155 }
3156}
3157
3158#[inline]
3163#[target_feature(enable = "avx2")]
3164#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 7))]
3165#[rustc_legacy_const_generics(1)]
3166#[stable(feature = "simd_x86", since = "1.27.0")]
3167#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3168pub const fn _mm256_srli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
3169 static_assert_uimm_bits!(IMM8, 8);
3170 unsafe {
3171 if IMM8 >= 64 {
3172 _mm256_setzero_si256()
3173 } else {
3174 transmute(simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64)))
3175 }
3176 }
3177}
3178
3179#[inline]
3184#[target_feature(enable = "avx2")]
3185#[cfg_attr(test, assert_instr(vpsrlvd))]
3186#[stable(feature = "simd_x86", since = "1.27.0")]
3187#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3188pub const fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
3189 unsafe {
3190 let count = count.as_u32x4();
3191 let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
3192 let count = simd_select(no_overflow, count, u32x4::ZERO);
3193 simd_select(no_overflow, simd_shr(a.as_u32x4(), count), u32x4::ZERO).as_m128i()
3194 }
3195}
3196
3197#[inline]
3202#[target_feature(enable = "avx2")]
3203#[cfg_attr(test, assert_instr(vpsrlvd))]
3204#[stable(feature = "simd_x86", since = "1.27.0")]
3205#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3206pub const fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
3207 unsafe {
3208 let count = count.as_u32x8();
3209 let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
3210 let count = simd_select(no_overflow, count, u32x8::ZERO);
3211 simd_select(no_overflow, simd_shr(a.as_u32x8(), count), u32x8::ZERO).as_m256i()
3212 }
3213}
3214
3215#[inline]
3220#[target_feature(enable = "avx2")]
3221#[cfg_attr(test, assert_instr(vpsrlvq))]
3222#[stable(feature = "simd_x86", since = "1.27.0")]
3223#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3224pub const fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
3225 unsafe {
3226 let count = count.as_u64x2();
3227 let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
3228 let count = simd_select(no_overflow, count, u64x2::ZERO);
3229 simd_select(no_overflow, simd_shr(a.as_u64x2(), count), u64x2::ZERO).as_m128i()
3230 }
3231}
3232
3233#[inline]
3238#[target_feature(enable = "avx2")]
3239#[cfg_attr(test, assert_instr(vpsrlvq))]
3240#[stable(feature = "simd_x86", since = "1.27.0")]
3241#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3242pub const fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i {
3243 unsafe {
3244 let count = count.as_u64x4();
3245 let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
3246 let count = simd_select(no_overflow, count, u64x4::ZERO);
3247 simd_select(no_overflow, simd_shr(a.as_u64x4(), count), u64x4::ZERO).as_m256i()
3248 }
3249}
3250
3251#[inline]
3257#[target_feature(enable = "avx2")]
3258#[cfg_attr(test, assert_instr(vmovntdqa))]
3259#[stable(feature = "simd_x86_updates", since = "1.82.0")]
3260pub unsafe fn _mm256_stream_load_si256(mem_addr: *const __m256i) -> __m256i {
3261 let dst: __m256i;
3262 crate::arch::asm!(
3263 vpl!("vmovntdqa {a}"),
3264 a = out(ymm_reg) dst,
3265 p = in(reg) mem_addr,
3266 options(pure, readonly, nostack, preserves_flags),
3267 );
3268 dst
3269}
3270
3271#[inline]
3275#[target_feature(enable = "avx2")]
3276#[cfg_attr(test, assert_instr(vpsubw))]
3277#[stable(feature = "simd_x86", since = "1.27.0")]
3278#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3279pub const fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i {
3280 unsafe { transmute(simd_sub(a.as_i16x16(), b.as_i16x16())) }
3281}
3282
3283#[inline]
3287#[target_feature(enable = "avx2")]
3288#[cfg_attr(test, assert_instr(vpsubd))]
3289#[stable(feature = "simd_x86", since = "1.27.0")]
3290#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3291pub const fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i {
3292 unsafe { transmute(simd_sub(a.as_i32x8(), b.as_i32x8())) }
3293}
3294
3295#[inline]
3299#[target_feature(enable = "avx2")]
3300#[cfg_attr(test, assert_instr(vpsubq))]
3301#[stable(feature = "simd_x86", since = "1.27.0")]
3302#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3303pub const fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i {
3304 unsafe { transmute(simd_sub(a.as_i64x4(), b.as_i64x4())) }
3305}
3306
3307#[inline]
3311#[target_feature(enable = "avx2")]
3312#[cfg_attr(test, assert_instr(vpsubb))]
3313#[stable(feature = "simd_x86", since = "1.27.0")]
3314#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3315pub const fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i {
3316 unsafe { transmute(simd_sub(a.as_i8x32(), b.as_i8x32())) }
3317}
3318
3319#[inline]
3324#[target_feature(enable = "avx2")]
3325#[cfg_attr(test, assert_instr(vpsubsw))]
3326#[stable(feature = "simd_x86", since = "1.27.0")]
3327#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3328pub const fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i {
3329 unsafe { transmute(simd_saturating_sub(a.as_i16x16(), b.as_i16x16())) }
3330}
3331
3332#[inline]
3337#[target_feature(enable = "avx2")]
3338#[cfg_attr(test, assert_instr(vpsubsb))]
3339#[stable(feature = "simd_x86", since = "1.27.0")]
3340#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3341pub const fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i {
3342 unsafe { transmute(simd_saturating_sub(a.as_i8x32(), b.as_i8x32())) }
3343}
3344
3345#[inline]
3350#[target_feature(enable = "avx2")]
3351#[cfg_attr(test, assert_instr(vpsubusw))]
3352#[stable(feature = "simd_x86", since = "1.27.0")]
3353#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3354pub const fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i {
3355 unsafe { transmute(simd_saturating_sub(a.as_u16x16(), b.as_u16x16())) }
3356}
3357
3358#[inline]
3363#[target_feature(enable = "avx2")]
3364#[cfg_attr(test, assert_instr(vpsubusb))]
3365#[stable(feature = "simd_x86", since = "1.27.0")]
3366#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3367pub const fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
3368 unsafe { transmute(simd_saturating_sub(a.as_u8x32(), b.as_u8x32())) }
3369}
3370
3371#[inline]
3411#[target_feature(enable = "avx2")]
3412#[cfg_attr(test, assert_instr(vpunpckhbw))]
3413#[stable(feature = "simd_x86", since = "1.27.0")]
3414#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3415pub const fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
3416 unsafe {
3417 #[rustfmt::skip]
3418 let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3419 8, 40, 9, 41, 10, 42, 11, 43,
3420 12, 44, 13, 45, 14, 46, 15, 47,
3421 24, 56, 25, 57, 26, 58, 27, 59,
3422 28, 60, 29, 61, 30, 62, 31, 63,
3423 ]);
3424 transmute(r)
3425 }
3426}
3427
3428#[inline]
3467#[target_feature(enable = "avx2")]
3468#[cfg_attr(test, assert_instr(vpunpcklbw))]
3469#[stable(feature = "simd_x86", since = "1.27.0")]
3470#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3471pub const fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
3472 unsafe {
3473 #[rustfmt::skip]
3474 let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3475 0, 32, 1, 33, 2, 34, 3, 35,
3476 4, 36, 5, 37, 6, 38, 7, 39,
3477 16, 48, 17, 49, 18, 50, 19, 51,
3478 20, 52, 21, 53, 22, 54, 23, 55,
3479 ]);
3480 transmute(r)
3481 }
3482}
3483
3484#[inline]
3519#[target_feature(enable = "avx2")]
3520#[cfg_attr(test, assert_instr(vpunpckhwd))]
3521#[stable(feature = "simd_x86", since = "1.27.0")]
3522#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3523pub const fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
3524 unsafe {
3525 let r: i16x16 = simd_shuffle!(
3526 a.as_i16x16(),
3527 b.as_i16x16(),
3528 [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31],
3529 );
3530 transmute(r)
3531 }
3532}
3533
3534#[inline]
3570#[target_feature(enable = "avx2")]
3571#[cfg_attr(test, assert_instr(vpunpcklwd))]
3572#[stable(feature = "simd_x86", since = "1.27.0")]
3573#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3574pub const fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
3575 unsafe {
3576 let r: i16x16 = simd_shuffle!(
3577 a.as_i16x16(),
3578 b.as_i16x16(),
3579 [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27],
3580 );
3581 transmute(r)
3582 }
3583}
3584
3585#[inline]
3614#[target_feature(enable = "avx2")]
3615#[cfg_attr(test, assert_instr(vunpckhps))]
3616#[stable(feature = "simd_x86", since = "1.27.0")]
3617#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3618pub const fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
3619 unsafe {
3620 let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]);
3621 transmute(r)
3622 }
3623}
3624
3625#[inline]
3654#[target_feature(enable = "avx2")]
3655#[cfg_attr(test, assert_instr(vunpcklps))]
3656#[stable(feature = "simd_x86", since = "1.27.0")]
3657#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3658pub const fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
3659 unsafe {
3660 let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]);
3661 transmute(r)
3662 }
3663}
3664
3665#[inline]
3694#[target_feature(enable = "avx2")]
3695#[cfg_attr(test, assert_instr(vunpckhpd))]
3696#[stable(feature = "simd_x86", since = "1.27.0")]
3697#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3698pub const fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
3699 unsafe {
3700 let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [1, 5, 3, 7]);
3701 transmute(r)
3702 }
3703}
3704
3705#[inline]
3734#[target_feature(enable = "avx2")]
3735#[cfg_attr(test, assert_instr(vunpcklpd))]
3736#[stable(feature = "simd_x86", since = "1.27.0")]
3737#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3738pub const fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i {
3739 unsafe {
3740 let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [0, 4, 2, 6]);
3741 transmute(r)
3742 }
3743}
3744
3745#[inline]
3750#[target_feature(enable = "avx2")]
3751#[cfg_attr(test, assert_instr(vxorps))]
3752#[stable(feature = "simd_x86", since = "1.27.0")]
3753#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3754pub const fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i {
3755 unsafe { transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) }
3756}
3757
3758#[inline]
3765#[target_feature(enable = "avx2")]
3766#[rustc_legacy_const_generics(1)]
3768#[stable(feature = "simd_x86", since = "1.27.0")]
3769#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3770pub const fn _mm256_extract_epi8<const INDEX: i32>(a: __m256i) -> i32 {
3771 static_assert_uimm_bits!(INDEX, 5);
3772 unsafe { simd_extract!(a.as_u8x32(), INDEX as u32, u8) as i32 }
3773}
3774
3775#[inline]
3782#[target_feature(enable = "avx2")]
3783#[rustc_legacy_const_generics(1)]
3785#[stable(feature = "simd_x86", since = "1.27.0")]
3786#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3787pub const fn _mm256_extract_epi16<const INDEX: i32>(a: __m256i) -> i32 {
3788 static_assert_uimm_bits!(INDEX, 4);
3789 unsafe { simd_extract!(a.as_u16x16(), INDEX as u32, u16) as i32 }
3790}
3791
3792#[allow(improper_ctypes)]
3793unsafe extern "C" {
3794 #[link_name = "llvm.x86.avx2.phadd.sw"]
3795 fn phaddsw(a: i16x16, b: i16x16) -> i16x16;
3796 #[link_name = "llvm.x86.avx2.phsub.sw"]
3797 fn phsubsw(a: i16x16, b: i16x16) -> i16x16;
3798 #[link_name = "llvm.x86.avx2.pmadd.wd"]
3799 fn pmaddwd(a: i16x16, b: i16x16) -> i32x8;
3800 #[link_name = "llvm.x86.avx2.pmadd.ub.sw"]
3801 fn pmaddubsw(a: u8x32, b: i8x32) -> i16x16;
3802 #[link_name = "llvm.x86.avx2.mpsadbw"]
3803 fn mpsadbw(a: u8x32, b: u8x32, imm8: i8) -> u16x16;
3804 #[link_name = "llvm.x86.avx2.pmul.hr.sw"]
3805 fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16;
3806 #[link_name = "llvm.x86.avx2.packsswb"]
3807 fn packsswb(a: i16x16, b: i16x16) -> i8x32;
3808 #[link_name = "llvm.x86.avx2.packssdw"]
3809 fn packssdw(a: i32x8, b: i32x8) -> i16x16;
3810 #[link_name = "llvm.x86.avx2.packuswb"]
3811 fn packuswb(a: i16x16, b: i16x16) -> u8x32;
3812 #[link_name = "llvm.x86.avx2.packusdw"]
3813 fn packusdw(a: i32x8, b: i32x8) -> u16x16;
3814 #[link_name = "llvm.x86.avx2.psad.bw"]
3815 fn psadbw(a: u8x32, b: u8x32) -> u64x4;
3816 #[link_name = "llvm.x86.avx2.psign.b"]
3817 fn psignb(a: i8x32, b: i8x32) -> i8x32;
3818 #[link_name = "llvm.x86.avx2.psign.w"]
3819 fn psignw(a: i16x16, b: i16x16) -> i16x16;
3820 #[link_name = "llvm.x86.avx2.psign.d"]
3821 fn psignd(a: i32x8, b: i32x8) -> i32x8;
3822 #[link_name = "llvm.x86.avx2.psll.w"]
3823 fn psllw(a: i16x16, count: i16x8) -> i16x16;
3824 #[link_name = "llvm.x86.avx2.psll.d"]
3825 fn pslld(a: i32x8, count: i32x4) -> i32x8;
3826 #[link_name = "llvm.x86.avx2.psll.q"]
3827 fn psllq(a: i64x4, count: i64x2) -> i64x4;
3828 #[link_name = "llvm.x86.avx2.psra.w"]
3829 fn psraw(a: i16x16, count: i16x8) -> i16x16;
3830 #[link_name = "llvm.x86.avx2.psra.d"]
3831 fn psrad(a: i32x8, count: i32x4) -> i32x8;
3832 #[link_name = "llvm.x86.avx2.psrl.w"]
3833 fn psrlw(a: i16x16, count: i16x8) -> i16x16;
3834 #[link_name = "llvm.x86.avx2.psrl.d"]
3835 fn psrld(a: i32x8, count: i32x4) -> i32x8;
3836 #[link_name = "llvm.x86.avx2.psrl.q"]
3837 fn psrlq(a: i64x4, count: i64x2) -> i64x4;
3838 #[link_name = "llvm.x86.avx2.pshuf.b"]
3839 fn pshufb(a: u8x32, b: u8x32) -> u8x32;
3840 #[link_name = "llvm.x86.avx2.permd"]
3841 fn permd(a: u32x8, b: u32x8) -> u32x8;
3842 #[link_name = "llvm.x86.avx2.permps"]
3843 fn permps(a: __m256, b: i32x8) -> __m256;
3844 #[link_name = "llvm.x86.avx2.gather.d.d"]
3845 fn pgatherdd(src: i32x4, slice: *const i8, offsets: i32x4, mask: i32x4, scale: i8) -> i32x4;
3846 #[link_name = "llvm.x86.avx2.gather.d.d.256"]
3847 fn vpgatherdd(src: i32x8, slice: *const i8, offsets: i32x8, mask: i32x8, scale: i8) -> i32x8;
3848 #[link_name = "llvm.x86.avx2.gather.d.q"]
3849 fn pgatherdq(src: i64x2, slice: *const i8, offsets: i32x4, mask: i64x2, scale: i8) -> i64x2;
3850 #[link_name = "llvm.x86.avx2.gather.d.q.256"]
3851 fn vpgatherdq(src: i64x4, slice: *const i8, offsets: i32x4, mask: i64x4, scale: i8) -> i64x4;
3852 #[link_name = "llvm.x86.avx2.gather.q.d"]
3853 fn pgatherqd(src: i32x4, slice: *const i8, offsets: i64x2, mask: i32x4, scale: i8) -> i32x4;
3854 #[link_name = "llvm.x86.avx2.gather.q.d.256"]
3855 fn vpgatherqd(src: i32x4, slice: *const i8, offsets: i64x4, mask: i32x4, scale: i8) -> i32x4;
3856 #[link_name = "llvm.x86.avx2.gather.q.q"]
3857 fn pgatherqq(src: i64x2, slice: *const i8, offsets: i64x2, mask: i64x2, scale: i8) -> i64x2;
3858 #[link_name = "llvm.x86.avx2.gather.q.q.256"]
3859 fn vpgatherqq(src: i64x4, slice: *const i8, offsets: i64x4, mask: i64x4, scale: i8) -> i64x4;
3860 #[link_name = "llvm.x86.avx2.gather.d.pd"]
3861 fn pgatherdpd(
3862 src: __m128d,
3863 slice: *const i8,
3864 offsets: i32x4,
3865 mask: __m128d,
3866 scale: i8,
3867 ) -> __m128d;
3868 #[link_name = "llvm.x86.avx2.gather.d.pd.256"]
3869 fn vpgatherdpd(
3870 src: __m256d,
3871 slice: *const i8,
3872 offsets: i32x4,
3873 mask: __m256d,
3874 scale: i8,
3875 ) -> __m256d;
3876 #[link_name = "llvm.x86.avx2.gather.q.pd"]
3877 fn pgatherqpd(
3878 src: __m128d,
3879 slice: *const i8,
3880 offsets: i64x2,
3881 mask: __m128d,
3882 scale: i8,
3883 ) -> __m128d;
3884 #[link_name = "llvm.x86.avx2.gather.q.pd.256"]
3885 fn vpgatherqpd(
3886 src: __m256d,
3887 slice: *const i8,
3888 offsets: i64x4,
3889 mask: __m256d,
3890 scale: i8,
3891 ) -> __m256d;
3892 #[link_name = "llvm.x86.avx2.gather.d.ps"]
3893 fn pgatherdps(src: __m128, slice: *const i8, offsets: i32x4, mask: __m128, scale: i8)
3894 -> __m128;
3895 #[link_name = "llvm.x86.avx2.gather.d.ps.256"]
3896 fn vpgatherdps(
3897 src: __m256,
3898 slice: *const i8,
3899 offsets: i32x8,
3900 mask: __m256,
3901 scale: i8,
3902 ) -> __m256;
3903 #[link_name = "llvm.x86.avx2.gather.q.ps"]
3904 fn pgatherqps(src: __m128, slice: *const i8, offsets: i64x2, mask: __m128, scale: i8)
3905 -> __m128;
3906 #[link_name = "llvm.x86.avx2.gather.q.ps.256"]
3907 fn vpgatherqps(
3908 src: __m128,
3909 slice: *const i8,
3910 offsets: i64x4,
3911 mask: __m128,
3912 scale: i8,
3913 ) -> __m128;
3914}
3915
3916#[cfg(test)]
3917mod tests {
3918 use crate::core_arch::assert_eq_const as assert_eq;
3919
3920 use stdarch_test::simd_test;
3921
3922 use crate::core_arch::x86::*;
3923
3924 #[simd_test(enable = "avx2")]
3925 const fn test_mm256_abs_epi32() {
3926 #[rustfmt::skip]
3927 let a = _mm256_setr_epi32(
3928 0, 1, -1, i32::MAX,
3929 i32::MIN, 100, -100, -32,
3930 );
3931 let r = _mm256_abs_epi32(a);
3932 #[rustfmt::skip]
3933 let e = _mm256_setr_epi32(
3934 0, 1, 1, i32::MAX,
3935 i32::MAX.wrapping_add(1), 100, 100, 32,
3936 );
3937 assert_eq_m256i(r, e);
3938 }
3939
3940 #[simd_test(enable = "avx2")]
3941 const fn test_mm256_abs_epi16() {
3942 #[rustfmt::skip]
3943 let a = _mm256_setr_epi16(
3944 0, 1, -1, 2, -2, 3, -3, 4,
3945 -4, 5, -5, i16::MAX, i16::MIN, 100, -100, -32,
3946 );
3947 let r = _mm256_abs_epi16(a);
3948 #[rustfmt::skip]
3949 let e = _mm256_setr_epi16(
3950 0, 1, 1, 2, 2, 3, 3, 4,
3951 4, 5, 5, i16::MAX, i16::MAX.wrapping_add(1), 100, 100, 32,
3952 );
3953 assert_eq_m256i(r, e);
3954 }
3955
3956 #[simd_test(enable = "avx2")]
3957 const fn test_mm256_abs_epi8() {
3958 #[rustfmt::skip]
3959 let a = _mm256_setr_epi8(
3960 0, 1, -1, 2, -2, 3, -3, 4,
3961 -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
3962 0, 1, -1, 2, -2, 3, -3, 4,
3963 -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
3964 );
3965 let r = _mm256_abs_epi8(a);
3966 #[rustfmt::skip]
3967 let e = _mm256_setr_epi8(
3968 0, 1, 1, 2, 2, 3, 3, 4,
3969 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
3970 0, 1, 1, 2, 2, 3, 3, 4,
3971 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
3972 );
3973 assert_eq_m256i(r, e);
3974 }
3975
3976 #[simd_test(enable = "avx2")]
3977 const fn test_mm256_add_epi64() {
3978 let a = _mm256_setr_epi64x(-10, 0, 100, 1_000_000_000);
3979 let b = _mm256_setr_epi64x(-1, 0, 1, 2);
3980 let r = _mm256_add_epi64(a, b);
3981 let e = _mm256_setr_epi64x(-11, 0, 101, 1_000_000_002);
3982 assert_eq_m256i(r, e);
3983 }
3984
3985 #[simd_test(enable = "avx2")]
3986 const fn test_mm256_add_epi32() {
3987 let a = _mm256_setr_epi32(-1, 0, 1, 2, 3, 4, 5, 6);
3988 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3989 let r = _mm256_add_epi32(a, b);
3990 let e = _mm256_setr_epi32(0, 2, 4, 6, 8, 10, 12, 14);
3991 assert_eq_m256i(r, e);
3992 }
3993
3994 #[simd_test(enable = "avx2")]
3995 const fn test_mm256_add_epi16() {
3996 #[rustfmt::skip]
3997 let a = _mm256_setr_epi16(
3998 0, 1, 2, 3, 4, 5, 6, 7,
3999 8, 9, 10, 11, 12, 13, 14, 15,
4000 );
4001 #[rustfmt::skip]
4002 let b = _mm256_setr_epi16(
4003 0, 1, 2, 3, 4, 5, 6, 7,
4004 8, 9, 10, 11, 12, 13, 14, 15,
4005 );
4006 let r = _mm256_add_epi16(a, b);
4007 #[rustfmt::skip]
4008 let e = _mm256_setr_epi16(
4009 0, 2, 4, 6, 8, 10, 12, 14,
4010 16, 18, 20, 22, 24, 26, 28, 30,
4011 );
4012 assert_eq_m256i(r, e);
4013 }
4014
4015 #[simd_test(enable = "avx2")]
4016 const fn test_mm256_add_epi8() {
4017 #[rustfmt::skip]
4018 let a = _mm256_setr_epi8(
4019 0, 1, 2, 3, 4, 5, 6, 7,
4020 8, 9, 10, 11, 12, 13, 14, 15,
4021 16, 17, 18, 19, 20, 21, 22, 23,
4022 24, 25, 26, 27, 28, 29, 30, 31,
4023 );
4024 #[rustfmt::skip]
4025 let b = _mm256_setr_epi8(
4026 0, 1, 2, 3, 4, 5, 6, 7,
4027 8, 9, 10, 11, 12, 13, 14, 15,
4028 16, 17, 18, 19, 20, 21, 22, 23,
4029 24, 25, 26, 27, 28, 29, 30, 31,
4030 );
4031 let r = _mm256_add_epi8(a, b);
4032 #[rustfmt::skip]
4033 let e = _mm256_setr_epi8(
4034 0, 2, 4, 6, 8, 10, 12, 14,
4035 16, 18, 20, 22, 24, 26, 28, 30,
4036 32, 34, 36, 38, 40, 42, 44, 46,
4037 48, 50, 52, 54, 56, 58, 60, 62,
4038 );
4039 assert_eq_m256i(r, e);
4040 }
4041
4042 #[simd_test(enable = "avx2")]
4043 const fn test_mm256_adds_epi8() {
4044 #[rustfmt::skip]
4045 let a = _mm256_setr_epi8(
4046 0, 1, 2, 3, 4, 5, 6, 7,
4047 8, 9, 10, 11, 12, 13, 14, 15,
4048 16, 17, 18, 19, 20, 21, 22, 23,
4049 24, 25, 26, 27, 28, 29, 30, 31,
4050 );
4051 #[rustfmt::skip]
4052 let b = _mm256_setr_epi8(
4053 32, 33, 34, 35, 36, 37, 38, 39,
4054 40, 41, 42, 43, 44, 45, 46, 47,
4055 48, 49, 50, 51, 52, 53, 54, 55,
4056 56, 57, 58, 59, 60, 61, 62, 63,
4057 );
4058 let r = _mm256_adds_epi8(a, b);
4059 #[rustfmt::skip]
4060 let e = _mm256_setr_epi8(
4061 32, 34, 36, 38, 40, 42, 44, 46,
4062 48, 50, 52, 54, 56, 58, 60, 62,
4063 64, 66, 68, 70, 72, 74, 76, 78,
4064 80, 82, 84, 86, 88, 90, 92, 94,
4065 );
4066 assert_eq_m256i(r, e);
4067 }
4068
4069 #[simd_test(enable = "avx2")]
4070 fn test_mm256_adds_epi8_saturate_positive() {
4071 let a = _mm256_set1_epi8(0x7F);
4072 let b = _mm256_set1_epi8(1);
4073 let r = _mm256_adds_epi8(a, b);
4074 assert_eq_m256i(r, a);
4075 }
4076
4077 #[simd_test(enable = "avx2")]
4078 fn test_mm256_adds_epi8_saturate_negative() {
4079 let a = _mm256_set1_epi8(-0x80);
4080 let b = _mm256_set1_epi8(-1);
4081 let r = _mm256_adds_epi8(a, b);
4082 assert_eq_m256i(r, a);
4083 }
4084
4085 #[simd_test(enable = "avx2")]
4086 const fn test_mm256_adds_epi16() {
4087 #[rustfmt::skip]
4088 let a = _mm256_setr_epi16(
4089 0, 1, 2, 3, 4, 5, 6, 7,
4090 8, 9, 10, 11, 12, 13, 14, 15,
4091 );
4092 #[rustfmt::skip]
4093 let b = _mm256_setr_epi16(
4094 32, 33, 34, 35, 36, 37, 38, 39,
4095 40, 41, 42, 43, 44, 45, 46, 47,
4096 );
4097 let r = _mm256_adds_epi16(a, b);
4098 #[rustfmt::skip]
4099 let e = _mm256_setr_epi16(
4100 32, 34, 36, 38, 40, 42, 44, 46,
4101 48, 50, 52, 54, 56, 58, 60, 62,
4102 );
4103
4104 assert_eq_m256i(r, e);
4105 }
4106
4107 #[simd_test(enable = "avx2")]
4108 fn test_mm256_adds_epi16_saturate_positive() {
4109 let a = _mm256_set1_epi16(0x7FFF);
4110 let b = _mm256_set1_epi16(1);
4111 let r = _mm256_adds_epi16(a, b);
4112 assert_eq_m256i(r, a);
4113 }
4114
4115 #[simd_test(enable = "avx2")]
4116 fn test_mm256_adds_epi16_saturate_negative() {
4117 let a = _mm256_set1_epi16(-0x8000);
4118 let b = _mm256_set1_epi16(-1);
4119 let r = _mm256_adds_epi16(a, b);
4120 assert_eq_m256i(r, a);
4121 }
4122
4123 #[simd_test(enable = "avx2")]
4124 const fn test_mm256_adds_epu8() {
4125 #[rustfmt::skip]
4126 let a = _mm256_setr_epi8(
4127 0, 1, 2, 3, 4, 5, 6, 7,
4128 8, 9, 10, 11, 12, 13, 14, 15,
4129 16, 17, 18, 19, 20, 21, 22, 23,
4130 24, 25, 26, 27, 28, 29, 30, 31,
4131 );
4132 #[rustfmt::skip]
4133 let b = _mm256_setr_epi8(
4134 32, 33, 34, 35, 36, 37, 38, 39,
4135 40, 41, 42, 43, 44, 45, 46, 47,
4136 48, 49, 50, 51, 52, 53, 54, 55,
4137 56, 57, 58, 59, 60, 61, 62, 63,
4138 );
4139 let r = _mm256_adds_epu8(a, b);
4140 #[rustfmt::skip]
4141 let e = _mm256_setr_epi8(
4142 32, 34, 36, 38, 40, 42, 44, 46,
4143 48, 50, 52, 54, 56, 58, 60, 62,
4144 64, 66, 68, 70, 72, 74, 76, 78,
4145 80, 82, 84, 86, 88, 90, 92, 94,
4146 );
4147 assert_eq_m256i(r, e);
4148 }
4149
4150 #[simd_test(enable = "avx2")]
4151 fn test_mm256_adds_epu8_saturate() {
4152 let a = _mm256_set1_epi8(!0);
4153 let b = _mm256_set1_epi8(1);
4154 let r = _mm256_adds_epu8(a, b);
4155 assert_eq_m256i(r, a);
4156 }
4157
4158 #[simd_test(enable = "avx2")]
4159 const fn test_mm256_adds_epu16() {
4160 #[rustfmt::skip]
4161 let a = _mm256_setr_epi16(
4162 0, 1, 2, 3, 4, 5, 6, 7,
4163 8, 9, 10, 11, 12, 13, 14, 15,
4164 );
4165 #[rustfmt::skip]
4166 let b = _mm256_setr_epi16(
4167 32, 33, 34, 35, 36, 37, 38, 39,
4168 40, 41, 42, 43, 44, 45, 46, 47,
4169 );
4170 let r = _mm256_adds_epu16(a, b);
4171 #[rustfmt::skip]
4172 let e = _mm256_setr_epi16(
4173 32, 34, 36, 38, 40, 42, 44, 46,
4174 48, 50, 52, 54, 56, 58, 60, 62,
4175 );
4176
4177 assert_eq_m256i(r, e);
4178 }
4179
4180 #[simd_test(enable = "avx2")]
4181 fn test_mm256_adds_epu16_saturate() {
4182 let a = _mm256_set1_epi16(!0);
4183 let b = _mm256_set1_epi16(1);
4184 let r = _mm256_adds_epu16(a, b);
4185 assert_eq_m256i(r, a);
4186 }
4187
4188 #[simd_test(enable = "avx2")]
4189 const fn test_mm256_and_si256() {
4190 let a = _mm256_set1_epi8(5);
4191 let b = _mm256_set1_epi8(3);
4192 let got = _mm256_and_si256(a, b);
4193 assert_eq_m256i(got, _mm256_set1_epi8(1));
4194 }
4195
4196 #[simd_test(enable = "avx2")]
4197 const fn test_mm256_andnot_si256() {
4198 let a = _mm256_set1_epi8(5);
4199 let b = _mm256_set1_epi8(3);
4200 let got = _mm256_andnot_si256(a, b);
4201 assert_eq_m256i(got, _mm256_set1_epi8(2));
4202 }
4203
4204 #[simd_test(enable = "avx2")]
4205 const fn test_mm256_avg_epu8() {
4206 let (a, b) = (_mm256_set1_epi8(3), _mm256_set1_epi8(9));
4207 let r = _mm256_avg_epu8(a, b);
4208 assert_eq_m256i(r, _mm256_set1_epi8(6));
4209 }
4210
4211 #[simd_test(enable = "avx2")]
4212 const fn test_mm256_avg_epu16() {
4213 let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4214 let r = _mm256_avg_epu16(a, b);
4215 assert_eq_m256i(r, _mm256_set1_epi16(6));
4216 }
4217
4218 #[simd_test(enable = "avx2")]
4219 const fn test_mm_blend_epi32() {
4220 let (a, b) = (_mm_set1_epi32(3), _mm_set1_epi32(9));
4221 let e = _mm_setr_epi32(9, 3, 3, 3);
4222 let r = _mm_blend_epi32::<0x01>(a, b);
4223 assert_eq_m128i(r, e);
4224
4225 let r = _mm_blend_epi32::<0x0E>(b, a);
4226 assert_eq_m128i(r, e);
4227 }
4228
4229 #[simd_test(enable = "avx2")]
4230 const fn test_mm256_blend_epi32() {
4231 let (a, b) = (_mm256_set1_epi32(3), _mm256_set1_epi32(9));
4232 let e = _mm256_setr_epi32(9, 3, 3, 3, 3, 3, 3, 3);
4233 let r = _mm256_blend_epi32::<0x01>(a, b);
4234 assert_eq_m256i(r, e);
4235
4236 let e = _mm256_setr_epi32(3, 9, 3, 3, 3, 3, 3, 9);
4237 let r = _mm256_blend_epi32::<0x82>(a, b);
4238 assert_eq_m256i(r, e);
4239
4240 let e = _mm256_setr_epi32(3, 3, 9, 9, 9, 9, 9, 3);
4241 let r = _mm256_blend_epi32::<0x7C>(a, b);
4242 assert_eq_m256i(r, e);
4243 }
4244
4245 #[simd_test(enable = "avx2")]
4246 const fn test_mm256_blend_epi16() {
4247 let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4248 let e = _mm256_setr_epi16(9, 3, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3);
4249 let r = _mm256_blend_epi16::<0x01>(a, b);
4250 assert_eq_m256i(r, e);
4251
4252 let r = _mm256_blend_epi16::<0xFE>(b, a);
4253 assert_eq_m256i(r, e);
4254 }
4255
4256 #[simd_test(enable = "avx2")]
4257 const fn test_mm256_blendv_epi8() {
4258 let (a, b) = (_mm256_set1_epi8(4), _mm256_set1_epi8(2));
4259 let mask = _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), -1);
4260 let e = _mm256_insert_epi8::<2>(_mm256_set1_epi8(4), 2);
4261 let r = _mm256_blendv_epi8(a, b, mask);
4262 assert_eq_m256i(r, e);
4263 }
4264
4265 #[simd_test(enable = "avx2")]
4266 const fn test_mm_broadcastb_epi8() {
4267 let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4268 let res = _mm_broadcastb_epi8(a);
4269 assert_eq_m128i(res, _mm_set1_epi8(0x2a));
4270 }
4271
4272 #[simd_test(enable = "avx2")]
4273 const fn test_mm256_broadcastb_epi8() {
4274 let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4275 let res = _mm256_broadcastb_epi8(a);
4276 assert_eq_m256i(res, _mm256_set1_epi8(0x2a));
4277 }
4278
4279 #[simd_test(enable = "avx2")]
4280 const fn test_mm_broadcastd_epi32() {
4281 let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4282 let res = _mm_broadcastd_epi32(a);
4283 assert_eq_m128i(res, _mm_set1_epi32(0x2a));
4284 }
4285
4286 #[simd_test(enable = "avx2")]
4287 const fn test_mm256_broadcastd_epi32() {
4288 let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4289 let res = _mm256_broadcastd_epi32(a);
4290 assert_eq_m256i(res, _mm256_set1_epi32(0x2a));
4291 }
4292
4293 #[simd_test(enable = "avx2")]
4294 const fn test_mm_broadcastq_epi64() {
4295 let a = _mm_setr_epi64x(0x1ffffffff, 0);
4296 let res = _mm_broadcastq_epi64(a);
4297 assert_eq_m128i(res, _mm_set1_epi64x(0x1ffffffff));
4298 }
4299
4300 #[simd_test(enable = "avx2")]
4301 const fn test_mm256_broadcastq_epi64() {
4302 let a = _mm_setr_epi64x(0x1ffffffff, 0);
4303 let res = _mm256_broadcastq_epi64(a);
4304 assert_eq_m256i(res, _mm256_set1_epi64x(0x1ffffffff));
4305 }
4306
4307 #[simd_test(enable = "avx2")]
4308 const fn test_mm_broadcastsd_pd() {
4309 let a = _mm_setr_pd(6.88, 3.44);
4310 let res = _mm_broadcastsd_pd(a);
4311 assert_eq_m128d(res, _mm_set1_pd(6.88));
4312 }
4313
4314 #[simd_test(enable = "avx2")]
4315 const fn test_mm256_broadcastsd_pd() {
4316 let a = _mm_setr_pd(6.88, 3.44);
4317 let res = _mm256_broadcastsd_pd(a);
4318 assert_eq_m256d(res, _mm256_set1_pd(6.88f64));
4319 }
4320
4321 #[simd_test(enable = "avx2")]
4322 const fn test_mm_broadcastsi128_si256() {
4323 let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4324 let res = _mm_broadcastsi128_si256(a);
4325 let retval = _mm256_setr_epi64x(
4326 0x0987654321012334,
4327 0x5678909876543210,
4328 0x0987654321012334,
4329 0x5678909876543210,
4330 );
4331 assert_eq_m256i(res, retval);
4332 }
4333
4334 #[simd_test(enable = "avx2")]
4335 const fn test_mm256_broadcastsi128_si256() {
4336 let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4337 let res = _mm256_broadcastsi128_si256(a);
4338 let retval = _mm256_setr_epi64x(
4339 0x0987654321012334,
4340 0x5678909876543210,
4341 0x0987654321012334,
4342 0x5678909876543210,
4343 );
4344 assert_eq_m256i(res, retval);
4345 }
4346
4347 #[simd_test(enable = "avx2")]
4348 const fn test_mm_broadcastss_ps() {
4349 let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
4350 let res = _mm_broadcastss_ps(a);
4351 assert_eq_m128(res, _mm_set1_ps(6.88));
4352 }
4353
4354 #[simd_test(enable = "avx2")]
4355 const fn test_mm256_broadcastss_ps() {
4356 let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
4357 let res = _mm256_broadcastss_ps(a);
4358 assert_eq_m256(res, _mm256_set1_ps(6.88));
4359 }
4360
4361 #[simd_test(enable = "avx2")]
4362 const fn test_mm_broadcastw_epi16() {
4363 let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4364 let res = _mm_broadcastw_epi16(a);
4365 assert_eq_m128i(res, _mm_set1_epi16(0x22b));
4366 }
4367
4368 #[simd_test(enable = "avx2")]
4369 const fn test_mm256_broadcastw_epi16() {
4370 let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4371 let res = _mm256_broadcastw_epi16(a);
4372 assert_eq_m256i(res, _mm256_set1_epi16(0x22b));
4373 }
4374
4375 #[simd_test(enable = "avx2")]
4376 const fn test_mm256_cmpeq_epi8() {
4377 #[rustfmt::skip]
4378 let a = _mm256_setr_epi8(
4379 0, 1, 2, 3, 4, 5, 6, 7,
4380 8, 9, 10, 11, 12, 13, 14, 15,
4381 16, 17, 18, 19, 20, 21, 22, 23,
4382 24, 25, 26, 27, 28, 29, 30, 31,
4383 );
4384 #[rustfmt::skip]
4385 let b = _mm256_setr_epi8(
4386 31, 30, 2, 28, 27, 26, 25, 24,
4387 23, 22, 21, 20, 19, 18, 17, 16,
4388 15, 14, 13, 12, 11, 10, 9, 8,
4389 7, 6, 5, 4, 3, 2, 1, 0,
4390 );
4391 let r = _mm256_cmpeq_epi8(a, b);
4392 assert_eq_m256i(r, _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), !0));
4393 }
4394
4395 #[simd_test(enable = "avx2")]
4396 const fn test_mm256_cmpeq_epi16() {
4397 #[rustfmt::skip]
4398 let a = _mm256_setr_epi16(
4399 0, 1, 2, 3, 4, 5, 6, 7,
4400 8, 9, 10, 11, 12, 13, 14, 15,
4401 );
4402 #[rustfmt::skip]
4403 let b = _mm256_setr_epi16(
4404 15, 14, 2, 12, 11, 10, 9, 8,
4405 7, 6, 5, 4, 3, 2, 1, 0,
4406 );
4407 let r = _mm256_cmpeq_epi16(a, b);
4408 assert_eq_m256i(r, _mm256_insert_epi16::<2>(_mm256_set1_epi16(0), !0));
4409 }
4410
4411 #[simd_test(enable = "avx2")]
4412 const fn test_mm256_cmpeq_epi32() {
4413 let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4414 let b = _mm256_setr_epi32(7, 6, 2, 4, 3, 2, 1, 0);
4415 let r = _mm256_cmpeq_epi32(a, b);
4416 let e = _mm256_set1_epi32(0);
4417 let e = _mm256_insert_epi32::<2>(e, !0);
4418 assert_eq_m256i(r, e);
4419 }
4420
4421 #[simd_test(enable = "avx2")]
4422 const fn test_mm256_cmpeq_epi64() {
4423 let a = _mm256_setr_epi64x(0, 1, 2, 3);
4424 let b = _mm256_setr_epi64x(3, 2, 2, 0);
4425 let r = _mm256_cmpeq_epi64(a, b);
4426 assert_eq_m256i(r, _mm256_insert_epi64::<2>(_mm256_set1_epi64x(0), !0));
4427 }
4428
4429 #[simd_test(enable = "avx2")]
4430 const fn test_mm256_cmpgt_epi8() {
4431 let a = _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), 5);
4432 let b = _mm256_set1_epi8(0);
4433 let r = _mm256_cmpgt_epi8(a, b);
4434 assert_eq_m256i(r, _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), !0));
4435 }
4436
4437 #[simd_test(enable = "avx2")]
4438 const fn test_mm256_cmpgt_epi16() {
4439 let a = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 5);
4440 let b = _mm256_set1_epi16(0);
4441 let r = _mm256_cmpgt_epi16(a, b);
4442 assert_eq_m256i(r, _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), !0));
4443 }
4444
4445 #[simd_test(enable = "avx2")]
4446 const fn test_mm256_cmpgt_epi32() {
4447 let a = _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), 5);
4448 let b = _mm256_set1_epi32(0);
4449 let r = _mm256_cmpgt_epi32(a, b);
4450 assert_eq_m256i(r, _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), !0));
4451 }
4452
4453 #[simd_test(enable = "avx2")]
4454 const fn test_mm256_cmpgt_epi64() {
4455 let a = _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), 5);
4456 let b = _mm256_set1_epi64x(0);
4457 let r = _mm256_cmpgt_epi64(a, b);
4458 assert_eq_m256i(r, _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), !0));
4459 }
4460
4461 #[simd_test(enable = "avx2")]
4462 const fn test_mm256_cvtepi8_epi16() {
4463 #[rustfmt::skip]
4464 let a = _mm_setr_epi8(
4465 0, 0, -1, 1, -2, 2, -3, 3,
4466 -4, 4, -5, 5, -6, 6, -7, 7,
4467 );
4468 #[rustfmt::skip]
4469 let r = _mm256_setr_epi16(
4470 0, 0, -1, 1, -2, 2, -3, 3,
4471 -4, 4, -5, 5, -6, 6, -7, 7,
4472 );
4473 assert_eq_m256i(r, _mm256_cvtepi8_epi16(a));
4474 }
4475
4476 #[simd_test(enable = "avx2")]
4477 const fn test_mm256_cvtepi8_epi32() {
4478 #[rustfmt::skip]
4479 let a = _mm_setr_epi8(
4480 0, 0, -1, 1, -2, 2, -3, 3,
4481 -4, 4, -5, 5, -6, 6, -7, 7,
4482 );
4483 let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4484 assert_eq_m256i(r, _mm256_cvtepi8_epi32(a));
4485 }
4486
4487 #[simd_test(enable = "avx2")]
4488 const fn test_mm256_cvtepi8_epi64() {
4489 #[rustfmt::skip]
4490 let a = _mm_setr_epi8(
4491 0, 0, -1, 1, -2, 2, -3, 3,
4492 -4, 4, -5, 5, -6, 6, -7, 7,
4493 );
4494 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4495 assert_eq_m256i(r, _mm256_cvtepi8_epi64(a));
4496 }
4497
4498 #[simd_test(enable = "avx2")]
4499 const fn test_mm256_cvtepi16_epi32() {
4500 let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4501 let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4502 assert_eq_m256i(r, _mm256_cvtepi16_epi32(a));
4503 }
4504
4505 #[simd_test(enable = "avx2")]
4506 const fn test_mm256_cvtepi16_epi64() {
4507 let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4508 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4509 assert_eq_m256i(r, _mm256_cvtepi16_epi64(a));
4510 }
4511
4512 #[simd_test(enable = "avx2")]
4513 const fn test_mm256_cvtepi32_epi64() {
4514 let a = _mm_setr_epi32(0, 0, -1, 1);
4515 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4516 assert_eq_m256i(r, _mm256_cvtepi32_epi64(a));
4517 }
4518
4519 #[simd_test(enable = "avx2")]
4520 const fn test_mm256_cvtepu16_epi32() {
4521 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4522 let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4523 assert_eq_m256i(r, _mm256_cvtepu16_epi32(a));
4524 }
4525
4526 #[simd_test(enable = "avx2")]
4527 const fn test_mm256_cvtepu16_epi64() {
4528 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4529 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4530 assert_eq_m256i(r, _mm256_cvtepu16_epi64(a));
4531 }
4532
4533 #[simd_test(enable = "avx2")]
4534 const fn test_mm256_cvtepu32_epi64() {
4535 let a = _mm_setr_epi32(0, 1, 2, 3);
4536 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4537 assert_eq_m256i(r, _mm256_cvtepu32_epi64(a));
4538 }
4539
4540 #[simd_test(enable = "avx2")]
4541 const fn test_mm256_cvtepu8_epi16() {
4542 #[rustfmt::skip]
4543 let a = _mm_setr_epi8(
4544 0, 1, 2, 3, 4, 5, 6, 7,
4545 8, 9, 10, 11, 12, 13, 14, 15,
4546 );
4547 #[rustfmt::skip]
4548 let r = _mm256_setr_epi16(
4549 0, 1, 2, 3, 4, 5, 6, 7,
4550 8, 9, 10, 11, 12, 13, 14, 15,
4551 );
4552 assert_eq_m256i(r, _mm256_cvtepu8_epi16(a));
4553 }
4554
4555 #[simd_test(enable = "avx2")]
4556 const fn test_mm256_cvtepu8_epi32() {
4557 #[rustfmt::skip]
4558 let a = _mm_setr_epi8(
4559 0, 1, 2, 3, 4, 5, 6, 7,
4560 8, 9, 10, 11, 12, 13, 14, 15,
4561 );
4562 let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4563 assert_eq_m256i(r, _mm256_cvtepu8_epi32(a));
4564 }
4565
4566 #[simd_test(enable = "avx2")]
4567 const fn test_mm256_cvtepu8_epi64() {
4568 #[rustfmt::skip]
4569 let a = _mm_setr_epi8(
4570 0, 1, 2, 3, 4, 5, 6, 7,
4571 8, 9, 10, 11, 12, 13, 14, 15,
4572 );
4573 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4574 assert_eq_m256i(r, _mm256_cvtepu8_epi64(a));
4575 }
4576
4577 #[simd_test(enable = "avx2")]
4578 const fn test_mm256_extracti128_si256() {
4579 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4580 let r = _mm256_extracti128_si256::<1>(a);
4581 let e = _mm_setr_epi64x(3, 4);
4582 assert_eq_m128i(r, e);
4583 }
4584
4585 #[simd_test(enable = "avx2")]
4586 const fn test_mm256_hadd_epi16() {
4587 let a = _mm256_set1_epi16(2);
4588 let b = _mm256_set1_epi16(4);
4589 let r = _mm256_hadd_epi16(a, b);
4590 let e = _mm256_setr_epi16(4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8);
4591 assert_eq_m256i(r, e);
4592 }
4593
4594 #[simd_test(enable = "avx2")]
4595 const fn test_mm256_hadd_epi32() {
4596 let a = _mm256_set1_epi32(2);
4597 let b = _mm256_set1_epi32(4);
4598 let r = _mm256_hadd_epi32(a, b);
4599 let e = _mm256_setr_epi32(4, 4, 8, 8, 4, 4, 8, 8);
4600 assert_eq_m256i(r, e);
4601 }
4602
4603 #[simd_test(enable = "avx2")]
4604 fn test_mm256_hadds_epi16() {
4605 let a = _mm256_set1_epi16(2);
4606 let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4607 let a = _mm256_insert_epi16::<1>(a, 1);
4608 let b = _mm256_set1_epi16(4);
4609 let r = _mm256_hadds_epi16(a, b);
4610 #[rustfmt::skip]
4611 let e = _mm256_setr_epi16(
4612 0x7FFF, 4, 4, 4, 8, 8, 8, 8,
4613 4, 4, 4, 4, 8, 8, 8, 8,
4614 );
4615 assert_eq_m256i(r, e);
4616 }
4617
4618 #[simd_test(enable = "avx2")]
4619 const fn test_mm256_hsub_epi16() {
4620 let a = _mm256_set1_epi16(2);
4621 let b = _mm256_set1_epi16(4);
4622 let r = _mm256_hsub_epi16(a, b);
4623 let e = _mm256_set1_epi16(0);
4624 assert_eq_m256i(r, e);
4625 }
4626
4627 #[simd_test(enable = "avx2")]
4628 const fn test_mm256_hsub_epi32() {
4629 let a = _mm256_set1_epi32(2);
4630 let b = _mm256_set1_epi32(4);
4631 let r = _mm256_hsub_epi32(a, b);
4632 let e = _mm256_set1_epi32(0);
4633 assert_eq_m256i(r, e);
4634 }
4635
4636 #[simd_test(enable = "avx2")]
4637 fn test_mm256_hsubs_epi16() {
4638 let a = _mm256_set1_epi16(2);
4639 let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4640 let a = _mm256_insert_epi16::<1>(a, -1);
4641 let b = _mm256_set1_epi16(4);
4642 let r = _mm256_hsubs_epi16(a, b);
4643 let e = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 0x7FFF);
4644 assert_eq_m256i(r, e);
4645 }
4646
4647 #[simd_test(enable = "avx2")]
4648 fn test_mm256_madd_epi16() {
4649 let a = _mm256_set1_epi16(2);
4650 let b = _mm256_set1_epi16(4);
4651 let r = _mm256_madd_epi16(a, b);
4652 let e = _mm256_set1_epi32(16);
4653 assert_eq_m256i(r, e);
4654 }
4655
4656 #[simd_test(enable = "avx2")]
4657 const fn test_mm256_inserti128_si256() {
4658 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4659 let b = _mm_setr_epi64x(7, 8);
4660 let r = _mm256_inserti128_si256::<1>(a, b);
4661 let e = _mm256_setr_epi64x(1, 2, 7, 8);
4662 assert_eq_m256i(r, e);
4663 }
4664
4665 #[simd_test(enable = "avx2")]
4666 fn test_mm256_maddubs_epi16() {
4667 let a = _mm256_set1_epi8(2);
4668 let b = _mm256_set1_epi8(4);
4669 let r = _mm256_maddubs_epi16(a, b);
4670 let e = _mm256_set1_epi16(16);
4671 assert_eq_m256i(r, e);
4672 }
4673
4674 #[simd_test(enable = "avx2")]
4675 const fn test_mm_maskload_epi32() {
4676 let nums = [1, 2, 3, 4];
4677 let a = &nums as *const i32;
4678 let mask = _mm_setr_epi32(-1, 0, 0, -1);
4679 let r = unsafe { _mm_maskload_epi32(a, mask) };
4680 let e = _mm_setr_epi32(1, 0, 0, 4);
4681 assert_eq_m128i(r, e);
4682 }
4683
4684 #[simd_test(enable = "avx2")]
4685 const fn test_mm256_maskload_epi32() {
4686 let nums = [1, 2, 3, 4, 5, 6, 7, 8];
4687 let a = &nums as *const i32;
4688 let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4689 let r = unsafe { _mm256_maskload_epi32(a, mask) };
4690 let e = _mm256_setr_epi32(1, 0, 0, 4, 0, 6, 7, 0);
4691 assert_eq_m256i(r, e);
4692 }
4693
4694 #[simd_test(enable = "avx2")]
4695 const fn test_mm_maskload_epi64() {
4696 let nums = [1_i64, 2_i64];
4697 let a = &nums as *const i64;
4698 let mask = _mm_setr_epi64x(0, -1);
4699 let r = unsafe { _mm_maskload_epi64(a, mask) };
4700 let e = _mm_setr_epi64x(0, 2);
4701 assert_eq_m128i(r, e);
4702 }
4703
4704 #[simd_test(enable = "avx2")]
4705 const fn test_mm256_maskload_epi64() {
4706 let nums = [1_i64, 2_i64, 3_i64, 4_i64];
4707 let a = &nums as *const i64;
4708 let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4709 let r = unsafe { _mm256_maskload_epi64(a, mask) };
4710 let e = _mm256_setr_epi64x(0, 2, 3, 0);
4711 assert_eq_m256i(r, e);
4712 }
4713
4714 #[simd_test(enable = "avx2")]
4715 const fn test_mm_maskstore_epi32() {
4716 let a = _mm_setr_epi32(1, 2, 3, 4);
4717 let mut arr = [-1, -1, -1, -1];
4718 let mask = _mm_setr_epi32(-1, 0, 0, -1);
4719 unsafe {
4720 _mm_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4721 }
4722 let e = [1, -1, -1, 4];
4723 assert_eq!(arr, e);
4724 }
4725
4726 #[simd_test(enable = "avx2")]
4727 const fn test_mm256_maskstore_epi32() {
4728 let a = _mm256_setr_epi32(1, 0x6d726f, 3, 42, 0x777161, 6, 7, 8);
4729 let mut arr = [-1, -1, -1, 0x776173, -1, 0x68657265, -1, -1];
4730 let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4731 unsafe {
4732 _mm256_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4733 }
4734 let e = [1, -1, -1, 42, -1, 6, 7, -1];
4735 assert_eq!(arr, e);
4736 }
4737
4738 #[simd_test(enable = "avx2")]
4739 const fn test_mm_maskstore_epi64() {
4740 let a = _mm_setr_epi64x(1_i64, 2_i64);
4741 let mut arr = [-1_i64, -1_i64];
4742 let mask = _mm_setr_epi64x(0, -1);
4743 unsafe {
4744 _mm_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4745 }
4746 let e = [-1, 2];
4747 assert_eq!(arr, e);
4748 }
4749
4750 #[simd_test(enable = "avx2")]
4751 const fn test_mm256_maskstore_epi64() {
4752 let a = _mm256_setr_epi64x(1_i64, 2_i64, 3_i64, 4_i64);
4753 let mut arr = [-1_i64, -1_i64, -1_i64, -1_i64];
4754 let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4755 unsafe {
4756 _mm256_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4757 }
4758 let e = [-1, 2, 3, -1];
4759 assert_eq!(arr, e);
4760 }
4761
4762 #[simd_test(enable = "avx2")]
4763 const fn test_mm256_max_epi16() {
4764 let a = _mm256_set1_epi16(2);
4765 let b = _mm256_set1_epi16(4);
4766 let r = _mm256_max_epi16(a, b);
4767 assert_eq_m256i(r, b);
4768 }
4769
4770 #[simd_test(enable = "avx2")]
4771 const fn test_mm256_max_epi32() {
4772 let a = _mm256_set1_epi32(2);
4773 let b = _mm256_set1_epi32(4);
4774 let r = _mm256_max_epi32(a, b);
4775 assert_eq_m256i(r, b);
4776 }
4777
4778 #[simd_test(enable = "avx2")]
4779 const fn test_mm256_max_epi8() {
4780 let a = _mm256_set1_epi8(2);
4781 let b = _mm256_set1_epi8(4);
4782 let r = _mm256_max_epi8(a, b);
4783 assert_eq_m256i(r, b);
4784 }
4785
4786 #[simd_test(enable = "avx2")]
4787 const fn test_mm256_max_epu16() {
4788 let a = _mm256_set1_epi16(2);
4789 let b = _mm256_set1_epi16(4);
4790 let r = _mm256_max_epu16(a, b);
4791 assert_eq_m256i(r, b);
4792 }
4793
4794 #[simd_test(enable = "avx2")]
4795 const fn test_mm256_max_epu32() {
4796 let a = _mm256_set1_epi32(2);
4797 let b = _mm256_set1_epi32(4);
4798 let r = _mm256_max_epu32(a, b);
4799 assert_eq_m256i(r, b);
4800 }
4801
4802 #[simd_test(enable = "avx2")]
4803 const fn test_mm256_max_epu8() {
4804 let a = _mm256_set1_epi8(2);
4805 let b = _mm256_set1_epi8(4);
4806 let r = _mm256_max_epu8(a, b);
4807 assert_eq_m256i(r, b);
4808 }
4809
4810 #[simd_test(enable = "avx2")]
4811 const fn test_mm256_min_epi16() {
4812 let a = _mm256_set1_epi16(2);
4813 let b = _mm256_set1_epi16(4);
4814 let r = _mm256_min_epi16(a, b);
4815 assert_eq_m256i(r, a);
4816 }
4817
4818 #[simd_test(enable = "avx2")]
4819 const fn test_mm256_min_epi32() {
4820 let a = _mm256_set1_epi32(2);
4821 let b = _mm256_set1_epi32(4);
4822 let r = _mm256_min_epi32(a, b);
4823 assert_eq_m256i(r, a);
4824 }
4825
4826 #[simd_test(enable = "avx2")]
4827 const fn test_mm256_min_epi8() {
4828 let a = _mm256_set1_epi8(2);
4829 let b = _mm256_set1_epi8(4);
4830 let r = _mm256_min_epi8(a, b);
4831 assert_eq_m256i(r, a);
4832 }
4833
4834 #[simd_test(enable = "avx2")]
4835 const fn test_mm256_min_epu16() {
4836 let a = _mm256_set1_epi16(2);
4837 let b = _mm256_set1_epi16(4);
4838 let r = _mm256_min_epu16(a, b);
4839 assert_eq_m256i(r, a);
4840 }
4841
4842 #[simd_test(enable = "avx2")]
4843 const fn test_mm256_min_epu32() {
4844 let a = _mm256_set1_epi32(2);
4845 let b = _mm256_set1_epi32(4);
4846 let r = _mm256_min_epu32(a, b);
4847 assert_eq_m256i(r, a);
4848 }
4849
4850 #[simd_test(enable = "avx2")]
4851 const fn test_mm256_min_epu8() {
4852 let a = _mm256_set1_epi8(2);
4853 let b = _mm256_set1_epi8(4);
4854 let r = _mm256_min_epu8(a, b);
4855 assert_eq_m256i(r, a);
4856 }
4857
4858 #[simd_test(enable = "avx2")]
4859 const fn test_mm256_movemask_epi8() {
4860 let a = _mm256_set1_epi8(-1);
4861 let r = _mm256_movemask_epi8(a);
4862 let e = -1;
4863 assert_eq!(r, e);
4864 }
4865
4866 #[simd_test(enable = "avx2")]
4867 fn test_mm256_mpsadbw_epu8() {
4868 let a = _mm256_set1_epi8(2);
4869 let b = _mm256_set1_epi8(4);
4870 let r = _mm256_mpsadbw_epu8::<0>(a, b);
4871 let e = _mm256_set1_epi16(8);
4872 assert_eq_m256i(r, e);
4873 }
4874
4875 #[simd_test(enable = "avx2")]
4876 const fn test_mm256_mul_epi32() {
4877 let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4878 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4879 let r = _mm256_mul_epi32(a, b);
4880 let e = _mm256_setr_epi64x(0, 0, 10, 14);
4881 assert_eq_m256i(r, e);
4882 }
4883
4884 #[simd_test(enable = "avx2")]
4885 const fn test_mm256_mul_epu32() {
4886 let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4887 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4888 let r = _mm256_mul_epu32(a, b);
4889 let e = _mm256_setr_epi64x(0, 0, 10, 14);
4890 assert_eq_m256i(r, e);
4891 }
4892
4893 #[simd_test(enable = "avx2")]
4894 const fn test_mm256_mulhi_epi16() {
4895 let a = _mm256_set1_epi16(6535);
4896 let b = _mm256_set1_epi16(6535);
4897 let r = _mm256_mulhi_epi16(a, b);
4898 let e = _mm256_set1_epi16(651);
4899 assert_eq_m256i(r, e);
4900 }
4901
4902 #[simd_test(enable = "avx2")]
4903 const fn test_mm256_mulhi_epu16() {
4904 let a = _mm256_set1_epi16(6535);
4905 let b = _mm256_set1_epi16(6535);
4906 let r = _mm256_mulhi_epu16(a, b);
4907 let e = _mm256_set1_epi16(651);
4908 assert_eq_m256i(r, e);
4909 }
4910
4911 #[simd_test(enable = "avx2")]
4912 const fn test_mm256_mullo_epi16() {
4913 let a = _mm256_set1_epi16(2);
4914 let b = _mm256_set1_epi16(4);
4915 let r = _mm256_mullo_epi16(a, b);
4916 let e = _mm256_set1_epi16(8);
4917 assert_eq_m256i(r, e);
4918 }
4919
4920 #[simd_test(enable = "avx2")]
4921 const fn test_mm256_mullo_epi32() {
4922 let a = _mm256_set1_epi32(2);
4923 let b = _mm256_set1_epi32(4);
4924 let r = _mm256_mullo_epi32(a, b);
4925 let e = _mm256_set1_epi32(8);
4926 assert_eq_m256i(r, e);
4927 }
4928
4929 #[simd_test(enable = "avx2")]
4930 fn test_mm256_mulhrs_epi16() {
4931 let a = _mm256_set1_epi16(2);
4932 let b = _mm256_set1_epi16(4);
4933 let r = _mm256_mullo_epi16(a, b);
4934 let e = _mm256_set1_epi16(8);
4935 assert_eq_m256i(r, e);
4936 }
4937
4938 #[simd_test(enable = "avx2")]
4939 const fn test_mm256_or_si256() {
4940 let a = _mm256_set1_epi8(-1);
4941 let b = _mm256_set1_epi8(0);
4942 let r = _mm256_or_si256(a, b);
4943 assert_eq_m256i(r, a);
4944 }
4945
4946 #[simd_test(enable = "avx2")]
4947 fn test_mm256_packs_epi16() {
4948 let a = _mm256_set1_epi16(2);
4949 let b = _mm256_set1_epi16(4);
4950 let r = _mm256_packs_epi16(a, b);
4951 #[rustfmt::skip]
4952 let e = _mm256_setr_epi8(
4953 2, 2, 2, 2, 2, 2, 2, 2,
4954 4, 4, 4, 4, 4, 4, 4, 4,
4955 2, 2, 2, 2, 2, 2, 2, 2,
4956 4, 4, 4, 4, 4, 4, 4, 4,
4957 );
4958
4959 assert_eq_m256i(r, e);
4960 }
4961
4962 #[simd_test(enable = "avx2")]
4963 fn test_mm256_packs_epi32() {
4964 let a = _mm256_set1_epi32(2);
4965 let b = _mm256_set1_epi32(4);
4966 let r = _mm256_packs_epi32(a, b);
4967 let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
4968
4969 assert_eq_m256i(r, e);
4970 }
4971
4972 #[simd_test(enable = "avx2")]
4973 fn test_mm256_packus_epi16() {
4974 let a = _mm256_set1_epi16(2);
4975 let b = _mm256_set1_epi16(4);
4976 let r = _mm256_packus_epi16(a, b);
4977 #[rustfmt::skip]
4978 let e = _mm256_setr_epi8(
4979 2, 2, 2, 2, 2, 2, 2, 2,
4980 4, 4, 4, 4, 4, 4, 4, 4,
4981 2, 2, 2, 2, 2, 2, 2, 2,
4982 4, 4, 4, 4, 4, 4, 4, 4,
4983 );
4984
4985 assert_eq_m256i(r, e);
4986 }
4987
4988 #[simd_test(enable = "avx2")]
4989 fn test_mm256_packus_epi32() {
4990 let a = _mm256_set1_epi32(2);
4991 let b = _mm256_set1_epi32(4);
4992 let r = _mm256_packus_epi32(a, b);
4993 let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
4994
4995 assert_eq_m256i(r, e);
4996 }
4997
4998 #[simd_test(enable = "avx2")]
4999 fn test_mm256_sad_epu8() {
5000 let a = _mm256_set1_epi8(2);
5001 let b = _mm256_set1_epi8(4);
5002 let r = _mm256_sad_epu8(a, b);
5003 let e = _mm256_set1_epi64x(16);
5004 assert_eq_m256i(r, e);
5005 }
5006
5007 #[simd_test(enable = "avx2")]
5008 const fn test_mm256_shufflehi_epi16() {
5009 #[rustfmt::skip]
5010 let a = _mm256_setr_epi16(
5011 0, 1, 2, 3, 11, 22, 33, 44,
5012 4, 5, 6, 7, 55, 66, 77, 88,
5013 );
5014 #[rustfmt::skip]
5015 let e = _mm256_setr_epi16(
5016 0, 1, 2, 3, 44, 22, 22, 11,
5017 4, 5, 6, 7, 88, 66, 66, 55,
5018 );
5019 let r = _mm256_shufflehi_epi16::<0b00_01_01_11>(a);
5020 assert_eq_m256i(r, e);
5021 }
5022
5023 #[simd_test(enable = "avx2")]
5024 const fn test_mm256_shufflelo_epi16() {
5025 #[rustfmt::skip]
5026 let a = _mm256_setr_epi16(
5027 11, 22, 33, 44, 0, 1, 2, 3,
5028 55, 66, 77, 88, 4, 5, 6, 7,
5029 );
5030 #[rustfmt::skip]
5031 let e = _mm256_setr_epi16(
5032 44, 22, 22, 11, 0, 1, 2, 3,
5033 88, 66, 66, 55, 4, 5, 6, 7,
5034 );
5035 let r = _mm256_shufflelo_epi16::<0b00_01_01_11>(a);
5036 assert_eq_m256i(r, e);
5037 }
5038
5039 #[simd_test(enable = "avx2")]
5040 fn test_mm256_sign_epi16() {
5041 let a = _mm256_set1_epi16(2);
5042 let b = _mm256_set1_epi16(-1);
5043 let r = _mm256_sign_epi16(a, b);
5044 let e = _mm256_set1_epi16(-2);
5045 assert_eq_m256i(r, e);
5046 }
5047
5048 #[simd_test(enable = "avx2")]
5049 fn test_mm256_sign_epi32() {
5050 let a = _mm256_set1_epi32(2);
5051 let b = _mm256_set1_epi32(-1);
5052 let r = _mm256_sign_epi32(a, b);
5053 let e = _mm256_set1_epi32(-2);
5054 assert_eq_m256i(r, e);
5055 }
5056
5057 #[simd_test(enable = "avx2")]
5058 fn test_mm256_sign_epi8() {
5059 let a = _mm256_set1_epi8(2);
5060 let b = _mm256_set1_epi8(-1);
5061 let r = _mm256_sign_epi8(a, b);
5062 let e = _mm256_set1_epi8(-2);
5063 assert_eq_m256i(r, e);
5064 }
5065
5066 #[simd_test(enable = "avx2")]
5067 fn test_mm256_sll_epi16() {
5068 let a = _mm256_set1_epi16(0xFF);
5069 let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
5070 let r = _mm256_sll_epi16(a, b);
5071 assert_eq_m256i(r, _mm256_set1_epi16(0xFF0));
5072 }
5073
5074 #[simd_test(enable = "avx2")]
5075 fn test_mm256_sll_epi32() {
5076 let a = _mm256_set1_epi32(0xFFFF);
5077 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
5078 let r = _mm256_sll_epi32(a, b);
5079 assert_eq_m256i(r, _mm256_set1_epi32(0xFFFF0));
5080 }
5081
5082 #[simd_test(enable = "avx2")]
5083 fn test_mm256_sll_epi64() {
5084 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5085 let b = _mm_insert_epi64::<0>(_mm_set1_epi64x(0), 4);
5086 let r = _mm256_sll_epi64(a, b);
5087 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF0));
5088 }
5089
5090 #[simd_test(enable = "avx2")]
5091 const fn test_mm256_slli_epi16() {
5092 assert_eq_m256i(
5093 _mm256_slli_epi16::<4>(_mm256_set1_epi16(0xFF)),
5094 _mm256_set1_epi16(0xFF0),
5095 );
5096 }
5097
5098 #[simd_test(enable = "avx2")]
5099 const fn test_mm256_slli_epi32() {
5100 assert_eq_m256i(
5101 _mm256_slli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
5102 _mm256_set1_epi32(0xFFFF0),
5103 );
5104 }
5105
5106 #[simd_test(enable = "avx2")]
5107 const fn test_mm256_slli_epi64() {
5108 assert_eq_m256i(
5109 _mm256_slli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
5110 _mm256_set1_epi64x(0xFFFFFFFF0),
5111 );
5112 }
5113
5114 #[simd_test(enable = "avx2")]
5115 const fn test_mm256_slli_si256() {
5116 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5117 let r = _mm256_slli_si256::<3>(a);
5118 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF000000));
5119 }
5120
5121 #[simd_test(enable = "avx2")]
5122 const fn test_mm_sllv_epi32() {
5123 let a = _mm_set1_epi32(2);
5124 let b = _mm_set1_epi32(1);
5125 let r = _mm_sllv_epi32(a, b);
5126 let e = _mm_set1_epi32(4);
5127 assert_eq_m128i(r, e);
5128 }
5129
5130 #[simd_test(enable = "avx2")]
5131 const fn test_mm256_sllv_epi32() {
5132 let a = _mm256_set1_epi32(2);
5133 let b = _mm256_set1_epi32(1);
5134 let r = _mm256_sllv_epi32(a, b);
5135 let e = _mm256_set1_epi32(4);
5136 assert_eq_m256i(r, e);
5137 }
5138
5139 #[simd_test(enable = "avx2")]
5140 const fn test_mm_sllv_epi64() {
5141 let a = _mm_set1_epi64x(2);
5142 let b = _mm_set1_epi64x(1);
5143 let r = _mm_sllv_epi64(a, b);
5144 let e = _mm_set1_epi64x(4);
5145 assert_eq_m128i(r, e);
5146 }
5147
5148 #[simd_test(enable = "avx2")]
5149 const fn test_mm256_sllv_epi64() {
5150 let a = _mm256_set1_epi64x(2);
5151 let b = _mm256_set1_epi64x(1);
5152 let r = _mm256_sllv_epi64(a, b);
5153 let e = _mm256_set1_epi64x(4);
5154 assert_eq_m256i(r, e);
5155 }
5156
5157 #[simd_test(enable = "avx2")]
5158 fn test_mm256_sra_epi16() {
5159 let a = _mm256_set1_epi16(-1);
5160 let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
5161 let r = _mm256_sra_epi16(a, b);
5162 assert_eq_m256i(r, _mm256_set1_epi16(-1));
5163 }
5164
5165 #[simd_test(enable = "avx2")]
5166 fn test_mm256_sra_epi32() {
5167 let a = _mm256_set1_epi32(-1);
5168 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 1);
5169 let r = _mm256_sra_epi32(a, b);
5170 assert_eq_m256i(r, _mm256_set1_epi32(-1));
5171 }
5172
5173 #[simd_test(enable = "avx2")]
5174 const fn test_mm256_srai_epi16() {
5175 assert_eq_m256i(
5176 _mm256_srai_epi16::<1>(_mm256_set1_epi16(-1)),
5177 _mm256_set1_epi16(-1),
5178 );
5179 }
5180
5181 #[simd_test(enable = "avx2")]
5182 const fn test_mm256_srai_epi32() {
5183 assert_eq_m256i(
5184 _mm256_srai_epi32::<1>(_mm256_set1_epi32(-1)),
5185 _mm256_set1_epi32(-1),
5186 );
5187 }
5188
5189 #[simd_test(enable = "avx2")]
5190 const fn test_mm_srav_epi32() {
5191 let a = _mm_set1_epi32(4);
5192 let count = _mm_set1_epi32(1);
5193 let r = _mm_srav_epi32(a, count);
5194 let e = _mm_set1_epi32(2);
5195 assert_eq_m128i(r, e);
5196 }
5197
5198 #[simd_test(enable = "avx2")]
5199 const fn test_mm256_srav_epi32() {
5200 let a = _mm256_set1_epi32(4);
5201 let count = _mm256_set1_epi32(1);
5202 let r = _mm256_srav_epi32(a, count);
5203 let e = _mm256_set1_epi32(2);
5204 assert_eq_m256i(r, e);
5205 }
5206
5207 #[simd_test(enable = "avx2")]
5208 const fn test_mm256_srli_si256() {
5209 #[rustfmt::skip]
5210 let a = _mm256_setr_epi8(
5211 1, 2, 3, 4, 5, 6, 7, 8,
5212 9, 10, 11, 12, 13, 14, 15, 16,
5213 17, 18, 19, 20, 21, 22, 23, 24,
5214 25, 26, 27, 28, 29, 30, 31, 32,
5215 );
5216 let r = _mm256_srli_si256::<3>(a);
5217 #[rustfmt::skip]
5218 let e = _mm256_setr_epi8(
5219 4, 5, 6, 7, 8, 9, 10, 11,
5220 12, 13, 14, 15, 16, 0, 0, 0,
5221 20, 21, 22, 23, 24, 25, 26, 27,
5222 28, 29, 30, 31, 32, 0, 0, 0,
5223 );
5224 assert_eq_m256i(r, e);
5225 }
5226
5227 #[simd_test(enable = "avx2")]
5228 fn test_mm256_srl_epi16() {
5229 let a = _mm256_set1_epi16(0xFF);
5230 let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
5231 let r = _mm256_srl_epi16(a, b);
5232 assert_eq_m256i(r, _mm256_set1_epi16(0xF));
5233 }
5234
5235 #[simd_test(enable = "avx2")]
5236 fn test_mm256_srl_epi32() {
5237 let a = _mm256_set1_epi32(0xFFFF);
5238 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
5239 let r = _mm256_srl_epi32(a, b);
5240 assert_eq_m256i(r, _mm256_set1_epi32(0xFFF));
5241 }
5242
5243 #[simd_test(enable = "avx2")]
5244 fn test_mm256_srl_epi64() {
5245 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5246 let b = _mm_setr_epi64x(4, 0);
5247 let r = _mm256_srl_epi64(a, b);
5248 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFF));
5249 }
5250
5251 #[simd_test(enable = "avx2")]
5252 const fn test_mm256_srli_epi16() {
5253 assert_eq_m256i(
5254 _mm256_srli_epi16::<4>(_mm256_set1_epi16(0xFF)),
5255 _mm256_set1_epi16(0xF),
5256 );
5257 }
5258
5259 #[simd_test(enable = "avx2")]
5260 const fn test_mm256_srli_epi32() {
5261 assert_eq_m256i(
5262 _mm256_srli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
5263 _mm256_set1_epi32(0xFFF),
5264 );
5265 }
5266
5267 #[simd_test(enable = "avx2")]
5268 const fn test_mm256_srli_epi64() {
5269 assert_eq_m256i(
5270 _mm256_srli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
5271 _mm256_set1_epi64x(0xFFFFFFF),
5272 );
5273 }
5274
5275 #[simd_test(enable = "avx2")]
5276 const fn test_mm_srlv_epi32() {
5277 let a = _mm_set1_epi32(2);
5278 let count = _mm_set1_epi32(1);
5279 let r = _mm_srlv_epi32(a, count);
5280 let e = _mm_set1_epi32(1);
5281 assert_eq_m128i(r, e);
5282 }
5283
5284 #[simd_test(enable = "avx2")]
5285 const fn test_mm256_srlv_epi32() {
5286 let a = _mm256_set1_epi32(2);
5287 let count = _mm256_set1_epi32(1);
5288 let r = _mm256_srlv_epi32(a, count);
5289 let e = _mm256_set1_epi32(1);
5290 assert_eq_m256i(r, e);
5291 }
5292
5293 #[simd_test(enable = "avx2")]
5294 const fn test_mm_srlv_epi64() {
5295 let a = _mm_set1_epi64x(2);
5296 let count = _mm_set1_epi64x(1);
5297 let r = _mm_srlv_epi64(a, count);
5298 let e = _mm_set1_epi64x(1);
5299 assert_eq_m128i(r, e);
5300 }
5301
5302 #[simd_test(enable = "avx2")]
5303 const fn test_mm256_srlv_epi64() {
5304 let a = _mm256_set1_epi64x(2);
5305 let count = _mm256_set1_epi64x(1);
5306 let r = _mm256_srlv_epi64(a, count);
5307 let e = _mm256_set1_epi64x(1);
5308 assert_eq_m256i(r, e);
5309 }
5310
5311 #[simd_test(enable = "avx2")]
5312 fn test_mm256_stream_load_si256() {
5313 let a = _mm256_set_epi64x(5, 6, 7, 8);
5314 let r = unsafe { _mm256_stream_load_si256(core::ptr::addr_of!(a) as *const _) };
5315 assert_eq_m256i(a, r);
5316 }
5317
5318 #[simd_test(enable = "avx2")]
5319 const fn test_mm256_sub_epi16() {
5320 let a = _mm256_set1_epi16(4);
5321 let b = _mm256_set1_epi16(2);
5322 let r = _mm256_sub_epi16(a, b);
5323 assert_eq_m256i(r, b);
5324 }
5325
5326 #[simd_test(enable = "avx2")]
5327 const fn test_mm256_sub_epi32() {
5328 let a = _mm256_set1_epi32(4);
5329 let b = _mm256_set1_epi32(2);
5330 let r = _mm256_sub_epi32(a, b);
5331 assert_eq_m256i(r, b);
5332 }
5333
5334 #[simd_test(enable = "avx2")]
5335 const fn test_mm256_sub_epi64() {
5336 let a = _mm256_set1_epi64x(4);
5337 let b = _mm256_set1_epi64x(2);
5338 let r = _mm256_sub_epi64(a, b);
5339 assert_eq_m256i(r, b);
5340 }
5341
5342 #[simd_test(enable = "avx2")]
5343 const fn test_mm256_sub_epi8() {
5344 let a = _mm256_set1_epi8(4);
5345 let b = _mm256_set1_epi8(2);
5346 let r = _mm256_sub_epi8(a, b);
5347 assert_eq_m256i(r, b);
5348 }
5349
5350 #[simd_test(enable = "avx2")]
5351 const fn test_mm256_subs_epi16() {
5352 let a = _mm256_set1_epi16(4);
5353 let b = _mm256_set1_epi16(2);
5354 let r = _mm256_subs_epi16(a, b);
5355 assert_eq_m256i(r, b);
5356 }
5357
5358 #[simd_test(enable = "avx2")]
5359 const fn test_mm256_subs_epi8() {
5360 let a = _mm256_set1_epi8(4);
5361 let b = _mm256_set1_epi8(2);
5362 let r = _mm256_subs_epi8(a, b);
5363 assert_eq_m256i(r, b);
5364 }
5365
5366 #[simd_test(enable = "avx2")]
5367 const fn test_mm256_subs_epu16() {
5368 let a = _mm256_set1_epi16(4);
5369 let b = _mm256_set1_epi16(2);
5370 let r = _mm256_subs_epu16(a, b);
5371 assert_eq_m256i(r, b);
5372 }
5373
5374 #[simd_test(enable = "avx2")]
5375 const fn test_mm256_subs_epu8() {
5376 let a = _mm256_set1_epi8(4);
5377 let b = _mm256_set1_epi8(2);
5378 let r = _mm256_subs_epu8(a, b);
5379 assert_eq_m256i(r, b);
5380 }
5381
5382 #[simd_test(enable = "avx2")]
5383 const fn test_mm256_xor_si256() {
5384 let a = _mm256_set1_epi8(5);
5385 let b = _mm256_set1_epi8(3);
5386 let r = _mm256_xor_si256(a, b);
5387 assert_eq_m256i(r, _mm256_set1_epi8(6));
5388 }
5389
5390 #[simd_test(enable = "avx2")]
5391 const fn test_mm256_alignr_epi8() {
5392 #[rustfmt::skip]
5393 let a = _mm256_setr_epi8(
5394 1, 2, 3, 4, 5, 6, 7, 8,
5395 9, 10, 11, 12, 13, 14, 15, 16,
5396 17, 18, 19, 20, 21, 22, 23, 24,
5397 25, 26, 27, 28, 29, 30, 31, 32,
5398 );
5399 #[rustfmt::skip]
5400 let b = _mm256_setr_epi8(
5401 -1, -2, -3, -4, -5, -6, -7, -8,
5402 -9, -10, -11, -12, -13, -14, -15, -16,
5403 -17, -18, -19, -20, -21, -22, -23, -24,
5404 -25, -26, -27, -28, -29, -30, -31, -32,
5405 );
5406 let r = _mm256_alignr_epi8::<33>(a, b);
5407 assert_eq_m256i(r, _mm256_set1_epi8(0));
5408
5409 let r = _mm256_alignr_epi8::<17>(a, b);
5410 #[rustfmt::skip]
5411 let expected = _mm256_setr_epi8(
5412 2, 3, 4, 5, 6, 7, 8, 9,
5413 10, 11, 12, 13, 14, 15, 16, 0,
5414 18, 19, 20, 21, 22, 23, 24, 25,
5415 26, 27, 28, 29, 30, 31, 32, 0,
5416 );
5417 assert_eq_m256i(r, expected);
5418
5419 let r = _mm256_alignr_epi8::<4>(a, b);
5420 #[rustfmt::skip]
5421 let expected = _mm256_setr_epi8(
5422 -5, -6, -7, -8, -9, -10, -11, -12,
5423 -13, -14, -15, -16, 1, 2, 3, 4,
5424 -21, -22, -23, -24, -25, -26, -27, -28,
5425 -29, -30, -31, -32, 17, 18, 19, 20,
5426 );
5427 assert_eq_m256i(r, expected);
5428
5429 let r = _mm256_alignr_epi8::<15>(a, b);
5430 #[rustfmt::skip]
5431 let expected = _mm256_setr_epi8(
5432 -16, 1, 2, 3, 4, 5, 6, 7,
5433 8, 9, 10, 11, 12, 13, 14, 15,
5434 -32, 17, 18, 19, 20, 21, 22, 23,
5435 24, 25, 26, 27, 28, 29, 30, 31,
5436 );
5437 assert_eq_m256i(r, expected);
5438
5439 let r = _mm256_alignr_epi8::<0>(a, b);
5440 assert_eq_m256i(r, b);
5441
5442 let r = _mm256_alignr_epi8::<16>(a, b);
5443 assert_eq_m256i(r, a);
5444 }
5445
5446 #[simd_test(enable = "avx2")]
5447 fn test_mm256_shuffle_epi8() {
5448 #[rustfmt::skip]
5449 let a = _mm256_setr_epi8(
5450 1, 2, 3, 4, 5, 6, 7, 8,
5451 9, 10, 11, 12, 13, 14, 15, 16,
5452 17, 18, 19, 20, 21, 22, 23, 24,
5453 25, 26, 27, 28, 29, 30, 31, 32,
5454 );
5455 #[rustfmt::skip]
5456 let b = _mm256_setr_epi8(
5457 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5458 12, 5, 5, 10, 4, 1, 8, 0,
5459 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5460 12, 5, 5, 10, 4, 1, 8, 0,
5461 );
5462 #[rustfmt::skip]
5463 let expected = _mm256_setr_epi8(
5464 5, 0, 5, 4, 9, 13, 7, 4,
5465 13, 6, 6, 11, 5, 2, 9, 1,
5466 21, 0, 21, 20, 25, 29, 23, 20,
5467 29, 22, 22, 27, 21, 18, 25, 17,
5468 );
5469 let r = _mm256_shuffle_epi8(a, b);
5470 assert_eq_m256i(r, expected);
5471 }
5472
5473 #[simd_test(enable = "avx2")]
5474 fn test_mm256_permutevar8x32_epi32() {
5475 let a = _mm256_setr_epi32(100, 200, 300, 400, 500, 600, 700, 800);
5476 let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5477 let expected = _mm256_setr_epi32(600, 100, 600, 200, 800, 700, 400, 500);
5478 let r = _mm256_permutevar8x32_epi32(a, b);
5479 assert_eq_m256i(r, expected);
5480 }
5481
5482 #[simd_test(enable = "avx2")]
5483 const fn test_mm256_permute4x64_epi64() {
5484 let a = _mm256_setr_epi64x(100, 200, 300, 400);
5485 let expected = _mm256_setr_epi64x(400, 100, 200, 100);
5486 let r = _mm256_permute4x64_epi64::<0b00010011>(a);
5487 assert_eq_m256i(r, expected);
5488 }
5489
5490 #[simd_test(enable = "avx2")]
5491 const fn test_mm256_permute2x128_si256() {
5492 let a = _mm256_setr_epi64x(100, 200, 500, 600);
5493 let b = _mm256_setr_epi64x(300, 400, 700, 800);
5494 let r = _mm256_permute2x128_si256::<0b00_01_00_11>(a, b);
5495 let e = _mm256_setr_epi64x(700, 800, 500, 600);
5496 assert_eq_m256i(r, e);
5497 }
5498
5499 #[simd_test(enable = "avx2")]
5500 const fn test_mm256_permute4x64_pd() {
5501 let a = _mm256_setr_pd(1., 2., 3., 4.);
5502 let r = _mm256_permute4x64_pd::<0b00_01_00_11>(a);
5503 let e = _mm256_setr_pd(4., 1., 2., 1.);
5504 assert_eq_m256d(r, e);
5505 }
5506
5507 #[simd_test(enable = "avx2")]
5508 fn test_mm256_permutevar8x32_ps() {
5509 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
5510 let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5511 let r = _mm256_permutevar8x32_ps(a, b);
5512 let e = _mm256_setr_ps(6., 1., 6., 2., 8., 7., 4., 5.);
5513 assert_eq_m256(r, e);
5514 }
5515
5516 #[simd_test(enable = "avx2")]
5517 fn test_mm_i32gather_epi32() {
5518 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5519 let r = unsafe { _mm_i32gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)) };
5521 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5522 }
5523
5524 #[simd_test(enable = "avx2")]
5525 fn test_mm_mask_i32gather_epi32() {
5526 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5527 let r = unsafe {
5529 _mm_mask_i32gather_epi32::<4>(
5530 _mm_set1_epi32(256),
5531 arr.as_ptr(),
5532 _mm_setr_epi32(0, 16, 64, 96),
5533 _mm_setr_epi32(-1, -1, -1, 0),
5534 )
5535 };
5536 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5537 }
5538
5539 #[simd_test(enable = "avx2")]
5540 fn test_mm256_i32gather_epi32() {
5541 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5542 let r = unsafe {
5544 _mm256_i32gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4))
5545 };
5546 assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5547 }
5548
5549 #[simd_test(enable = "avx2")]
5550 fn test_mm256_mask_i32gather_epi32() {
5551 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5552 let r = unsafe {
5554 _mm256_mask_i32gather_epi32::<4>(
5555 _mm256_set1_epi32(256),
5556 arr.as_ptr(),
5557 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5558 _mm256_setr_epi32(-1, -1, -1, 0, 0, 0, 0, 0),
5559 )
5560 };
5561 assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 64, 256, 256, 256, 256, 256));
5562 }
5563
5564 #[simd_test(enable = "avx2")]
5565 fn test_mm_i32gather_ps() {
5566 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5567 let r = unsafe { _mm_i32gather_ps::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)) };
5569 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5570 }
5571
5572 #[simd_test(enable = "avx2")]
5573 fn test_mm_mask_i32gather_ps() {
5574 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5575 let r = unsafe {
5577 _mm_mask_i32gather_ps::<4>(
5578 _mm_set1_ps(256.0),
5579 arr.as_ptr(),
5580 _mm_setr_epi32(0, 16, 64, 96),
5581 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5582 )
5583 };
5584 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5585 }
5586
5587 #[simd_test(enable = "avx2")]
5588 fn test_mm256_i32gather_ps() {
5589 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5590 let r = unsafe {
5592 _mm256_i32gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4))
5593 };
5594 assert_eq_m256(r, _mm256_setr_ps(0.0, 16.0, 32.0, 48.0, 1.0, 2.0, 3.0, 4.0));
5595 }
5596
5597 #[simd_test(enable = "avx2")]
5598 fn test_mm256_mask_i32gather_ps() {
5599 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5600 let r = unsafe {
5602 _mm256_mask_i32gather_ps::<4>(
5603 _mm256_set1_ps(256.0),
5604 arr.as_ptr(),
5605 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5606 _mm256_setr_ps(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0),
5607 )
5608 };
5609 assert_eq_m256(
5610 r,
5611 _mm256_setr_ps(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0),
5612 );
5613 }
5614
5615 #[simd_test(enable = "avx2")]
5616 fn test_mm_i32gather_epi64() {
5617 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5618 let r = unsafe { _mm_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0)) };
5620 assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5621 }
5622
5623 #[simd_test(enable = "avx2")]
5624 fn test_mm_mask_i32gather_epi64() {
5625 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5626 let r = unsafe {
5628 _mm_mask_i32gather_epi64::<8>(
5629 _mm_set1_epi64x(256),
5630 arr.as_ptr(),
5631 _mm_setr_epi32(16, 16, 16, 16),
5632 _mm_setr_epi64x(-1, 0),
5633 )
5634 };
5635 assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5636 }
5637
5638 #[simd_test(enable = "avx2")]
5639 fn test_mm256_i32gather_epi64() {
5640 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5641 let r = unsafe { _mm256_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)) };
5643 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5644 }
5645
5646 #[simd_test(enable = "avx2")]
5647 fn test_mm256_mask_i32gather_epi64() {
5648 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5649 let r = unsafe {
5651 _mm256_mask_i32gather_epi64::<8>(
5652 _mm256_set1_epi64x(256),
5653 arr.as_ptr(),
5654 _mm_setr_epi32(0, 16, 64, 96),
5655 _mm256_setr_epi64x(-1, -1, -1, 0),
5656 )
5657 };
5658 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5659 }
5660
5661 #[simd_test(enable = "avx2")]
5662 fn test_mm_i32gather_pd() {
5663 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5664 let r = unsafe { _mm_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0)) };
5666 assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5667 }
5668
5669 #[simd_test(enable = "avx2")]
5670 fn test_mm_mask_i32gather_pd() {
5671 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5672 let r = unsafe {
5674 _mm_mask_i32gather_pd::<8>(
5675 _mm_set1_pd(256.0),
5676 arr.as_ptr(),
5677 _mm_setr_epi32(16, 16, 16, 16),
5678 _mm_setr_pd(-1.0, 0.0),
5679 )
5680 };
5681 assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5682 }
5683
5684 #[simd_test(enable = "avx2")]
5685 fn test_mm256_i32gather_pd() {
5686 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5687 let r = unsafe { _mm256_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)) };
5689 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5690 }
5691
5692 #[simd_test(enable = "avx2")]
5693 fn test_mm256_mask_i32gather_pd() {
5694 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5695 let r = unsafe {
5697 _mm256_mask_i32gather_pd::<8>(
5698 _mm256_set1_pd(256.0),
5699 arr.as_ptr(),
5700 _mm_setr_epi32(0, 16, 64, 96),
5701 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5702 )
5703 };
5704 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5705 }
5706
5707 #[simd_test(enable = "avx2")]
5708 fn test_mm_i64gather_epi32() {
5709 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5710 let r = unsafe { _mm_i64gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16)) };
5712 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 0, 0));
5713 }
5714
5715 #[simd_test(enable = "avx2")]
5716 fn test_mm_mask_i64gather_epi32() {
5717 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5718 let r = unsafe {
5720 _mm_mask_i64gather_epi32::<4>(
5721 _mm_set1_epi32(256),
5722 arr.as_ptr(),
5723 _mm_setr_epi64x(0, 16),
5724 _mm_setr_epi32(-1, 0, -1, 0),
5725 )
5726 };
5727 assert_eq_m128i(r, _mm_setr_epi32(0, 256, 0, 0));
5728 }
5729
5730 #[simd_test(enable = "avx2")]
5731 fn test_mm256_i64gather_epi32() {
5732 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5733 let r =
5735 unsafe { _mm256_i64gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)) };
5736 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5737 }
5738
5739 #[simd_test(enable = "avx2")]
5740 fn test_mm256_mask_i64gather_epi32() {
5741 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5742 let r = unsafe {
5744 _mm256_mask_i64gather_epi32::<4>(
5745 _mm_set1_epi32(256),
5746 arr.as_ptr(),
5747 _mm256_setr_epi64x(0, 16, 64, 96),
5748 _mm_setr_epi32(-1, -1, -1, 0),
5749 )
5750 };
5751 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5752 }
5753
5754 #[simd_test(enable = "avx2")]
5755 fn test_mm_i64gather_ps() {
5756 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5757 let r = unsafe { _mm_i64gather_ps::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16)) };
5759 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 0.0, 0.0));
5760 }
5761
5762 #[simd_test(enable = "avx2")]
5763 fn test_mm_mask_i64gather_ps() {
5764 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5765 let r = unsafe {
5767 _mm_mask_i64gather_ps::<4>(
5768 _mm_set1_ps(256.0),
5769 arr.as_ptr(),
5770 _mm_setr_epi64x(0, 16),
5771 _mm_setr_ps(-1.0, 0.0, -1.0, 0.0),
5772 )
5773 };
5774 assert_eq_m128(r, _mm_setr_ps(0.0, 256.0, 0.0, 0.0));
5775 }
5776
5777 #[simd_test(enable = "avx2")]
5778 fn test_mm256_i64gather_ps() {
5779 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5780 let r =
5782 unsafe { _mm256_i64gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)) };
5783 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5784 }
5785
5786 #[simd_test(enable = "avx2")]
5787 fn test_mm256_mask_i64gather_ps() {
5788 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5789 let r = unsafe {
5791 _mm256_mask_i64gather_ps::<4>(
5792 _mm_set1_ps(256.0),
5793 arr.as_ptr(),
5794 _mm256_setr_epi64x(0, 16, 64, 96),
5795 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5796 )
5797 };
5798 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5799 }
5800
5801 #[simd_test(enable = "avx2")]
5802 fn test_mm_i64gather_epi64() {
5803 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5804 let r = unsafe { _mm_i64gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16)) };
5806 assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5807 }
5808
5809 #[simd_test(enable = "avx2")]
5810 fn test_mm_mask_i64gather_epi64() {
5811 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5812 let r = unsafe {
5814 _mm_mask_i64gather_epi64::<8>(
5815 _mm_set1_epi64x(256),
5816 arr.as_ptr(),
5817 _mm_setr_epi64x(16, 16),
5818 _mm_setr_epi64x(-1, 0),
5819 )
5820 };
5821 assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5822 }
5823
5824 #[simd_test(enable = "avx2")]
5825 fn test_mm256_i64gather_epi64() {
5826 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5827 let r =
5829 unsafe { _mm256_i64gather_epi64::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)) };
5830 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5831 }
5832
5833 #[simd_test(enable = "avx2")]
5834 fn test_mm256_mask_i64gather_epi64() {
5835 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5836 let r = unsafe {
5838 _mm256_mask_i64gather_epi64::<8>(
5839 _mm256_set1_epi64x(256),
5840 arr.as_ptr(),
5841 _mm256_setr_epi64x(0, 16, 64, 96),
5842 _mm256_setr_epi64x(-1, -1, -1, 0),
5843 )
5844 };
5845 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5846 }
5847
5848 #[simd_test(enable = "avx2")]
5849 fn test_mm_i64gather_pd() {
5850 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5851 let r = unsafe { _mm_i64gather_pd::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16)) };
5853 assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5854 }
5855
5856 #[simd_test(enable = "avx2")]
5857 fn test_mm_mask_i64gather_pd() {
5858 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5859 let r = unsafe {
5861 _mm_mask_i64gather_pd::<8>(
5862 _mm_set1_pd(256.0),
5863 arr.as_ptr(),
5864 _mm_setr_epi64x(16, 16),
5865 _mm_setr_pd(-1.0, 0.0),
5866 )
5867 };
5868 assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5869 }
5870
5871 #[simd_test(enable = "avx2")]
5872 fn test_mm256_i64gather_pd() {
5873 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5874 let r =
5876 unsafe { _mm256_i64gather_pd::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)) };
5877 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5878 }
5879
5880 #[simd_test(enable = "avx2")]
5881 fn test_mm256_mask_i64gather_pd() {
5882 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5883 let r = unsafe {
5885 _mm256_mask_i64gather_pd::<8>(
5886 _mm256_set1_pd(256.0),
5887 arr.as_ptr(),
5888 _mm256_setr_epi64x(0, 16, 64, 96),
5889 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5890 )
5891 };
5892 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5893 }
5894
5895 #[simd_test(enable = "avx2")]
5896 const fn test_mm256_extract_epi8() {
5897 #[rustfmt::skip]
5898 let a = _mm256_setr_epi8(
5899 -1, 1, 2, 3, 4, 5, 6, 7,
5900 8, 9, 10, 11, 12, 13, 14, 15,
5901 16, 17, 18, 19, 20, 21, 22, 23,
5902 24, 25, 26, 27, 28, 29, 30, 31
5903 );
5904 let r1 = _mm256_extract_epi8::<0>(a);
5905 let r2 = _mm256_extract_epi8::<3>(a);
5906 assert_eq!(r1, 0xFF);
5907 assert_eq!(r2, 3);
5908 }
5909
5910 #[simd_test(enable = "avx2")]
5911 const fn test_mm256_extract_epi16() {
5912 #[rustfmt::skip]
5913 let a = _mm256_setr_epi16(
5914 -1, 1, 2, 3, 4, 5, 6, 7,
5915 8, 9, 10, 11, 12, 13, 14, 15,
5916 );
5917 let r1 = _mm256_extract_epi16::<0>(a);
5918 let r2 = _mm256_extract_epi16::<3>(a);
5919 assert_eq!(r1, 0xFFFF);
5920 assert_eq!(r2, 3);
5921 }
5922}