Skip to main content

core/stdarch/crates/core_arch/src/x86/
avx512f.rs

1use crate::{
2    arch::asm,
3    core_arch::{simd::*, x86::*},
4    intrinsics::simd::*,
5    intrinsics::{fmaf32, fmaf64},
6    mem, ptr,
7};
8
9use core::hint::unreachable_unchecked;
10#[cfg(test)]
11use stdarch_test::assert_instr;
12
13/// Computes the absolute values of packed 32-bit integers in `a`.
14///
15/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi32&expand=39)
16#[inline]
17#[target_feature(enable = "avx512f")]
18#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19#[cfg_attr(test, assert_instr(vpabsd))]
20#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21pub const fn _mm512_abs_epi32(a: __m512i) -> __m512i {
22    unsafe {
23        let a = a.as_i32x16();
24        let r = simd_select::<i32x16, _>(simd_lt(a, i32x16::ZERO), simd_neg(a), a);
25        transmute(r)
26    }
27}
28
29/// Computes the absolute value of packed 32-bit integers in `a`, and store the
30/// unsigned results in `dst` using writemask `k` (elements are copied from
31/// `src` when the corresponding mask bit is not set).
32///
33/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi32&expand=40)
34#[inline]
35#[target_feature(enable = "avx512f")]
36#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37#[cfg_attr(test, assert_instr(vpabsd))]
38#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39pub const fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
40    unsafe {
41        let abs = _mm512_abs_epi32(a).as_i32x16();
42        transmute(simd_select_bitmask(k, abs, src.as_i32x16()))
43    }
44}
45
46/// Computes the absolute value of packed 32-bit integers in `a`, and store the
47/// unsigned results in `dst` using zeromask `k` (elements are zeroed out when
48/// the corresponding mask bit is not set).
49///
50/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi32&expand=41)
51#[inline]
52#[target_feature(enable = "avx512f")]
53#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
54#[cfg_attr(test, assert_instr(vpabsd))]
55#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
56pub const fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
57    unsafe {
58        let abs = _mm512_abs_epi32(a).as_i32x16();
59        transmute(simd_select_bitmask(k, abs, i32x16::ZERO))
60    }
61}
62
63/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
64///
65/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi32&expand=37)
66#[inline]
67#[target_feature(enable = "avx512f,avx512vl")]
68#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
69#[cfg_attr(test, assert_instr(vpabsd))]
70#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
71pub const fn _mm256_mask_abs_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
72    unsafe {
73        let abs = _mm256_abs_epi32(a).as_i32x8();
74        transmute(simd_select_bitmask(k, abs, src.as_i32x8()))
75    }
76}
77
78/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
79///
80/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi32&expand=38)
81#[inline]
82#[target_feature(enable = "avx512f,avx512vl")]
83#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
84#[cfg_attr(test, assert_instr(vpabsd))]
85#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
86pub const fn _mm256_maskz_abs_epi32(k: __mmask8, a: __m256i) -> __m256i {
87    unsafe {
88        let abs = _mm256_abs_epi32(a).as_i32x8();
89        transmute(simd_select_bitmask(k, abs, i32x8::ZERO))
90    }
91}
92
93/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
94///
95/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi32&expand=34)
96#[inline]
97#[target_feature(enable = "avx512f,avx512vl")]
98#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
99#[cfg_attr(test, assert_instr(vpabsd))]
100#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
101pub const fn _mm_mask_abs_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
102    unsafe {
103        let abs = _mm_abs_epi32(a).as_i32x4();
104        transmute(simd_select_bitmask(k, abs, src.as_i32x4()))
105    }
106}
107
108/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
109///
110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi32&expand=35)
111#[inline]
112#[target_feature(enable = "avx512f,avx512vl")]
113#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
114#[cfg_attr(test, assert_instr(vpabsd))]
115#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
116pub const fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i {
117    unsafe {
118        let abs = _mm_abs_epi32(a).as_i32x4();
119        transmute(simd_select_bitmask(k, abs, i32x4::ZERO))
120    }
121}
122
123/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
124///
125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi64&expand=48)
126#[inline]
127#[target_feature(enable = "avx512f")]
128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
129#[cfg_attr(test, assert_instr(vpabsq))]
130#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
131pub const fn _mm512_abs_epi64(a: __m512i) -> __m512i {
132    unsafe {
133        let a = a.as_i64x8();
134        let r = simd_select::<i64x8, _>(simd_lt(a, i64x8::ZERO), simd_neg(a), a);
135        transmute(r)
136    }
137}
138
139/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
140///
141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi64&expand=49)
142#[inline]
143#[target_feature(enable = "avx512f")]
144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
145#[cfg_attr(test, assert_instr(vpabsq))]
146#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
147pub const fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
148    unsafe {
149        let abs = _mm512_abs_epi64(a).as_i64x8();
150        transmute(simd_select_bitmask(k, abs, src.as_i64x8()))
151    }
152}
153
154/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
155///
156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi64&expand=50)
157#[inline]
158#[target_feature(enable = "avx512f")]
159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
160#[cfg_attr(test, assert_instr(vpabsq))]
161#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
162pub const fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i {
163    unsafe {
164        let abs = _mm512_abs_epi64(a).as_i64x8();
165        transmute(simd_select_bitmask(k, abs, i64x8::ZERO))
166    }
167}
168
169/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
170///
171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi64&expand=45)
172#[inline]
173#[target_feature(enable = "avx512f,avx512vl")]
174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
175#[cfg_attr(test, assert_instr(vpabsq))]
176#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
177pub const fn _mm256_abs_epi64(a: __m256i) -> __m256i {
178    unsafe {
179        let a = a.as_i64x4();
180        let r = simd_select::<i64x4, _>(simd_lt(a, i64x4::ZERO), simd_neg(a), a);
181        transmute(r)
182    }
183}
184
185/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
186///
187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi64&expand=46)
188#[inline]
189#[target_feature(enable = "avx512f,avx512vl")]
190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
191#[cfg_attr(test, assert_instr(vpabsq))]
192#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
193pub const fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
194    unsafe {
195        let abs = _mm256_abs_epi64(a).as_i64x4();
196        transmute(simd_select_bitmask(k, abs, src.as_i64x4()))
197    }
198}
199
200/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
201///
202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi64)
203#[inline]
204#[target_feature(enable = "avx512f,avx512vl")]
205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
206#[cfg_attr(test, assert_instr(vpabsq))]
207#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
208pub const fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i {
209    unsafe {
210        let abs = _mm256_abs_epi64(a).as_i64x4();
211        transmute(simd_select_bitmask(k, abs, i64x4::ZERO))
212    }
213}
214
215/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
216///
217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi64)
218#[inline]
219#[target_feature(enable = "avx512f,avx512vl")]
220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
221#[cfg_attr(test, assert_instr(vpabsq))]
222#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
223pub const fn _mm_abs_epi64(a: __m128i) -> __m128i {
224    unsafe {
225        let a = a.as_i64x2();
226        let r = simd_select::<i64x2, _>(simd_lt(a, i64x2::ZERO), simd_neg(a), a);
227        transmute(r)
228    }
229}
230
231/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
232///
233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi64)
234#[inline]
235#[target_feature(enable = "avx512f,avx512vl")]
236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
237#[cfg_attr(test, assert_instr(vpabsq))]
238#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
239pub const fn _mm_mask_abs_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
240    unsafe {
241        let abs = _mm_abs_epi64(a).as_i64x2();
242        transmute(simd_select_bitmask(k, abs, src.as_i64x2()))
243    }
244}
245
246/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
247///
248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi64)
249#[inline]
250#[target_feature(enable = "avx512f,avx512vl")]
251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
252#[cfg_attr(test, assert_instr(vpabsq))]
253#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
254pub const fn _mm_maskz_abs_epi64(k: __mmask8, a: __m128i) -> __m128i {
255    unsafe {
256        let abs = _mm_abs_epi64(a).as_i64x2();
257        transmute(simd_select_bitmask(k, abs, i64x2::ZERO))
258    }
259}
260
261/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst.
262///
263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_ps&expand=65)
264#[inline]
265#[target_feature(enable = "avx512f")]
266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
267#[cfg_attr(test, assert_instr(vpandd))]
268#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
269pub const fn _mm512_abs_ps(v2: __m512) -> __m512 {
270    unsafe { simd_fabs(v2) }
271}
272
273/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
274///
275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_ps&expand=66)
276#[inline]
277#[target_feature(enable = "avx512f")]
278#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
279#[cfg_attr(test, assert_instr(vpandd))]
280#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
281pub const fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m512 {
282    unsafe { simd_select_bitmask(k, simd_fabs(v2), src) }
283}
284
285/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst.
286///
287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_pd&expand=60)
288#[inline]
289#[target_feature(enable = "avx512f")]
290#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
291#[cfg_attr(test, assert_instr(vpandq))]
292#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
293pub const fn _mm512_abs_pd(v2: __m512d) -> __m512d {
294    unsafe { simd_fabs(v2) }
295}
296
297/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
298///
299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_pd&expand=61)
300#[inline]
301#[target_feature(enable = "avx512f")]
302#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
303#[cfg_attr(test, assert_instr(vpandq))]
304#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
305pub const fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m512d {
306    unsafe { simd_select_bitmask(k, simd_fabs(v2), src) }
307}
308
309/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
310///
311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi32&expand=3801)
312#[inline]
313#[target_feature(enable = "avx512f")]
314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
315#[cfg_attr(test, assert_instr(vmovdqa32))]
316#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
317pub const fn _mm512_mask_mov_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
318    unsafe {
319        let mov = a.as_i32x16();
320        transmute(simd_select_bitmask(k, mov, src.as_i32x16()))
321    }
322}
323
324/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
325///
326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi32&expand=3802)
327#[inline]
328#[target_feature(enable = "avx512f")]
329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
330#[cfg_attr(test, assert_instr(vmovdqa32))]
331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
332pub const fn _mm512_maskz_mov_epi32(k: __mmask16, a: __m512i) -> __m512i {
333    unsafe {
334        let mov = a.as_i32x16();
335        transmute(simd_select_bitmask(k, mov, i32x16::ZERO))
336    }
337}
338
339/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
340///
341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi32&expand=3799)
342#[inline]
343#[target_feature(enable = "avx512f,avx512vl")]
344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
345#[cfg_attr(test, assert_instr(vmovdqa32))]
346#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
347pub const fn _mm256_mask_mov_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
348    unsafe {
349        let mov = a.as_i32x8();
350        transmute(simd_select_bitmask(k, mov, src.as_i32x8()))
351    }
352}
353
354/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
355///
356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi32&expand=3800)
357#[inline]
358#[target_feature(enable = "avx512f,avx512vl")]
359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
360#[cfg_attr(test, assert_instr(vmovdqa32))]
361#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
362pub const fn _mm256_maskz_mov_epi32(k: __mmask8, a: __m256i) -> __m256i {
363    unsafe {
364        let mov = a.as_i32x8();
365        transmute(simd_select_bitmask(k, mov, i32x8::ZERO))
366    }
367}
368
369/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
370///
371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi32&expand=3797)
372#[inline]
373#[target_feature(enable = "avx512f,avx512vl")]
374#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
375#[cfg_attr(test, assert_instr(vmovdqa32))]
376#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
377pub const fn _mm_mask_mov_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
378    unsafe {
379        let mov = a.as_i32x4();
380        transmute(simd_select_bitmask(k, mov, src.as_i32x4()))
381    }
382}
383
384/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
385///
386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi32&expand=3798)
387#[inline]
388#[target_feature(enable = "avx512f,avx512vl")]
389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
390#[cfg_attr(test, assert_instr(vmovdqa32))]
391#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
392pub const fn _mm_maskz_mov_epi32(k: __mmask8, a: __m128i) -> __m128i {
393    unsafe {
394        let mov = a.as_i32x4();
395        transmute(simd_select_bitmask(k, mov, i32x4::ZERO))
396    }
397}
398
399/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
400///
401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi64&expand=3807)
402#[inline]
403#[target_feature(enable = "avx512f")]
404#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
405#[cfg_attr(test, assert_instr(vmovdqa64))]
406#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
407pub const fn _mm512_mask_mov_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
408    unsafe {
409        let mov = a.as_i64x8();
410        transmute(simd_select_bitmask(k, mov, src.as_i64x8()))
411    }
412}
413
414/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
415///
416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi64&expand=3808)
417#[inline]
418#[target_feature(enable = "avx512f")]
419#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
420#[cfg_attr(test, assert_instr(vmovdqa64))]
421#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
422pub const fn _mm512_maskz_mov_epi64(k: __mmask8, a: __m512i) -> __m512i {
423    unsafe {
424        let mov = a.as_i64x8();
425        transmute(simd_select_bitmask(k, mov, i64x8::ZERO))
426    }
427}
428
429/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
430///
431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi64&expand=3805)
432#[inline]
433#[target_feature(enable = "avx512f,avx512vl")]
434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
435#[cfg_attr(test, assert_instr(vmovdqa64))]
436#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
437pub const fn _mm256_mask_mov_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
438    unsafe {
439        let mov = a.as_i64x4();
440        transmute(simd_select_bitmask(k, mov, src.as_i64x4()))
441    }
442}
443
444/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
445///
446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi64&expand=3806)
447#[inline]
448#[target_feature(enable = "avx512f,avx512vl")]
449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
450#[cfg_attr(test, assert_instr(vmovdqa64))]
451#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
452pub const fn _mm256_maskz_mov_epi64(k: __mmask8, a: __m256i) -> __m256i {
453    unsafe {
454        let mov = a.as_i64x4();
455        transmute(simd_select_bitmask(k, mov, i64x4::ZERO))
456    }
457}
458
459/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
460///
461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi64&expand=3803)
462#[inline]
463#[target_feature(enable = "avx512f,avx512vl")]
464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
465#[cfg_attr(test, assert_instr(vmovdqa64))]
466#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
467pub const fn _mm_mask_mov_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
468    unsafe {
469        let mov = a.as_i64x2();
470        transmute(simd_select_bitmask(k, mov, src.as_i64x2()))
471    }
472}
473
474/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
475///
476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi64&expand=3804)
477#[inline]
478#[target_feature(enable = "avx512f,avx512vl")]
479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
480#[cfg_attr(test, assert_instr(vmovdqa64))]
481#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
482pub const fn _mm_maskz_mov_epi64(k: __mmask8, a: __m128i) -> __m128i {
483    unsafe {
484        let mov = a.as_i64x2();
485        transmute(simd_select_bitmask(k, mov, i64x2::ZERO))
486    }
487}
488
489/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
490///
491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_ps&expand=3825)
492#[inline]
493#[target_feature(enable = "avx512f")]
494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
495#[cfg_attr(test, assert_instr(vmovaps))]
496#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
497pub const fn _mm512_mask_mov_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
498    unsafe {
499        let mov = a.as_f32x16();
500        transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
501    }
502}
503
504/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
505///
506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_ps&expand=3826)
507#[inline]
508#[target_feature(enable = "avx512f")]
509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
510#[cfg_attr(test, assert_instr(vmovaps))]
511#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
512pub const fn _mm512_maskz_mov_ps(k: __mmask16, a: __m512) -> __m512 {
513    unsafe {
514        let mov = a.as_f32x16();
515        transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
516    }
517}
518
519/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
520///
521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_ps&expand=3823)
522#[inline]
523#[target_feature(enable = "avx512f,avx512vl")]
524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
525#[cfg_attr(test, assert_instr(vmovaps))]
526#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
527pub const fn _mm256_mask_mov_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
528    unsafe {
529        let mov = a.as_f32x8();
530        transmute(simd_select_bitmask(k, mov, src.as_f32x8()))
531    }
532}
533
534/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
535///
536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_ps&expand=3824)
537#[inline]
538#[target_feature(enable = "avx512f,avx512vl")]
539#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
540#[cfg_attr(test, assert_instr(vmovaps))]
541#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
542pub const fn _mm256_maskz_mov_ps(k: __mmask8, a: __m256) -> __m256 {
543    unsafe {
544        let mov = a.as_f32x8();
545        transmute(simd_select_bitmask(k, mov, f32x8::ZERO))
546    }
547}
548
549/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
550///
551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_ps&expand=3821)
552#[inline]
553#[target_feature(enable = "avx512f,avx512vl")]
554#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
555#[cfg_attr(test, assert_instr(vmovaps))]
556#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
557pub const fn _mm_mask_mov_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
558    unsafe {
559        let mov = a.as_f32x4();
560        transmute(simd_select_bitmask(k, mov, src.as_f32x4()))
561    }
562}
563
564/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
565///
566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_ps&expand=3822)
567#[inline]
568#[target_feature(enable = "avx512f,avx512vl")]
569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
570#[cfg_attr(test, assert_instr(vmovaps))]
571#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
572pub const fn _mm_maskz_mov_ps(k: __mmask8, a: __m128) -> __m128 {
573    unsafe {
574        let mov = a.as_f32x4();
575        transmute(simd_select_bitmask(k, mov, f32x4::ZERO))
576    }
577}
578
579/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
580///
581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_pd&expand=3819)
582#[inline]
583#[target_feature(enable = "avx512f")]
584#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
585#[cfg_attr(test, assert_instr(vmovapd))]
586#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
587pub const fn _mm512_mask_mov_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
588    unsafe {
589        let mov = a.as_f64x8();
590        transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
591    }
592}
593
594/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
595///
596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_pd&expand=3820)
597#[inline]
598#[target_feature(enable = "avx512f")]
599#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
600#[cfg_attr(test, assert_instr(vmovapd))]
601#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
602pub const fn _mm512_maskz_mov_pd(k: __mmask8, a: __m512d) -> __m512d {
603    unsafe {
604        let mov = a.as_f64x8();
605        transmute(simd_select_bitmask(k, mov, f64x8::ZERO))
606    }
607}
608
609/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
610///
611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_pd&expand=3817)
612#[inline]
613#[target_feature(enable = "avx512f,avx512vl")]
614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
615#[cfg_attr(test, assert_instr(vmovapd))]
616#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
617pub const fn _mm256_mask_mov_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
618    unsafe {
619        let mov = a.as_f64x4();
620        transmute(simd_select_bitmask(k, mov, src.as_f64x4()))
621    }
622}
623
624/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
625///
626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_pd&expand=3818)
627#[inline]
628#[target_feature(enable = "avx512f,avx512vl")]
629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
630#[cfg_attr(test, assert_instr(vmovapd))]
631#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
632pub const fn _mm256_maskz_mov_pd(k: __mmask8, a: __m256d) -> __m256d {
633    unsafe {
634        let mov = a.as_f64x4();
635        transmute(simd_select_bitmask(k, mov, f64x4::ZERO))
636    }
637}
638
639/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
640///
641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_pd&expand=3815)
642#[inline]
643#[target_feature(enable = "avx512f,avx512vl")]
644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
645#[cfg_attr(test, assert_instr(vmovapd))]
646#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
647pub const fn _mm_mask_mov_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
648    unsafe {
649        let mov = a.as_f64x2();
650        transmute(simd_select_bitmask(k, mov, src.as_f64x2()))
651    }
652}
653
654/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
655///
656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_pd&expand=3816)
657#[inline]
658#[target_feature(enable = "avx512f,avx512vl")]
659#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
660#[cfg_attr(test, assert_instr(vmovapd))]
661#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
662pub const fn _mm_maskz_mov_pd(k: __mmask8, a: __m128d) -> __m128d {
663    unsafe {
664        let mov = a.as_f64x2();
665        transmute(simd_select_bitmask(k, mov, f64x2::ZERO))
666    }
667}
668
669/// Add packed 32-bit integers in a and b, and store the results in dst.
670///
671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi32&expand=100)
672#[inline]
673#[target_feature(enable = "avx512f")]
674#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
675#[cfg_attr(test, assert_instr(vpaddd))]
676#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
677pub const fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i {
678    unsafe { transmute(simd_add(a.as_i32x16(), b.as_i32x16())) }
679}
680
681/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
682///
683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi32&expand=101)
684#[inline]
685#[target_feature(enable = "avx512f")]
686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
687#[cfg_attr(test, assert_instr(vpaddd))]
688#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
689pub const fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
690    unsafe {
691        let add = _mm512_add_epi32(a, b).as_i32x16();
692        transmute(simd_select_bitmask(k, add, src.as_i32x16()))
693    }
694}
695
696/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
697///
698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi32&expand=102)
699#[inline]
700#[target_feature(enable = "avx512f")]
701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
702#[cfg_attr(test, assert_instr(vpaddd))]
703#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
704pub const fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
705    unsafe {
706        let add = _mm512_add_epi32(a, b).as_i32x16();
707        transmute(simd_select_bitmask(k, add, i32x16::ZERO))
708    }
709}
710
711/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
712///
713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi32&expand=98)
714#[inline]
715#[target_feature(enable = "avx512f,avx512vl")]
716#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
717#[cfg_attr(test, assert_instr(vpaddd))]
718#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
719pub const fn _mm256_mask_add_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
720    unsafe {
721        let add = _mm256_add_epi32(a, b).as_i32x8();
722        transmute(simd_select_bitmask(k, add, src.as_i32x8()))
723    }
724}
725
726/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
727///
728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi32&expand=99)
729#[inline]
730#[target_feature(enable = "avx512f,avx512vl")]
731#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
732#[cfg_attr(test, assert_instr(vpaddd))]
733#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
734pub const fn _mm256_maskz_add_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
735    unsafe {
736        let add = _mm256_add_epi32(a, b).as_i32x8();
737        transmute(simd_select_bitmask(k, add, i32x8::ZERO))
738    }
739}
740
741/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
742///
743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi32&expand=95)
744#[inline]
745#[target_feature(enable = "avx512f,avx512vl")]
746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
747#[cfg_attr(test, assert_instr(vpaddd))]
748#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
749pub const fn _mm_mask_add_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
750    unsafe {
751        let add = _mm_add_epi32(a, b).as_i32x4();
752        transmute(simd_select_bitmask(k, add, src.as_i32x4()))
753    }
754}
755
756/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
757///
758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi32&expand=96)
759#[inline]
760#[target_feature(enable = "avx512f,avx512vl")]
761#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
762#[cfg_attr(test, assert_instr(vpaddd))]
763#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
764pub const fn _mm_maskz_add_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
765    unsafe {
766        let add = _mm_add_epi32(a, b).as_i32x4();
767        transmute(simd_select_bitmask(k, add, i32x4::ZERO))
768    }
769}
770
771/// Add packed 64-bit integers in a and b, and store the results in dst.
772///
773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi64&expand=109)
774#[inline]
775#[target_feature(enable = "avx512f")]
776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
777#[cfg_attr(test, assert_instr(vpaddq))]
778#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
779pub const fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i {
780    unsafe { transmute(simd_add(a.as_i64x8(), b.as_i64x8())) }
781}
782
783/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
784///
785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi64&expand=110)
786#[inline]
787#[target_feature(enable = "avx512f")]
788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
789#[cfg_attr(test, assert_instr(vpaddq))]
790#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
791pub const fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
792    unsafe {
793        let add = _mm512_add_epi64(a, b).as_i64x8();
794        transmute(simd_select_bitmask(k, add, src.as_i64x8()))
795    }
796}
797
798/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
799///
800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi64&expand=111)
801#[inline]
802#[target_feature(enable = "avx512f")]
803#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
804#[cfg_attr(test, assert_instr(vpaddq))]
805#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
806pub const fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
807    unsafe {
808        let add = _mm512_add_epi64(a, b).as_i64x8();
809        transmute(simd_select_bitmask(k, add, i64x8::ZERO))
810    }
811}
812
813/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
814///
815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi64&expand=107)
816#[inline]
817#[target_feature(enable = "avx512f,avx512vl")]
818#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
819#[cfg_attr(test, assert_instr(vpaddq))]
820#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
821pub const fn _mm256_mask_add_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
822    unsafe {
823        let add = _mm256_add_epi64(a, b).as_i64x4();
824        transmute(simd_select_bitmask(k, add, src.as_i64x4()))
825    }
826}
827
828/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
829///
830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi64&expand=108)
831#[inline]
832#[target_feature(enable = "avx512f,avx512vl")]
833#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
834#[cfg_attr(test, assert_instr(vpaddq))]
835#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
836pub const fn _mm256_maskz_add_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
837    unsafe {
838        let add = _mm256_add_epi64(a, b).as_i64x4();
839        transmute(simd_select_bitmask(k, add, i64x4::ZERO))
840    }
841}
842
843/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
844///
845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi64&expand=104)
846#[inline]
847#[target_feature(enable = "avx512f,avx512vl")]
848#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
849#[cfg_attr(test, assert_instr(vpaddq))]
850#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
851pub const fn _mm_mask_add_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
852    unsafe {
853        let add = _mm_add_epi64(a, b).as_i64x2();
854        transmute(simd_select_bitmask(k, add, src.as_i64x2()))
855    }
856}
857
858/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
859///
860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi64&expand=105)
861#[inline]
862#[target_feature(enable = "avx512f,avx512vl")]
863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
864#[cfg_attr(test, assert_instr(vpaddq))]
865#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
866pub const fn _mm_maskz_add_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
867    unsafe {
868        let add = _mm_add_epi64(a, b).as_i64x2();
869        transmute(simd_select_bitmask(k, add, i64x2::ZERO))
870    }
871}
872
873/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
874///
875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_ps&expand=139)
876#[inline]
877#[target_feature(enable = "avx512f")]
878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
879#[cfg_attr(test, assert_instr(vaddps))]
880#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
881pub const fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 {
882    unsafe { transmute(simd_add(a.as_f32x16(), b.as_f32x16())) }
883}
884
885/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
886///
887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_ps&expand=140)
888#[inline]
889#[target_feature(enable = "avx512f")]
890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
891#[cfg_attr(test, assert_instr(vaddps))]
892#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
893pub const fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
894    unsafe {
895        let add = _mm512_add_ps(a, b).as_f32x16();
896        transmute(simd_select_bitmask(k, add, src.as_f32x16()))
897    }
898}
899
900/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
901///
902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_ps&expand=141)
903#[inline]
904#[target_feature(enable = "avx512f")]
905#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
906#[cfg_attr(test, assert_instr(vaddps))]
907#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
908pub const fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
909    unsafe {
910        let add = _mm512_add_ps(a, b).as_f32x16();
911        transmute(simd_select_bitmask(k, add, f32x16::ZERO))
912    }
913}
914
915/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
916///
917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_ps&expand=137)
918#[inline]
919#[target_feature(enable = "avx512f,avx512vl")]
920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
921#[cfg_attr(test, assert_instr(vaddps))]
922#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
923pub const fn _mm256_mask_add_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
924    unsafe {
925        let add = _mm256_add_ps(a, b).as_f32x8();
926        transmute(simd_select_bitmask(k, add, src.as_f32x8()))
927    }
928}
929
930/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
931///
932/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_ps&expand=138)
933#[inline]
934#[target_feature(enable = "avx512f,avx512vl")]
935#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
936#[cfg_attr(test, assert_instr(vaddps))]
937#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
938pub const fn _mm256_maskz_add_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
939    unsafe {
940        let add = _mm256_add_ps(a, b).as_f32x8();
941        transmute(simd_select_bitmask(k, add, f32x8::ZERO))
942    }
943}
944
945/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
946///
947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_ps&expand=134)
948#[inline]
949#[target_feature(enable = "avx512f,avx512vl")]
950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
951#[cfg_attr(test, assert_instr(vaddps))]
952#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
953pub const fn _mm_mask_add_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
954    unsafe {
955        let add = _mm_add_ps(a, b).as_f32x4();
956        transmute(simd_select_bitmask(k, add, src.as_f32x4()))
957    }
958}
959
960/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
961///
962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_ps&expand=135)
963#[inline]
964#[target_feature(enable = "avx512f,avx512vl")]
965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
966#[cfg_attr(test, assert_instr(vaddps))]
967#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
968pub const fn _mm_maskz_add_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
969    unsafe {
970        let add = _mm_add_ps(a, b).as_f32x4();
971        transmute(simd_select_bitmask(k, add, f32x4::ZERO))
972    }
973}
974
975/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
976///
977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_pd&expand=127)
978#[inline]
979#[target_feature(enable = "avx512f")]
980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
981#[cfg_attr(test, assert_instr(vaddpd))]
982#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
983pub const fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d {
984    unsafe { transmute(simd_add(a.as_f64x8(), b.as_f64x8())) }
985}
986
987/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
988///
989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_pd&expand=128)
990#[inline]
991#[target_feature(enable = "avx512f")]
992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
993#[cfg_attr(test, assert_instr(vaddpd))]
994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
995pub const fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
996    unsafe {
997        let add = _mm512_add_pd(a, b).as_f64x8();
998        transmute(simd_select_bitmask(k, add, src.as_f64x8()))
999    }
1000}
1001
1002/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1003///
1004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_pd&expand=129)
1005#[inline]
1006#[target_feature(enable = "avx512f")]
1007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1008#[cfg_attr(test, assert_instr(vaddpd))]
1009#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1010pub const fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1011    unsafe {
1012        let add = _mm512_add_pd(a, b).as_f64x8();
1013        transmute(simd_select_bitmask(k, add, f64x8::ZERO))
1014    }
1015}
1016
1017/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1018///
1019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_pd&expand=125)
1020#[inline]
1021#[target_feature(enable = "avx512f,avx512vl")]
1022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1023#[cfg_attr(test, assert_instr(vaddpd))]
1024#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1025pub const fn _mm256_mask_add_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1026    unsafe {
1027        let add = _mm256_add_pd(a, b).as_f64x4();
1028        transmute(simd_select_bitmask(k, add, src.as_f64x4()))
1029    }
1030}
1031
1032/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1033///
1034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_pd&expand=126)
1035#[inline]
1036#[target_feature(enable = "avx512f,avx512vl")]
1037#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1038#[cfg_attr(test, assert_instr(vaddpd))]
1039#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1040pub const fn _mm256_maskz_add_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1041    unsafe {
1042        let add = _mm256_add_pd(a, b).as_f64x4();
1043        transmute(simd_select_bitmask(k, add, f64x4::ZERO))
1044    }
1045}
1046
1047/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1048///
1049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_pd&expand=122)
1050#[inline]
1051#[target_feature(enable = "avx512f,avx512vl")]
1052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1053#[cfg_attr(test, assert_instr(vaddpd))]
1054#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1055pub const fn _mm_mask_add_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1056    unsafe {
1057        let add = _mm_add_pd(a, b).as_f64x2();
1058        transmute(simd_select_bitmask(k, add, src.as_f64x2()))
1059    }
1060}
1061
1062/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1063///
1064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_pd&expand=123)
1065#[inline]
1066#[target_feature(enable = "avx512f,avx512vl")]
1067#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1068#[cfg_attr(test, assert_instr(vaddpd))]
1069#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1070pub const fn _mm_maskz_add_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1071    unsafe {
1072        let add = _mm_add_pd(a, b).as_f64x2();
1073        transmute(simd_select_bitmask(k, add, f64x2::ZERO))
1074    }
1075}
1076
1077/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst.
1078///
1079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi32&expand=5694)
1080#[inline]
1081#[target_feature(enable = "avx512f")]
1082#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1083#[cfg_attr(test, assert_instr(vpsubd))]
1084#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1085pub const fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i {
1086    unsafe { transmute(simd_sub(a.as_i32x16(), b.as_i32x16())) }
1087}
1088
1089/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1090///
1091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi32&expand=5692)
1092#[inline]
1093#[target_feature(enable = "avx512f")]
1094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1095#[cfg_attr(test, assert_instr(vpsubd))]
1096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1097pub const fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1098    unsafe {
1099        let sub = _mm512_sub_epi32(a, b).as_i32x16();
1100        transmute(simd_select_bitmask(k, sub, src.as_i32x16()))
1101    }
1102}
1103
1104/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1105///
1106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi32&expand=5693)
1107#[inline]
1108#[target_feature(enable = "avx512f")]
1109#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1110#[cfg_attr(test, assert_instr(vpsubd))]
1111#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1112pub const fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1113    unsafe {
1114        let sub = _mm512_sub_epi32(a, b).as_i32x16();
1115        transmute(simd_select_bitmask(k, sub, i32x16::ZERO))
1116    }
1117}
1118
1119/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1120///
1121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi32&expand=5689)
1122#[inline]
1123#[target_feature(enable = "avx512f,avx512vl")]
1124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1125#[cfg_attr(test, assert_instr(vpsubd))]
1126#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1127pub const fn _mm256_mask_sub_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1128    unsafe {
1129        let sub = _mm256_sub_epi32(a, b).as_i32x8();
1130        transmute(simd_select_bitmask(k, sub, src.as_i32x8()))
1131    }
1132}
1133
1134/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1135///
1136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi32&expand=5690)
1137#[inline]
1138#[target_feature(enable = "avx512f,avx512vl")]
1139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1140#[cfg_attr(test, assert_instr(vpsubd))]
1141#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1142pub const fn _mm256_maskz_sub_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1143    unsafe {
1144        let sub = _mm256_sub_epi32(a, b).as_i32x8();
1145        transmute(simd_select_bitmask(k, sub, i32x8::ZERO))
1146    }
1147}
1148
1149/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1150///
1151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi32&expand=5686)
1152#[inline]
1153#[target_feature(enable = "avx512f,avx512vl")]
1154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1155#[cfg_attr(test, assert_instr(vpsubd))]
1156#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1157pub const fn _mm_mask_sub_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1158    unsafe {
1159        let sub = _mm_sub_epi32(a, b).as_i32x4();
1160        transmute(simd_select_bitmask(k, sub, src.as_i32x4()))
1161    }
1162}
1163
1164/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1165///
1166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi32&expand=5687)
1167#[inline]
1168#[target_feature(enable = "avx512f,avx512vl")]
1169#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1170#[cfg_attr(test, assert_instr(vpsubd))]
1171#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1172pub const fn _mm_maskz_sub_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1173    unsafe {
1174        let sub = _mm_sub_epi32(a, b).as_i32x4();
1175        transmute(simd_select_bitmask(k, sub, i32x4::ZERO))
1176    }
1177}
1178
1179/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst.
1180///
1181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi64&expand=5703)
1182#[inline]
1183#[target_feature(enable = "avx512f")]
1184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1185#[cfg_attr(test, assert_instr(vpsubq))]
1186#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1187pub const fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i {
1188    unsafe { transmute(simd_sub(a.as_i64x8(), b.as_i64x8())) }
1189}
1190
1191/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1192///
1193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi64&expand=5701)
1194#[inline]
1195#[target_feature(enable = "avx512f")]
1196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1197#[cfg_attr(test, assert_instr(vpsubq))]
1198#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1199pub const fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1200    unsafe {
1201        let sub = _mm512_sub_epi64(a, b).as_i64x8();
1202        transmute(simd_select_bitmask(k, sub, src.as_i64x8()))
1203    }
1204}
1205
1206/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1207///
1208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi64&expand=5702)
1209#[inline]
1210#[target_feature(enable = "avx512f")]
1211#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1212#[cfg_attr(test, assert_instr(vpsubq))]
1213#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1214pub const fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1215    unsafe {
1216        let sub = _mm512_sub_epi64(a, b).as_i64x8();
1217        transmute(simd_select_bitmask(k, sub, i64x8::ZERO))
1218    }
1219}
1220
1221/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1222///
1223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi64&expand=5698)
1224#[inline]
1225#[target_feature(enable = "avx512f,avx512vl")]
1226#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1227#[cfg_attr(test, assert_instr(vpsubq))]
1228#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1229pub const fn _mm256_mask_sub_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1230    unsafe {
1231        let sub = _mm256_sub_epi64(a, b).as_i64x4();
1232        transmute(simd_select_bitmask(k, sub, src.as_i64x4()))
1233    }
1234}
1235
1236/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1237///
1238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi64&expand=5699)
1239#[inline]
1240#[target_feature(enable = "avx512f,avx512vl")]
1241#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1242#[cfg_attr(test, assert_instr(vpsubq))]
1243#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1244pub const fn _mm256_maskz_sub_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1245    unsafe {
1246        let sub = _mm256_sub_epi64(a, b).as_i64x4();
1247        transmute(simd_select_bitmask(k, sub, i64x4::ZERO))
1248    }
1249}
1250
1251/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1252///
1253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi64&expand=5695)
1254#[inline]
1255#[target_feature(enable = "avx512f,avx512vl")]
1256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1257#[cfg_attr(test, assert_instr(vpsubq))]
1258#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1259pub const fn _mm_mask_sub_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1260    unsafe {
1261        let sub = _mm_sub_epi64(a, b).as_i64x2();
1262        transmute(simd_select_bitmask(k, sub, src.as_i64x2()))
1263    }
1264}
1265
1266/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1267///
1268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi64&expand=5696)
1269#[inline]
1270#[target_feature(enable = "avx512f,avx512vl")]
1271#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1272#[cfg_attr(test, assert_instr(vpsubq))]
1273#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1274pub const fn _mm_maskz_sub_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1275    unsafe {
1276        let sub = _mm_sub_epi64(a, b).as_i64x2();
1277        transmute(simd_select_bitmask(k, sub, i64x2::ZERO))
1278    }
1279}
1280
1281/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
1282///
1283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_ps&expand=5733)
1284#[inline]
1285#[target_feature(enable = "avx512f")]
1286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1287#[cfg_attr(test, assert_instr(vsubps))]
1288#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1289pub const fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 {
1290    unsafe { transmute(simd_sub(a.as_f32x16(), b.as_f32x16())) }
1291}
1292
1293/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1294///
1295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_ps&expand=5731)
1296#[inline]
1297#[target_feature(enable = "avx512f")]
1298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1299#[cfg_attr(test, assert_instr(vsubps))]
1300#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1301pub const fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1302    unsafe {
1303        let sub = _mm512_sub_ps(a, b).as_f32x16();
1304        transmute(simd_select_bitmask(k, sub, src.as_f32x16()))
1305    }
1306}
1307
1308/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1309///
1310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_ps&expand=5732)
1311#[inline]
1312#[target_feature(enable = "avx512f")]
1313#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1314#[cfg_attr(test, assert_instr(vsubps))]
1315#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1316pub const fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1317    unsafe {
1318        let sub = _mm512_sub_ps(a, b).as_f32x16();
1319        transmute(simd_select_bitmask(k, sub, f32x16::ZERO))
1320    }
1321}
1322
1323/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1324///
1325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_ps&expand=5728)
1326#[inline]
1327#[target_feature(enable = "avx512f,avx512vl")]
1328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1329#[cfg_attr(test, assert_instr(vsubps))]
1330#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1331pub const fn _mm256_mask_sub_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1332    unsafe {
1333        let sub = _mm256_sub_ps(a, b).as_f32x8();
1334        transmute(simd_select_bitmask(k, sub, src.as_f32x8()))
1335    }
1336}
1337
1338/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1339///
1340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_ps&expand=5729)
1341#[inline]
1342#[target_feature(enable = "avx512f,avx512vl")]
1343#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1344#[cfg_attr(test, assert_instr(vsubps))]
1345#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1346pub const fn _mm256_maskz_sub_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1347    unsafe {
1348        let sub = _mm256_sub_ps(a, b).as_f32x8();
1349        transmute(simd_select_bitmask(k, sub, f32x8::ZERO))
1350    }
1351}
1352
1353/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1354///
1355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_ps&expand=5725)
1356#[inline]
1357#[target_feature(enable = "avx512f,avx512vl")]
1358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1359#[cfg_attr(test, assert_instr(vsubps))]
1360#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1361pub const fn _mm_mask_sub_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1362    unsafe {
1363        let sub = _mm_sub_ps(a, b).as_f32x4();
1364        transmute(simd_select_bitmask(k, sub, src.as_f32x4()))
1365    }
1366}
1367
1368/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1369///
1370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_ps&expand=5726)
1371#[inline]
1372#[target_feature(enable = "avx512f,avx512vl")]
1373#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1374#[cfg_attr(test, assert_instr(vsubps))]
1375#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1376pub const fn _mm_maskz_sub_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1377    unsafe {
1378        let sub = _mm_sub_ps(a, b).as_f32x4();
1379        transmute(simd_select_bitmask(k, sub, f32x4::ZERO))
1380    }
1381}
1382
1383/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
1384///
1385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_pd&expand=5721)
1386#[inline]
1387#[target_feature(enable = "avx512f")]
1388#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1389#[cfg_attr(test, assert_instr(vsubpd))]
1390#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1391pub const fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d {
1392    unsafe { transmute(simd_sub(a.as_f64x8(), b.as_f64x8())) }
1393}
1394
1395/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1396///
1397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_pd&expand=5719)
1398#[inline]
1399#[target_feature(enable = "avx512f")]
1400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1401#[cfg_attr(test, assert_instr(vsubpd))]
1402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1403pub const fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1404    unsafe {
1405        let sub = _mm512_sub_pd(a, b).as_f64x8();
1406        transmute(simd_select_bitmask(k, sub, src.as_f64x8()))
1407    }
1408}
1409
1410/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1411///
1412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_pd&expand=5720)
1413#[inline]
1414#[target_feature(enable = "avx512f")]
1415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1416#[cfg_attr(test, assert_instr(vsubpd))]
1417#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1418pub const fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1419    unsafe {
1420        let sub = _mm512_sub_pd(a, b).as_f64x8();
1421        transmute(simd_select_bitmask(k, sub, f64x8::ZERO))
1422    }
1423}
1424
1425/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1426///
1427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_pd&expand=5716)
1428#[inline]
1429#[target_feature(enable = "avx512f,avx512vl")]
1430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1431#[cfg_attr(test, assert_instr(vsubpd))]
1432#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1433pub const fn _mm256_mask_sub_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1434    unsafe {
1435        let sub = _mm256_sub_pd(a, b).as_f64x4();
1436        transmute(simd_select_bitmask(k, sub, src.as_f64x4()))
1437    }
1438}
1439
1440/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1441///
1442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_pd&expand=5717)
1443#[inline]
1444#[target_feature(enable = "avx512f,avx512vl")]
1445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1446#[cfg_attr(test, assert_instr(vsubpd))]
1447#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1448pub const fn _mm256_maskz_sub_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1449    unsafe {
1450        let sub = _mm256_sub_pd(a, b).as_f64x4();
1451        transmute(simd_select_bitmask(k, sub, f64x4::ZERO))
1452    }
1453}
1454
1455/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1456///
1457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_pd&expand=5713)
1458#[inline]
1459#[target_feature(enable = "avx512f,avx512vl")]
1460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1461#[cfg_attr(test, assert_instr(vsubpd))]
1462#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1463pub const fn _mm_mask_sub_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1464    unsafe {
1465        let sub = _mm_sub_pd(a, b).as_f64x2();
1466        transmute(simd_select_bitmask(k, sub, src.as_f64x2()))
1467    }
1468}
1469
1470/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1471///
1472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_pd&expand=5714)
1473#[inline]
1474#[target_feature(enable = "avx512f,avx512vl")]
1475#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1476#[cfg_attr(test, assert_instr(vsubpd))]
1477#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1478pub const fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1479    unsafe {
1480        let sub = _mm_sub_pd(a, b).as_f64x2();
1481        transmute(simd_select_bitmask(k, sub, f64x2::ZERO))
1482    }
1483}
1484
1485/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst.
1486///
1487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epi32&expand=3907)
1488#[inline]
1489#[target_feature(enable = "avx512f")]
1490#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1491#[cfg_attr(test, assert_instr(vpmuldq))]
1492#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1493pub const fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i {
1494    unsafe {
1495        let a = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(a.as_i64x8()));
1496        let b = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(b.as_i64x8()));
1497        transmute(simd_mul(a, b))
1498    }
1499}
1500
1501/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1502///
1503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epi32&expand=3905)
1504#[inline]
1505#[target_feature(enable = "avx512f")]
1506#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1507#[cfg_attr(test, assert_instr(vpmuldq))]
1508#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1509pub const fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1510    unsafe {
1511        let mul = _mm512_mul_epi32(a, b).as_i64x8();
1512        transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
1513    }
1514}
1515
1516/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1517///
1518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epi32&expand=3906)
1519#[inline]
1520#[target_feature(enable = "avx512f")]
1521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1522#[cfg_attr(test, assert_instr(vpmuldq))]
1523#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1524pub const fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1525    unsafe {
1526        let mul = _mm512_mul_epi32(a, b).as_i64x8();
1527        transmute(simd_select_bitmask(k, mul, i64x8::ZERO))
1528    }
1529}
1530
1531/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1532///
1533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epi32&expand=3902)
1534#[inline]
1535#[target_feature(enable = "avx512f,avx512vl")]
1536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1537#[cfg_attr(test, assert_instr(vpmuldq))]
1538#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1539pub const fn _mm256_mask_mul_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1540    unsafe {
1541        let mul = _mm256_mul_epi32(a, b).as_i64x4();
1542        transmute(simd_select_bitmask(k, mul, src.as_i64x4()))
1543    }
1544}
1545
1546/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1547///
1548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epi32&expand=3903)
1549#[inline]
1550#[target_feature(enable = "avx512f,avx512vl")]
1551#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1552#[cfg_attr(test, assert_instr(vpmuldq))]
1553#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1554pub const fn _mm256_maskz_mul_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1555    unsafe {
1556        let mul = _mm256_mul_epi32(a, b).as_i64x4();
1557        transmute(simd_select_bitmask(k, mul, i64x4::ZERO))
1558    }
1559}
1560
1561/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1562///
1563/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epi32&expand=3899)
1564#[inline]
1565#[target_feature(enable = "avx512f,avx512vl")]
1566#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1567#[cfg_attr(test, assert_instr(vpmuldq))]
1568#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1569pub const fn _mm_mask_mul_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1570    unsafe {
1571        let mul = _mm_mul_epi32(a, b).as_i64x2();
1572        transmute(simd_select_bitmask(k, mul, src.as_i64x2()))
1573    }
1574}
1575
1576/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1577///
1578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epi32&expand=3900)
1579#[inline]
1580#[target_feature(enable = "avx512f,avx512vl")]
1581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1582#[cfg_attr(test, assert_instr(vpmuldq))]
1583#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1584pub const fn _mm_maskz_mul_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1585    unsafe {
1586        let mul = _mm_mul_epi32(a, b).as_i64x2();
1587        transmute(simd_select_bitmask(k, mul, i64x2::ZERO))
1588    }
1589}
1590
1591/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst.
1592///
1593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi32&expand=4005)
1594#[inline]
1595#[target_feature(enable = "avx512f")]
1596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1597#[cfg_attr(test, assert_instr(vpmulld))]
1598#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1599pub const fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i {
1600    unsafe { transmute(simd_mul(a.as_i32x16(), b.as_i32x16())) }
1601}
1602
1603/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1604///
1605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi32&expand=4003)
1606#[inline]
1607#[target_feature(enable = "avx512f")]
1608#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1609#[cfg_attr(test, assert_instr(vpmulld))]
1610#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1611pub const fn _mm512_mask_mullo_epi32(
1612    src: __m512i,
1613    k: __mmask16,
1614    a: __m512i,
1615    b: __m512i,
1616) -> __m512i {
1617    unsafe {
1618        let mul = _mm512_mullo_epi32(a, b).as_i32x16();
1619        transmute(simd_select_bitmask(k, mul, src.as_i32x16()))
1620    }
1621}
1622
1623/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1624///
1625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi32&expand=4004)
1626#[inline]
1627#[target_feature(enable = "avx512f")]
1628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1629#[cfg_attr(test, assert_instr(vpmulld))]
1630#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1631pub const fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1632    unsafe {
1633        let mul = _mm512_mullo_epi32(a, b).as_i32x16();
1634        transmute(simd_select_bitmask(k, mul, i32x16::ZERO))
1635    }
1636}
1637
1638/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1639///
1640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi32&expand=4000)
1641#[inline]
1642#[target_feature(enable = "avx512f,avx512vl")]
1643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1644#[cfg_attr(test, assert_instr(vpmulld))]
1645#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1646pub const fn _mm256_mask_mullo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1647    unsafe {
1648        let mul = _mm256_mullo_epi32(a, b).as_i32x8();
1649        transmute(simd_select_bitmask(k, mul, src.as_i32x8()))
1650    }
1651}
1652
1653/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1654///
1655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi32&expand=4001)
1656#[inline]
1657#[target_feature(enable = "avx512f,avx512vl")]
1658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1659#[cfg_attr(test, assert_instr(vpmulld))]
1660#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1661pub const fn _mm256_maskz_mullo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1662    unsafe {
1663        let mul = _mm256_mullo_epi32(a, b).as_i32x8();
1664        transmute(simd_select_bitmask(k, mul, i32x8::ZERO))
1665    }
1666}
1667
1668/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1669///
1670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi32&expand=3997)
1671#[inline]
1672#[target_feature(enable = "avx512f,avx512vl")]
1673#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1674#[cfg_attr(test, assert_instr(vpmulld))]
1675#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1676pub const fn _mm_mask_mullo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1677    unsafe {
1678        let mul = _mm_mullo_epi32(a, b).as_i32x4();
1679        transmute(simd_select_bitmask(k, mul, src.as_i32x4()))
1680    }
1681}
1682
1683/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1684///
1685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi32&expand=3998)
1686#[inline]
1687#[target_feature(enable = "avx512f,avx512vl")]
1688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1689#[cfg_attr(test, assert_instr(vpmulld))]
1690#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1691pub const fn _mm_maskz_mullo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1692    unsafe {
1693        let mul = _mm_mullo_epi32(a, b).as_i32x4();
1694        transmute(simd_select_bitmask(k, mul, i32x4::ZERO))
1695    }
1696}
1697
1698/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst.
1699///
1700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullox_epi64&expand=4017)
1701///
1702/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1703#[inline]
1704#[target_feature(enable = "avx512f")]
1705#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1706#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1707pub const fn _mm512_mullox_epi64(a: __m512i, b: __m512i) -> __m512i {
1708    unsafe { transmute(simd_mul(a.as_i64x8(), b.as_i64x8())) }
1709}
1710
1711/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1712///
1713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullox_epi64&expand=4016)
1714///
1715/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1716#[inline]
1717#[target_feature(enable = "avx512f")]
1718#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1719#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1720pub const fn _mm512_mask_mullox_epi64(
1721    src: __m512i,
1722    k: __mmask8,
1723    a: __m512i,
1724    b: __m512i,
1725) -> __m512i {
1726    unsafe {
1727        let mul = _mm512_mullox_epi64(a, b).as_i64x8();
1728        transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
1729    }
1730}
1731
1732/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst.
1733///
1734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epu32&expand=3916)
1735#[inline]
1736#[target_feature(enable = "avx512f")]
1737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1738#[cfg_attr(test, assert_instr(vpmuludq))]
1739#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1740pub const fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i {
1741    unsafe {
1742        let a = a.as_u64x8();
1743        let b = b.as_u64x8();
1744        let mask = u64x8::splat(u32::MAX as u64);
1745        transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
1746    }
1747}
1748
1749/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1750///
1751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epu32&expand=3914)
1752#[inline]
1753#[target_feature(enable = "avx512f")]
1754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1755#[cfg_attr(test, assert_instr(vpmuludq))]
1756#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1757pub const fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1758    unsafe {
1759        let mul = _mm512_mul_epu32(a, b).as_u64x8();
1760        transmute(simd_select_bitmask(k, mul, src.as_u64x8()))
1761    }
1762}
1763
1764/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1765///
1766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epu32&expand=3915)
1767#[inline]
1768#[target_feature(enable = "avx512f")]
1769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1770#[cfg_attr(test, assert_instr(vpmuludq))]
1771#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1772pub const fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1773    unsafe {
1774        let mul = _mm512_mul_epu32(a, b).as_u64x8();
1775        transmute(simd_select_bitmask(k, mul, u64x8::ZERO))
1776    }
1777}
1778
1779/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1780///
1781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epu32&expand=3911)
1782#[inline]
1783#[target_feature(enable = "avx512f,avx512vl")]
1784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1785#[cfg_attr(test, assert_instr(vpmuludq))]
1786#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1787pub const fn _mm256_mask_mul_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1788    unsafe {
1789        let mul = _mm256_mul_epu32(a, b).as_u64x4();
1790        transmute(simd_select_bitmask(k, mul, src.as_u64x4()))
1791    }
1792}
1793
1794/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1795///
1796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epu32&expand=3912)
1797#[inline]
1798#[target_feature(enable = "avx512f,avx512vl")]
1799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1800#[cfg_attr(test, assert_instr(vpmuludq))]
1801#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1802pub const fn _mm256_maskz_mul_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1803    unsafe {
1804        let mul = _mm256_mul_epu32(a, b).as_u64x4();
1805        transmute(simd_select_bitmask(k, mul, u64x4::ZERO))
1806    }
1807}
1808
1809/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1810///
1811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epu32&expand=3908)
1812#[inline]
1813#[target_feature(enable = "avx512f,avx512vl")]
1814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1815#[cfg_attr(test, assert_instr(vpmuludq))]
1816#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1817pub const fn _mm_mask_mul_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1818    unsafe {
1819        let mul = _mm_mul_epu32(a, b).as_u64x2();
1820        transmute(simd_select_bitmask(k, mul, src.as_u64x2()))
1821    }
1822}
1823
1824/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1825///
1826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epu32&expand=3909)
1827#[inline]
1828#[target_feature(enable = "avx512f,avx512vl")]
1829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1830#[cfg_attr(test, assert_instr(vpmuludq))]
1831#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1832pub const fn _mm_maskz_mul_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1833    unsafe {
1834        let mul = _mm_mul_epu32(a, b).as_u64x2();
1835        transmute(simd_select_bitmask(k, mul, u64x2::ZERO))
1836    }
1837}
1838
1839/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
1840///
1841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_ps&expand=3934)
1842#[inline]
1843#[target_feature(enable = "avx512f")]
1844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1845#[cfg_attr(test, assert_instr(vmulps))]
1846#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1847pub const fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 {
1848    unsafe { transmute(simd_mul(a.as_f32x16(), b.as_f32x16())) }
1849}
1850
1851/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1852///
1853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_ps&expand=3932)
1854#[inline]
1855#[target_feature(enable = "avx512f")]
1856#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1857#[cfg_attr(test, assert_instr(vmulps))]
1858#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1859pub const fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1860    unsafe {
1861        let mul = _mm512_mul_ps(a, b).as_f32x16();
1862        transmute(simd_select_bitmask(k, mul, src.as_f32x16()))
1863    }
1864}
1865
1866/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1867///
1868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_ps&expand=3933)
1869#[inline]
1870#[target_feature(enable = "avx512f")]
1871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1872#[cfg_attr(test, assert_instr(vmulps))]
1873#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1874pub const fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1875    unsafe {
1876        let mul = _mm512_mul_ps(a, b).as_f32x16();
1877        transmute(simd_select_bitmask(k, mul, f32x16::ZERO))
1878    }
1879}
1880
1881/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1882///
1883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_ps&expand=3929)
1884#[inline]
1885#[target_feature(enable = "avx512f,avx512vl")]
1886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1887#[cfg_attr(test, assert_instr(vmulps))]
1888#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1889pub const fn _mm256_mask_mul_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1890    unsafe {
1891        let mul = _mm256_mul_ps(a, b).as_f32x8();
1892        transmute(simd_select_bitmask(k, mul, src.as_f32x8()))
1893    }
1894}
1895
1896/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1897///
1898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_ps&expand=3930)
1899#[inline]
1900#[target_feature(enable = "avx512f,avx512vl")]
1901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1902#[cfg_attr(test, assert_instr(vmulps))]
1903#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1904pub const fn _mm256_maskz_mul_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1905    unsafe {
1906        let mul = _mm256_mul_ps(a, b).as_f32x8();
1907        transmute(simd_select_bitmask(k, mul, f32x8::ZERO))
1908    }
1909}
1910
1911/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1912///
1913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_ps&expand=3926)
1914#[inline]
1915#[target_feature(enable = "avx512f,avx512vl")]
1916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1917#[cfg_attr(test, assert_instr(vmulps))]
1918#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1919pub const fn _mm_mask_mul_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1920    unsafe {
1921        let mul = _mm_mul_ps(a, b).as_f32x4();
1922        transmute(simd_select_bitmask(k, mul, src.as_f32x4()))
1923    }
1924}
1925
1926/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1927///
1928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_ps&expand=3927)
1929#[inline]
1930#[target_feature(enable = "avx512f,avx512vl")]
1931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1932#[cfg_attr(test, assert_instr(vmulps))]
1933#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1934pub const fn _mm_maskz_mul_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1935    unsafe {
1936        let mul = _mm_mul_ps(a, b).as_f32x4();
1937        transmute(simd_select_bitmask(k, mul, f32x4::ZERO))
1938    }
1939}
1940
1941/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
1942///
1943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_pd&expand=3925)
1944#[inline]
1945#[target_feature(enable = "avx512f")]
1946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1947#[cfg_attr(test, assert_instr(vmulpd))]
1948#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1949pub const fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d {
1950    unsafe { transmute(simd_mul(a.as_f64x8(), b.as_f64x8())) }
1951}
1952
1953/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1954///
1955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_pd&expand=3923)
1956#[inline]
1957#[target_feature(enable = "avx512f")]
1958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1959#[cfg_attr(test, assert_instr(vmulpd))]
1960#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1961pub const fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1962    unsafe {
1963        let mul = _mm512_mul_pd(a, b).as_f64x8();
1964        transmute(simd_select_bitmask(k, mul, src.as_f64x8()))
1965    }
1966}
1967
1968/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1969///
1970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_pd&expand=3924)
1971#[inline]
1972#[target_feature(enable = "avx512f")]
1973#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1974#[cfg_attr(test, assert_instr(vmulpd))]
1975#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1976pub const fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1977    unsafe {
1978        let mul = _mm512_mul_pd(a, b).as_f64x8();
1979        transmute(simd_select_bitmask(k, mul, f64x8::ZERO))
1980    }
1981}
1982
1983/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1984///
1985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_pd&expand=3920)
1986#[inline]
1987#[target_feature(enable = "avx512f,avx512vl")]
1988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1989#[cfg_attr(test, assert_instr(vmulpd))]
1990#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1991pub const fn _mm256_mask_mul_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1992    unsafe {
1993        let mul = _mm256_mul_pd(a, b).as_f64x4();
1994        transmute(simd_select_bitmask(k, mul, src.as_f64x4()))
1995    }
1996}
1997
1998/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1999///
2000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_pd&expand=3921)
2001#[inline]
2002#[target_feature(enable = "avx512f,avx512vl")]
2003#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2004#[cfg_attr(test, assert_instr(vmulpd))]
2005#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2006pub const fn _mm256_maskz_mul_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2007    unsafe {
2008        let mul = _mm256_mul_pd(a, b).as_f64x4();
2009        transmute(simd_select_bitmask(k, mul, f64x4::ZERO))
2010    }
2011}
2012
2013/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2014///
2015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_pd&expand=3917)
2016#[inline]
2017#[target_feature(enable = "avx512f,avx512vl")]
2018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2019#[cfg_attr(test, assert_instr(vmulpd))]
2020#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2021pub const fn _mm_mask_mul_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2022    unsafe {
2023        let mul = _mm_mul_pd(a, b).as_f64x2();
2024        transmute(simd_select_bitmask(k, mul, src.as_f64x2()))
2025    }
2026}
2027
2028/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2029///
2030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_pd&expand=3918)
2031#[inline]
2032#[target_feature(enable = "avx512f,avx512vl")]
2033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2034#[cfg_attr(test, assert_instr(vmulpd))]
2035#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2036pub const fn _mm_maskz_mul_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2037    unsafe {
2038        let mul = _mm_mul_pd(a, b).as_f64x2();
2039        transmute(simd_select_bitmask(k, mul, f64x2::ZERO))
2040    }
2041}
2042
2043/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.
2044///
2045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_ps&expand=2162)
2046#[inline]
2047#[target_feature(enable = "avx512f")]
2048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2049#[cfg_attr(test, assert_instr(vdivps))]
2050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2051pub const fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 {
2052    unsafe { transmute(simd_div(a.as_f32x16(), b.as_f32x16())) }
2053}
2054
2055/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2056///
2057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_ps&expand=2163)
2058#[inline]
2059#[target_feature(enable = "avx512f")]
2060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2061#[cfg_attr(test, assert_instr(vdivps))]
2062#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2063pub const fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2064    unsafe {
2065        let div = _mm512_div_ps(a, b).as_f32x16();
2066        transmute(simd_select_bitmask(k, div, src.as_f32x16()))
2067    }
2068}
2069
2070/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2071///
2072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_ps&expand=2164)
2073#[inline]
2074#[target_feature(enable = "avx512f")]
2075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2076#[cfg_attr(test, assert_instr(vdivps))]
2077#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2078pub const fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
2079    unsafe {
2080        let div = _mm512_div_ps(a, b).as_f32x16();
2081        transmute(simd_select_bitmask(k, div, f32x16::ZERO))
2082    }
2083}
2084
2085/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2086///
2087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_ps&expand=2160)
2088#[inline]
2089#[target_feature(enable = "avx512f,avx512vl")]
2090#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2091#[cfg_attr(test, assert_instr(vdivps))]
2092#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2093pub const fn _mm256_mask_div_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
2094    unsafe {
2095        let div = _mm256_div_ps(a, b).as_f32x8();
2096        transmute(simd_select_bitmask(k, div, src.as_f32x8()))
2097    }
2098}
2099
2100/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2101///
2102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_ps&expand=2161)
2103#[inline]
2104#[target_feature(enable = "avx512f,avx512vl")]
2105#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2106#[cfg_attr(test, assert_instr(vdivps))]
2107#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2108pub const fn _mm256_maskz_div_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
2109    unsafe {
2110        let div = _mm256_div_ps(a, b).as_f32x8();
2111        transmute(simd_select_bitmask(k, div, f32x8::ZERO))
2112    }
2113}
2114
2115/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2116///
2117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_ps&expand=2157)
2118#[inline]
2119#[target_feature(enable = "avx512f,avx512vl")]
2120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2121#[cfg_attr(test, assert_instr(vdivps))]
2122#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2123pub const fn _mm_mask_div_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
2124    unsafe {
2125        let div = _mm_div_ps(a, b).as_f32x4();
2126        transmute(simd_select_bitmask(k, div, src.as_f32x4()))
2127    }
2128}
2129
2130/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2131///
2132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_ps&expand=2158)
2133#[inline]
2134#[target_feature(enable = "avx512f,avx512vl")]
2135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2136#[cfg_attr(test, assert_instr(vdivps))]
2137#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2138pub const fn _mm_maskz_div_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
2139    unsafe {
2140        let div = _mm_div_ps(a, b).as_f32x4();
2141        transmute(simd_select_bitmask(k, div, f32x4::ZERO))
2142    }
2143}
2144
2145/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst.
2146///
2147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_pd&expand=2153)
2148#[inline]
2149#[target_feature(enable = "avx512f")]
2150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2151#[cfg_attr(test, assert_instr(vdivpd))]
2152#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2153pub const fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d {
2154    unsafe { transmute(simd_div(a.as_f64x8(), b.as_f64x8())) }
2155}
2156
2157/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2158///
2159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_pd&expand=2154)
2160#[inline]
2161#[target_feature(enable = "avx512f")]
2162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2163#[cfg_attr(test, assert_instr(vdivpd))]
2164#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2165pub const fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2166    unsafe {
2167        let div = _mm512_div_pd(a, b).as_f64x8();
2168        transmute(simd_select_bitmask(k, div, src.as_f64x8()))
2169    }
2170}
2171
2172/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2173///
2174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_pd&expand=2155)
2175#[inline]
2176#[target_feature(enable = "avx512f")]
2177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2178#[cfg_attr(test, assert_instr(vdivpd))]
2179#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2180pub const fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2181    unsafe {
2182        let div = _mm512_div_pd(a, b).as_f64x8();
2183        transmute(simd_select_bitmask(k, div, f64x8::ZERO))
2184    }
2185}
2186
2187/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2188///
2189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_pd&expand=2151)
2190#[inline]
2191#[target_feature(enable = "avx512f,avx512vl")]
2192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2193#[cfg_attr(test, assert_instr(vdivpd))]
2194#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2195pub const fn _mm256_mask_div_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2196    unsafe {
2197        let div = _mm256_div_pd(a, b).as_f64x4();
2198        transmute(simd_select_bitmask(k, div, src.as_f64x4()))
2199    }
2200}
2201
2202/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2203///
2204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_pd&expand=2152)
2205#[inline]
2206#[target_feature(enable = "avx512f,avx512vl")]
2207#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2208#[cfg_attr(test, assert_instr(vdivpd))]
2209#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2210pub const fn _mm256_maskz_div_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2211    unsafe {
2212        let div = _mm256_div_pd(a, b).as_f64x4();
2213        transmute(simd_select_bitmask(k, div, f64x4::ZERO))
2214    }
2215}
2216
2217/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2218///
2219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_pd&expand=2148)
2220#[inline]
2221#[target_feature(enable = "avx512f,avx512vl")]
2222#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2223#[cfg_attr(test, assert_instr(vdivpd))]
2224#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2225pub const fn _mm_mask_div_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2226    unsafe {
2227        let div = _mm_div_pd(a, b).as_f64x2();
2228        transmute(simd_select_bitmask(k, div, src.as_f64x2()))
2229    }
2230}
2231
2232/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2233///
2234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_pd&expand=2149)
2235#[inline]
2236#[target_feature(enable = "avx512f,avx512vl")]
2237#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2238#[cfg_attr(test, assert_instr(vdivpd))]
2239#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2240pub const fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2241    unsafe {
2242        let div = _mm_div_pd(a, b).as_f64x2();
2243        transmute(simd_select_bitmask(k, div, f64x2::ZERO))
2244    }
2245}
2246
2247/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst.
2248///
2249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi32&expand=3582)
2250#[inline]
2251#[target_feature(enable = "avx512f")]
2252#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2253#[cfg_attr(test, assert_instr(vpmaxsd))]
2254#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2255pub const fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i {
2256    unsafe { simd_imax(a.as_i32x16(), b.as_i32x16()).as_m512i() }
2257}
2258
2259/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2260///
2261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi32&expand=3580)
2262#[inline]
2263#[target_feature(enable = "avx512f")]
2264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2265#[cfg_attr(test, assert_instr(vpmaxsd))]
2266#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2267pub const fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2268    unsafe {
2269        let max = _mm512_max_epi32(a, b).as_i32x16();
2270        transmute(simd_select_bitmask(k, max, src.as_i32x16()))
2271    }
2272}
2273
2274/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2275///
2276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi32&expand=3581)
2277#[inline]
2278#[target_feature(enable = "avx512f")]
2279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2280#[cfg_attr(test, assert_instr(vpmaxsd))]
2281#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2282pub const fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2283    unsafe {
2284        let max = _mm512_max_epi32(a, b).as_i32x16();
2285        transmute(simd_select_bitmask(k, max, i32x16::ZERO))
2286    }
2287}
2288
2289/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2290///
2291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi32&expand=3577)
2292#[inline]
2293#[target_feature(enable = "avx512f,avx512vl")]
2294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2295#[cfg_attr(test, assert_instr(vpmaxsd))]
2296#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2297pub const fn _mm256_mask_max_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2298    unsafe {
2299        let max = _mm256_max_epi32(a, b).as_i32x8();
2300        transmute(simd_select_bitmask(k, max, src.as_i32x8()))
2301    }
2302}
2303
2304/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2305///
2306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi32&expand=3578)
2307#[inline]
2308#[target_feature(enable = "avx512f,avx512vl")]
2309#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2310#[cfg_attr(test, assert_instr(vpmaxsd))]
2311#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2312pub const fn _mm256_maskz_max_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2313    unsafe {
2314        let max = _mm256_max_epi32(a, b).as_i32x8();
2315        transmute(simd_select_bitmask(k, max, i32x8::ZERO))
2316    }
2317}
2318
2319/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2320///
2321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi32&expand=3574)
2322#[inline]
2323#[target_feature(enable = "avx512f,avx512vl")]
2324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2325#[cfg_attr(test, assert_instr(vpmaxsd))]
2326#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2327pub const fn _mm_mask_max_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2328    unsafe {
2329        let max = _mm_max_epi32(a, b).as_i32x4();
2330        transmute(simd_select_bitmask(k, max, src.as_i32x4()))
2331    }
2332}
2333
2334/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2335///
2336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi32&expand=3575)
2337#[inline]
2338#[target_feature(enable = "avx512f,avx512vl")]
2339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2340#[cfg_attr(test, assert_instr(vpmaxsd))]
2341#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2342pub const fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2343    unsafe {
2344        let max = _mm_max_epi32(a, b).as_i32x4();
2345        transmute(simd_select_bitmask(k, max, i32x4::ZERO))
2346    }
2347}
2348
2349/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2350///
2351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi64&expand=3591)
2352#[inline]
2353#[target_feature(enable = "avx512f")]
2354#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2355#[cfg_attr(test, assert_instr(vpmaxsq))]
2356#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2357pub const fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i {
2358    unsafe { simd_imax(a.as_i64x8(), b.as_i64x8()).as_m512i() }
2359}
2360
2361/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2362///
2363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi64&expand=3589)
2364#[inline]
2365#[target_feature(enable = "avx512f")]
2366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2367#[cfg_attr(test, assert_instr(vpmaxsq))]
2368#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2369pub const fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2370    unsafe {
2371        let max = _mm512_max_epi64(a, b).as_i64x8();
2372        transmute(simd_select_bitmask(k, max, src.as_i64x8()))
2373    }
2374}
2375
2376/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2377///
2378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi64&expand=3590)
2379#[inline]
2380#[target_feature(enable = "avx512f")]
2381#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2382#[cfg_attr(test, assert_instr(vpmaxsq))]
2383#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2384pub const fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2385    unsafe {
2386        let max = _mm512_max_epi64(a, b).as_i64x8();
2387        transmute(simd_select_bitmask(k, max, i64x8::ZERO))
2388    }
2389}
2390
2391/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2392///
2393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi64&expand=3588)
2394#[inline]
2395#[target_feature(enable = "avx512f,avx512vl")]
2396#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2397#[cfg_attr(test, assert_instr(vpmaxsq))]
2398#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2399pub const fn _mm256_max_epi64(a: __m256i, b: __m256i) -> __m256i {
2400    unsafe { simd_imax(a.as_i64x4(), b.as_i64x4()).as_m256i() }
2401}
2402
2403/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2404///
2405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi64&expand=3586)
2406#[inline]
2407#[target_feature(enable = "avx512f,avx512vl")]
2408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2409#[cfg_attr(test, assert_instr(vpmaxsq))]
2410#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2411pub const fn _mm256_mask_max_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2412    unsafe {
2413        let max = _mm256_max_epi64(a, b).as_i64x4();
2414        transmute(simd_select_bitmask(k, max, src.as_i64x4()))
2415    }
2416}
2417
2418/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2419///
2420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi64&expand=3587)
2421#[inline]
2422#[target_feature(enable = "avx512f,avx512vl")]
2423#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2424#[cfg_attr(test, assert_instr(vpmaxsq))]
2425#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2426pub const fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2427    unsafe {
2428        let max = _mm256_max_epi64(a, b).as_i64x4();
2429        transmute(simd_select_bitmask(k, max, i64x4::ZERO))
2430    }
2431}
2432
2433/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2434///
2435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi64&expand=3585)
2436#[inline]
2437#[target_feature(enable = "avx512f,avx512vl")]
2438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2439#[cfg_attr(test, assert_instr(vpmaxsq))]
2440#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2441pub const fn _mm_max_epi64(a: __m128i, b: __m128i) -> __m128i {
2442    unsafe { simd_imax(a.as_i64x2(), b.as_i64x2()).as_m128i() }
2443}
2444
2445/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2446///
2447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi64&expand=3583)
2448#[inline]
2449#[target_feature(enable = "avx512f,avx512vl")]
2450#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2451#[cfg_attr(test, assert_instr(vpmaxsq))]
2452#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2453pub const fn _mm_mask_max_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2454    unsafe {
2455        let max = _mm_max_epi64(a, b).as_i64x2();
2456        transmute(simd_select_bitmask(k, max, src.as_i64x2()))
2457    }
2458}
2459
2460/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2461///
2462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi64&expand=3584)
2463#[inline]
2464#[target_feature(enable = "avx512f,avx512vl")]
2465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2466#[cfg_attr(test, assert_instr(vpmaxsq))]
2467#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2468pub const fn _mm_maskz_max_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2469    unsafe {
2470        let max = _mm_max_epi64(a, b).as_i64x2();
2471        transmute(simd_select_bitmask(k, max, i64x2::ZERO))
2472    }
2473}
2474
2475/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.
2476///
2477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_ps&expand=3655)
2478#[inline]
2479#[target_feature(enable = "avx512f")]
2480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2481#[cfg_attr(test, assert_instr(vmaxps))]
2482pub fn _mm512_max_ps(a: __m512, b: __m512) -> __m512 {
2483    unsafe {
2484        transmute(vmaxps(
2485            a.as_f32x16(),
2486            b.as_f32x16(),
2487            _MM_FROUND_CUR_DIRECTION,
2488        ))
2489    }
2490}
2491
2492/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2493///
2494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_ps&expand=3653)
2495#[inline]
2496#[target_feature(enable = "avx512f")]
2497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2498#[cfg_attr(test, assert_instr(vmaxps))]
2499pub fn _mm512_mask_max_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2500    unsafe {
2501        let max = _mm512_max_ps(a, b).as_f32x16();
2502        transmute(simd_select_bitmask(k, max, src.as_f32x16()))
2503    }
2504}
2505
2506/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2507///
2508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_ps&expand=3654)
2509#[inline]
2510#[target_feature(enable = "avx512f")]
2511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2512#[cfg_attr(test, assert_instr(vmaxps))]
2513pub fn _mm512_maskz_max_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
2514    unsafe {
2515        let max = _mm512_max_ps(a, b).as_f32x16();
2516        transmute(simd_select_bitmask(k, max, f32x16::ZERO))
2517    }
2518}
2519
2520/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2521///
2522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_ps&expand=3650)
2523#[inline]
2524#[target_feature(enable = "avx512f,avx512vl")]
2525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2526#[cfg_attr(test, assert_instr(vmaxps))]
2527pub fn _mm256_mask_max_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
2528    unsafe {
2529        let max = _mm256_max_ps(a, b).as_f32x8();
2530        transmute(simd_select_bitmask(k, max, src.as_f32x8()))
2531    }
2532}
2533
2534/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2535///
2536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_ps&expand=3651)
2537#[inline]
2538#[target_feature(enable = "avx512f,avx512vl")]
2539#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2540#[cfg_attr(test, assert_instr(vmaxps))]
2541pub fn _mm256_maskz_max_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
2542    unsafe {
2543        let max = _mm256_max_ps(a, b).as_f32x8();
2544        transmute(simd_select_bitmask(k, max, f32x8::ZERO))
2545    }
2546}
2547
2548/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2549///
2550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_ps&expand=3647)
2551#[inline]
2552#[target_feature(enable = "avx512f,avx512vl")]
2553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2554#[cfg_attr(test, assert_instr(vmaxps))]
2555pub fn _mm_mask_max_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
2556    unsafe {
2557        let max = _mm_max_ps(a, b).as_f32x4();
2558        transmute(simd_select_bitmask(k, max, src.as_f32x4()))
2559    }
2560}
2561
2562/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2563///
2564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_ps&expand=3648)
2565#[inline]
2566#[target_feature(enable = "avx512f,avx512vl")]
2567#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2568#[cfg_attr(test, assert_instr(vmaxps))]
2569pub fn _mm_maskz_max_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
2570    unsafe {
2571        let max = _mm_max_ps(a, b).as_f32x4();
2572        transmute(simd_select_bitmask(k, max, f32x4::ZERO))
2573    }
2574}
2575
2576/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.
2577///
2578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_pd&expand=3645)
2579#[inline]
2580#[target_feature(enable = "avx512f")]
2581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2582#[cfg_attr(test, assert_instr(vmaxpd))]
2583pub fn _mm512_max_pd(a: __m512d, b: __m512d) -> __m512d {
2584    unsafe { transmute(vmaxpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) }
2585}
2586
2587/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2588///
2589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_pd&expand=3643)
2590#[inline]
2591#[target_feature(enable = "avx512f")]
2592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2593#[cfg_attr(test, assert_instr(vmaxpd))]
2594pub fn _mm512_mask_max_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2595    unsafe {
2596        let max = _mm512_max_pd(a, b).as_f64x8();
2597        transmute(simd_select_bitmask(k, max, src.as_f64x8()))
2598    }
2599}
2600
2601/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2602///
2603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_pd&expand=3644)
2604#[inline]
2605#[target_feature(enable = "avx512f")]
2606#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2607#[cfg_attr(test, assert_instr(vmaxpd))]
2608pub fn _mm512_maskz_max_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2609    unsafe {
2610        let max = _mm512_max_pd(a, b).as_f64x8();
2611        transmute(simd_select_bitmask(k, max, f64x8::ZERO))
2612    }
2613}
2614
2615/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2616///
2617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_pd&expand=3640)
2618#[inline]
2619#[target_feature(enable = "avx512f,avx512vl")]
2620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2621#[cfg_attr(test, assert_instr(vmaxpd))]
2622pub fn _mm256_mask_max_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2623    unsafe {
2624        let max = _mm256_max_pd(a, b).as_f64x4();
2625        transmute(simd_select_bitmask(k, max, src.as_f64x4()))
2626    }
2627}
2628
2629/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2630///
2631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_pd&expand=3641)
2632#[inline]
2633#[target_feature(enable = "avx512f,avx512vl")]
2634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2635#[cfg_attr(test, assert_instr(vmaxpd))]
2636pub fn _mm256_maskz_max_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2637    unsafe {
2638        let max = _mm256_max_pd(a, b).as_f64x4();
2639        transmute(simd_select_bitmask(k, max, f64x4::ZERO))
2640    }
2641}
2642
2643/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2644///
2645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_pd&expand=3637)
2646#[inline]
2647#[target_feature(enable = "avx512f,avx512vl")]
2648#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2649#[cfg_attr(test, assert_instr(vmaxpd))]
2650pub fn _mm_mask_max_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2651    unsafe {
2652        let max = _mm_max_pd(a, b).as_f64x2();
2653        transmute(simd_select_bitmask(k, max, src.as_f64x2()))
2654    }
2655}
2656
2657/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2658///
2659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_pd&expand=3638)
2660#[inline]
2661#[target_feature(enable = "avx512f,avx512vl")]
2662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2663#[cfg_attr(test, assert_instr(vmaxpd))]
2664pub fn _mm_maskz_max_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2665    unsafe {
2666        let max = _mm_max_pd(a, b).as_f64x2();
2667        transmute(simd_select_bitmask(k, max, f64x2::ZERO))
2668    }
2669}
2670
2671/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst.
2672///
2673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu32&expand=3618)
2674#[inline]
2675#[target_feature(enable = "avx512f")]
2676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2677#[cfg_attr(test, assert_instr(vpmaxud))]
2678#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2679pub const fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i {
2680    unsafe { simd_imax(a.as_u32x16(), b.as_u32x16()).as_m512i() }
2681}
2682
2683/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2684///
2685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu32&expand=3616)
2686#[inline]
2687#[target_feature(enable = "avx512f")]
2688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2689#[cfg_attr(test, assert_instr(vpmaxud))]
2690#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2691pub const fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2692    unsafe {
2693        let max = _mm512_max_epu32(a, b).as_u32x16();
2694        transmute(simd_select_bitmask(k, max, src.as_u32x16()))
2695    }
2696}
2697
2698/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2699///
2700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu32&expand=3617)
2701#[inline]
2702#[target_feature(enable = "avx512f")]
2703#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2704#[cfg_attr(test, assert_instr(vpmaxud))]
2705#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2706pub const fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2707    unsafe {
2708        let max = _mm512_max_epu32(a, b).as_u32x16();
2709        transmute(simd_select_bitmask(k, max, u32x16::ZERO))
2710    }
2711}
2712
2713/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2714///
2715/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu32&expand=3613)
2716#[inline]
2717#[target_feature(enable = "avx512f,avx512vl")]
2718#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2719#[cfg_attr(test, assert_instr(vpmaxud))]
2720#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2721pub const fn _mm256_mask_max_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2722    unsafe {
2723        let max = _mm256_max_epu32(a, b).as_u32x8();
2724        transmute(simd_select_bitmask(k, max, src.as_u32x8()))
2725    }
2726}
2727
2728/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2729///
2730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu32&expand=3614)
2731#[inline]
2732#[target_feature(enable = "avx512f,avx512vl")]
2733#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2734#[cfg_attr(test, assert_instr(vpmaxud))]
2735#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2736pub const fn _mm256_maskz_max_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2737    unsafe {
2738        let max = _mm256_max_epu32(a, b).as_u32x8();
2739        transmute(simd_select_bitmask(k, max, u32x8::ZERO))
2740    }
2741}
2742
2743/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2744///
2745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu32&expand=3610)
2746#[inline]
2747#[target_feature(enable = "avx512f,avx512vl")]
2748#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2749#[cfg_attr(test, assert_instr(vpmaxud))]
2750#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2751pub const fn _mm_mask_max_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2752    unsafe {
2753        let max = _mm_max_epu32(a, b).as_u32x4();
2754        transmute(simd_select_bitmask(k, max, src.as_u32x4()))
2755    }
2756}
2757
2758/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2759///
2760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu32&expand=3611)
2761#[inline]
2762#[target_feature(enable = "avx512f,avx512vl")]
2763#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2764#[cfg_attr(test, assert_instr(vpmaxud))]
2765#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2766pub const fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2767    unsafe {
2768        let max = _mm_max_epu32(a, b).as_u32x4();
2769        transmute(simd_select_bitmask(k, max, u32x4::ZERO))
2770    }
2771}
2772
2773/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2774///
2775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu64&expand=3627)
2776#[inline]
2777#[target_feature(enable = "avx512f")]
2778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2779#[cfg_attr(test, assert_instr(vpmaxuq))]
2780#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2781pub const fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i {
2782    unsafe { simd_imax(a.as_u64x8(), b.as_u64x8()).as_m512i() }
2783}
2784
2785/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2786///
2787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu64&expand=3625)
2788#[inline]
2789#[target_feature(enable = "avx512f")]
2790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2791#[cfg_attr(test, assert_instr(vpmaxuq))]
2792#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2793pub const fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2794    unsafe {
2795        let max = _mm512_max_epu64(a, b).as_u64x8();
2796        transmute(simd_select_bitmask(k, max, src.as_u64x8()))
2797    }
2798}
2799
2800/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2801///
2802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu64&expand=3626)
2803#[inline]
2804#[target_feature(enable = "avx512f")]
2805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2806#[cfg_attr(test, assert_instr(vpmaxuq))]
2807#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2808pub const fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2809    unsafe {
2810        let max = _mm512_max_epu64(a, b).as_u64x8();
2811        transmute(simd_select_bitmask(k, max, u64x8::ZERO))
2812    }
2813}
2814
2815/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2816///
2817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu64&expand=3624)
2818#[inline]
2819#[target_feature(enable = "avx512f,avx512vl")]
2820#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2821#[cfg_attr(test, assert_instr(vpmaxuq))]
2822#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2823pub const fn _mm256_max_epu64(a: __m256i, b: __m256i) -> __m256i {
2824    unsafe { simd_imax(a.as_u64x4(), b.as_u64x4()).as_m256i() }
2825}
2826
2827/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2828///
2829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu64&expand=3622)
2830#[inline]
2831#[target_feature(enable = "avx512f,avx512vl")]
2832#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2833#[cfg_attr(test, assert_instr(vpmaxuq))]
2834#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2835pub const fn _mm256_mask_max_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2836    unsafe {
2837        let max = _mm256_max_epu64(a, b).as_u64x4();
2838        transmute(simd_select_bitmask(k, max, src.as_u64x4()))
2839    }
2840}
2841
2842/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2843///
2844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu64&expand=3623)
2845#[inline]
2846#[target_feature(enable = "avx512f,avx512vl")]
2847#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2848#[cfg_attr(test, assert_instr(vpmaxuq))]
2849#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2850pub const fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2851    unsafe {
2852        let max = _mm256_max_epu64(a, b).as_u64x4();
2853        transmute(simd_select_bitmask(k, max, u64x4::ZERO))
2854    }
2855}
2856
2857/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2858///
2859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu64&expand=3621)
2860#[inline]
2861#[target_feature(enable = "avx512f,avx512vl")]
2862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2863#[cfg_attr(test, assert_instr(vpmaxuq))]
2864#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2865pub const fn _mm_max_epu64(a: __m128i, b: __m128i) -> __m128i {
2866    unsafe { simd_imax(a.as_u64x2(), b.as_u64x2()).as_m128i() }
2867}
2868
2869/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2870///
2871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu64&expand=3619)
2872#[inline]
2873#[target_feature(enable = "avx512f,avx512vl")]
2874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2875#[cfg_attr(test, assert_instr(vpmaxuq))]
2876#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2877pub const fn _mm_mask_max_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2878    unsafe {
2879        let max = _mm_max_epu64(a, b).as_u64x2();
2880        transmute(simd_select_bitmask(k, max, src.as_u64x2()))
2881    }
2882}
2883
2884/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2885///
2886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu64&expand=3620)
2887#[inline]
2888#[target_feature(enable = "avx512f,avx512vl")]
2889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2890#[cfg_attr(test, assert_instr(vpmaxuq))]
2891#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2892pub const fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2893    unsafe {
2894        let max = _mm_max_epu64(a, b).as_u64x2();
2895        transmute(simd_select_bitmask(k, max, u64x2::ZERO))
2896    }
2897}
2898
2899/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst.
2900///
2901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi32&expand=3696)
2902#[inline]
2903#[target_feature(enable = "avx512f")]
2904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2905#[cfg_attr(test, assert_instr(vpminsd))]
2906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2907pub const fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i {
2908    unsafe { simd_imin(a.as_i32x16(), b.as_i32x16()).as_m512i() }
2909}
2910
2911/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2912///
2913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi32&expand=3694)
2914#[inline]
2915#[target_feature(enable = "avx512f")]
2916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2917#[cfg_attr(test, assert_instr(vpminsd))]
2918#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2919pub const fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2920    unsafe {
2921        let min = _mm512_min_epi32(a, b).as_i32x16();
2922        transmute(simd_select_bitmask(k, min, src.as_i32x16()))
2923    }
2924}
2925
2926/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2927///
2928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi32&expand=3695)
2929#[inline]
2930#[target_feature(enable = "avx512f")]
2931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2932#[cfg_attr(test, assert_instr(vpminsd))]
2933#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2934pub const fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2935    unsafe {
2936        let min = _mm512_min_epi32(a, b).as_i32x16();
2937        transmute(simd_select_bitmask(k, min, i32x16::ZERO))
2938    }
2939}
2940
2941/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2942///
2943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi32&expand=3691)
2944#[inline]
2945#[target_feature(enable = "avx512f,avx512vl")]
2946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2947#[cfg_attr(test, assert_instr(vpminsd))]
2948#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2949pub const fn _mm256_mask_min_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2950    unsafe {
2951        let min = _mm256_min_epi32(a, b).as_i32x8();
2952        transmute(simd_select_bitmask(k, min, src.as_i32x8()))
2953    }
2954}
2955
2956/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2957///
2958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi32&expand=3692)
2959#[inline]
2960#[target_feature(enable = "avx512f,avx512vl")]
2961#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2962#[cfg_attr(test, assert_instr(vpminsd))]
2963#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2964pub const fn _mm256_maskz_min_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2965    unsafe {
2966        let min = _mm256_min_epi32(a, b).as_i32x8();
2967        transmute(simd_select_bitmask(k, min, i32x8::ZERO))
2968    }
2969}
2970
2971/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2972///
2973/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi32&expand=3688)
2974#[inline]
2975#[target_feature(enable = "avx512f,avx512vl")]
2976#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2977#[cfg_attr(test, assert_instr(vpminsd))]
2978#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2979pub const fn _mm_mask_min_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2980    unsafe {
2981        let min = _mm_min_epi32(a, b).as_i32x4();
2982        transmute(simd_select_bitmask(k, min, src.as_i32x4()))
2983    }
2984}
2985
2986/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2987///
2988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi32&expand=3689)
2989#[inline]
2990#[target_feature(enable = "avx512f,avx512vl")]
2991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2992#[cfg_attr(test, assert_instr(vpminsd))]
2993#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2994pub const fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2995    unsafe {
2996        let min = _mm_min_epi32(a, b).as_i32x4();
2997        transmute(simd_select_bitmask(k, min, i32x4::ZERO))
2998    }
2999}
3000
3001/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
3002///
3003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi64&expand=3705)
3004#[inline]
3005#[target_feature(enable = "avx512f")]
3006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3007#[cfg_attr(test, assert_instr(vpminsq))]
3008#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3009pub const fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i {
3010    unsafe { simd_imin(a.as_i64x8(), b.as_i64x8()).as_m512i() }
3011}
3012
3013/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3014///
3015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi64&expand=3703)
3016#[inline]
3017#[target_feature(enable = "avx512f")]
3018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3019#[cfg_attr(test, assert_instr(vpminsq))]
3020#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3021pub const fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3022    unsafe {
3023        let min = _mm512_min_epi64(a, b).as_i64x8();
3024        transmute(simd_select_bitmask(k, min, src.as_i64x8()))
3025    }
3026}
3027
3028/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3029///
3030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi64&expand=3704)
3031#[inline]
3032#[target_feature(enable = "avx512f")]
3033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3034#[cfg_attr(test, assert_instr(vpminsq))]
3035#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3036pub const fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3037    unsafe {
3038        let min = _mm512_min_epi64(a, b).as_i64x8();
3039        transmute(simd_select_bitmask(k, min, i64x8::ZERO))
3040    }
3041}
3042
3043/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
3044///
3045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi64&expand=3702)
3046#[inline]
3047#[target_feature(enable = "avx512f,avx512vl")]
3048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3049#[cfg_attr(test, assert_instr(vpminsq))]
3050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3051pub const fn _mm256_min_epi64(a: __m256i, b: __m256i) -> __m256i {
3052    unsafe { simd_imin(a.as_i64x4(), b.as_i64x4()).as_m256i() }
3053}
3054
3055/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3056///
3057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi64&expand=3700)
3058#[inline]
3059#[target_feature(enable = "avx512f,avx512vl")]
3060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3061#[cfg_attr(test, assert_instr(vpminsq))]
3062#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3063pub const fn _mm256_mask_min_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3064    unsafe {
3065        let min = _mm256_min_epi64(a, b).as_i64x4();
3066        transmute(simd_select_bitmask(k, min, src.as_i64x4()))
3067    }
3068}
3069
3070/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3071///
3072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi64&expand=3701)
3073#[inline]
3074#[target_feature(enable = "avx512f,avx512vl")]
3075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3076#[cfg_attr(test, assert_instr(vpminsq))]
3077#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3078pub const fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3079    unsafe {
3080        let min = _mm256_min_epi64(a, b).as_i64x4();
3081        transmute(simd_select_bitmask(k, min, i64x4::ZERO))
3082    }
3083}
3084
3085/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
3086///
3087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi64)
3088#[inline]
3089#[target_feature(enable = "avx512f,avx512vl")]
3090#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3091#[cfg_attr(test, assert_instr(vpminsq))]
3092#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3093pub const fn _mm_min_epi64(a: __m128i, b: __m128i) -> __m128i {
3094    unsafe { simd_imin(a.as_i64x2(), b.as_i64x2()).as_m128i() }
3095}
3096
3097/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3098///
3099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi64)
3100#[inline]
3101#[target_feature(enable = "avx512f,avx512vl")]
3102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3103#[cfg_attr(test, assert_instr(vpminsq))]
3104#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3105pub const fn _mm_mask_min_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3106    unsafe {
3107        let min = _mm_min_epi64(a, b).as_i64x2();
3108        transmute(simd_select_bitmask(k, min, src.as_i64x2()))
3109    }
3110}
3111
3112/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3113///
3114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi64)
3115#[inline]
3116#[target_feature(enable = "avx512f,avx512vl")]
3117#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3118#[cfg_attr(test, assert_instr(vpminsq))]
3119#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3120pub const fn _mm_maskz_min_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3121    unsafe {
3122        let min = _mm_min_epi64(a, b).as_i64x2();
3123        transmute(simd_select_bitmask(k, min, i64x2::ZERO))
3124    }
3125}
3126
3127/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.
3128///
3129/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_ps&expand=3769)
3130#[inline]
3131#[target_feature(enable = "avx512f")]
3132#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3133#[cfg_attr(test, assert_instr(vminps))]
3134pub fn _mm512_min_ps(a: __m512, b: __m512) -> __m512 {
3135    unsafe {
3136        transmute(vminps(
3137            a.as_f32x16(),
3138            b.as_f32x16(),
3139            _MM_FROUND_CUR_DIRECTION,
3140        ))
3141    }
3142}
3143
3144/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3145///
3146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_ps&expand=3767)
3147#[inline]
3148#[target_feature(enable = "avx512f")]
3149#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3150#[cfg_attr(test, assert_instr(vminps))]
3151pub fn _mm512_mask_min_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
3152    unsafe {
3153        let min = _mm512_min_ps(a, b).as_f32x16();
3154        transmute(simd_select_bitmask(k, min, src.as_f32x16()))
3155    }
3156}
3157
3158/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3159///
3160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_ps&expand=3768)
3161#[inline]
3162#[target_feature(enable = "avx512f")]
3163#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3164#[cfg_attr(test, assert_instr(vminps))]
3165pub fn _mm512_maskz_min_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
3166    unsafe {
3167        let min = _mm512_min_ps(a, b).as_f32x16();
3168        transmute(simd_select_bitmask(k, min, f32x16::ZERO))
3169    }
3170}
3171
3172/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3173///
3174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_ps&expand=3764)
3175#[inline]
3176#[target_feature(enable = "avx512f,avx512vl")]
3177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3178#[cfg_attr(test, assert_instr(vminps))]
3179pub fn _mm256_mask_min_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
3180    unsafe {
3181        let min = _mm256_min_ps(a, b).as_f32x8();
3182        transmute(simd_select_bitmask(k, min, src.as_f32x8()))
3183    }
3184}
3185
3186/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3187///
3188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_ps&expand=3765)
3189#[inline]
3190#[target_feature(enable = "avx512f,avx512vl")]
3191#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3192#[cfg_attr(test, assert_instr(vminps))]
3193pub fn _mm256_maskz_min_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
3194    unsafe {
3195        let min = _mm256_min_ps(a, b).as_f32x8();
3196        transmute(simd_select_bitmask(k, min, f32x8::ZERO))
3197    }
3198}
3199
3200/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3201///
3202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_ps&expand=3761)
3203#[inline]
3204#[target_feature(enable = "avx512f,avx512vl")]
3205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3206#[cfg_attr(test, assert_instr(vminps))]
3207pub fn _mm_mask_min_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
3208    unsafe {
3209        let min = _mm_min_ps(a, b).as_f32x4();
3210        transmute(simd_select_bitmask(k, min, src.as_f32x4()))
3211    }
3212}
3213
3214/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3215///
3216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_ps&expand=3762)
3217#[inline]
3218#[target_feature(enable = "avx512f,avx512vl")]
3219#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3220#[cfg_attr(test, assert_instr(vminps))]
3221pub fn _mm_maskz_min_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
3222    unsafe {
3223        let min = _mm_min_ps(a, b).as_f32x4();
3224        transmute(simd_select_bitmask(k, min, f32x4::ZERO))
3225    }
3226}
3227
3228/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.
3229///
3230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_pd&expand=3759)
3231#[inline]
3232#[target_feature(enable = "avx512f")]
3233#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3234#[cfg_attr(test, assert_instr(vminpd))]
3235pub fn _mm512_min_pd(a: __m512d, b: __m512d) -> __m512d {
3236    unsafe { transmute(vminpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) }
3237}
3238
3239/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3240///
3241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_pd&expand=3757)
3242#[inline]
3243#[target_feature(enable = "avx512f")]
3244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3245#[cfg_attr(test, assert_instr(vminpd))]
3246pub fn _mm512_mask_min_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
3247    unsafe {
3248        let min = _mm512_min_pd(a, b).as_f64x8();
3249        transmute(simd_select_bitmask(k, min, src.as_f64x8()))
3250    }
3251}
3252
3253/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3254///
3255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_pd&expand=3758)
3256#[inline]
3257#[target_feature(enable = "avx512f")]
3258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3259#[cfg_attr(test, assert_instr(vminpd))]
3260pub fn _mm512_maskz_min_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
3261    unsafe {
3262        let min = _mm512_min_pd(a, b).as_f64x8();
3263        transmute(simd_select_bitmask(k, min, f64x8::ZERO))
3264    }
3265}
3266
3267/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3268///
3269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_pd&expand=3754)
3270#[inline]
3271#[target_feature(enable = "avx512f,avx512vl")]
3272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3273#[cfg_attr(test, assert_instr(vminpd))]
3274pub fn _mm256_mask_min_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
3275    unsafe {
3276        let min = _mm256_min_pd(a, b).as_f64x4();
3277        transmute(simd_select_bitmask(k, min, src.as_f64x4()))
3278    }
3279}
3280
3281/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3282///
3283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_pd&expand=3755)
3284#[inline]
3285#[target_feature(enable = "avx512f,avx512vl")]
3286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3287#[cfg_attr(test, assert_instr(vminpd))]
3288pub fn _mm256_maskz_min_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
3289    unsafe {
3290        let min = _mm256_min_pd(a, b).as_f64x4();
3291        transmute(simd_select_bitmask(k, min, f64x4::ZERO))
3292    }
3293}
3294
3295/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3296///
3297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_pd&expand=3751)
3298#[inline]
3299#[target_feature(enable = "avx512f,avx512vl")]
3300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3301#[cfg_attr(test, assert_instr(vminpd))]
3302pub fn _mm_mask_min_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
3303    unsafe {
3304        let min = _mm_min_pd(a, b).as_f64x2();
3305        transmute(simd_select_bitmask(k, min, src.as_f64x2()))
3306    }
3307}
3308
3309/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3310///
3311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_pd&expand=3752)
3312#[inline]
3313#[target_feature(enable = "avx512f,avx512vl")]
3314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3315#[cfg_attr(test, assert_instr(vminpd))]
3316pub fn _mm_maskz_min_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
3317    unsafe {
3318        let min = _mm_min_pd(a, b).as_f64x2();
3319        transmute(simd_select_bitmask(k, min, f64x2::ZERO))
3320    }
3321}
3322
3323/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst.
3324///
3325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu32&expand=3732)
3326#[inline]
3327#[target_feature(enable = "avx512f")]
3328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3329#[cfg_attr(test, assert_instr(vpminud))]
3330#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3331pub const fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i {
3332    unsafe { simd_imin(a.as_u32x16(), b.as_u32x16()).as_m512i() }
3333}
3334
3335/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3336///
3337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu32&expand=3730)
3338#[inline]
3339#[target_feature(enable = "avx512f")]
3340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3341#[cfg_attr(test, assert_instr(vpminud))]
3342#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3343pub const fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
3344    unsafe {
3345        let min = _mm512_min_epu32(a, b).as_u32x16();
3346        transmute(simd_select_bitmask(k, min, src.as_u32x16()))
3347    }
3348}
3349
3350/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3351///
3352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu32&expand=3731)
3353#[inline]
3354#[target_feature(enable = "avx512f")]
3355#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3356#[cfg_attr(test, assert_instr(vpminud))]
3357#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3358pub const fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
3359    unsafe {
3360        let min = _mm512_min_epu32(a, b).as_u32x16();
3361        transmute(simd_select_bitmask(k, min, u32x16::ZERO))
3362    }
3363}
3364
3365/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3366///
3367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu32&expand=3727)
3368#[inline]
3369#[target_feature(enable = "avx512f,avx512vl")]
3370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3371#[cfg_attr(test, assert_instr(vpminud))]
3372#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3373pub const fn _mm256_mask_min_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3374    unsafe {
3375        let min = _mm256_min_epu32(a, b).as_u32x8();
3376        transmute(simd_select_bitmask(k, min, src.as_u32x8()))
3377    }
3378}
3379
3380/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3381///
3382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu32&expand=3728)
3383#[inline]
3384#[target_feature(enable = "avx512f,avx512vl")]
3385#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3386#[cfg_attr(test, assert_instr(vpminud))]
3387#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3388pub const fn _mm256_maskz_min_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3389    unsafe {
3390        let min = _mm256_min_epu32(a, b).as_u32x8();
3391        transmute(simd_select_bitmask(k, min, u32x8::ZERO))
3392    }
3393}
3394
3395/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3396///
3397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu32&expand=3724)
3398#[inline]
3399#[target_feature(enable = "avx512f,avx512vl")]
3400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3401#[cfg_attr(test, assert_instr(vpminud))]
3402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3403pub const fn _mm_mask_min_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3404    unsafe {
3405        let min = _mm_min_epu32(a, b).as_u32x4();
3406        transmute(simd_select_bitmask(k, min, src.as_u32x4()))
3407    }
3408}
3409
3410/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3411///
3412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu32&expand=3725)
3413#[inline]
3414#[target_feature(enable = "avx512f,avx512vl")]
3415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3416#[cfg_attr(test, assert_instr(vpminud))]
3417#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3418pub const fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3419    unsafe {
3420        let min = _mm_min_epu32(a, b).as_u32x4();
3421        transmute(simd_select_bitmask(k, min, u32x4::ZERO))
3422    }
3423}
3424
3425/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3426///
3427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu64&expand=3741)
3428#[inline]
3429#[target_feature(enable = "avx512f")]
3430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3431#[cfg_attr(test, assert_instr(vpminuq))]
3432#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3433pub const fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i {
3434    unsafe { simd_imin(a.as_u64x8(), b.as_u64x8()).as_m512i() }
3435}
3436
3437/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3438///
3439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu64&expand=3739)
3440#[inline]
3441#[target_feature(enable = "avx512f")]
3442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3443#[cfg_attr(test, assert_instr(vpminuq))]
3444#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3445pub const fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3446    unsafe {
3447        let min = _mm512_min_epu64(a, b).as_u64x8();
3448        transmute(simd_select_bitmask(k, min, src.as_u64x8()))
3449    }
3450}
3451
3452/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3453///
3454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu64&expand=3740)
3455#[inline]
3456#[target_feature(enable = "avx512f")]
3457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3458#[cfg_attr(test, assert_instr(vpminuq))]
3459#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3460pub const fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3461    unsafe {
3462        let min = _mm512_min_epu64(a, b).as_u64x8();
3463        transmute(simd_select_bitmask(k, min, u64x8::ZERO))
3464    }
3465}
3466
3467/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3468///
3469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu64&expand=3738)
3470#[inline]
3471#[target_feature(enable = "avx512f,avx512vl")]
3472#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3473#[cfg_attr(test, assert_instr(vpminuq))]
3474#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3475pub const fn _mm256_min_epu64(a: __m256i, b: __m256i) -> __m256i {
3476    unsafe { simd_imin(a.as_u64x4(), b.as_u64x4()).as_m256i() }
3477}
3478
3479/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3480///
3481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu64&expand=3736)
3482#[inline]
3483#[target_feature(enable = "avx512f,avx512vl")]
3484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3485#[cfg_attr(test, assert_instr(vpminuq))]
3486#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3487pub const fn _mm256_mask_min_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3488    unsafe {
3489        let min = _mm256_min_epu64(a, b).as_u64x4();
3490        transmute(simd_select_bitmask(k, min, src.as_u64x4()))
3491    }
3492}
3493
3494/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3495///
3496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu64&expand=3737)
3497#[inline]
3498#[target_feature(enable = "avx512f,avx512vl")]
3499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3500#[cfg_attr(test, assert_instr(vpminuq))]
3501#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3502pub const fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3503    unsafe {
3504        let min = _mm256_min_epu64(a, b).as_u64x4();
3505        transmute(simd_select_bitmask(k, min, u64x4::ZERO))
3506    }
3507}
3508
3509/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3510///
3511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu64&expand=3735)
3512#[inline]
3513#[target_feature(enable = "avx512f,avx512vl")]
3514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3515#[cfg_attr(test, assert_instr(vpminuq))]
3516#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3517pub const fn _mm_min_epu64(a: __m128i, b: __m128i) -> __m128i {
3518    unsafe { simd_imin(a.as_u64x2(), b.as_u64x2()).as_m128i() }
3519}
3520
3521/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3522///
3523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu64&expand=3733)
3524#[inline]
3525#[target_feature(enable = "avx512f,avx512vl")]
3526#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3527#[cfg_attr(test, assert_instr(vpminuq))]
3528#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3529pub const fn _mm_mask_min_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3530    unsafe {
3531        let min = _mm_min_epu64(a, b).as_u64x2();
3532        transmute(simd_select_bitmask(k, min, src.as_u64x2()))
3533    }
3534}
3535
3536/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3537///
3538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu64&expand=3734)
3539#[inline]
3540#[target_feature(enable = "avx512f,avx512vl")]
3541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3542#[cfg_attr(test, assert_instr(vpminuq))]
3543#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3544pub const fn _mm_maskz_min_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3545    unsafe {
3546        let min = _mm_min_epu64(a, b).as_u64x2();
3547        transmute(simd_select_bitmask(k, min, u64x2::ZERO))
3548    }
3549}
3550
3551/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
3552///
3553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_ps&expand=5371)
3554#[inline]
3555#[target_feature(enable = "avx512f")]
3556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3557#[cfg_attr(test, assert_instr(vsqrtps))]
3558pub fn _mm512_sqrt_ps(a: __m512) -> __m512 {
3559    unsafe { simd_fsqrt(a) }
3560}
3561
3562/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3563///
3564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_ps&expand=5369)
3565#[inline]
3566#[target_feature(enable = "avx512f")]
3567#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3568#[cfg_attr(test, assert_instr(vsqrtps))]
3569pub fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
3570    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3571}
3572
3573/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3574///
3575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_ps&expand=5370)
3576#[inline]
3577#[target_feature(enable = "avx512f")]
3578#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3579#[cfg_attr(test, assert_instr(vsqrtps))]
3580pub fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 {
3581    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_ps()) }
3582}
3583
3584/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3585///
3586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_ps&expand=5366)
3587#[inline]
3588#[target_feature(enable = "avx512f,avx512vl")]
3589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3590#[cfg_attr(test, assert_instr(vsqrtps))]
3591pub fn _mm256_mask_sqrt_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
3592    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3593}
3594
3595/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3596///
3597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_ps&expand=5367)
3598#[inline]
3599#[target_feature(enable = "avx512f,avx512vl")]
3600#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3601#[cfg_attr(test, assert_instr(vsqrtps))]
3602pub fn _mm256_maskz_sqrt_ps(k: __mmask8, a: __m256) -> __m256 {
3603    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_ps()) }
3604}
3605
3606/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3607///
3608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_ps&expand=5363)
3609#[inline]
3610#[target_feature(enable = "avx512f,avx512vl")]
3611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3612#[cfg_attr(test, assert_instr(vsqrtps))]
3613pub fn _mm_mask_sqrt_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
3614    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3615}
3616
3617/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3618///
3619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_ps&expand=5364)
3620#[inline]
3621#[target_feature(enable = "avx512f,avx512vl")]
3622#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3623#[cfg_attr(test, assert_instr(vsqrtps))]
3624pub fn _mm_maskz_sqrt_ps(k: __mmask8, a: __m128) -> __m128 {
3625    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_ps()) }
3626}
3627
3628/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
3629///
3630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_pd&expand=5362)
3631#[inline]
3632#[target_feature(enable = "avx512f")]
3633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3634#[cfg_attr(test, assert_instr(vsqrtpd))]
3635pub fn _mm512_sqrt_pd(a: __m512d) -> __m512d {
3636    unsafe { simd_fsqrt(a) }
3637}
3638
3639/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3640///
3641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_pd&expand=5360)
3642#[inline]
3643#[target_feature(enable = "avx512f")]
3644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3645#[cfg_attr(test, assert_instr(vsqrtpd))]
3646pub fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
3647    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3648}
3649
3650/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3651///
3652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_pd&expand=5361)
3653#[inline]
3654#[target_feature(enable = "avx512f")]
3655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3656#[cfg_attr(test, assert_instr(vsqrtpd))]
3657pub fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d {
3658    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_pd()) }
3659}
3660
3661/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3662///
3663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_pd&expand=5357)
3664#[inline]
3665#[target_feature(enable = "avx512f,avx512vl")]
3666#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3667#[cfg_attr(test, assert_instr(vsqrtpd))]
3668pub fn _mm256_mask_sqrt_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
3669    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3670}
3671
3672/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3673///
3674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_pd&expand=5358)
3675#[inline]
3676#[target_feature(enable = "avx512f,avx512vl")]
3677#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3678#[cfg_attr(test, assert_instr(vsqrtpd))]
3679pub fn _mm256_maskz_sqrt_pd(k: __mmask8, a: __m256d) -> __m256d {
3680    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_pd()) }
3681}
3682
3683/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3684///
3685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_pd&expand=5354)
3686#[inline]
3687#[target_feature(enable = "avx512f,avx512vl")]
3688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3689#[cfg_attr(test, assert_instr(vsqrtpd))]
3690pub fn _mm_mask_sqrt_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
3691    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3692}
3693
3694/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3695///
3696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_pd&expand=5355)
3697#[inline]
3698#[target_feature(enable = "avx512f,avx512vl")]
3699#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3700#[cfg_attr(test, assert_instr(vsqrtpd))]
3701pub fn _mm_maskz_sqrt_pd(k: __mmask8, a: __m128d) -> __m128d {
3702    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_pd()) }
3703}
3704
3705/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3706///
3707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_ps&expand=2557)
3708#[inline]
3709#[target_feature(enable = "avx512f")]
3710#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3711#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3712#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3713pub const fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3714    unsafe { simd_fma(a, b, c) }
3715}
3716
3717/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3718///
3719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_ps&expand=2558)
3720#[inline]
3721#[target_feature(enable = "avx512f")]
3722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3723#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3724#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3725pub const fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3726    unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), a) }
3727}
3728
3729/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3730///
3731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_ps&expand=2560)
3732#[inline]
3733#[target_feature(enable = "avx512f")]
3734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3735#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3736#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3737pub const fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3738    unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), _mm512_setzero_ps()) }
3739}
3740
3741/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3742///
3743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_ps&expand=2559)
3744#[inline]
3745#[target_feature(enable = "avx512f")]
3746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3747#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3748#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3749pub const fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3750    unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), c) }
3751}
3752
3753/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3754///
3755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_ps&expand=2554)
3756#[inline]
3757#[target_feature(enable = "avx512f,avx512vl")]
3758#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3759#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3760#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3761pub const fn _mm256_mask_fmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3762    unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), a) }
3763}
3764
3765/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3766///
3767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_ps&expand=2556)
3768#[inline]
3769#[target_feature(enable = "avx512f,avx512vl")]
3770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3771#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3772#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3773pub const fn _mm256_maskz_fmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3774    unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), _mm256_setzero_ps()) }
3775}
3776
3777/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3778///
3779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_ps&expand=2555)
3780#[inline]
3781#[target_feature(enable = "avx512f,avx512vl")]
3782#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3783#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3784#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3785pub const fn _mm256_mask3_fmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3786    unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), c) }
3787}
3788
3789/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3790///
3791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_ps&expand=2550)
3792#[inline]
3793#[target_feature(enable = "avx512f,avx512vl")]
3794#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3795#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3796#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3797pub const fn _mm_mask_fmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3798    unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), a) }
3799}
3800
3801/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3802///
3803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_ps&expand=2552)
3804#[inline]
3805#[target_feature(enable = "avx512f,avx512vl")]
3806#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3807#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3808#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3809pub const fn _mm_maskz_fmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3810    unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), _mm_setzero_ps()) }
3811}
3812
3813/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3814///
3815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_ps&expand=2551)
3816#[inline]
3817#[target_feature(enable = "avx512f,avx512vl")]
3818#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3819#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3820#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3821pub const fn _mm_mask3_fmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3822    unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), c) }
3823}
3824
3825/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3826///
3827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_pd&expand=2545)
3828#[inline]
3829#[target_feature(enable = "avx512f")]
3830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3831#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3832#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3833pub const fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3834    unsafe { simd_fma(a, b, c) }
3835}
3836
3837/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3838///
3839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_pd&expand=2546)
3840#[inline]
3841#[target_feature(enable = "avx512f")]
3842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3843#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3844#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3845pub const fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3846    unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), a) }
3847}
3848
3849/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3850///
3851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_pd&expand=2548)
3852#[inline]
3853#[target_feature(enable = "avx512f")]
3854#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3855#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3856#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3857pub const fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3858    unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), _mm512_setzero_pd()) }
3859}
3860
3861/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3862///
3863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_pd&expand=2547)
3864#[inline]
3865#[target_feature(enable = "avx512f")]
3866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3867#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3868#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3869pub const fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3870    unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), c) }
3871}
3872
3873/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3874///
3875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_pd&expand=2542)
3876#[inline]
3877#[target_feature(enable = "avx512f,avx512vl")]
3878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3879#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3880#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3881pub const fn _mm256_mask_fmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3882    unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), a) }
3883}
3884
3885/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3886///
3887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_pd&expand=2544)
3888#[inline]
3889#[target_feature(enable = "avx512f,avx512vl")]
3890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3891#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3892#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3893pub const fn _mm256_maskz_fmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3894    unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), _mm256_setzero_pd()) }
3895}
3896
3897/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3898///
3899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_pd&expand=2543)
3900#[inline]
3901#[target_feature(enable = "avx512f,avx512vl")]
3902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3903#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3904#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3905pub const fn _mm256_mask3_fmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3906    unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), c) }
3907}
3908
3909/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3910///
3911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_pd&expand=2538)
3912#[inline]
3913#[target_feature(enable = "avx512f,avx512vl")]
3914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3915#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3916#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3917pub const fn _mm_mask_fmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3918    unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), a) }
3919}
3920
3921/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3922///
3923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_pd&expand=2540)
3924#[inline]
3925#[target_feature(enable = "avx512f,avx512vl")]
3926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3927#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3928#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3929pub const fn _mm_maskz_fmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3930    unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), _mm_setzero_pd()) }
3931}
3932
3933/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3934///
3935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_pd&expand=2539)
3936#[inline]
3937#[target_feature(enable = "avx512f,avx512vl")]
3938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3939#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3940#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3941pub const fn _mm_mask3_fmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3942    unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), c) }
3943}
3944
3945/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3946///
3947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_ps&expand=2643)
3948#[inline]
3949#[target_feature(enable = "avx512f")]
3950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3951#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3952#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3953pub const fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3954    unsafe { simd_fma(a, b, simd_neg(c)) }
3955}
3956
3957/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3958///
3959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_ps&expand=2644)
3960#[inline]
3961#[target_feature(enable = "avx512f")]
3962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3963#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3964#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3965pub const fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3966    unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), a) }
3967}
3968
3969/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3970///
3971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_ps&expand=2646)
3972#[inline]
3973#[target_feature(enable = "avx512f")]
3974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3975#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3976#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3977pub const fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3978    unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), _mm512_setzero_ps()) }
3979}
3980
3981/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3982///
3983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_ps&expand=2645)
3984#[inline]
3985#[target_feature(enable = "avx512f")]
3986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3987#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3988#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3989pub const fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3990    unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), c) }
3991}
3992
3993/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3994///
3995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_ps&expand=2640)
3996#[inline]
3997#[target_feature(enable = "avx512f,avx512vl")]
3998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3999#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4000#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4001pub const fn _mm256_mask_fmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4002    unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), a) }
4003}
4004
4005/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4006///
4007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_ps&expand=2642)
4008#[inline]
4009#[target_feature(enable = "avx512f,avx512vl")]
4010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4011#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4012#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4013pub const fn _mm256_maskz_fmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4014    unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), _mm256_setzero_ps()) }
4015}
4016
4017/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4018///
4019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_ps&expand=2641)
4020#[inline]
4021#[target_feature(enable = "avx512f,avx512vl")]
4022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4023#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4024#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4025pub const fn _mm256_mask3_fmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4026    unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), c) }
4027}
4028
4029/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4030///
4031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_ps&expand=2636)
4032#[inline]
4033#[target_feature(enable = "avx512f,avx512vl")]
4034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4035#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4036#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4037pub const fn _mm_mask_fmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4038    unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), a) }
4039}
4040
4041/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4042///
4043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_ps&expand=2638)
4044#[inline]
4045#[target_feature(enable = "avx512f,avx512vl")]
4046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4047#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4048#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4049pub const fn _mm_maskz_fmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4050    unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), _mm_setzero_ps()) }
4051}
4052
4053/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4054///
4055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_ps&expand=2637)
4056#[inline]
4057#[target_feature(enable = "avx512f,avx512vl")]
4058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4059#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4060#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4061pub const fn _mm_mask3_fmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4062    unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), c) }
4063}
4064
4065/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
4066///
4067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_pd&expand=2631)
4068#[inline]
4069#[target_feature(enable = "avx512f")]
4070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4071#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4072#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4073pub const fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4074    unsafe { simd_fma(a, b, simd_neg(c)) }
4075}
4076
4077/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4078///
4079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_pd&expand=2632)
4080#[inline]
4081#[target_feature(enable = "avx512f")]
4082#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4083#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4084#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4085pub const fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4086    unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), a) }
4087}
4088
4089/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4090///
4091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_pd&expand=2634)
4092#[inline]
4093#[target_feature(enable = "avx512f")]
4094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4095#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4097pub const fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4098    unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), _mm512_setzero_pd()) }
4099}
4100
4101/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4102///
4103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_pd&expand=2633)
4104#[inline]
4105#[target_feature(enable = "avx512f")]
4106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4107#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4108#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4109pub const fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4110    unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), c) }
4111}
4112
4113/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4114///
4115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_pd&expand=2628)
4116#[inline]
4117#[target_feature(enable = "avx512f,avx512vl")]
4118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4119#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4120#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4121pub const fn _mm256_mask_fmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4122    unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), a) }
4123}
4124
4125/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4126///
4127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_pd&expand=2630)
4128#[inline]
4129#[target_feature(enable = "avx512f,avx512vl")]
4130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4131#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4132#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4133pub const fn _mm256_maskz_fmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4134    unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), _mm256_setzero_pd()) }
4135}
4136
4137/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4138///
4139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_pd&expand=2629)
4140#[inline]
4141#[target_feature(enable = "avx512f,avx512vl")]
4142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4143#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4144#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4145pub const fn _mm256_mask3_fmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4146    unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), c) }
4147}
4148
4149/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4150///
4151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_pd&expand=2624)
4152#[inline]
4153#[target_feature(enable = "avx512f,avx512vl")]
4154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4155#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4156#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4157pub const fn _mm_mask_fmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4158    unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), a) }
4159}
4160
4161/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4162///
4163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_pd&expand=2626)
4164#[inline]
4165#[target_feature(enable = "avx512f,avx512vl")]
4166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4167#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4168#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4169pub const fn _mm_maskz_fmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4170    unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), _mm_setzero_pd()) }
4171}
4172
4173/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4174///
4175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_pd&expand=2625)
4176#[inline]
4177#[target_feature(enable = "avx512f,avx512vl")]
4178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4179#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4180#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4181pub const fn _mm_mask3_fmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4182    unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), c) }
4183}
4184
4185/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
4186///
4187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_ps&expand=2611)
4188#[inline]
4189#[target_feature(enable = "avx512f")]
4190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4191#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4192#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4193pub const fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4194    unsafe {
4195        let add = simd_fma(a, b, c);
4196        let sub = simd_fma(a, b, simd_neg(c));
4197        simd_shuffle!(
4198            add,
4199            sub,
4200            [16, 1, 18, 3, 20, 5, 22, 7, 24, 9, 26, 11, 28, 13, 30, 15]
4201        )
4202    }
4203}
4204
4205/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4206///
4207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_ps&expand=2612)
4208#[inline]
4209#[target_feature(enable = "avx512f")]
4210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4211#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4212#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4213pub const fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4214    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), a) }
4215}
4216
4217/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4218///
4219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_ps&expand=2614)
4220#[inline]
4221#[target_feature(enable = "avx512f")]
4222#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4223#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4224#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4225pub const fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4226    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), _mm512_setzero_ps()) }
4227}
4228
4229/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4230///
4231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_ps&expand=2613)
4232#[inline]
4233#[target_feature(enable = "avx512f")]
4234#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4235#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4236#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4237pub const fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4238    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), c) }
4239}
4240
4241/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4242///
4243/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_ps&expand=2608)
4244#[inline]
4245#[target_feature(enable = "avx512f,avx512vl")]
4246#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4247#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4248#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4249pub const fn _mm256_mask_fmaddsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4250    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), a) }
4251}
4252
4253/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4254///
4255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_ps&expand=2610)
4256#[inline]
4257#[target_feature(enable = "avx512f,avx512vl")]
4258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4259#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4260#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4261pub const fn _mm256_maskz_fmaddsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4262    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), _mm256_setzero_ps()) }
4263}
4264
4265/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4266///
4267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_ps&expand=2609)
4268#[inline]
4269#[target_feature(enable = "avx512f,avx512vl")]
4270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4271#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4272#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4273pub const fn _mm256_mask3_fmaddsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4274    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), c) }
4275}
4276
4277/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4278///
4279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_ps&expand=2604)
4280#[inline]
4281#[target_feature(enable = "avx512f,avx512vl")]
4282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4283#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4284#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4285pub const fn _mm_mask_fmaddsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4286    unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), a) }
4287}
4288
4289/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4290///
4291/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ps&expand=2606)
4292#[inline]
4293#[target_feature(enable = "avx512f,avx512vl")]
4294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4295#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4296#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4297pub const fn _mm_maskz_fmaddsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4298    unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), _mm_setzero_ps()) }
4299}
4300
4301/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4302///
4303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_ps&expand=2605)
4304#[inline]
4305#[target_feature(enable = "avx512f,avx512vl")]
4306#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4307#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4308#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4309pub const fn _mm_mask3_fmaddsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4310    unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), c) }
4311}
4312
4313/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
4314///
4315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_pd&expand=2599)
4316#[inline]
4317#[target_feature(enable = "avx512f")]
4318#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4319#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4320#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4321pub const fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4322    unsafe {
4323        let add = simd_fma(a, b, c);
4324        let sub = simd_fma(a, b, simd_neg(c));
4325        simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
4326    }
4327}
4328
4329/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4330///
4331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_pd&expand=2600)
4332#[inline]
4333#[target_feature(enable = "avx512f")]
4334#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4335#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4336#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4337pub const fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4338    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), a) }
4339}
4340
4341/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4342///
4343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_pd&expand=2602)
4344#[inline]
4345#[target_feature(enable = "avx512f")]
4346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4347#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4348#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4349pub const fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4350    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), _mm512_setzero_pd()) }
4351}
4352
4353/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4354///
4355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_pd&expand=2613)
4356#[inline]
4357#[target_feature(enable = "avx512f")]
4358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4359#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4360#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4361pub const fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4362    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), c) }
4363}
4364
4365/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4366///
4367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_pd&expand=2596)
4368#[inline]
4369#[target_feature(enable = "avx512f,avx512vl")]
4370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4371#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4372#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4373pub const fn _mm256_mask_fmaddsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4374    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), a) }
4375}
4376
4377/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4378///
4379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_pd&expand=2598)
4380#[inline]
4381#[target_feature(enable = "avx512f,avx512vl")]
4382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4383#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4384#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4385pub const fn _mm256_maskz_fmaddsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4386    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), _mm256_setzero_pd()) }
4387}
4388
4389/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4390///
4391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_pd&expand=2597)
4392#[inline]
4393#[target_feature(enable = "avx512f,avx512vl")]
4394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4395#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4396#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4397pub const fn _mm256_mask3_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4398    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), c) }
4399}
4400
4401/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4402///
4403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_pd&expand=2592)
4404#[inline]
4405#[target_feature(enable = "avx512f,avx512vl")]
4406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4407#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4408#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4409pub const fn _mm_mask_fmaddsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4410    unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), a) }
4411}
4412
4413/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4414///
4415/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmaddsub_pd&expand=2594)
4416#[inline]
4417#[target_feature(enable = "avx512f,avx512vl")]
4418#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4419#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4420#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4421pub const fn _mm_maskz_fmaddsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4422    unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), _mm_setzero_pd()) }
4423}
4424
4425/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4426///
4427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_pd&expand=2593)
4428#[inline]
4429#[target_feature(enable = "avx512f,avx512vl")]
4430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4431#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4432#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4433pub const fn _mm_mask3_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4434    unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), c) }
4435}
4436
4437/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4438///
4439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_ps&expand=2691)
4440#[inline]
4441#[target_feature(enable = "avx512f")]
4442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4443#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4444#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4445pub const fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4446    unsafe {
4447        let add = simd_fma(a, b, c);
4448        let sub = simd_fma(a, b, simd_neg(c));
4449        simd_shuffle!(
4450            add,
4451            sub,
4452            [0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31]
4453        )
4454    }
4455}
4456
4457/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4458///
4459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_ps&expand=2692)
4460#[inline]
4461#[target_feature(enable = "avx512f")]
4462#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4463#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4464#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4465pub const fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4466    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), a) }
4467}
4468
4469/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4470///
4471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_ps&expand=2694)
4472#[inline]
4473#[target_feature(enable = "avx512f")]
4474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4475#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4476#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4477pub const fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4478    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), _mm512_setzero_ps()) }
4479}
4480
4481/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4482///
4483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_ps&expand=2693)
4484#[inline]
4485#[target_feature(enable = "avx512f")]
4486#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4487#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4488#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4489pub const fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4490    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), c) }
4491}
4492
4493/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4494///
4495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_ps&expand=2688)
4496#[inline]
4497#[target_feature(enable = "avx512f,avx512vl")]
4498#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4499#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4500#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4501pub const fn _mm256_mask_fmsubadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4502    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), a) }
4503}
4504
4505/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4506///
4507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_ps&expand=2690)
4508#[inline]
4509#[target_feature(enable = "avx512f,avx512vl")]
4510#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4511#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4512#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4513pub const fn _mm256_maskz_fmsubadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4514    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), _mm256_setzero_ps()) }
4515}
4516
4517/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4518///
4519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_ps&expand=2689)
4520#[inline]
4521#[target_feature(enable = "avx512f,avx512vl")]
4522#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4523#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4524#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4525pub const fn _mm256_mask3_fmsubadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4526    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), c) }
4527}
4528
4529/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4530///
4531/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_ps&expand=2684)
4532#[inline]
4533#[target_feature(enable = "avx512f,avx512vl")]
4534#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4535#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4536#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4537pub const fn _mm_mask_fmsubadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4538    unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), a) }
4539}
4540
4541/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4542///
4543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_ps&expand=2686)
4544#[inline]
4545#[target_feature(enable = "avx512f,avx512vl")]
4546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4547#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4548#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4549pub const fn _mm_maskz_fmsubadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4550    unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), _mm_setzero_ps()) }
4551}
4552
4553/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4554///
4555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_ps&expand=2685)
4556#[inline]
4557#[target_feature(enable = "avx512f,avx512vl")]
4558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4559#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4560#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4561pub const fn _mm_mask3_fmsubadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4562    unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), c) }
4563}
4564
4565/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4566///
4567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_pd&expand=2679)
4568#[inline]
4569#[target_feature(enable = "avx512f")]
4570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4571#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4572#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4573pub const fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4574    unsafe {
4575        let add = simd_fma(a, b, c);
4576        let sub = simd_fma(a, b, simd_neg(c));
4577        simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15])
4578    }
4579}
4580
4581/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4582///
4583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_pd&expand=2680)
4584#[inline]
4585#[target_feature(enable = "avx512f")]
4586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4587#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4588#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4589pub const fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4590    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), a) }
4591}
4592
4593/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4594///
4595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_pd&expand=2682)
4596#[inline]
4597#[target_feature(enable = "avx512f")]
4598#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4599#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4600#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4601pub const fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4602    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), _mm512_setzero_pd()) }
4603}
4604
4605/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4606///
4607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_pd&expand=2681)
4608#[inline]
4609#[target_feature(enable = "avx512f")]
4610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4611#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4612#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4613pub const fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4614    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), c) }
4615}
4616
4617/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4618///
4619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_pd&expand=2676)
4620#[inline]
4621#[target_feature(enable = "avx512f,avx512vl")]
4622#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4623#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4624#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4625pub const fn _mm256_mask_fmsubadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4626    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), a) }
4627}
4628
4629/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4630///
4631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_pd&expand=2678)
4632#[inline]
4633#[target_feature(enable = "avx512f,avx512vl")]
4634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4635#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4636#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4637pub const fn _mm256_maskz_fmsubadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4638    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), _mm256_setzero_pd()) }
4639}
4640
4641/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4642///
4643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_pd&expand=2677)
4644#[inline]
4645#[target_feature(enable = "avx512f,avx512vl")]
4646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4647#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4648#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4649pub const fn _mm256_mask3_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4650    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), c) }
4651}
4652
4653/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4654///
4655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_pd&expand=2672)
4656#[inline]
4657#[target_feature(enable = "avx512f,avx512vl")]
4658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4659#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4660#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4661pub const fn _mm_mask_fmsubadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4662    unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), a) }
4663}
4664
4665/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4666///
4667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_pd&expand=2674)
4668#[inline]
4669#[target_feature(enable = "avx512f,avx512vl")]
4670#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4671#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4672#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4673pub const fn _mm_maskz_fmsubadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4674    unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), _mm_setzero_pd()) }
4675}
4676
4677/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4678///
4679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_pd&expand=2673)
4680#[inline]
4681#[target_feature(enable = "avx512f,avx512vl")]
4682#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4683#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4684#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4685pub const fn _mm_mask3_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4686    unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), c) }
4687}
4688
4689/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4690///
4691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_ps&expand=2723)
4692#[inline]
4693#[target_feature(enable = "avx512f")]
4694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4695#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4696#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4697pub const fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4698    unsafe { simd_fma(simd_neg(a), b, c) }
4699}
4700
4701/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4702///
4703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_ps&expand=2724)
4704#[inline]
4705#[target_feature(enable = "avx512f")]
4706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4707#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4708#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4709pub const fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4710    unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), a) }
4711}
4712
4713/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4714///
4715/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_ps&expand=2726)
4716#[inline]
4717#[target_feature(enable = "avx512f")]
4718#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4719#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4720#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4721pub const fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4722    unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), _mm512_setzero_ps()) }
4723}
4724
4725/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4726///
4727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_ps&expand=2725)
4728#[inline]
4729#[target_feature(enable = "avx512f")]
4730#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4731#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4732#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4733pub const fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4734    unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), c) }
4735}
4736
4737/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4738///
4739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_ps&expand=2720)
4740#[inline]
4741#[target_feature(enable = "avx512f,avx512vl")]
4742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4743#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4744#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4745pub const fn _mm256_mask_fnmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4746    unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), a) }
4747}
4748
4749/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4750///
4751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_ps&expand=2722)
4752#[inline]
4753#[target_feature(enable = "avx512f,avx512vl")]
4754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4755#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4756#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4757pub const fn _mm256_maskz_fnmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4758    unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), _mm256_setzero_ps()) }
4759}
4760
4761/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4762///
4763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_ps&expand=2721)
4764#[inline]
4765#[target_feature(enable = "avx512f,avx512vl")]
4766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4767#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4768#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4769pub const fn _mm256_mask3_fnmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4770    unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), c) }
4771}
4772
4773/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4774///
4775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_ps&expand=2716)
4776#[inline]
4777#[target_feature(enable = "avx512f,avx512vl")]
4778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4779#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4780#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4781pub const fn _mm_mask_fnmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4782    unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), a) }
4783}
4784
4785/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4786///
4787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_ps&expand=2718)
4788#[inline]
4789#[target_feature(enable = "avx512f,avx512vl")]
4790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4791#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4792#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4793pub const fn _mm_maskz_fnmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4794    unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), _mm_setzero_ps()) }
4795}
4796
4797/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4798///
4799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_ps&expand=2717)
4800#[inline]
4801#[target_feature(enable = "avx512f,avx512vl")]
4802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4803#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4804#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4805pub const fn _mm_mask3_fnmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4806    unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), c) }
4807}
4808
4809/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4810///
4811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_pd&expand=2711)
4812#[inline]
4813#[target_feature(enable = "avx512f")]
4814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4815#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4816#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4817pub const fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4818    unsafe { simd_fma(simd_neg(a), b, c) }
4819}
4820
4821/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4822///
4823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_pd&expand=2712)
4824#[inline]
4825#[target_feature(enable = "avx512f")]
4826#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4827#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4828#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4829pub const fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4830    unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), a) }
4831}
4832
4833/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4834///
4835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_pd&expand=2714)
4836#[inline]
4837#[target_feature(enable = "avx512f")]
4838#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4839#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4840#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4841pub const fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4842    unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), _mm512_setzero_pd()) }
4843}
4844
4845/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4846///
4847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_pd&expand=2713)
4848#[inline]
4849#[target_feature(enable = "avx512f")]
4850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4851#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4852#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4853pub const fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4854    unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), c) }
4855}
4856
4857/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4858///
4859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_pd&expand=2708)
4860#[inline]
4861#[target_feature(enable = "avx512f,avx512vl")]
4862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4863#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4864#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4865pub const fn _mm256_mask_fnmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4866    unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), a) }
4867}
4868
4869/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4870///
4871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_pd&expand=2710)
4872#[inline]
4873#[target_feature(enable = "avx512f,avx512vl")]
4874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4875#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4876#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4877pub const fn _mm256_maskz_fnmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4878    unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), _mm256_setzero_pd()) }
4879}
4880
4881/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4882///
4883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_pd&expand=2709)
4884#[inline]
4885#[target_feature(enable = "avx512f,avx512vl")]
4886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4887#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4888#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4889pub const fn _mm256_mask3_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4890    unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), c) }
4891}
4892
4893/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4894///
4895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_pd&expand=2704)
4896#[inline]
4897#[target_feature(enable = "avx512f,avx512vl")]
4898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4899#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4900#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4901pub const fn _mm_mask_fnmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4902    unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), a) }
4903}
4904
4905/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4906///
4907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_pd&expand=2706)
4908#[inline]
4909#[target_feature(enable = "avx512f,avx512vl")]
4910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4911#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4912#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4913pub const fn _mm_maskz_fnmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4914    unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), _mm_setzero_pd()) }
4915}
4916
4917/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4918///
4919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_pd&expand=2705)
4920#[inline]
4921#[target_feature(enable = "avx512f,avx512vl")]
4922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4923#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4924#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4925pub const fn _mm_mask3_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4926    unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), c) }
4927}
4928
4929/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4930///
4931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_ps&expand=2771)
4932#[inline]
4933#[target_feature(enable = "avx512f")]
4934#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4935#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4936#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4937pub const fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4938    unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
4939}
4940
4941/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4942///
4943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_ps&expand=2772)
4944#[inline]
4945#[target_feature(enable = "avx512f")]
4946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4947#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4948#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4949pub const fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4950    unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), a) }
4951}
4952
4953/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4954///
4955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_ps&expand=2774)
4956#[inline]
4957#[target_feature(enable = "avx512f")]
4958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4959#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4960#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4961pub const fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4962    unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), _mm512_setzero_ps()) }
4963}
4964
4965/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4966///
4967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_ps&expand=2773)
4968#[inline]
4969#[target_feature(enable = "avx512f")]
4970#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4971#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4972#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4973pub const fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4974    unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), c) }
4975}
4976
4977/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4978///
4979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_ps&expand=2768)
4980#[inline]
4981#[target_feature(enable = "avx512f,avx512vl")]
4982#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4983#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4984#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4985pub const fn _mm256_mask_fnmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4986    unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), a) }
4987}
4988
4989/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4990///
4991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_ps&expand=2770)
4992#[inline]
4993#[target_feature(enable = "avx512f,avx512vl")]
4994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4995#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4996#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4997pub const fn _mm256_maskz_fnmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4998    unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), _mm256_setzero_ps()) }
4999}
5000
5001/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
5002///
5003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_ps&expand=2769)
5004#[inline]
5005#[target_feature(enable = "avx512f,avx512vl")]
5006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5007#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
5008#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5009pub const fn _mm256_mask3_fnmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
5010    unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), c) }
5011}
5012
5013/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
5014///
5015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_ps&expand=2764)
5016#[inline]
5017#[target_feature(enable = "avx512f,avx512vl")]
5018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5019#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
5020#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5021pub const fn _mm_mask_fnmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
5022    unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), a) }
5023}
5024
5025/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5026///
5027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_ps&expand=2766)
5028#[inline]
5029#[target_feature(enable = "avx512f,avx512vl")]
5030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5031#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
5032#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5033pub const fn _mm_maskz_fnmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
5034    unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), _mm_setzero_ps()) }
5035}
5036
5037/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
5038///
5039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_ps&expand=2765)
5040#[inline]
5041#[target_feature(enable = "avx512f,avx512vl")]
5042#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5043#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
5044#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5045pub const fn _mm_mask3_fnmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
5046    unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), c) }
5047}
5048
5049/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
5050///
5051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_pd&expand=2759)
5052#[inline]
5053#[target_feature(enable = "avx512f")]
5054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5055#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5056#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5057pub const fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
5058    unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
5059}
5060
5061/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
5062///
5063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_pd&expand=2760)
5064#[inline]
5065#[target_feature(enable = "avx512f")]
5066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5067#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5068#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5069pub const fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
5070    unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), a) }
5071}
5072
5073/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5074///
5075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_pd&expand=2762)
5076#[inline]
5077#[target_feature(enable = "avx512f")]
5078#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5079#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5080#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5081pub const fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
5082    unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), _mm512_setzero_pd()) }
5083}
5084
5085/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
5086///
5087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_pd&expand=2761)
5088#[inline]
5089#[target_feature(enable = "avx512f")]
5090#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5091#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5092#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5093pub const fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
5094    unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), c) }
5095}
5096
5097/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
5098///
5099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_pd&expand=2756)
5100#[inline]
5101#[target_feature(enable = "avx512f,avx512vl")]
5102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5103#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5104#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5105pub const fn _mm256_mask_fnmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
5106    unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), a) }
5107}
5108
5109/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5110///
5111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_pd&expand=2758)
5112#[inline]
5113#[target_feature(enable = "avx512f,avx512vl")]
5114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5115#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5116#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5117pub const fn _mm256_maskz_fnmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
5118    unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), _mm256_setzero_pd()) }
5119}
5120
5121/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
5122///
5123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_pd&expand=2757)
5124#[inline]
5125#[target_feature(enable = "avx512f,avx512vl")]
5126#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5127#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5128#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5129pub const fn _mm256_mask3_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
5130    unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), c) }
5131}
5132
5133/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
5134///
5135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_pd&expand=2752)
5136#[inline]
5137#[target_feature(enable = "avx512f,avx512vl")]
5138#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5139#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5140#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5141pub const fn _mm_mask_fnmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
5142    unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), a) }
5143}
5144
5145/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5146///
5147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_pd&expand=2754)
5148#[inline]
5149#[target_feature(enable = "avx512f,avx512vl")]
5150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5151#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5152#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5153pub const fn _mm_maskz_fnmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
5154    unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), _mm_setzero_pd()) }
5155}
5156
5157/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
5158///
5159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_pd&expand=2753)
5160#[inline]
5161#[target_feature(enable = "avx512f,avx512vl")]
5162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5163#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5164#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5165pub const fn _mm_mask3_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
5166    unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), c) }
5167}
5168
5169/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5170///
5171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_ps&expand=4502)
5172#[inline]
5173#[target_feature(enable = "avx512f")]
5174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5175#[cfg_attr(test, assert_instr(vrcp14ps))]
5176pub fn _mm512_rcp14_ps(a: __m512) -> __m512 {
5177    unsafe { transmute(vrcp14ps(a.as_f32x16(), f32x16::ZERO, 0b11111111_11111111)) }
5178}
5179
5180/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5181///
5182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_ps&expand=4500)
5183#[inline]
5184#[target_feature(enable = "avx512f")]
5185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5186#[cfg_attr(test, assert_instr(vrcp14ps))]
5187pub fn _mm512_mask_rcp14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5188    unsafe { transmute(vrcp14ps(a.as_f32x16(), src.as_f32x16(), k)) }
5189}
5190
5191/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5192///
5193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_ps&expand=4501)
5194#[inline]
5195#[target_feature(enable = "avx512f")]
5196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5197#[cfg_attr(test, assert_instr(vrcp14ps))]
5198pub fn _mm512_maskz_rcp14_ps(k: __mmask16, a: __m512) -> __m512 {
5199    unsafe { transmute(vrcp14ps(a.as_f32x16(), f32x16::ZERO, k)) }
5200}
5201
5202/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5203///
5204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_ps&expand=4499)
5205#[inline]
5206#[target_feature(enable = "avx512f,avx512vl")]
5207#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5208#[cfg_attr(test, assert_instr(vrcp14ps))]
5209pub fn _mm256_rcp14_ps(a: __m256) -> __m256 {
5210    unsafe { transmute(vrcp14ps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) }
5211}
5212
5213/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5214///
5215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_ps&expand=4497)
5216#[inline]
5217#[target_feature(enable = "avx512f,avx512vl")]
5218#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5219#[cfg_attr(test, assert_instr(vrcp14ps))]
5220pub fn _mm256_mask_rcp14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5221    unsafe { transmute(vrcp14ps256(a.as_f32x8(), src.as_f32x8(), k)) }
5222}
5223
5224/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5225///
5226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_ps&expand=4498)
5227#[inline]
5228#[target_feature(enable = "avx512f,avx512vl")]
5229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5230#[cfg_attr(test, assert_instr(vrcp14ps))]
5231pub fn _mm256_maskz_rcp14_ps(k: __mmask8, a: __m256) -> __m256 {
5232    unsafe { transmute(vrcp14ps256(a.as_f32x8(), f32x8::ZERO, k)) }
5233}
5234
5235/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5236///
5237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_ps&expand=4496)
5238#[inline]
5239#[target_feature(enable = "avx512f,avx512vl")]
5240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5241#[cfg_attr(test, assert_instr(vrcp14ps))]
5242pub fn _mm_rcp14_ps(a: __m128) -> __m128 {
5243    unsafe { transmute(vrcp14ps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) }
5244}
5245
5246/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5247///
5248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_ps&expand=4494)
5249#[inline]
5250#[target_feature(enable = "avx512f,avx512vl")]
5251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5252#[cfg_attr(test, assert_instr(vrcp14ps))]
5253pub fn _mm_mask_rcp14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5254    unsafe { transmute(vrcp14ps128(a.as_f32x4(), src.as_f32x4(), k)) }
5255}
5256
5257/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5258///
5259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_ps&expand=4495)
5260#[inline]
5261#[target_feature(enable = "avx512f,avx512vl")]
5262#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5263#[cfg_attr(test, assert_instr(vrcp14ps))]
5264pub fn _mm_maskz_rcp14_ps(k: __mmask8, a: __m128) -> __m128 {
5265    unsafe { transmute(vrcp14ps128(a.as_f32x4(), f32x4::ZERO, k)) }
5266}
5267
5268/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5269///
5270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_pd&expand=4493)
5271#[inline]
5272#[target_feature(enable = "avx512f")]
5273#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5274#[cfg_attr(test, assert_instr(vrcp14pd))]
5275pub fn _mm512_rcp14_pd(a: __m512d) -> __m512d {
5276    unsafe { transmute(vrcp14pd(a.as_f64x8(), f64x8::ZERO, 0b11111111)) }
5277}
5278
5279/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5280///
5281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_pd&expand=4491)
5282#[inline]
5283#[target_feature(enable = "avx512f")]
5284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5285#[cfg_attr(test, assert_instr(vrcp14pd))]
5286pub fn _mm512_mask_rcp14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5287    unsafe { transmute(vrcp14pd(a.as_f64x8(), src.as_f64x8(), k)) }
5288}
5289
5290/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5291///
5292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_pd&expand=4492)
5293#[inline]
5294#[target_feature(enable = "avx512f")]
5295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5296#[cfg_attr(test, assert_instr(vrcp14pd))]
5297pub fn _mm512_maskz_rcp14_pd(k: __mmask8, a: __m512d) -> __m512d {
5298    unsafe { transmute(vrcp14pd(a.as_f64x8(), f64x8::ZERO, k)) }
5299}
5300
5301/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5302///
5303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_pd&expand=4490)
5304#[inline]
5305#[target_feature(enable = "avx512f,avx512vl")]
5306#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5307#[cfg_attr(test, assert_instr(vrcp14pd))]
5308pub fn _mm256_rcp14_pd(a: __m256d) -> __m256d {
5309    unsafe { transmute(vrcp14pd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) }
5310}
5311
5312/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5313///
5314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_pd&expand=4488)
5315#[inline]
5316#[target_feature(enable = "avx512f,avx512vl")]
5317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5318#[cfg_attr(test, assert_instr(vrcp14pd))]
5319pub fn _mm256_mask_rcp14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5320    unsafe { transmute(vrcp14pd256(a.as_f64x4(), src.as_f64x4(), k)) }
5321}
5322
5323/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5324///
5325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_pd&expand=4489)
5326#[inline]
5327#[target_feature(enable = "avx512f,avx512vl")]
5328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5329#[cfg_attr(test, assert_instr(vrcp14pd))]
5330pub fn _mm256_maskz_rcp14_pd(k: __mmask8, a: __m256d) -> __m256d {
5331    unsafe { transmute(vrcp14pd256(a.as_f64x4(), f64x4::ZERO, k)) }
5332}
5333
5334/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5335///
5336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_pd&expand=4487)
5337#[inline]
5338#[target_feature(enable = "avx512f,avx512vl")]
5339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5340#[cfg_attr(test, assert_instr(vrcp14pd))]
5341pub fn _mm_rcp14_pd(a: __m128d) -> __m128d {
5342    unsafe { transmute(vrcp14pd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) }
5343}
5344
5345/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5346///
5347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_pd&expand=4485)
5348#[inline]
5349#[target_feature(enable = "avx512f,avx512vl")]
5350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5351#[cfg_attr(test, assert_instr(vrcp14pd))]
5352pub fn _mm_mask_rcp14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5353    unsafe { transmute(vrcp14pd128(a.as_f64x2(), src.as_f64x2(), k)) }
5354}
5355
5356/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5357///
5358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_pd&expand=4486)
5359#[inline]
5360#[target_feature(enable = "avx512f,avx512vl")]
5361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5362#[cfg_attr(test, assert_instr(vrcp14pd))]
5363pub fn _mm_maskz_rcp14_pd(k: __mmask8, a: __m128d) -> __m128d {
5364    unsafe { transmute(vrcp14pd128(a.as_f64x2(), f64x2::ZERO, k)) }
5365}
5366
5367/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5368///
5369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_ps&expand=4819)
5370#[inline]
5371#[target_feature(enable = "avx512f")]
5372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5373#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5374pub fn _mm512_rsqrt14_ps(a: __m512) -> __m512 {
5375    unsafe { transmute(vrsqrt14ps(a.as_f32x16(), f32x16::ZERO, 0b11111111_11111111)) }
5376}
5377
5378/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5379///
5380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_ps&expand=4817)
5381#[inline]
5382#[target_feature(enable = "avx512f")]
5383#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5384#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5385pub fn _mm512_mask_rsqrt14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5386    unsafe { transmute(vrsqrt14ps(a.as_f32x16(), src.as_f32x16(), k)) }
5387}
5388
5389/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5390///
5391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_ps&expand=4818)
5392#[inline]
5393#[target_feature(enable = "avx512f")]
5394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5395#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5396pub fn _mm512_maskz_rsqrt14_ps(k: __mmask16, a: __m512) -> __m512 {
5397    unsafe { transmute(vrsqrt14ps(a.as_f32x16(), f32x16::ZERO, k)) }
5398}
5399
5400/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5401///
5402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_ps)
5403#[inline]
5404#[target_feature(enable = "avx512f,avx512vl")]
5405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5406#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5407pub fn _mm256_rsqrt14_ps(a: __m256) -> __m256 {
5408    unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) }
5409}
5410
5411/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5412///
5413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_ps&expand=4815)
5414#[inline]
5415#[target_feature(enable = "avx512f,avx512vl")]
5416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5417#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5418pub fn _mm256_mask_rsqrt14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5419    unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), src.as_f32x8(), k)) }
5420}
5421
5422/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5423///
5424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_ps&expand=4816)
5425#[inline]
5426#[target_feature(enable = "avx512f,avx512vl")]
5427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5428#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5429pub fn _mm256_maskz_rsqrt14_ps(k: __mmask8, a: __m256) -> __m256 {
5430    unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), f32x8::ZERO, k)) }
5431}
5432
5433/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5434///
5435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_ps)
5436#[inline]
5437#[target_feature(enable = "avx512f,avx512vl")]
5438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5439#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5440pub fn _mm_rsqrt14_ps(a: __m128) -> __m128 {
5441    unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) }
5442}
5443
5444/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5445///
5446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_ps&expand=4813)
5447#[inline]
5448#[target_feature(enable = "avx512f,avx512vl")]
5449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5450#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5451pub fn _mm_mask_rsqrt14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5452    unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), src.as_f32x4(), k)) }
5453}
5454
5455/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5456///
5457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_ps&expand=4814)
5458#[inline]
5459#[target_feature(enable = "avx512f,avx512vl")]
5460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5461#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5462pub fn _mm_maskz_rsqrt14_ps(k: __mmask8, a: __m128) -> __m128 {
5463    unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), f32x4::ZERO, k)) }
5464}
5465
5466/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5467///
5468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_pd&expand=4812)
5469#[inline]
5470#[target_feature(enable = "avx512f")]
5471#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5472#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5473pub fn _mm512_rsqrt14_pd(a: __m512d) -> __m512d {
5474    unsafe { transmute(vrsqrt14pd(a.as_f64x8(), f64x8::ZERO, 0b11111111)) }
5475}
5476
5477/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5478///
5479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_pd&expand=4810)
5480#[inline]
5481#[target_feature(enable = "avx512f")]
5482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5483#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5484pub fn _mm512_mask_rsqrt14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5485    unsafe { transmute(vrsqrt14pd(a.as_f64x8(), src.as_f64x8(), k)) }
5486}
5487
5488/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5489///
5490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_pd&expand=4811)
5491#[inline]
5492#[target_feature(enable = "avx512f")]
5493#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5494#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5495pub fn _mm512_maskz_rsqrt14_pd(k: __mmask8, a: __m512d) -> __m512d {
5496    unsafe { transmute(vrsqrt14pd(a.as_f64x8(), f64x8::ZERO, k)) }
5497}
5498
5499/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5500///
5501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_pd)
5502#[inline]
5503#[target_feature(enable = "avx512f,avx512vl")]
5504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5505#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5506pub fn _mm256_rsqrt14_pd(a: __m256d) -> __m256d {
5507    unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) }
5508}
5509
5510/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5511///
5512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_pd&expand=4808)
5513#[inline]
5514#[target_feature(enable = "avx512f,avx512vl")]
5515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5516#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5517pub fn _mm256_mask_rsqrt14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5518    unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), src.as_f64x4(), k)) }
5519}
5520
5521/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5522///
5523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_pd&expand=4809)
5524#[inline]
5525#[target_feature(enable = "avx512f,avx512vl")]
5526#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5527#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5528pub fn _mm256_maskz_rsqrt14_pd(k: __mmask8, a: __m256d) -> __m256d {
5529    unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), f64x4::ZERO, k)) }
5530}
5531
5532/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5533///
5534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_pd)
5535#[inline]
5536#[target_feature(enable = "avx512f,avx512vl")]
5537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5538#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5539pub fn _mm_rsqrt14_pd(a: __m128d) -> __m128d {
5540    unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) }
5541}
5542
5543/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5544///
5545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_pd&expand=4806)
5546#[inline]
5547#[target_feature(enable = "avx512f,avx512vl")]
5548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5549#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5550pub fn _mm_mask_rsqrt14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5551    unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), src.as_f64x2(), k)) }
5552}
5553
5554/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5555///
5556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_pd&expand=4807)
5557#[inline]
5558#[target_feature(enable = "avx512f,avx512vl")]
5559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5560#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5561pub fn _mm_maskz_rsqrt14_pd(k: __mmask8, a: __m128d) -> __m128d {
5562    unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), f64x2::ZERO, k)) }
5563}
5564
5565/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5566///
5567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_ps&expand=2844)
5568#[inline]
5569#[target_feature(enable = "avx512f")]
5570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5571#[cfg_attr(test, assert_instr(vgetexpps))]
5572pub fn _mm512_getexp_ps(a: __m512) -> __m512 {
5573    unsafe {
5574        transmute(vgetexpps(
5575            a.as_f32x16(),
5576            f32x16::ZERO,
5577            0b11111111_11111111,
5578            _MM_FROUND_CUR_DIRECTION,
5579        ))
5580    }
5581}
5582
5583/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5584///
5585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_ps&expand=2845)
5586#[inline]
5587#[target_feature(enable = "avx512f")]
5588#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5589#[cfg_attr(test, assert_instr(vgetexpps))]
5590pub fn _mm512_mask_getexp_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5591    unsafe {
5592        transmute(vgetexpps(
5593            a.as_f32x16(),
5594            src.as_f32x16(),
5595            k,
5596            _MM_FROUND_CUR_DIRECTION,
5597        ))
5598    }
5599}
5600
5601/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5602///
5603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_ps&expand=2846)
5604#[inline]
5605#[target_feature(enable = "avx512f")]
5606#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5607#[cfg_attr(test, assert_instr(vgetexpps))]
5608pub fn _mm512_maskz_getexp_ps(k: __mmask16, a: __m512) -> __m512 {
5609    unsafe {
5610        transmute(vgetexpps(
5611            a.as_f32x16(),
5612            f32x16::ZERO,
5613            k,
5614            _MM_FROUND_CUR_DIRECTION,
5615        ))
5616    }
5617}
5618
5619/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5620///
5621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_ps&expand=2841)
5622#[inline]
5623#[target_feature(enable = "avx512f,avx512vl")]
5624#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5625#[cfg_attr(test, assert_instr(vgetexpps))]
5626pub fn _mm256_getexp_ps(a: __m256) -> __m256 {
5627    unsafe { transmute(vgetexpps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) }
5628}
5629
5630/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5631///
5632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_ps&expand=2842)
5633#[inline]
5634#[target_feature(enable = "avx512f,avx512vl")]
5635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5636#[cfg_attr(test, assert_instr(vgetexpps))]
5637pub fn _mm256_mask_getexp_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5638    unsafe { transmute(vgetexpps256(a.as_f32x8(), src.as_f32x8(), k)) }
5639}
5640
5641/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5642///
5643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_ps&expand=2843)
5644#[inline]
5645#[target_feature(enable = "avx512f,avx512vl")]
5646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5647#[cfg_attr(test, assert_instr(vgetexpps))]
5648pub fn _mm256_maskz_getexp_ps(k: __mmask8, a: __m256) -> __m256 {
5649    unsafe { transmute(vgetexpps256(a.as_f32x8(), f32x8::ZERO, k)) }
5650}
5651
5652/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5653///
5654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_ps&expand=2838)
5655#[inline]
5656#[target_feature(enable = "avx512f,avx512vl")]
5657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5658#[cfg_attr(test, assert_instr(vgetexpps))]
5659pub fn _mm_getexp_ps(a: __m128) -> __m128 {
5660    unsafe { transmute(vgetexpps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) }
5661}
5662
5663/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5664///
5665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_ps&expand=2839)
5666#[inline]
5667#[target_feature(enable = "avx512f,avx512vl")]
5668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5669#[cfg_attr(test, assert_instr(vgetexpps))]
5670pub fn _mm_mask_getexp_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5671    unsafe { transmute(vgetexpps128(a.as_f32x4(), src.as_f32x4(), k)) }
5672}
5673
5674/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5675///
5676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_ps&expand=2840)
5677#[inline]
5678#[target_feature(enable = "avx512f,avx512vl")]
5679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5680#[cfg_attr(test, assert_instr(vgetexpps))]
5681pub fn _mm_maskz_getexp_ps(k: __mmask8, a: __m128) -> __m128 {
5682    unsafe { transmute(vgetexpps128(a.as_f32x4(), f32x4::ZERO, k)) }
5683}
5684
5685/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5686///
5687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_pd&expand=2835)
5688#[inline]
5689#[target_feature(enable = "avx512f")]
5690#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5691#[cfg_attr(test, assert_instr(vgetexppd))]
5692pub fn _mm512_getexp_pd(a: __m512d) -> __m512d {
5693    unsafe {
5694        transmute(vgetexppd(
5695            a.as_f64x8(),
5696            f64x8::ZERO,
5697            0b11111111,
5698            _MM_FROUND_CUR_DIRECTION,
5699        ))
5700    }
5701}
5702
5703/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5704///
5705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_pd&expand=2836)
5706#[inline]
5707#[target_feature(enable = "avx512f")]
5708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5709#[cfg_attr(test, assert_instr(vgetexppd))]
5710pub fn _mm512_mask_getexp_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5711    unsafe {
5712        transmute(vgetexppd(
5713            a.as_f64x8(),
5714            src.as_f64x8(),
5715            k,
5716            _MM_FROUND_CUR_DIRECTION,
5717        ))
5718    }
5719}
5720
5721/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5722///
5723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_pd&expand=2837)
5724#[inline]
5725#[target_feature(enable = "avx512f")]
5726#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5727#[cfg_attr(test, assert_instr(vgetexppd))]
5728pub fn _mm512_maskz_getexp_pd(k: __mmask8, a: __m512d) -> __m512d {
5729    unsafe {
5730        transmute(vgetexppd(
5731            a.as_f64x8(),
5732            f64x8::ZERO,
5733            k,
5734            _MM_FROUND_CUR_DIRECTION,
5735        ))
5736    }
5737}
5738
5739/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5740///
5741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_pd&expand=2832)
5742#[inline]
5743#[target_feature(enable = "avx512f,avx512vl")]
5744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5745#[cfg_attr(test, assert_instr(vgetexppd))]
5746pub fn _mm256_getexp_pd(a: __m256d) -> __m256d {
5747    unsafe { transmute(vgetexppd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) }
5748}
5749
5750/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5751///
5752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_pd&expand=2833)
5753#[inline]
5754#[target_feature(enable = "avx512f,avx512vl")]
5755#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5756#[cfg_attr(test, assert_instr(vgetexppd))]
5757pub fn _mm256_mask_getexp_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5758    unsafe { transmute(vgetexppd256(a.as_f64x4(), src.as_f64x4(), k)) }
5759}
5760
5761/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5762///
5763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_pd&expand=2834)
5764#[inline]
5765#[target_feature(enable = "avx512f,avx512vl")]
5766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5767#[cfg_attr(test, assert_instr(vgetexppd))]
5768pub fn _mm256_maskz_getexp_pd(k: __mmask8, a: __m256d) -> __m256d {
5769    unsafe { transmute(vgetexppd256(a.as_f64x4(), f64x4::ZERO, k)) }
5770}
5771
5772/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5773///
5774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_pd&expand=2829)
5775#[inline]
5776#[target_feature(enable = "avx512f,avx512vl")]
5777#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5778#[cfg_attr(test, assert_instr(vgetexppd))]
5779pub fn _mm_getexp_pd(a: __m128d) -> __m128d {
5780    unsafe { transmute(vgetexppd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) }
5781}
5782
5783/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5784///
5785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_pd&expand=2830)
5786#[inline]
5787#[target_feature(enable = "avx512f,avx512vl")]
5788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5789#[cfg_attr(test, assert_instr(vgetexppd))]
5790pub fn _mm_mask_getexp_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5791    unsafe { transmute(vgetexppd128(a.as_f64x2(), src.as_f64x2(), k)) }
5792}
5793
5794/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5795///
5796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_pd&expand=2831)
5797#[inline]
5798#[target_feature(enable = "avx512f,avx512vl")]
5799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5800#[cfg_attr(test, assert_instr(vgetexppd))]
5801pub fn _mm_maskz_getexp_pd(k: __mmask8, a: __m128d) -> __m128d {
5802    unsafe { transmute(vgetexppd128(a.as_f64x2(), f64x2::ZERO, k)) }
5803}
5804
5805/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5806/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5807/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5808/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5809/// * [`_MM_FROUND_TO_POS_INF`] : round up
5810/// * [`_MM_FROUND_TO_ZERO`] : truncate
5811/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5812///
5813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_ps&expand=4784)
5814#[inline]
5815#[target_feature(enable = "avx512f")]
5816#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5817#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5818#[rustc_legacy_const_generics(1)]
5819pub fn _mm512_roundscale_ps<const IMM8: i32>(a: __m512) -> __m512 {
5820    unsafe {
5821        static_assert_uimm_bits!(IMM8, 8);
5822        let a = a.as_f32x16();
5823        let r = vrndscaleps(
5824            a,
5825            IMM8,
5826            f32x16::ZERO,
5827            0b11111111_11111111,
5828            _MM_FROUND_CUR_DIRECTION,
5829        );
5830        transmute(r)
5831    }
5832}
5833
5834/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5835/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5836/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5837/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5838/// * [`_MM_FROUND_TO_POS_INF`] : round up
5839/// * [`_MM_FROUND_TO_ZERO`] : truncate
5840/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5841///
5842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_ps&expand=4782)
5843#[inline]
5844#[target_feature(enable = "avx512f")]
5845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5846#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5847#[rustc_legacy_const_generics(3)]
5848pub fn _mm512_mask_roundscale_ps<const IMM8: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5849    unsafe {
5850        static_assert_uimm_bits!(IMM8, 8);
5851        let a = a.as_f32x16();
5852        let src = src.as_f32x16();
5853        let r = vrndscaleps(a, IMM8, src, k, _MM_FROUND_CUR_DIRECTION);
5854        transmute(r)
5855    }
5856}
5857
5858/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5859/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5860/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5861/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5862/// * [`_MM_FROUND_TO_POS_INF`] : round up
5863/// * [`_MM_FROUND_TO_ZERO`] : truncate
5864/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5865///
5866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_ps&expand=4783)
5867#[inline]
5868#[target_feature(enable = "avx512f")]
5869#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5870#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5871#[rustc_legacy_const_generics(2)]
5872pub fn _mm512_maskz_roundscale_ps<const IMM8: i32>(k: __mmask16, a: __m512) -> __m512 {
5873    unsafe {
5874        static_assert_uimm_bits!(IMM8, 8);
5875        let a = a.as_f32x16();
5876        let r = vrndscaleps(a, IMM8, f32x16::ZERO, k, _MM_FROUND_CUR_DIRECTION);
5877        transmute(r)
5878    }
5879}
5880
5881/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5882/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5883/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5884/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5885/// * [`_MM_FROUND_TO_POS_INF`] : round up
5886/// * [`_MM_FROUND_TO_ZERO`] : truncate
5887/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5888///
5889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_ps&expand=4781)
5890#[inline]
5891#[target_feature(enable = "avx512f,avx512vl")]
5892#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5893#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))]
5894#[rustc_legacy_const_generics(1)]
5895pub fn _mm256_roundscale_ps<const IMM8: i32>(a: __m256) -> __m256 {
5896    unsafe {
5897        static_assert_uimm_bits!(IMM8, 8);
5898        let a = a.as_f32x8();
5899        let r = vrndscaleps256(a, IMM8, f32x8::ZERO, 0b11111111);
5900        transmute(r)
5901    }
5902}
5903
5904/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5905/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5906/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5907/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5908/// * [`_MM_FROUND_TO_POS_INF`] : round up
5909/// * [`_MM_FROUND_TO_ZERO`] : truncate
5910/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5911///
5912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_ps&expand=4779)
5913#[inline]
5914#[target_feature(enable = "avx512f,avx512vl")]
5915#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5916#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5917#[rustc_legacy_const_generics(3)]
5918pub fn _mm256_mask_roundscale_ps<const IMM8: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5919    unsafe {
5920        static_assert_uimm_bits!(IMM8, 8);
5921        let a = a.as_f32x8();
5922        let src = src.as_f32x8();
5923        let r = vrndscaleps256(a, IMM8, src, k);
5924        transmute(r)
5925    }
5926}
5927
5928/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5929/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5930/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5931/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5932/// * [`_MM_FROUND_TO_POS_INF`] : round up
5933/// * [`_MM_FROUND_TO_ZERO`] : truncate
5934/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5935///
5936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_ps&expand=4780)
5937#[inline]
5938#[target_feature(enable = "avx512f,avx512vl")]
5939#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5940#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5941#[rustc_legacy_const_generics(2)]
5942pub fn _mm256_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m256 {
5943    unsafe {
5944        static_assert_uimm_bits!(IMM8, 8);
5945        let a = a.as_f32x8();
5946        let r = vrndscaleps256(a, IMM8, f32x8::ZERO, k);
5947        transmute(r)
5948    }
5949}
5950
5951/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5952/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5953/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5954/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5955/// * [`_MM_FROUND_TO_POS_INF`] : round up
5956/// * [`_MM_FROUND_TO_ZERO`] : truncate
5957/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5958///
5959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_ps&expand=4778)
5960#[inline]
5961#[target_feature(enable = "avx512f,avx512vl")]
5962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5963#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))]
5964#[rustc_legacy_const_generics(1)]
5965pub fn _mm_roundscale_ps<const IMM8: i32>(a: __m128) -> __m128 {
5966    unsafe {
5967        static_assert_uimm_bits!(IMM8, 8);
5968        let a = a.as_f32x4();
5969        let r = vrndscaleps128(a, IMM8, f32x4::ZERO, 0b00001111);
5970        transmute(r)
5971    }
5972}
5973
5974/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5975/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5976/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5977/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5978/// * [`_MM_FROUND_TO_POS_INF`] : round up
5979/// * [`_MM_FROUND_TO_ZERO`] : truncate
5980/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5981///
5982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_ps&expand=4776)
5983#[inline]
5984#[target_feature(enable = "avx512f,avx512vl")]
5985#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5986#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5987#[rustc_legacy_const_generics(3)]
5988pub fn _mm_mask_roundscale_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5989    unsafe {
5990        static_assert_uimm_bits!(IMM8, 8);
5991        let a = a.as_f32x4();
5992        let src = src.as_f32x4();
5993        let r = vrndscaleps128(a, IMM8, src, k);
5994        transmute(r)
5995    }
5996}
5997
5998/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5999/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6000/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6001/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6002/// * [`_MM_FROUND_TO_POS_INF`] : round up
6003/// * [`_MM_FROUND_TO_ZERO`] : truncate
6004/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6005///
6006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_ps&expand=4777)
6007#[inline]
6008#[target_feature(enable = "avx512f,avx512vl")]
6009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6010#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
6011#[rustc_legacy_const_generics(2)]
6012pub fn _mm_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128 {
6013    unsafe {
6014        static_assert_uimm_bits!(IMM8, 8);
6015        let a = a.as_f32x4();
6016        let r = vrndscaleps128(a, IMM8, f32x4::ZERO, k);
6017        transmute(r)
6018    }
6019}
6020
6021/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
6022/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6023/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6024/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6025/// * [`_MM_FROUND_TO_POS_INF`] : round up
6026/// * [`_MM_FROUND_TO_ZERO`] : truncate
6027/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6028///
6029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_pd&expand=4775)
6030#[inline]
6031#[target_feature(enable = "avx512f")]
6032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6033#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
6034#[rustc_legacy_const_generics(1)]
6035pub fn _mm512_roundscale_pd<const IMM8: i32>(a: __m512d) -> __m512d {
6036    unsafe {
6037        static_assert_uimm_bits!(IMM8, 8);
6038        let a = a.as_f64x8();
6039        let r = vrndscalepd(a, IMM8, f64x8::ZERO, 0b11111111, _MM_FROUND_CUR_DIRECTION);
6040        transmute(r)
6041    }
6042}
6043
6044/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
6045/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6046/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6047/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6048/// * [`_MM_FROUND_TO_POS_INF`] : round up
6049/// * [`_MM_FROUND_TO_ZERO`] : truncate
6050/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6051///
6052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_pd&expand=4773)
6053#[inline]
6054#[target_feature(enable = "avx512f")]
6055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6056#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
6057#[rustc_legacy_const_generics(3)]
6058pub fn _mm512_mask_roundscale_pd<const IMM8: i32>(
6059    src: __m512d,
6060    k: __mmask8,
6061    a: __m512d,
6062) -> __m512d {
6063    unsafe {
6064        static_assert_uimm_bits!(IMM8, 8);
6065        let a = a.as_f64x8();
6066        let src = src.as_f64x8();
6067        let r = vrndscalepd(a, IMM8, src, k, _MM_FROUND_CUR_DIRECTION);
6068        transmute(r)
6069    }
6070}
6071
6072/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
6073/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6074/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6075/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6076/// * [`_MM_FROUND_TO_POS_INF`] : round up
6077/// * [`_MM_FROUND_TO_ZERO`] : truncate
6078/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6079///
6080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_pd&expand=4774)
6081#[inline]
6082#[target_feature(enable = "avx512f")]
6083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6084#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
6085#[rustc_legacy_const_generics(2)]
6086pub fn _mm512_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m512d {
6087    unsafe {
6088        static_assert_uimm_bits!(IMM8, 8);
6089        let a = a.as_f64x8();
6090        let r = vrndscalepd(a, IMM8, f64x8::ZERO, k, _MM_FROUND_CUR_DIRECTION);
6091        transmute(r)
6092    }
6093}
6094
6095/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
6096/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6097/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6098/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6099/// * [`_MM_FROUND_TO_POS_INF`] : round up
6100/// * [`_MM_FROUND_TO_ZERO`] : truncate
6101/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6102///
6103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_pd&expand=4772)
6104#[inline]
6105#[target_feature(enable = "avx512f,avx512vl")]
6106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6107#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 16))]
6108#[rustc_legacy_const_generics(1)]
6109pub fn _mm256_roundscale_pd<const IMM8: i32>(a: __m256d) -> __m256d {
6110    unsafe {
6111        static_assert_uimm_bits!(IMM8, 8);
6112        let a = a.as_f64x4();
6113        let r = vrndscalepd256(a, IMM8, f64x4::ZERO, 0b00001111);
6114        transmute(r)
6115    }
6116}
6117
6118/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
6119/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6120/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6121/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6122/// * [`_MM_FROUND_TO_POS_INF`] : round up
6123/// * [`_MM_FROUND_TO_ZERO`] : truncate
6124/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6125///
6126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_pd&expand=4770)
6127#[inline]
6128#[target_feature(enable = "avx512f,avx512vl")]
6129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6130#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
6131#[rustc_legacy_const_generics(3)]
6132pub fn _mm256_mask_roundscale_pd<const IMM8: i32>(
6133    src: __m256d,
6134    k: __mmask8,
6135    a: __m256d,
6136) -> __m256d {
6137    unsafe {
6138        static_assert_uimm_bits!(IMM8, 8);
6139        let a = a.as_f64x4();
6140        let src = src.as_f64x4();
6141        let r = vrndscalepd256(a, IMM8, src, k);
6142        transmute(r)
6143    }
6144}
6145
6146/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
6147/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6148/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6149/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6150/// * [`_MM_FROUND_TO_POS_INF`] : round up
6151/// * [`_MM_FROUND_TO_ZERO`] : truncate
6152/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6153///
6154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_pd&expand=4771)
6155#[inline]
6156#[target_feature(enable = "avx512f,avx512vl")]
6157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6158#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
6159#[rustc_legacy_const_generics(2)]
6160pub fn _mm256_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m256d {
6161    unsafe {
6162        static_assert_uimm_bits!(IMM8, 8);
6163        let a = a.as_f64x4();
6164        let r = vrndscalepd256(a, IMM8, f64x4::ZERO, k);
6165        transmute(r)
6166    }
6167}
6168
6169/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
6170/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6171/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6172/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6173/// * [`_MM_FROUND_TO_POS_INF`] : round up
6174/// * [`_MM_FROUND_TO_ZERO`] : truncate
6175/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6176///
6177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_pd&expand=4769)
6178#[inline]
6179#[target_feature(enable = "avx512f,avx512vl")]
6180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6181#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 16))]
6182#[rustc_legacy_const_generics(1)]
6183pub fn _mm_roundscale_pd<const IMM8: i32>(a: __m128d) -> __m128d {
6184    unsafe {
6185        static_assert_uimm_bits!(IMM8, 8);
6186        let a = a.as_f64x2();
6187        let r = vrndscalepd128(a, IMM8, f64x2::ZERO, 0b00000011);
6188        transmute(r)
6189    }
6190}
6191
6192/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
6193/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6194/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6195/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6196/// * [`_MM_FROUND_TO_POS_INF`] : round up
6197/// * [`_MM_FROUND_TO_ZERO`] : truncate
6198/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6199///
6200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_pd&expand=4767)
6201#[inline]
6202#[target_feature(enable = "avx512f,avx512vl")]
6203#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6204#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
6205#[rustc_legacy_const_generics(3)]
6206pub fn _mm_mask_roundscale_pd<const IMM8: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
6207    unsafe {
6208        static_assert_uimm_bits!(IMM8, 8);
6209        let a = a.as_f64x2();
6210        let src = src.as_f64x2();
6211        let r = vrndscalepd128(a, IMM8, src, k);
6212        transmute(r)
6213    }
6214}
6215
6216/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
6217/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6218/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6219/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6220/// * [`_MM_FROUND_TO_POS_INF`] : round up
6221/// * [`_MM_FROUND_TO_ZERO`] : truncate
6222/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6223///
6224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_pd&expand=4768)
6225#[inline]
6226#[target_feature(enable = "avx512f,avx512vl")]
6227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6228#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
6229#[rustc_legacy_const_generics(2)]
6230pub fn _mm_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m128d) -> __m128d {
6231    unsafe {
6232        static_assert_uimm_bits!(IMM8, 8);
6233        let a = a.as_f64x2();
6234        let r = vrndscalepd128(a, IMM8, f64x2::ZERO, k);
6235        transmute(r)
6236    }
6237}
6238
6239/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6240///
6241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_ps&expand=4883)
6242#[inline]
6243#[target_feature(enable = "avx512f")]
6244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6245#[cfg_attr(test, assert_instr(vscalefps))]
6246pub fn _mm512_scalef_ps(a: __m512, b: __m512) -> __m512 {
6247    unsafe {
6248        transmute(vscalefps(
6249            a.as_f32x16(),
6250            b.as_f32x16(),
6251            f32x16::ZERO,
6252            0b11111111_11111111,
6253            _MM_FROUND_CUR_DIRECTION,
6254        ))
6255    }
6256}
6257
6258/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6259///
6260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_ps&expand=4881)
6261#[inline]
6262#[target_feature(enable = "avx512f")]
6263#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6264#[cfg_attr(test, assert_instr(vscalefps))]
6265pub fn _mm512_mask_scalef_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
6266    unsafe {
6267        transmute(vscalefps(
6268            a.as_f32x16(),
6269            b.as_f32x16(),
6270            src.as_f32x16(),
6271            k,
6272            _MM_FROUND_CUR_DIRECTION,
6273        ))
6274    }
6275}
6276
6277/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6278///
6279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_ps&expand=4882)
6280#[inline]
6281#[target_feature(enable = "avx512f")]
6282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6283#[cfg_attr(test, assert_instr(vscalefps))]
6284pub fn _mm512_maskz_scalef_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
6285    unsafe {
6286        transmute(vscalefps(
6287            a.as_f32x16(),
6288            b.as_f32x16(),
6289            f32x16::ZERO,
6290            k,
6291            _MM_FROUND_CUR_DIRECTION,
6292        ))
6293    }
6294}
6295
6296/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6297///
6298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_ps&expand=4880)
6299#[inline]
6300#[target_feature(enable = "avx512f,avx512vl")]
6301#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6302#[cfg_attr(test, assert_instr(vscalefps))]
6303pub fn _mm256_scalef_ps(a: __m256, b: __m256) -> __m256 {
6304    unsafe {
6305        transmute(vscalefps256(
6306            a.as_f32x8(),
6307            b.as_f32x8(),
6308            f32x8::ZERO,
6309            0b11111111,
6310        ))
6311    }
6312}
6313
6314/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6315///
6316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_ps&expand=4878)
6317#[inline]
6318#[target_feature(enable = "avx512f,avx512vl")]
6319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6320#[cfg_attr(test, assert_instr(vscalefps))]
6321pub fn _mm256_mask_scalef_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
6322    unsafe { transmute(vscalefps256(a.as_f32x8(), b.as_f32x8(), src.as_f32x8(), k)) }
6323}
6324
6325/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6326///
6327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_ps&expand=4879)
6328#[inline]
6329#[target_feature(enable = "avx512f,avx512vl")]
6330#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6331#[cfg_attr(test, assert_instr(vscalefps))]
6332pub fn _mm256_maskz_scalef_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
6333    unsafe { transmute(vscalefps256(a.as_f32x8(), b.as_f32x8(), f32x8::ZERO, k)) }
6334}
6335
6336/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6337///
6338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_ps&expand=4877)
6339#[inline]
6340#[target_feature(enable = "avx512f,avx512vl")]
6341#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6342#[cfg_attr(test, assert_instr(vscalefps))]
6343pub fn _mm_scalef_ps(a: __m128, b: __m128) -> __m128 {
6344    unsafe {
6345        transmute(vscalefps128(
6346            a.as_f32x4(),
6347            b.as_f32x4(),
6348            f32x4::ZERO,
6349            0b00001111,
6350        ))
6351    }
6352}
6353
6354/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6355///
6356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_ps&expand=4875)
6357#[inline]
6358#[target_feature(enable = "avx512f,avx512vl")]
6359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6360#[cfg_attr(test, assert_instr(vscalefps))]
6361pub fn _mm_mask_scalef_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
6362    unsafe { transmute(vscalefps128(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) }
6363}
6364
6365/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6366///
6367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_ps&expand=4876)
6368#[inline]
6369#[target_feature(enable = "avx512f,avx512vl")]
6370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6371#[cfg_attr(test, assert_instr(vscalefps))]
6372pub fn _mm_maskz_scalef_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
6373    unsafe { transmute(vscalefps128(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) }
6374}
6375
6376/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6377///
6378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_pd&expand=4874)
6379#[inline]
6380#[target_feature(enable = "avx512f")]
6381#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6382#[cfg_attr(test, assert_instr(vscalefpd))]
6383pub fn _mm512_scalef_pd(a: __m512d, b: __m512d) -> __m512d {
6384    unsafe {
6385        transmute(vscalefpd(
6386            a.as_f64x8(),
6387            b.as_f64x8(),
6388            f64x8::ZERO,
6389            0b11111111,
6390            _MM_FROUND_CUR_DIRECTION,
6391        ))
6392    }
6393}
6394
6395/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6396///
6397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_pd&expand=4872)
6398#[inline]
6399#[target_feature(enable = "avx512f")]
6400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6401#[cfg_attr(test, assert_instr(vscalefpd))]
6402pub fn _mm512_mask_scalef_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
6403    unsafe {
6404        transmute(vscalefpd(
6405            a.as_f64x8(),
6406            b.as_f64x8(),
6407            src.as_f64x8(),
6408            k,
6409            _MM_FROUND_CUR_DIRECTION,
6410        ))
6411    }
6412}
6413
6414/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6415///
6416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_pd&expand=4873)
6417#[inline]
6418#[target_feature(enable = "avx512f")]
6419#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6420#[cfg_attr(test, assert_instr(vscalefpd))]
6421pub fn _mm512_maskz_scalef_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
6422    unsafe {
6423        transmute(vscalefpd(
6424            a.as_f64x8(),
6425            b.as_f64x8(),
6426            f64x8::ZERO,
6427            k,
6428            _MM_FROUND_CUR_DIRECTION,
6429        ))
6430    }
6431}
6432
6433/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6434///
6435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_pd&expand=4871)
6436#[inline]
6437#[target_feature(enable = "avx512f,avx512vl")]
6438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6439#[cfg_attr(test, assert_instr(vscalefpd))]
6440pub fn _mm256_scalef_pd(a: __m256d, b: __m256d) -> __m256d {
6441    unsafe {
6442        transmute(vscalefpd256(
6443            a.as_f64x4(),
6444            b.as_f64x4(),
6445            f64x4::ZERO,
6446            0b00001111,
6447        ))
6448    }
6449}
6450
6451/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6452///
6453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_pd&expand=4869)
6454#[inline]
6455#[target_feature(enable = "avx512f,avx512vl")]
6456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6457#[cfg_attr(test, assert_instr(vscalefpd))]
6458pub fn _mm256_mask_scalef_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
6459    unsafe { transmute(vscalefpd256(a.as_f64x4(), b.as_f64x4(), src.as_f64x4(), k)) }
6460}
6461
6462/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6463///
6464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_pd&expand=4870)
6465#[inline]
6466#[target_feature(enable = "avx512f,avx512vl")]
6467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6468#[cfg_attr(test, assert_instr(vscalefpd))]
6469pub fn _mm256_maskz_scalef_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
6470    unsafe { transmute(vscalefpd256(a.as_f64x4(), b.as_f64x4(), f64x4::ZERO, k)) }
6471}
6472
6473/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6474///
6475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_pd&expand=4868)
6476#[inline]
6477#[target_feature(enable = "avx512f,avx512vl")]
6478#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6479#[cfg_attr(test, assert_instr(vscalefpd))]
6480pub fn _mm_scalef_pd(a: __m128d, b: __m128d) -> __m128d {
6481    unsafe {
6482        transmute(vscalefpd128(
6483            a.as_f64x2(),
6484            b.as_f64x2(),
6485            f64x2::ZERO,
6486            0b00000011,
6487        ))
6488    }
6489}
6490
6491/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6492///
6493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_pd&expand=4866)
6494#[inline]
6495#[target_feature(enable = "avx512f,avx512vl")]
6496#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6497#[cfg_attr(test, assert_instr(vscalefpd))]
6498pub fn _mm_mask_scalef_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6499    unsafe { transmute(vscalefpd128(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) }
6500}
6501
6502/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6503///
6504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_pd&expand=4867)
6505#[inline]
6506#[target_feature(enable = "avx512f,avx512vl")]
6507#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6508#[cfg_attr(test, assert_instr(vscalefpd))]
6509pub fn _mm_maskz_scalef_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6510    unsafe { transmute(vscalefpd128(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) }
6511}
6512
6513/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6514///
6515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_ps&expand=2499)
6516#[inline]
6517#[target_feature(enable = "avx512f")]
6518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6519#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6520#[rustc_legacy_const_generics(3)]
6521pub fn _mm512_fixupimm_ps<const IMM8: i32>(a: __m512, b: __m512, c: __m512i) -> __m512 {
6522    unsafe {
6523        static_assert_uimm_bits!(IMM8, 8);
6524        let a = a.as_f32x16();
6525        let b = b.as_f32x16();
6526        let c = c.as_i32x16();
6527        let r = vfixupimmps(a, b, c, IMM8, 0b11111111_11111111, _MM_FROUND_CUR_DIRECTION);
6528        transmute(r)
6529    }
6530}
6531
6532/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6533///
6534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_ps&expand=2500)
6535#[inline]
6536#[target_feature(enable = "avx512f")]
6537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6538#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6539#[rustc_legacy_const_generics(4)]
6540pub fn _mm512_mask_fixupimm_ps<const IMM8: i32>(
6541    a: __m512,
6542    k: __mmask16,
6543    b: __m512,
6544    c: __m512i,
6545) -> __m512 {
6546    unsafe {
6547        static_assert_uimm_bits!(IMM8, 8);
6548        let a = a.as_f32x16();
6549        let b = b.as_f32x16();
6550        let c = c.as_i32x16();
6551        let r = vfixupimmps(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6552        transmute(r)
6553    }
6554}
6555
6556/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6557///
6558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_ps&expand=2501)
6559#[inline]
6560#[target_feature(enable = "avx512f")]
6561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6562#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6563#[rustc_legacy_const_generics(4)]
6564pub fn _mm512_maskz_fixupimm_ps<const IMM8: i32>(
6565    k: __mmask16,
6566    a: __m512,
6567    b: __m512,
6568    c: __m512i,
6569) -> __m512 {
6570    unsafe {
6571        static_assert_uimm_bits!(IMM8, 8);
6572        let a = a.as_f32x16();
6573        let b = b.as_f32x16();
6574        let c = c.as_i32x16();
6575        let r = vfixupimmpsz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6576        transmute(r)
6577    }
6578}
6579
6580/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6581///
6582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_ps&expand=2496)
6583#[inline]
6584#[target_feature(enable = "avx512f,avx512vl")]
6585#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6586#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6587#[rustc_legacy_const_generics(3)]
6588pub fn _mm256_fixupimm_ps<const IMM8: i32>(a: __m256, b: __m256, c: __m256i) -> __m256 {
6589    unsafe {
6590        static_assert_uimm_bits!(IMM8, 8);
6591        let a = a.as_f32x8();
6592        let b = b.as_f32x8();
6593        let c = c.as_i32x8();
6594        let r = vfixupimmps256(a, b, c, IMM8, 0b11111111);
6595        transmute(r)
6596    }
6597}
6598
6599/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6600///
6601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_ps&expand=2497)
6602#[inline]
6603#[target_feature(enable = "avx512f,avx512vl")]
6604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6605#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6606#[rustc_legacy_const_generics(4)]
6607pub fn _mm256_mask_fixupimm_ps<const IMM8: i32>(
6608    a: __m256,
6609    k: __mmask8,
6610    b: __m256,
6611    c: __m256i,
6612) -> __m256 {
6613    unsafe {
6614        static_assert_uimm_bits!(IMM8, 8);
6615        let a = a.as_f32x8();
6616        let b = b.as_f32x8();
6617        let c = c.as_i32x8();
6618        let r = vfixupimmps256(a, b, c, IMM8, k);
6619        transmute(r)
6620    }
6621}
6622
6623/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6624///
6625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_ps&expand=2498)
6626#[inline]
6627#[target_feature(enable = "avx512f,avx512vl")]
6628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6629#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6630#[rustc_legacy_const_generics(4)]
6631pub fn _mm256_maskz_fixupimm_ps<const IMM8: i32>(
6632    k: __mmask8,
6633    a: __m256,
6634    b: __m256,
6635    c: __m256i,
6636) -> __m256 {
6637    unsafe {
6638        static_assert_uimm_bits!(IMM8, 8);
6639        let a = a.as_f32x8();
6640        let b = b.as_f32x8();
6641        let c = c.as_i32x8();
6642        let r = vfixupimmpsz256(a, b, c, IMM8, k);
6643        transmute(r)
6644    }
6645}
6646
6647/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6648///
6649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_ps&expand=2493)
6650#[inline]
6651#[target_feature(enable = "avx512f,avx512vl")]
6652#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6653#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6654#[rustc_legacy_const_generics(3)]
6655pub fn _mm_fixupimm_ps<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
6656    unsafe {
6657        static_assert_uimm_bits!(IMM8, 8);
6658        let a = a.as_f32x4();
6659        let b = b.as_f32x4();
6660        let c = c.as_i32x4();
6661        let r = vfixupimmps128(a, b, c, IMM8, 0b00001111);
6662        transmute(r)
6663    }
6664}
6665
6666/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6667///
6668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_ps&expand=2494)
6669#[inline]
6670#[target_feature(enable = "avx512f,avx512vl")]
6671#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6672#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6673#[rustc_legacy_const_generics(4)]
6674pub fn _mm_mask_fixupimm_ps<const IMM8: i32>(
6675    a: __m128,
6676    k: __mmask8,
6677    b: __m128,
6678    c: __m128i,
6679) -> __m128 {
6680    unsafe {
6681        static_assert_uimm_bits!(IMM8, 8);
6682        let a = a.as_f32x4();
6683        let b = b.as_f32x4();
6684        let c = c.as_i32x4();
6685        let r = vfixupimmps128(a, b, c, IMM8, k);
6686        transmute(r)
6687    }
6688}
6689
6690/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6691///
6692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_ps&expand=2495)
6693#[inline]
6694#[target_feature(enable = "avx512f,avx512vl")]
6695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6696#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6697#[rustc_legacy_const_generics(4)]
6698pub fn _mm_maskz_fixupimm_ps<const IMM8: i32>(
6699    k: __mmask8,
6700    a: __m128,
6701    b: __m128,
6702    c: __m128i,
6703) -> __m128 {
6704    unsafe {
6705        static_assert_uimm_bits!(IMM8, 8);
6706        let a = a.as_f32x4();
6707        let b = b.as_f32x4();
6708        let c = c.as_i32x4();
6709        let r = vfixupimmpsz128(a, b, c, IMM8, k);
6710        transmute(r)
6711    }
6712}
6713
6714/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6715///
6716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_pd&expand=2490)
6717#[inline]
6718#[target_feature(enable = "avx512f")]
6719#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6720#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6721#[rustc_legacy_const_generics(3)]
6722pub fn _mm512_fixupimm_pd<const IMM8: i32>(a: __m512d, b: __m512d, c: __m512i) -> __m512d {
6723    unsafe {
6724        static_assert_uimm_bits!(IMM8, 8);
6725        let a = a.as_f64x8();
6726        let b = b.as_f64x8();
6727        let c = c.as_i64x8();
6728        let r = vfixupimmpd(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
6729        transmute(r)
6730    }
6731}
6732
6733/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6734///
6735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_pd&expand=2491)
6736#[inline]
6737#[target_feature(enable = "avx512f")]
6738#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6739#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6740#[rustc_legacy_const_generics(4)]
6741pub fn _mm512_mask_fixupimm_pd<const IMM8: i32>(
6742    a: __m512d,
6743    k: __mmask8,
6744    b: __m512d,
6745    c: __m512i,
6746) -> __m512d {
6747    unsafe {
6748        static_assert_uimm_bits!(IMM8, 8);
6749        let a = a.as_f64x8();
6750        let b = b.as_f64x8();
6751        let c = c.as_i64x8();
6752        let r = vfixupimmpd(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6753        transmute(r)
6754    }
6755}
6756
6757/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6758///
6759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_pd&expand=2492)
6760#[inline]
6761#[target_feature(enable = "avx512f")]
6762#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6763#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6764#[rustc_legacy_const_generics(4)]
6765pub fn _mm512_maskz_fixupimm_pd<const IMM8: i32>(
6766    k: __mmask8,
6767    a: __m512d,
6768    b: __m512d,
6769    c: __m512i,
6770) -> __m512d {
6771    unsafe {
6772        static_assert_uimm_bits!(IMM8, 8);
6773        let a = a.as_f64x8();
6774        let b = b.as_f64x8();
6775        let c = c.as_i64x8();
6776        let r = vfixupimmpdz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6777        transmute(r)
6778    }
6779}
6780
6781/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6782///
6783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_pd&expand=2487)
6784#[inline]
6785#[target_feature(enable = "avx512f,avx512vl")]
6786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6787#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6788#[rustc_legacy_const_generics(3)]
6789pub fn _mm256_fixupimm_pd<const IMM8: i32>(a: __m256d, b: __m256d, c: __m256i) -> __m256d {
6790    unsafe {
6791        static_assert_uimm_bits!(IMM8, 8);
6792        let a = a.as_f64x4();
6793        let b = b.as_f64x4();
6794        let c = c.as_i64x4();
6795        let r = vfixupimmpd256(a, b, c, IMM8, 0b00001111);
6796        transmute(r)
6797    }
6798}
6799
6800/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6801///
6802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_pd&expand=2488)
6803#[inline]
6804#[target_feature(enable = "avx512f,avx512vl")]
6805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6806#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6807#[rustc_legacy_const_generics(4)]
6808pub fn _mm256_mask_fixupimm_pd<const IMM8: i32>(
6809    a: __m256d,
6810    k: __mmask8,
6811    b: __m256d,
6812    c: __m256i,
6813) -> __m256d {
6814    unsafe {
6815        static_assert_uimm_bits!(IMM8, 8);
6816        let a = a.as_f64x4();
6817        let b = b.as_f64x4();
6818        let c = c.as_i64x4();
6819        let r = vfixupimmpd256(a, b, c, IMM8, k);
6820        transmute(r)
6821    }
6822}
6823
6824/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6825///
6826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_pd&expand=2489)
6827#[inline]
6828#[target_feature(enable = "avx512f,avx512vl")]
6829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6830#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6831#[rustc_legacy_const_generics(4)]
6832pub fn _mm256_maskz_fixupimm_pd<const IMM8: i32>(
6833    k: __mmask8,
6834    a: __m256d,
6835    b: __m256d,
6836    c: __m256i,
6837) -> __m256d {
6838    unsafe {
6839        static_assert_uimm_bits!(IMM8, 8);
6840        let a = a.as_f64x4();
6841        let b = b.as_f64x4();
6842        let c = c.as_i64x4();
6843        let r = vfixupimmpdz256(a, b, c, IMM8, k);
6844        transmute(r)
6845    }
6846}
6847
6848/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6849///
6850/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_pd&expand=2484)
6851#[inline]
6852#[target_feature(enable = "avx512f,avx512vl")]
6853#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6854#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6855#[rustc_legacy_const_generics(3)]
6856pub fn _mm_fixupimm_pd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
6857    unsafe {
6858        static_assert_uimm_bits!(IMM8, 8);
6859        let a = a.as_f64x2();
6860        let b = b.as_f64x2();
6861        let c = c.as_i64x2();
6862        let r = vfixupimmpd128(a, b, c, IMM8, 0b00000011);
6863        transmute(r)
6864    }
6865}
6866
6867/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6868///
6869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_pd&expand=2485)
6870#[inline]
6871#[target_feature(enable = "avx512f,avx512vl")]
6872#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6873#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6874#[rustc_legacy_const_generics(4)]
6875pub fn _mm_mask_fixupimm_pd<const IMM8: i32>(
6876    a: __m128d,
6877    k: __mmask8,
6878    b: __m128d,
6879    c: __m128i,
6880) -> __m128d {
6881    unsafe {
6882        static_assert_uimm_bits!(IMM8, 8);
6883        let a = a.as_f64x2();
6884        let b = b.as_f64x2();
6885        let c = c.as_i64x2();
6886        let r = vfixupimmpd128(a, b, c, IMM8, k);
6887        transmute(r)
6888    }
6889}
6890
6891/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6892///
6893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_pd&expand=2486)
6894#[inline]
6895#[target_feature(enable = "avx512f,avx512vl")]
6896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6897#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6898#[rustc_legacy_const_generics(4)]
6899pub fn _mm_maskz_fixupimm_pd<const IMM8: i32>(
6900    k: __mmask8,
6901    a: __m128d,
6902    b: __m128d,
6903    c: __m128i,
6904) -> __m128d {
6905    unsafe {
6906        static_assert_uimm_bits!(IMM8, 8);
6907        let a = a.as_f64x2();
6908        let b = b.as_f64x2();
6909        let c = c.as_i64x2();
6910        let r = vfixupimmpdz128(a, b, c, IMM8, k);
6911        transmute(r)
6912    }
6913}
6914
6915/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6916///
6917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi32&expand=5867)
6918#[inline]
6919#[target_feature(enable = "avx512f")]
6920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6921#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6922#[rustc_legacy_const_generics(3)]
6923pub fn _mm512_ternarylogic_epi32<const IMM8: i32>(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
6924    unsafe {
6925        static_assert_uimm_bits!(IMM8, 8);
6926        let a = a.as_i32x16();
6927        let b = b.as_i32x16();
6928        let c = c.as_i32x16();
6929        let r = vpternlogd(a, b, c, IMM8);
6930        transmute(r)
6931    }
6932}
6933
6934/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6935///
6936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi32&expand=5865)
6937#[inline]
6938#[target_feature(enable = "avx512f")]
6939#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6940#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6941#[rustc_legacy_const_generics(4)]
6942pub fn _mm512_mask_ternarylogic_epi32<const IMM8: i32>(
6943    src: __m512i,
6944    k: __mmask16,
6945    a: __m512i,
6946    b: __m512i,
6947) -> __m512i {
6948    unsafe {
6949        static_assert_uimm_bits!(IMM8, 8);
6950        let src = src.as_i32x16();
6951        let a = a.as_i32x16();
6952        let b = b.as_i32x16();
6953        let r = vpternlogd(src, a, b, IMM8);
6954        transmute(simd_select_bitmask(k, r, src))
6955    }
6956}
6957
6958/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6959///
6960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi32&expand=5866)
6961#[inline]
6962#[target_feature(enable = "avx512f")]
6963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6964#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6965#[rustc_legacy_const_generics(4)]
6966pub fn _mm512_maskz_ternarylogic_epi32<const IMM8: i32>(
6967    k: __mmask16,
6968    a: __m512i,
6969    b: __m512i,
6970    c: __m512i,
6971) -> __m512i {
6972    unsafe {
6973        static_assert_uimm_bits!(IMM8, 8);
6974        let a = a.as_i32x16();
6975        let b = b.as_i32x16();
6976        let c = c.as_i32x16();
6977        let r = vpternlogd(a, b, c, IMM8);
6978        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
6979    }
6980}
6981
6982/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6983///
6984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi32&expand=5864)
6985#[inline]
6986#[target_feature(enable = "avx512f,avx512vl")]
6987#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6988#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6989#[rustc_legacy_const_generics(3)]
6990pub fn _mm256_ternarylogic_epi32<const IMM8: i32>(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
6991    unsafe {
6992        static_assert_uimm_bits!(IMM8, 8);
6993        let a = a.as_i32x8();
6994        let b = b.as_i32x8();
6995        let c = c.as_i32x8();
6996        let r = vpternlogd256(a, b, c, IMM8);
6997        transmute(r)
6998    }
6999}
7000
7001/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
7002///
7003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi32&expand=5862)
7004#[inline]
7005#[target_feature(enable = "avx512f,avx512vl")]
7006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7007#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
7008#[rustc_legacy_const_generics(4)]
7009pub fn _mm256_mask_ternarylogic_epi32<const IMM8: i32>(
7010    src: __m256i,
7011    k: __mmask8,
7012    a: __m256i,
7013    b: __m256i,
7014) -> __m256i {
7015    unsafe {
7016        static_assert_uimm_bits!(IMM8, 8);
7017        let src = src.as_i32x8();
7018        let a = a.as_i32x8();
7019        let b = b.as_i32x8();
7020        let r = vpternlogd256(src, a, b, IMM8);
7021        transmute(simd_select_bitmask(k, r, src))
7022    }
7023}
7024
7025/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
7026///
7027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi32&expand=5863)
7028#[inline]
7029#[target_feature(enable = "avx512f,avx512vl")]
7030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7031#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
7032#[rustc_legacy_const_generics(4)]
7033pub fn _mm256_maskz_ternarylogic_epi32<const IMM8: i32>(
7034    k: __mmask8,
7035    a: __m256i,
7036    b: __m256i,
7037    c: __m256i,
7038) -> __m256i {
7039    unsafe {
7040        static_assert_uimm_bits!(IMM8, 8);
7041        let a = a.as_i32x8();
7042        let b = b.as_i32x8();
7043        let c = c.as_i32x8();
7044        let r = vpternlogd256(a, b, c, IMM8);
7045        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
7046    }
7047}
7048
7049/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
7050///
7051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi32&expand=5861)
7052#[inline]
7053#[target_feature(enable = "avx512f,avx512vl")]
7054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7055#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
7056#[rustc_legacy_const_generics(3)]
7057pub fn _mm_ternarylogic_epi32<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
7058    unsafe {
7059        static_assert_uimm_bits!(IMM8, 8);
7060        let a = a.as_i32x4();
7061        let b = b.as_i32x4();
7062        let c = c.as_i32x4();
7063        let r = vpternlogd128(a, b, c, IMM8);
7064        transmute(r)
7065    }
7066}
7067
7068/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
7069///
7070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi32&expand=5859)
7071#[inline]
7072#[target_feature(enable = "avx512f,avx512vl")]
7073#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7074#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
7075#[rustc_legacy_const_generics(4)]
7076pub fn _mm_mask_ternarylogic_epi32<const IMM8: i32>(
7077    src: __m128i,
7078    k: __mmask8,
7079    a: __m128i,
7080    b: __m128i,
7081) -> __m128i {
7082    unsafe {
7083        static_assert_uimm_bits!(IMM8, 8);
7084        let src = src.as_i32x4();
7085        let a = a.as_i32x4();
7086        let b = b.as_i32x4();
7087        let r = vpternlogd128(src, a, b, IMM8);
7088        transmute(simd_select_bitmask(k, r, src))
7089    }
7090}
7091
7092/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
7093///
7094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi32&expand=5860)
7095#[inline]
7096#[target_feature(enable = "avx512f,avx512vl")]
7097#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7098#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
7099#[rustc_legacy_const_generics(4)]
7100pub fn _mm_maskz_ternarylogic_epi32<const IMM8: i32>(
7101    k: __mmask8,
7102    a: __m128i,
7103    b: __m128i,
7104    c: __m128i,
7105) -> __m128i {
7106    unsafe {
7107        static_assert_uimm_bits!(IMM8, 8);
7108        let a = a.as_i32x4();
7109        let b = b.as_i32x4();
7110        let c = c.as_i32x4();
7111        let r = vpternlogd128(a, b, c, IMM8);
7112        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
7113    }
7114}
7115
7116/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
7117///
7118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi64&expand=5876)
7119#[inline]
7120#[target_feature(enable = "avx512f")]
7121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7122#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7123#[rustc_legacy_const_generics(3)]
7124pub fn _mm512_ternarylogic_epi64<const IMM8: i32>(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
7125    unsafe {
7126        static_assert_uimm_bits!(IMM8, 8);
7127        let a = a.as_i64x8();
7128        let b = b.as_i64x8();
7129        let c = c.as_i64x8();
7130        let r = vpternlogq(a, b, c, IMM8);
7131        transmute(r)
7132    }
7133}
7134
7135/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
7136///
7137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi64&expand=5874)
7138#[inline]
7139#[target_feature(enable = "avx512f")]
7140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7141#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7142#[rustc_legacy_const_generics(4)]
7143pub fn _mm512_mask_ternarylogic_epi64<const IMM8: i32>(
7144    src: __m512i,
7145    k: __mmask8,
7146    a: __m512i,
7147    b: __m512i,
7148) -> __m512i {
7149    unsafe {
7150        static_assert_uimm_bits!(IMM8, 8);
7151        let src = src.as_i64x8();
7152        let a = a.as_i64x8();
7153        let b = b.as_i64x8();
7154        let r = vpternlogq(src, a, b, IMM8);
7155        transmute(simd_select_bitmask(k, r, src))
7156    }
7157}
7158
7159/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
7160///
7161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi64&expand=5875)
7162#[inline]
7163#[target_feature(enable = "avx512f")]
7164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7165#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7166#[rustc_legacy_const_generics(4)]
7167pub fn _mm512_maskz_ternarylogic_epi64<const IMM8: i32>(
7168    k: __mmask8,
7169    a: __m512i,
7170    b: __m512i,
7171    c: __m512i,
7172) -> __m512i {
7173    unsafe {
7174        static_assert_uimm_bits!(IMM8, 8);
7175        let a = a.as_i64x8();
7176        let b = b.as_i64x8();
7177        let c = c.as_i64x8();
7178        let r = vpternlogq(a, b, c, IMM8);
7179        transmute(simd_select_bitmask(k, r, i64x8::ZERO))
7180    }
7181}
7182
7183/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
7184///
7185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi64&expand=5873)
7186#[inline]
7187#[target_feature(enable = "avx512f,avx512vl")]
7188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7189#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7190#[rustc_legacy_const_generics(3)]
7191pub fn _mm256_ternarylogic_epi64<const IMM8: i32>(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
7192    unsafe {
7193        static_assert_uimm_bits!(IMM8, 8);
7194        let a = a.as_i64x4();
7195        let b = b.as_i64x4();
7196        let c = c.as_i64x4();
7197        let r = vpternlogq256(a, b, c, IMM8);
7198        transmute(r)
7199    }
7200}
7201
7202/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
7203///
7204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi64&expand=5871)
7205#[inline]
7206#[target_feature(enable = "avx512f,avx512vl")]
7207#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7208#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7209#[rustc_legacy_const_generics(4)]
7210pub fn _mm256_mask_ternarylogic_epi64<const IMM8: i32>(
7211    src: __m256i,
7212    k: __mmask8,
7213    a: __m256i,
7214    b: __m256i,
7215) -> __m256i {
7216    unsafe {
7217        static_assert_uimm_bits!(IMM8, 8);
7218        let src = src.as_i64x4();
7219        let a = a.as_i64x4();
7220        let b = b.as_i64x4();
7221        let r = vpternlogq256(src, a, b, IMM8);
7222        transmute(simd_select_bitmask(k, r, src))
7223    }
7224}
7225
7226/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
7227///
7228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi64&expand=5872)
7229#[inline]
7230#[target_feature(enable = "avx512f,avx512vl")]
7231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7232#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7233#[rustc_legacy_const_generics(4)]
7234pub fn _mm256_maskz_ternarylogic_epi64<const IMM8: i32>(
7235    k: __mmask8,
7236    a: __m256i,
7237    b: __m256i,
7238    c: __m256i,
7239) -> __m256i {
7240    unsafe {
7241        static_assert_uimm_bits!(IMM8, 8);
7242        let a = a.as_i64x4();
7243        let b = b.as_i64x4();
7244        let c = c.as_i64x4();
7245        let r = vpternlogq256(a, b, c, IMM8);
7246        transmute(simd_select_bitmask(k, r, i64x4::ZERO))
7247    }
7248}
7249
7250/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
7251///
7252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi64&expand=5870)
7253#[inline]
7254#[target_feature(enable = "avx512f,avx512vl")]
7255#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7256#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7257#[rustc_legacy_const_generics(3)]
7258pub fn _mm_ternarylogic_epi64<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
7259    unsafe {
7260        static_assert_uimm_bits!(IMM8, 8);
7261        let a = a.as_i64x2();
7262        let b = b.as_i64x2();
7263        let c = c.as_i64x2();
7264        let r = vpternlogq128(a, b, c, IMM8);
7265        transmute(r)
7266    }
7267}
7268
7269/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
7270///
7271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi64&expand=5868)
7272#[inline]
7273#[target_feature(enable = "avx512f,avx512vl")]
7274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7275#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7276#[rustc_legacy_const_generics(4)]
7277pub fn _mm_mask_ternarylogic_epi64<const IMM8: i32>(
7278    src: __m128i,
7279    k: __mmask8,
7280    a: __m128i,
7281    b: __m128i,
7282) -> __m128i {
7283    unsafe {
7284        static_assert_uimm_bits!(IMM8, 8);
7285        let src = src.as_i64x2();
7286        let a = a.as_i64x2();
7287        let b = b.as_i64x2();
7288        let r = vpternlogq128(src, a, b, IMM8);
7289        transmute(simd_select_bitmask(k, r, src))
7290    }
7291}
7292
7293/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
7294///
7295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi64&expand=5869)
7296#[inline]
7297#[target_feature(enable = "avx512f,avx512vl")]
7298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7299#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7300#[rustc_legacy_const_generics(4)]
7301pub fn _mm_maskz_ternarylogic_epi64<const IMM8: i32>(
7302    k: __mmask8,
7303    a: __m128i,
7304    b: __m128i,
7305    c: __m128i,
7306) -> __m128i {
7307    unsafe {
7308        static_assert_uimm_bits!(IMM8, 8);
7309        let a = a.as_i64x2();
7310        let b = b.as_i64x2();
7311        let c = c.as_i64x2();
7312        let r = vpternlogq128(a, b, c, IMM8);
7313        transmute(simd_select_bitmask(k, r, i64x2::ZERO))
7314    }
7315}
7316
7317/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7318/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7319///    _MM_MANT_NORM_1_2     // interval [1, 2)
7320///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
7321///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
7322///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7323/// The sign is determined by sc which can take the following values:
7324///    _MM_MANT_SIGN_src     // sign = sign(src)
7325///    _MM_MANT_SIGN_zero    // sign = 0
7326///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7327///
7328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_ps&expand=2880)
7329#[inline]
7330#[target_feature(enable = "avx512f")]
7331#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7332#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7333#[rustc_legacy_const_generics(1, 2)]
7334pub fn _mm512_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7335    a: __m512,
7336) -> __m512 {
7337    unsafe {
7338        static_assert_uimm_bits!(NORM, 4);
7339        static_assert_uimm_bits!(SIGN, 2);
7340        let a = a.as_f32x16();
7341        let zero = f32x16::ZERO;
7342        let r = vgetmantps(
7343            a,
7344            SIGN << 2 | NORM,
7345            zero,
7346            0b11111111_11111111,
7347            _MM_FROUND_CUR_DIRECTION,
7348        );
7349        transmute(r)
7350    }
7351}
7352
7353/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7354/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7355///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7356///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7357///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7358///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7359/// The sign is determined by sc which can take the following values:\
7360///    _MM_MANT_SIGN_src     // sign = sign(src)\
7361///    _MM_MANT_SIGN_zero    // sign = 0\
7362///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7363///
7364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_ps&expand=2881)
7365#[inline]
7366#[target_feature(enable = "avx512f")]
7367#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7368#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7369#[rustc_legacy_const_generics(3, 4)]
7370pub fn _mm512_mask_getmant_ps<
7371    const NORM: _MM_MANTISSA_NORM_ENUM,
7372    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7373>(
7374    src: __m512,
7375    k: __mmask16,
7376    a: __m512,
7377) -> __m512 {
7378    unsafe {
7379        static_assert_uimm_bits!(NORM, 4);
7380        static_assert_uimm_bits!(SIGN, 2);
7381        let a = a.as_f32x16();
7382        let src = src.as_f32x16();
7383        let r = vgetmantps(a, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
7384        transmute(r)
7385    }
7386}
7387
7388/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7389/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7390///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7391///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7392///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7393///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7394/// The sign is determined by sc which can take the following values:\
7395///    _MM_MANT_SIGN_src     // sign = sign(src)\
7396///    _MM_MANT_SIGN_zero    // sign = 0\
7397///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7398///
7399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_ps&expand=2882)
7400#[inline]
7401#[target_feature(enable = "avx512f")]
7402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7403#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7404#[rustc_legacy_const_generics(2, 3)]
7405pub fn _mm512_maskz_getmant_ps<
7406    const NORM: _MM_MANTISSA_NORM_ENUM,
7407    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7408>(
7409    k: __mmask16,
7410    a: __m512,
7411) -> __m512 {
7412    unsafe {
7413        static_assert_uimm_bits!(NORM, 4);
7414        static_assert_uimm_bits!(SIGN, 2);
7415        let a = a.as_f32x16();
7416        let r = vgetmantps(
7417            a,
7418            SIGN << 2 | NORM,
7419            f32x16::ZERO,
7420            k,
7421            _MM_FROUND_CUR_DIRECTION,
7422        );
7423        transmute(r)
7424    }
7425}
7426
7427/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7428/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7429///    _MM_MANT_NORM_1_2     // interval [1, 2)
7430///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
7431///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
7432///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7433/// The sign is determined by sc which can take the following values:
7434///    _MM_MANT_SIGN_src     // sign = sign(src)
7435///    _MM_MANT_SIGN_zero    // sign = 0
7436///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7437///
7438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_ps&expand=2877)
7439#[inline]
7440#[target_feature(enable = "avx512f,avx512vl")]
7441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7442#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7443#[rustc_legacy_const_generics(1, 2)]
7444pub fn _mm256_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7445    a: __m256,
7446) -> __m256 {
7447    unsafe {
7448        static_assert_uimm_bits!(NORM, 4);
7449        static_assert_uimm_bits!(SIGN, 2);
7450        let a = a.as_f32x8();
7451        let r = vgetmantps256(a, SIGN << 2 | NORM, f32x8::ZERO, 0b11111111);
7452        transmute(r)
7453    }
7454}
7455
7456/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7457/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7458///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7459///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7460///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7461///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7462/// The sign is determined by sc which can take the following values:\
7463///    _MM_MANT_SIGN_src     // sign = sign(src)\
7464///    _MM_MANT_SIGN_zero    // sign = 0\
7465///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7466///
7467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_ps&expand=2878)
7468#[inline]
7469#[target_feature(enable = "avx512f,avx512vl")]
7470#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7471#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7472#[rustc_legacy_const_generics(3, 4)]
7473pub fn _mm256_mask_getmant_ps<
7474    const NORM: _MM_MANTISSA_NORM_ENUM,
7475    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7476>(
7477    src: __m256,
7478    k: __mmask8,
7479    a: __m256,
7480) -> __m256 {
7481    unsafe {
7482        static_assert_uimm_bits!(NORM, 4);
7483        static_assert_uimm_bits!(SIGN, 2);
7484        let a = a.as_f32x8();
7485        let src = src.as_f32x8();
7486        let r = vgetmantps256(a, SIGN << 2 | NORM, src, k);
7487        transmute(r)
7488    }
7489}
7490
7491/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7492/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7493///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7494///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7495///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7496///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7497/// The sign is determined by sc which can take the following values:\
7498///    _MM_MANT_SIGN_src     // sign = sign(src)\
7499///    _MM_MANT_SIGN_zero    // sign = 0\
7500///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7501///
7502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_ps&expand=2879)
7503#[inline]
7504#[target_feature(enable = "avx512f,avx512vl")]
7505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7506#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7507#[rustc_legacy_const_generics(2, 3)]
7508pub fn _mm256_maskz_getmant_ps<
7509    const NORM: _MM_MANTISSA_NORM_ENUM,
7510    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7511>(
7512    k: __mmask8,
7513    a: __m256,
7514) -> __m256 {
7515    unsafe {
7516        static_assert_uimm_bits!(NORM, 4);
7517        static_assert_uimm_bits!(SIGN, 2);
7518        let a = a.as_f32x8();
7519        let r = vgetmantps256(a, SIGN << 2 | NORM, f32x8::ZERO, k);
7520        transmute(r)
7521    }
7522}
7523
7524/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7525/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7526///    _MM_MANT_NORM_1_2     // interval [1, 2)
7527///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
7528///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
7529///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7530/// The sign is determined by sc which can take the following values:
7531///    _MM_MANT_SIGN_src     // sign = sign(src)
7532///    _MM_MANT_SIGN_zero    // sign = 0
7533///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7534///
7535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_ps&expand=2874)
7536#[inline]
7537#[target_feature(enable = "avx512f,avx512vl")]
7538#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7539#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7540#[rustc_legacy_const_generics(1, 2)]
7541pub fn _mm_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7542    a: __m128,
7543) -> __m128 {
7544    unsafe {
7545        static_assert_uimm_bits!(NORM, 4);
7546        static_assert_uimm_bits!(SIGN, 2);
7547        let a = a.as_f32x4();
7548        let r = vgetmantps128(a, SIGN << 2 | NORM, f32x4::ZERO, 0b00001111);
7549        transmute(r)
7550    }
7551}
7552
7553/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7554/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7555///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7556///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7557///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7558///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7559/// The sign is determined by sc which can take the following values:\
7560///    _MM_MANT_SIGN_src     // sign = sign(src)\
7561///    _MM_MANT_SIGN_zero    // sign = 0\
7562///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7563///
7564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_ps&expand=2875)
7565#[inline]
7566#[target_feature(enable = "avx512f,avx512vl")]
7567#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7568#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7569#[rustc_legacy_const_generics(3, 4)]
7570pub fn _mm_mask_getmant_ps<
7571    const NORM: _MM_MANTISSA_NORM_ENUM,
7572    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7573>(
7574    src: __m128,
7575    k: __mmask8,
7576    a: __m128,
7577) -> __m128 {
7578    unsafe {
7579        static_assert_uimm_bits!(NORM, 4);
7580        static_assert_uimm_bits!(SIGN, 2);
7581        let a = a.as_f32x4();
7582        let src = src.as_f32x4();
7583        let r = vgetmantps128(a, SIGN << 2 | NORM, src, k);
7584        transmute(r)
7585    }
7586}
7587
7588/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7589/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7590///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7591///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7592///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7593///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7594/// The sign is determined by sc which can take the following values:\
7595///    _MM_MANT_SIGN_src     // sign = sign(src)\
7596///    _MM_MANT_SIGN_zero    // sign = 0\
7597///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7598///
7599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_ps&expand=2876)
7600#[inline]
7601#[target_feature(enable = "avx512f,avx512vl")]
7602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7603#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7604#[rustc_legacy_const_generics(2, 3)]
7605pub fn _mm_maskz_getmant_ps<
7606    const NORM: _MM_MANTISSA_NORM_ENUM,
7607    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7608>(
7609    k: __mmask8,
7610    a: __m128,
7611) -> __m128 {
7612    unsafe {
7613        static_assert_uimm_bits!(NORM, 4);
7614        static_assert_uimm_bits!(SIGN, 2);
7615        let a = a.as_f32x4();
7616        let r = vgetmantps128(a, SIGN << 2 | NORM, f32x4::ZERO, k);
7617        transmute(r)
7618    }
7619}
7620
7621/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7622/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7623///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7624///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7625///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7626///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7627/// The sign is determined by sc which can take the following values:\
7628///    _MM_MANT_SIGN_src     // sign = sign(src)\
7629///    _MM_MANT_SIGN_zero    // sign = 0\
7630///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7631///
7632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_pd&expand=2871)
7633#[inline]
7634#[target_feature(enable = "avx512f")]
7635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7636#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7637#[rustc_legacy_const_generics(1, 2)]
7638pub fn _mm512_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7639    a: __m512d,
7640) -> __m512d {
7641    unsafe {
7642        static_assert_uimm_bits!(NORM, 4);
7643        static_assert_uimm_bits!(SIGN, 2);
7644        let a = a.as_f64x8();
7645        let zero = f64x8::ZERO;
7646        let r = vgetmantpd(
7647            a,
7648            SIGN << 2 | NORM,
7649            zero,
7650            0b11111111,
7651            _MM_FROUND_CUR_DIRECTION,
7652        );
7653        transmute(r)
7654    }
7655}
7656
7657/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7658/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7659///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7660///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7661///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7662///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7663/// The sign is determined by sc which can take the following values:\
7664///    _MM_MANT_SIGN_src     // sign = sign(src)\
7665///    _MM_MANT_SIGN_zero    // sign = 0\
7666///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7667///
7668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_pd&expand=2872)
7669#[inline]
7670#[target_feature(enable = "avx512f")]
7671#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7672#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7673#[rustc_legacy_const_generics(3, 4)]
7674pub fn _mm512_mask_getmant_pd<
7675    const NORM: _MM_MANTISSA_NORM_ENUM,
7676    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7677>(
7678    src: __m512d,
7679    k: __mmask8,
7680    a: __m512d,
7681) -> __m512d {
7682    unsafe {
7683        static_assert_uimm_bits!(NORM, 4);
7684        static_assert_uimm_bits!(SIGN, 2);
7685        let a = a.as_f64x8();
7686        let src = src.as_f64x8();
7687        let r = vgetmantpd(a, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
7688        transmute(r)
7689    }
7690}
7691
7692/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7693/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7694///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7695///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7696///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7697///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7698/// The sign is determined by sc which can take the following values:\
7699///    _MM_MANT_SIGN_src     // sign = sign(src)\
7700///    _MM_MANT_SIGN_zero    // sign = 0\
7701///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7702///
7703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_pd&expand=2873)
7704#[inline]
7705#[target_feature(enable = "avx512f")]
7706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7707#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7708#[rustc_legacy_const_generics(2, 3)]
7709pub fn _mm512_maskz_getmant_pd<
7710    const NORM: _MM_MANTISSA_NORM_ENUM,
7711    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7712>(
7713    k: __mmask8,
7714    a: __m512d,
7715) -> __m512d {
7716    unsafe {
7717        static_assert_uimm_bits!(NORM, 4);
7718        static_assert_uimm_bits!(SIGN, 2);
7719        let a = a.as_f64x8();
7720        let r = vgetmantpd(
7721            a,
7722            SIGN << 2 | NORM,
7723            f64x8::ZERO,
7724            k,
7725            _MM_FROUND_CUR_DIRECTION,
7726        );
7727        transmute(r)
7728    }
7729}
7730
7731/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7732/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7733///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7734///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7735///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7736///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7737/// The sign is determined by sc which can take the following values:\
7738///    _MM_MANT_SIGN_src     // sign = sign(src)\
7739///    _MM_MANT_SIGN_zero    // sign = 0\
7740///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7741///
7742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_pd&expand=2868)
7743#[inline]
7744#[target_feature(enable = "avx512f,avx512vl")]
7745#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7746#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7747#[rustc_legacy_const_generics(1, 2)]
7748pub fn _mm256_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7749    a: __m256d,
7750) -> __m256d {
7751    unsafe {
7752        static_assert_uimm_bits!(NORM, 4);
7753        static_assert_uimm_bits!(SIGN, 2);
7754        let a = a.as_f64x4();
7755        let r = vgetmantpd256(a, SIGN << 2 | NORM, f64x4::ZERO, 0b00001111);
7756        transmute(r)
7757    }
7758}
7759
7760/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7761/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7762///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7763///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7764///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7765///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7766/// The sign is determined by sc which can take the following values:\
7767///    _MM_MANT_SIGN_src     // sign = sign(src)\
7768///    _MM_MANT_SIGN_zero    // sign = 0\
7769///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7770///
7771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_pd&expand=2869)
7772#[inline]
7773#[target_feature(enable = "avx512f,avx512vl")]
7774#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7775#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7776#[rustc_legacy_const_generics(3, 4)]
7777pub fn _mm256_mask_getmant_pd<
7778    const NORM: _MM_MANTISSA_NORM_ENUM,
7779    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7780>(
7781    src: __m256d,
7782    k: __mmask8,
7783    a: __m256d,
7784) -> __m256d {
7785    unsafe {
7786        static_assert_uimm_bits!(NORM, 4);
7787        static_assert_uimm_bits!(SIGN, 2);
7788        let a = a.as_f64x4();
7789        let src = src.as_f64x4();
7790        let r = vgetmantpd256(a, SIGN << 2 | NORM, src, k);
7791        transmute(r)
7792    }
7793}
7794
7795/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7796/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7797///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7798///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7799///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7800///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7801/// The sign is determined by sc which can take the following values:\
7802///    _MM_MANT_SIGN_src     // sign = sign(src)\
7803///    _MM_MANT_SIGN_zero    // sign = 0\
7804///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7805///
7806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_pd&expand=2870)
7807#[inline]
7808#[target_feature(enable = "avx512f,avx512vl")]
7809#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7810#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7811#[rustc_legacy_const_generics(2, 3)]
7812pub fn _mm256_maskz_getmant_pd<
7813    const NORM: _MM_MANTISSA_NORM_ENUM,
7814    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7815>(
7816    k: __mmask8,
7817    a: __m256d,
7818) -> __m256d {
7819    unsafe {
7820        static_assert_uimm_bits!(NORM, 4);
7821        static_assert_uimm_bits!(SIGN, 2);
7822        let a = a.as_f64x4();
7823        let r = vgetmantpd256(a, SIGN << 2 | NORM, f64x4::ZERO, k);
7824        transmute(r)
7825    }
7826}
7827
7828/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7829/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7830///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7831///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7832///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7833///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7834/// The sign is determined by sc which can take the following values:\
7835///    _MM_MANT_SIGN_src     // sign = sign(src)\
7836///    _MM_MANT_SIGN_zero    // sign = 0\
7837///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7838///
7839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_pd&expand=2865)
7840#[inline]
7841#[target_feature(enable = "avx512f,avx512vl")]
7842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7843#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7844#[rustc_legacy_const_generics(1, 2)]
7845pub fn _mm_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7846    a: __m128d,
7847) -> __m128d {
7848    unsafe {
7849        static_assert_uimm_bits!(NORM, 4);
7850        static_assert_uimm_bits!(SIGN, 2);
7851        let a = a.as_f64x2();
7852        let r = vgetmantpd128(a, SIGN << 2 | NORM, f64x2::ZERO, 0b00000011);
7853        transmute(r)
7854    }
7855}
7856
7857/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7858/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7859///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7860///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7861///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7862///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7863/// The sign is determined by sc which can take the following values:\
7864///    _MM_MANT_SIGN_src     // sign = sign(src)\
7865///    _MM_MANT_SIGN_zero    // sign = 0\
7866///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7867///
7868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_pd&expand=2866)
7869#[inline]
7870#[target_feature(enable = "avx512f,avx512vl")]
7871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7872#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7873#[rustc_legacy_const_generics(3, 4)]
7874pub fn _mm_mask_getmant_pd<
7875    const NORM: _MM_MANTISSA_NORM_ENUM,
7876    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7877>(
7878    src: __m128d,
7879    k: __mmask8,
7880    a: __m128d,
7881) -> __m128d {
7882    unsafe {
7883        static_assert_uimm_bits!(NORM, 4);
7884        static_assert_uimm_bits!(SIGN, 2);
7885        let a = a.as_f64x2();
7886        let src = src.as_f64x2();
7887        let r = vgetmantpd128(a, SIGN << 2 | NORM, src, k);
7888        transmute(r)
7889    }
7890}
7891
7892/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7893/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7894///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7895///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7896///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7897///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7898/// The sign is determined by sc which can take the following values:\
7899///    _MM_MANT_SIGN_src     // sign = sign(src)\
7900///    _MM_MANT_SIGN_zero    // sign = 0\
7901///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7902///
7903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_pd&expand=2867)
7904#[inline]
7905#[target_feature(enable = "avx512f,avx512vl")]
7906#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7907#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7908#[rustc_legacy_const_generics(2, 3)]
7909pub fn _mm_maskz_getmant_pd<
7910    const NORM: _MM_MANTISSA_NORM_ENUM,
7911    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7912>(
7913    k: __mmask8,
7914    a: __m128d,
7915) -> __m128d {
7916    unsafe {
7917        static_assert_uimm_bits!(NORM, 4);
7918        static_assert_uimm_bits!(SIGN, 2);
7919        let a = a.as_f64x2();
7920        let r = vgetmantpd128(a, SIGN << 2 | NORM, f64x2::ZERO, k);
7921        transmute(r)
7922    }
7923}
7924
7925/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
7926///
7927/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7928/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7929/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7930/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7931/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7932/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7933///
7934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_ps&expand=145)
7935#[inline]
7936#[target_feature(enable = "avx512f")]
7937#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7938#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7939#[rustc_legacy_const_generics(2)]
7940pub fn _mm512_add_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7941    unsafe {
7942        static_assert_rounding!(ROUNDING);
7943        let a = a.as_f32x16();
7944        let b = b.as_f32x16();
7945        let r = vaddps(a, b, ROUNDING);
7946        transmute(r)
7947    }
7948}
7949
7950/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7951///
7952/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7953/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7954/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7955/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7956/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7957/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7958///
7959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_ps&expand=146)
7960#[inline]
7961#[target_feature(enable = "avx512f")]
7962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7963#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7964#[rustc_legacy_const_generics(4)]
7965pub fn _mm512_mask_add_round_ps<const ROUNDING: i32>(
7966    src: __m512,
7967    k: __mmask16,
7968    a: __m512,
7969    b: __m512,
7970) -> __m512 {
7971    unsafe {
7972        static_assert_rounding!(ROUNDING);
7973        let a = a.as_f32x16();
7974        let b = b.as_f32x16();
7975        let r = vaddps(a, b, ROUNDING);
7976        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
7977    }
7978}
7979
7980/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7981///
7982/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7983/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7984/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7985/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7986/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7987/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7988///
7989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_ps&expand=147)
7990#[inline]
7991#[target_feature(enable = "avx512f")]
7992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7993#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7994#[rustc_legacy_const_generics(3)]
7995pub fn _mm512_maskz_add_round_ps<const ROUNDING: i32>(
7996    k: __mmask16,
7997    a: __m512,
7998    b: __m512,
7999) -> __m512 {
8000    unsafe {
8001        static_assert_rounding!(ROUNDING);
8002        let a = a.as_f32x16();
8003        let b = b.as_f32x16();
8004        let r = vaddps(a, b, ROUNDING);
8005        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8006    }
8007}
8008
8009/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
8010///
8011/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8012/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8013/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8014/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8015/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8016/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8017///
8018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_pd&expand=142)
8019#[inline]
8020#[target_feature(enable = "avx512f")]
8021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8022#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
8023#[rustc_legacy_const_generics(2)]
8024pub fn _mm512_add_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8025    unsafe {
8026        static_assert_rounding!(ROUNDING);
8027        let a = a.as_f64x8();
8028        let b = b.as_f64x8();
8029        let r = vaddpd(a, b, ROUNDING);
8030        transmute(r)
8031    }
8032}
8033
8034/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8035///
8036/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8037/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8038/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8039/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8040/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8041/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8042///
8043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_pd&expand=143)
8044#[inline]
8045#[target_feature(enable = "avx512f")]
8046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8047#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
8048#[rustc_legacy_const_generics(4)]
8049pub fn _mm512_mask_add_round_pd<const ROUNDING: i32>(
8050    src: __m512d,
8051    k: __mmask8,
8052    a: __m512d,
8053    b: __m512d,
8054) -> __m512d {
8055    unsafe {
8056        static_assert_rounding!(ROUNDING);
8057        let a = a.as_f64x8();
8058        let b = b.as_f64x8();
8059        let r = vaddpd(a, b, ROUNDING);
8060        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8061    }
8062}
8063
8064/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8065///
8066/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8067/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8068/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8069/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8070/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8071/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8072///
8073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_pd&expand=144)
8074#[inline]
8075#[target_feature(enable = "avx512f")]
8076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8077#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
8078#[rustc_legacy_const_generics(3)]
8079pub fn _mm512_maskz_add_round_pd<const ROUNDING: i32>(
8080    k: __mmask8,
8081    a: __m512d,
8082    b: __m512d,
8083) -> __m512d {
8084    unsafe {
8085        static_assert_rounding!(ROUNDING);
8086        let a = a.as_f64x8();
8087        let b = b.as_f64x8();
8088        let r = vaddpd(a, b, ROUNDING);
8089        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8090    }
8091}
8092
8093/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
8094///
8095/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8096/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8097/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8098/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8099/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8100/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8101///
8102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_ps&expand=5739)
8103#[inline]
8104#[target_feature(enable = "avx512f")]
8105#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8106#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
8107#[rustc_legacy_const_generics(2)]
8108pub fn _mm512_sub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
8109    unsafe {
8110        static_assert_rounding!(ROUNDING);
8111        let a = a.as_f32x16();
8112        let b = b.as_f32x16();
8113        let r = vsubps(a, b, ROUNDING);
8114        transmute(r)
8115    }
8116}
8117
8118/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8119///
8120/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8121/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8122/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8123/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8124/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8125/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8126///
8127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_ps&expand=5737)
8128#[inline]
8129#[target_feature(enable = "avx512f")]
8130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8131#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
8132#[rustc_legacy_const_generics(4)]
8133pub fn _mm512_mask_sub_round_ps<const ROUNDING: i32>(
8134    src: __m512,
8135    k: __mmask16,
8136    a: __m512,
8137    b: __m512,
8138) -> __m512 {
8139    unsafe {
8140        static_assert_rounding!(ROUNDING);
8141        let a = a.as_f32x16();
8142        let b = b.as_f32x16();
8143        let r = vsubps(a, b, ROUNDING);
8144        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8145    }
8146}
8147
8148/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8149///
8150/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8151/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8152/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8153/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8154/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8155/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8156///
8157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_ps&expand=5738)
8158#[inline]
8159#[target_feature(enable = "avx512f")]
8160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8161#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
8162#[rustc_legacy_const_generics(3)]
8163pub fn _mm512_maskz_sub_round_ps<const ROUNDING: i32>(
8164    k: __mmask16,
8165    a: __m512,
8166    b: __m512,
8167) -> __m512 {
8168    unsafe {
8169        static_assert_rounding!(ROUNDING);
8170        let a = a.as_f32x16();
8171        let b = b.as_f32x16();
8172        let r = vsubps(a, b, ROUNDING);
8173        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8174    }
8175}
8176
8177/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
8178///
8179/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8180/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8181/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8182/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8183/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8184/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8185///
8186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_pd&expand=5736)
8187#[inline]
8188#[target_feature(enable = "avx512f")]
8189#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8190#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
8191#[rustc_legacy_const_generics(2)]
8192pub fn _mm512_sub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8193    unsafe {
8194        static_assert_rounding!(ROUNDING);
8195        let a = a.as_f64x8();
8196        let b = b.as_f64x8();
8197        let r = vsubpd(a, b, ROUNDING);
8198        transmute(r)
8199    }
8200}
8201
8202/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8203///
8204/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8205/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8206/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8207/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8208/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8209/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8210///
8211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_pd&expand=5734)
8212#[inline]
8213#[target_feature(enable = "avx512f")]
8214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8215#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
8216#[rustc_legacy_const_generics(4)]
8217pub fn _mm512_mask_sub_round_pd<const ROUNDING: i32>(
8218    src: __m512d,
8219    k: __mmask8,
8220    a: __m512d,
8221    b: __m512d,
8222) -> __m512d {
8223    unsafe {
8224        static_assert_rounding!(ROUNDING);
8225        let a = a.as_f64x8();
8226        let b = b.as_f64x8();
8227        let r = vsubpd(a, b, ROUNDING);
8228        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8229    }
8230}
8231
8232/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8233///
8234/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8235/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8236/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8237/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8238/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8239/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8240///
8241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_pd&expand=5735)
8242#[inline]
8243#[target_feature(enable = "avx512f")]
8244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8245#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
8246#[rustc_legacy_const_generics(3)]
8247pub fn _mm512_maskz_sub_round_pd<const ROUNDING: i32>(
8248    k: __mmask8,
8249    a: __m512d,
8250    b: __m512d,
8251) -> __m512d {
8252    unsafe {
8253        static_assert_rounding!(ROUNDING);
8254        let a = a.as_f64x8();
8255        let b = b.as_f64x8();
8256        let r = vsubpd(a, b, ROUNDING);
8257        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8258    }
8259}
8260
8261/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
8262///
8263/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8264/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8265/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8266/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8267/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8268/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8269///
8270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_ps&expand=3940)
8271#[inline]
8272#[target_feature(enable = "avx512f")]
8273#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8274#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
8275#[rustc_legacy_const_generics(2)]
8276pub fn _mm512_mul_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
8277    unsafe {
8278        static_assert_rounding!(ROUNDING);
8279        let a = a.as_f32x16();
8280        let b = b.as_f32x16();
8281        let r = vmulps(a, b, ROUNDING);
8282        transmute(r)
8283    }
8284}
8285
8286/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8287///
8288/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8289/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8290/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8291/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8292/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8293/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8294///
8295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_ps&expand=3938)
8296#[inline]
8297#[target_feature(enable = "avx512f")]
8298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8299#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
8300#[rustc_legacy_const_generics(4)]
8301pub fn _mm512_mask_mul_round_ps<const ROUNDING: i32>(
8302    src: __m512,
8303    k: __mmask16,
8304    a: __m512,
8305    b: __m512,
8306) -> __m512 {
8307    unsafe {
8308        static_assert_rounding!(ROUNDING);
8309        let a = a.as_f32x16();
8310        let b = b.as_f32x16();
8311        let r = vmulps(a, b, ROUNDING);
8312        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8313    }
8314}
8315
8316/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8317///
8318/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8319/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8320/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8321/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8322/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8323/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8324///
8325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_ps&expand=3939)
8326#[inline]
8327#[target_feature(enable = "avx512f")]
8328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8329#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
8330#[rustc_legacy_const_generics(3)]
8331pub fn _mm512_maskz_mul_round_ps<const ROUNDING: i32>(
8332    k: __mmask16,
8333    a: __m512,
8334    b: __m512,
8335) -> __m512 {
8336    unsafe {
8337        static_assert_rounding!(ROUNDING);
8338        let a = a.as_f32x16();
8339        let b = b.as_f32x16();
8340        let r = vmulps(a, b, ROUNDING);
8341        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8342    }
8343}
8344
8345/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
8346///
8347/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8348/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8349/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8350/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8351/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8352/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8353///
8354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_pd&expand=3937)
8355#[inline]
8356#[target_feature(enable = "avx512f")]
8357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8358#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8359#[rustc_legacy_const_generics(2)]
8360pub fn _mm512_mul_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8361    unsafe {
8362        static_assert_rounding!(ROUNDING);
8363        let a = a.as_f64x8();
8364        let b = b.as_f64x8();
8365        let r = vmulpd(a, b, ROUNDING);
8366        transmute(r)
8367    }
8368}
8369
8370/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8371///
8372/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8373/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8374/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8375/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8376/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8377/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8378///
8379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_pd&expand=3935)
8380#[inline]
8381#[target_feature(enable = "avx512f")]
8382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8383#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8384#[rustc_legacy_const_generics(4)]
8385pub fn _mm512_mask_mul_round_pd<const ROUNDING: i32>(
8386    src: __m512d,
8387    k: __mmask8,
8388    a: __m512d,
8389    b: __m512d,
8390) -> __m512d {
8391    unsafe {
8392        static_assert_rounding!(ROUNDING);
8393        let a = a.as_f64x8();
8394        let b = b.as_f64x8();
8395        let r = vmulpd(a, b, ROUNDING);
8396        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8397    }
8398}
8399
8400/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8401///
8402/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8403/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8404/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8405/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8406/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8407/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8408///
8409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_pd&expand=3939)
8410#[inline]
8411#[target_feature(enable = "avx512f")]
8412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8413#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8414#[rustc_legacy_const_generics(3)]
8415pub fn _mm512_maskz_mul_round_pd<const ROUNDING: i32>(
8416    k: __mmask8,
8417    a: __m512d,
8418    b: __m512d,
8419) -> __m512d {
8420    unsafe {
8421        static_assert_rounding!(ROUNDING);
8422        let a = a.as_f64x8();
8423        let b = b.as_f64x8();
8424        let r = vmulpd(a, b, ROUNDING);
8425        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8426    }
8427}
8428
8429/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.\
8430///
8431/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8432/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8433/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8434/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8435/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8436/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8437///
8438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_ps&expand=2168)
8439#[inline]
8440#[target_feature(enable = "avx512f")]
8441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8442#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8443#[rustc_legacy_const_generics(2)]
8444pub fn _mm512_div_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
8445    unsafe {
8446        static_assert_rounding!(ROUNDING);
8447        let a = a.as_f32x16();
8448        let b = b.as_f32x16();
8449        let r = vdivps(a, b, ROUNDING);
8450        transmute(r)
8451    }
8452}
8453
8454/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8455///
8456/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8457/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8458/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8459/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8460/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8461/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8462///
8463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_ps&expand=2169)
8464#[inline]
8465#[target_feature(enable = "avx512f")]
8466#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8467#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8468#[rustc_legacy_const_generics(4)]
8469pub fn _mm512_mask_div_round_ps<const ROUNDING: i32>(
8470    src: __m512,
8471    k: __mmask16,
8472    a: __m512,
8473    b: __m512,
8474) -> __m512 {
8475    unsafe {
8476        static_assert_rounding!(ROUNDING);
8477        let a = a.as_f32x16();
8478        let b = b.as_f32x16();
8479        let r = vdivps(a, b, ROUNDING);
8480        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8481    }
8482}
8483
8484/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8485///
8486/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8487/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8488/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8489/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8490/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8491/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8492///
8493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_ps&expand=2170)
8494#[inline]
8495#[target_feature(enable = "avx512f")]
8496#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8497#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8498#[rustc_legacy_const_generics(3)]
8499pub fn _mm512_maskz_div_round_ps<const ROUNDING: i32>(
8500    k: __mmask16,
8501    a: __m512,
8502    b: __m512,
8503) -> __m512 {
8504    unsafe {
8505        static_assert_rounding!(ROUNDING);
8506        let a = a.as_f32x16();
8507        let b = b.as_f32x16();
8508        let r = vdivps(a, b, ROUNDING);
8509        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8510    }
8511}
8512
8513/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, =and store the results in dst.\
8514///
8515/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8516/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8517/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8518/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8519/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8520/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8521///
8522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_pd&expand=2165)
8523#[inline]
8524#[target_feature(enable = "avx512f")]
8525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8526#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8527#[rustc_legacy_const_generics(2)]
8528pub fn _mm512_div_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8529    unsafe {
8530        static_assert_rounding!(ROUNDING);
8531        let a = a.as_f64x8();
8532        let b = b.as_f64x8();
8533        let r = vdivpd(a, b, ROUNDING);
8534        transmute(r)
8535    }
8536}
8537
8538/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8539///
8540/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8541/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8542/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8543/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8544/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8545/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8546///
8547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_pd&expand=2166)
8548#[inline]
8549#[target_feature(enable = "avx512f")]
8550#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8551#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8552#[rustc_legacy_const_generics(4)]
8553pub fn _mm512_mask_div_round_pd<const ROUNDING: i32>(
8554    src: __m512d,
8555    k: __mmask8,
8556    a: __m512d,
8557    b: __m512d,
8558) -> __m512d {
8559    unsafe {
8560        static_assert_rounding!(ROUNDING);
8561        let a = a.as_f64x8();
8562        let b = b.as_f64x8();
8563        let r = vdivpd(a, b, ROUNDING);
8564        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8565    }
8566}
8567
8568/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8569///
8570/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8571/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8572/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8573/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8574/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8575/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8576///
8577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_pd&expand=2167)
8578#[inline]
8579#[target_feature(enable = "avx512f")]
8580#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8581#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8582#[rustc_legacy_const_generics(3)]
8583pub fn _mm512_maskz_div_round_pd<const ROUNDING: i32>(
8584    k: __mmask8,
8585    a: __m512d,
8586    b: __m512d,
8587) -> __m512d {
8588    unsafe {
8589        static_assert_rounding!(ROUNDING);
8590        let a = a.as_f64x8();
8591        let b = b.as_f64x8();
8592        let r = vdivpd(a, b, ROUNDING);
8593        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8594    }
8595}
8596
8597/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
8598///
8599/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8600/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8601/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8602/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8603/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8604/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8605///
8606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_ps&expand=5377)
8607#[inline]
8608#[target_feature(enable = "avx512f")]
8609#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8610#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8611#[rustc_legacy_const_generics(1)]
8612pub fn _mm512_sqrt_round_ps<const ROUNDING: i32>(a: __m512) -> __m512 {
8613    unsafe {
8614        static_assert_rounding!(ROUNDING);
8615        let a = a.as_f32x16();
8616        let r = vsqrtps(a, ROUNDING);
8617        transmute(r)
8618    }
8619}
8620
8621/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8622///
8623/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8624/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8625/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8626/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8627/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8628/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8629///
8630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_ps&expand=5375)
8631#[inline]
8632#[target_feature(enable = "avx512f")]
8633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8634#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8635#[rustc_legacy_const_generics(3)]
8636pub fn _mm512_mask_sqrt_round_ps<const ROUNDING: i32>(
8637    src: __m512,
8638    k: __mmask16,
8639    a: __m512,
8640) -> __m512 {
8641    unsafe {
8642        static_assert_rounding!(ROUNDING);
8643        let a = a.as_f32x16();
8644        let r = vsqrtps(a, ROUNDING);
8645        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8646    }
8647}
8648
8649/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8650///
8651/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8652/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8653/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8654/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8655/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8656/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8657///
8658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_ps&expand=5376)
8659#[inline]
8660#[target_feature(enable = "avx512f")]
8661#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8662#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8663#[rustc_legacy_const_generics(2)]
8664pub fn _mm512_maskz_sqrt_round_ps<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512 {
8665    unsafe {
8666        static_assert_rounding!(ROUNDING);
8667        let a = a.as_f32x16();
8668        let r = vsqrtps(a, ROUNDING);
8669        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8670    }
8671}
8672
8673/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
8674///
8675/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8676/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8677/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8678/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8679/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8680/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8681///
8682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_pd&expand=5374)
8683#[inline]
8684#[target_feature(enable = "avx512f")]
8685#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8686#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8687#[rustc_legacy_const_generics(1)]
8688pub fn _mm512_sqrt_round_pd<const ROUNDING: i32>(a: __m512d) -> __m512d {
8689    unsafe {
8690        static_assert_rounding!(ROUNDING);
8691        let a = a.as_f64x8();
8692        let r = vsqrtpd(a, ROUNDING);
8693        transmute(r)
8694    }
8695}
8696
8697/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8698///
8699/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8700/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8701/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8702/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8703/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8704/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8705///
8706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_pd&expand=5372)
8707#[inline]
8708#[target_feature(enable = "avx512f")]
8709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8710#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8711#[rustc_legacy_const_generics(3)]
8712pub fn _mm512_mask_sqrt_round_pd<const ROUNDING: i32>(
8713    src: __m512d,
8714    k: __mmask8,
8715    a: __m512d,
8716) -> __m512d {
8717    unsafe {
8718        static_assert_rounding!(ROUNDING);
8719        let a = a.as_f64x8();
8720        let r = vsqrtpd(a, ROUNDING);
8721        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8722    }
8723}
8724
8725/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8726///
8727/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8728/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8729/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8730/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8731/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8732/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8733///
8734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_pd&expand=5373)
8735#[inline]
8736#[target_feature(enable = "avx512f")]
8737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8738#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8739#[rustc_legacy_const_generics(2)]
8740pub fn _mm512_maskz_sqrt_round_pd<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512d {
8741    unsafe {
8742        static_assert_rounding!(ROUNDING);
8743        let a = a.as_f64x8();
8744        let r = vsqrtpd(a, ROUNDING);
8745        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8746    }
8747}
8748
8749/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8750///
8751/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8752/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8753/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8754/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8755/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8756/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8757///
8758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_ps&expand=2565)
8759#[inline]
8760#[target_feature(enable = "avx512f")]
8761#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8762#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8763#[rustc_legacy_const_generics(3)]
8764pub fn _mm512_fmadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8765    unsafe {
8766        static_assert_rounding!(ROUNDING);
8767        vfmadd132psround(a, b, c, ROUNDING)
8768    }
8769}
8770
8771/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8772///
8773/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8774/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8775/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8776/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8777/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8778/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8779///
8780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_ps&expand=2566)
8781#[inline]
8782#[target_feature(enable = "avx512f")]
8783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8784#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8785#[rustc_legacy_const_generics(4)]
8786pub fn _mm512_mask_fmadd_round_ps<const ROUNDING: i32>(
8787    a: __m512,
8788    k: __mmask16,
8789    b: __m512,
8790    c: __m512,
8791) -> __m512 {
8792    unsafe {
8793        static_assert_rounding!(ROUNDING);
8794        simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), a)
8795    }
8796}
8797
8798/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in a using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8799///
8800/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8801/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8802/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8803/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8804/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8805/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8806///
8807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_ps&expand=2568)
8808#[inline]
8809#[target_feature(enable = "avx512f")]
8810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8811#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8812#[rustc_legacy_const_generics(4)]
8813pub fn _mm512_maskz_fmadd_round_ps<const ROUNDING: i32>(
8814    k: __mmask16,
8815    a: __m512,
8816    b: __m512,
8817    c: __m512,
8818) -> __m512 {
8819    unsafe {
8820        static_assert_rounding!(ROUNDING);
8821        simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), _mm512_setzero_ps())
8822    }
8823}
8824
8825/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8826///
8827/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8828/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8829/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8830/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8831/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8832/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8833///
8834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_ps&expand=2567)
8835#[inline]
8836#[target_feature(enable = "avx512f")]
8837#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8838#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8839#[rustc_legacy_const_generics(4)]
8840pub fn _mm512_mask3_fmadd_round_ps<const ROUNDING: i32>(
8841    a: __m512,
8842    b: __m512,
8843    c: __m512,
8844    k: __mmask16,
8845) -> __m512 {
8846    unsafe {
8847        static_assert_rounding!(ROUNDING);
8848        simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), c)
8849    }
8850}
8851
8852/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8853///
8854/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8855/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8856/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8857/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8858/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8859/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8860///
8861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_pd&expand=2561)
8862#[inline]
8863#[target_feature(enable = "avx512f")]
8864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8865#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8866#[rustc_legacy_const_generics(3)]
8867pub fn _mm512_fmadd_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
8868    unsafe {
8869        static_assert_rounding!(ROUNDING);
8870        vfmadd132pdround(a, b, c, ROUNDING)
8871    }
8872}
8873
8874/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8875///
8876/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8877/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8878/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8879/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8880/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8881/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8882///
8883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_pd&expand=2562)
8884#[inline]
8885#[target_feature(enable = "avx512f")]
8886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8887#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8888#[rustc_legacy_const_generics(4)]
8889pub fn _mm512_mask_fmadd_round_pd<const ROUNDING: i32>(
8890    a: __m512d,
8891    k: __mmask8,
8892    b: __m512d,
8893    c: __m512d,
8894) -> __m512d {
8895    unsafe {
8896        static_assert_rounding!(ROUNDING);
8897        simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), a)
8898    }
8899}
8900
8901/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8902///
8903/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8904/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8905/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8906/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8907/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8908/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8909///
8910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_pd&expand=2564)
8911#[inline]
8912#[target_feature(enable = "avx512f")]
8913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8914#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8915#[rustc_legacy_const_generics(4)]
8916pub fn _mm512_maskz_fmadd_round_pd<const ROUNDING: i32>(
8917    k: __mmask8,
8918    a: __m512d,
8919    b: __m512d,
8920    c: __m512d,
8921) -> __m512d {
8922    unsafe {
8923        static_assert_rounding!(ROUNDING);
8924        simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), _mm512_setzero_pd())
8925    }
8926}
8927
8928/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8929///
8930/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8931/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8932/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8933/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8934/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8935/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8936///
8937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_pd&expand=2563)
8938#[inline]
8939#[target_feature(enable = "avx512f")]
8940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8941#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8942#[rustc_legacy_const_generics(4)]
8943pub fn _mm512_mask3_fmadd_round_pd<const ROUNDING: i32>(
8944    a: __m512d,
8945    b: __m512d,
8946    c: __m512d,
8947    k: __mmask8,
8948) -> __m512d {
8949    unsafe {
8950        static_assert_rounding!(ROUNDING);
8951        simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), c)
8952    }
8953}
8954
8955/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
8956///
8957/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8958/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8959/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8960/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8961/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8962/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8963///
8964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_ps&expand=2651)
8965#[inline]
8966#[target_feature(enable = "avx512f")]
8967#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8968#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8969#[rustc_legacy_const_generics(3)]
8970pub fn _mm512_fmsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8971    unsafe {
8972        static_assert_rounding!(ROUNDING);
8973        vfmadd132psround(a, b, simd_neg(c), ROUNDING)
8974    }
8975}
8976
8977/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8978///
8979/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8980/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8981/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8982/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8983/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8984/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8985///
8986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_ps&expand=2652)
8987#[inline]
8988#[target_feature(enable = "avx512f")]
8989#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8990#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8991#[rustc_legacy_const_generics(4)]
8992pub fn _mm512_mask_fmsub_round_ps<const ROUNDING: i32>(
8993    a: __m512,
8994    k: __mmask16,
8995    b: __m512,
8996    c: __m512,
8997) -> __m512 {
8998    unsafe {
8999        static_assert_rounding!(ROUNDING);
9000        let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
9001        simd_select_bitmask(k, r, a)
9002    }
9003}
9004
9005/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9006///
9007/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9008/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9009/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9010/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9011/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9012/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9013///
9014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_ps&expand=2654)
9015#[inline]
9016#[target_feature(enable = "avx512f")]
9017#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9018#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
9019#[rustc_legacy_const_generics(4)]
9020pub fn _mm512_maskz_fmsub_round_ps<const ROUNDING: i32>(
9021    k: __mmask16,
9022    a: __m512,
9023    b: __m512,
9024    c: __m512,
9025) -> __m512 {
9026    unsafe {
9027        static_assert_rounding!(ROUNDING);
9028        let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
9029        simd_select_bitmask(k, r, _mm512_setzero_ps())
9030    }
9031}
9032
9033/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9034///
9035/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9036/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9037/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9038/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9039/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9040/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9041///
9042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_ps&expand=2653)
9043#[inline]
9044#[target_feature(enable = "avx512f")]
9045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9046#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
9047#[rustc_legacy_const_generics(4)]
9048pub fn _mm512_mask3_fmsub_round_ps<const ROUNDING: i32>(
9049    a: __m512,
9050    b: __m512,
9051    c: __m512,
9052    k: __mmask16,
9053) -> __m512 {
9054    unsafe {
9055        static_assert_rounding!(ROUNDING);
9056        let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
9057        simd_select_bitmask(k, r, c)
9058    }
9059}
9060
9061/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
9062///
9063/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9064/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9065/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9066/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9067/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9068/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9069///
9070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_pd&expand=2647)
9071#[inline]
9072#[target_feature(enable = "avx512f")]
9073#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9074#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
9075#[rustc_legacy_const_generics(3)]
9076pub fn _mm512_fmsub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9077    unsafe {
9078        static_assert_rounding!(ROUNDING);
9079        vfmadd132pdround(a, b, simd_neg(c), ROUNDING)
9080    }
9081}
9082
9083/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9084///
9085/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9086/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9087/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9088/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9089/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9090/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9091///
9092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_pd&expand=2648)
9093#[inline]
9094#[target_feature(enable = "avx512f")]
9095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9096#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
9097#[rustc_legacy_const_generics(4)]
9098pub fn _mm512_mask_fmsub_round_pd<const ROUNDING: i32>(
9099    a: __m512d,
9100    k: __mmask8,
9101    b: __m512d,
9102    c: __m512d,
9103) -> __m512d {
9104    unsafe {
9105        static_assert_rounding!(ROUNDING);
9106        let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
9107        simd_select_bitmask(k, r, a)
9108    }
9109}
9110
9111/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9112///
9113/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9114/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9115/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9116/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9117/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9118/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9119///
9120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_pd&expand=2650)
9121#[inline]
9122#[target_feature(enable = "avx512f")]
9123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9124#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
9125#[rustc_legacy_const_generics(4)]
9126pub fn _mm512_maskz_fmsub_round_pd<const ROUNDING: i32>(
9127    k: __mmask8,
9128    a: __m512d,
9129    b: __m512d,
9130    c: __m512d,
9131) -> __m512d {
9132    unsafe {
9133        static_assert_rounding!(ROUNDING);
9134        let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
9135        simd_select_bitmask(k, r, _mm512_setzero_pd())
9136    }
9137}
9138
9139/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9140///
9141/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9142/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9143/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9144/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9145/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9146/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9147///
9148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_pd&expand=2649)
9149#[inline]
9150#[target_feature(enable = "avx512f")]
9151#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9152#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
9153#[rustc_legacy_const_generics(4)]
9154pub fn _mm512_mask3_fmsub_round_pd<const ROUNDING: i32>(
9155    a: __m512d,
9156    b: __m512d,
9157    c: __m512d,
9158    k: __mmask8,
9159) -> __m512d {
9160    unsafe {
9161        static_assert_rounding!(ROUNDING);
9162        let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
9163        simd_select_bitmask(k, r, c)
9164    }
9165}
9166
9167/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
9168///
9169/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9170/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9171/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9172/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9173/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9174/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9175///
9176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_ps&expand=2619)
9177#[inline]
9178#[target_feature(enable = "avx512f")]
9179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9180#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
9181#[rustc_legacy_const_generics(3)]
9182pub fn _mm512_fmaddsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9183    unsafe {
9184        static_assert_rounding!(ROUNDING);
9185        vfmaddsubpsround(a, b, c, ROUNDING)
9186    }
9187}
9188
9189/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9190///
9191/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9192/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9193/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9194/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9195/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9196/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9197///
9198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_ps&expand=2620)
9199#[inline]
9200#[target_feature(enable = "avx512f")]
9201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9202#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
9203#[rustc_legacy_const_generics(4)]
9204pub fn _mm512_mask_fmaddsub_round_ps<const ROUNDING: i32>(
9205    a: __m512,
9206    k: __mmask16,
9207    b: __m512,
9208    c: __m512,
9209) -> __m512 {
9210    unsafe {
9211        static_assert_rounding!(ROUNDING);
9212        simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), a)
9213    }
9214}
9215
9216/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9217///
9218/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9219/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9220/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9221/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9222/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9223/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9224///
9225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_ps&expand=2622)
9226#[inline]
9227#[target_feature(enable = "avx512f")]
9228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9229#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
9230#[rustc_legacy_const_generics(4)]
9231pub fn _mm512_maskz_fmaddsub_round_ps<const ROUNDING: i32>(
9232    k: __mmask16,
9233    a: __m512,
9234    b: __m512,
9235    c: __m512,
9236) -> __m512 {
9237    unsafe {
9238        static_assert_rounding!(ROUNDING);
9239        simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), _mm512_setzero_ps())
9240    }
9241}
9242
9243/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9244///
9245/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9246/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9247/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9248/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9249/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9250/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9251///
9252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_ps&expand=2621)
9253#[inline]
9254#[target_feature(enable = "avx512f")]
9255#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9256#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
9257#[rustc_legacy_const_generics(4)]
9258pub fn _mm512_mask3_fmaddsub_round_ps<const ROUNDING: i32>(
9259    a: __m512,
9260    b: __m512,
9261    c: __m512,
9262    k: __mmask16,
9263) -> __m512 {
9264    unsafe {
9265        static_assert_rounding!(ROUNDING);
9266        simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), c)
9267    }
9268}
9269
9270/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
9271///
9272/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9273/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9274/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9275/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9276/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9277/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9278///
9279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_pd&expand=2615)
9280#[inline]
9281#[target_feature(enable = "avx512f")]
9282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9283#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9284#[rustc_legacy_const_generics(3)]
9285pub fn _mm512_fmaddsub_round_pd<const ROUNDING: i32>(
9286    a: __m512d,
9287    b: __m512d,
9288    c: __m512d,
9289) -> __m512d {
9290    unsafe {
9291        static_assert_rounding!(ROUNDING);
9292        vfmaddsubpdround(a, b, c, ROUNDING)
9293    }
9294}
9295
9296/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9297///
9298/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9299/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9300/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9301/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9302/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9303/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9304///
9305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_pd&expand=2616)
9306#[inline]
9307#[target_feature(enable = "avx512f")]
9308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9309#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9310#[rustc_legacy_const_generics(4)]
9311pub fn _mm512_mask_fmaddsub_round_pd<const ROUNDING: i32>(
9312    a: __m512d,
9313    k: __mmask8,
9314    b: __m512d,
9315    c: __m512d,
9316) -> __m512d {
9317    unsafe {
9318        static_assert_rounding!(ROUNDING);
9319        simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), a)
9320    }
9321}
9322
9323/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9324///
9325/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9326/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9327/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9328/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9329/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9330/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9331///
9332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_pd&expand=2618)
9333#[inline]
9334#[target_feature(enable = "avx512f")]
9335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9336#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9337#[rustc_legacy_const_generics(4)]
9338pub fn _mm512_maskz_fmaddsub_round_pd<const ROUNDING: i32>(
9339    k: __mmask8,
9340    a: __m512d,
9341    b: __m512d,
9342    c: __m512d,
9343) -> __m512d {
9344    unsafe {
9345        static_assert_rounding!(ROUNDING);
9346        simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), _mm512_setzero_pd())
9347    }
9348}
9349
9350/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9351///
9352/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9353/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9354/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9355/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9356/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9357/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9358///
9359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_pd&expand=2617)
9360#[inline]
9361#[target_feature(enable = "avx512f")]
9362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9363#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9364#[rustc_legacy_const_generics(4)]
9365pub fn _mm512_mask3_fmaddsub_round_pd<const ROUNDING: i32>(
9366    a: __m512d,
9367    b: __m512d,
9368    c: __m512d,
9369    k: __mmask8,
9370) -> __m512d {
9371    unsafe {
9372        static_assert_rounding!(ROUNDING);
9373        simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), c)
9374    }
9375}
9376
9377/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
9378///
9379/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9380/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9381/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9382/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9383/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9384/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9385///
9386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_ps&expand=2699)
9387#[inline]
9388#[target_feature(enable = "avx512f")]
9389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9390#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9391#[rustc_legacy_const_generics(3)]
9392pub fn _mm512_fmsubadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9393    unsafe {
9394        static_assert_rounding!(ROUNDING);
9395        vfmaddsubpsround(a, b, simd_neg(c), ROUNDING)
9396    }
9397}
9398
9399/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9400///
9401/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9402/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9403/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9404/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9405/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9406/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9407///
9408/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_ps&expand=2700)
9409#[inline]
9410#[target_feature(enable = "avx512f")]
9411#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9412#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9413#[rustc_legacy_const_generics(4)]
9414pub fn _mm512_mask_fmsubadd_round_ps<const ROUNDING: i32>(
9415    a: __m512,
9416    k: __mmask16,
9417    b: __m512,
9418    c: __m512,
9419) -> __m512 {
9420    unsafe {
9421        static_assert_rounding!(ROUNDING);
9422        let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
9423        simd_select_bitmask(k, r, a)
9424    }
9425}
9426
9427/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9428///
9429/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9430/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9431/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9432/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9433/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9434/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9435///
9436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_ps&expand=2702)
9437#[inline]
9438#[target_feature(enable = "avx512f")]
9439#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9440#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9441#[rustc_legacy_const_generics(4)]
9442pub fn _mm512_maskz_fmsubadd_round_ps<const ROUNDING: i32>(
9443    k: __mmask16,
9444    a: __m512,
9445    b: __m512,
9446    c: __m512,
9447) -> __m512 {
9448    unsafe {
9449        static_assert_rounding!(ROUNDING);
9450        let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
9451        simd_select_bitmask(k, r, _mm512_setzero_ps())
9452    }
9453}
9454
9455/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9456///
9457/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9458/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9459/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9460/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9461/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9462/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9463///
9464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_ps&expand=2701)
9465#[inline]
9466#[target_feature(enable = "avx512f")]
9467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9468#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9469#[rustc_legacy_const_generics(4)]
9470pub fn _mm512_mask3_fmsubadd_round_ps<const ROUNDING: i32>(
9471    a: __m512,
9472    b: __m512,
9473    c: __m512,
9474    k: __mmask16,
9475) -> __m512 {
9476    unsafe {
9477        static_assert_rounding!(ROUNDING);
9478        let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
9479        simd_select_bitmask(k, r, c)
9480    }
9481}
9482
9483/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
9484///
9485/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9486/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9487/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9488/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9489/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9490/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9491///
9492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_pd&expand=2695)
9493#[inline]
9494#[target_feature(enable = "avx512f")]
9495#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9496#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9497#[rustc_legacy_const_generics(3)]
9498pub fn _mm512_fmsubadd_round_pd<const ROUNDING: i32>(
9499    a: __m512d,
9500    b: __m512d,
9501    c: __m512d,
9502) -> __m512d {
9503    unsafe {
9504        static_assert_rounding!(ROUNDING);
9505        vfmaddsubpdround(a, b, simd_neg(c), ROUNDING)
9506    }
9507}
9508
9509/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9510///
9511/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9512/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9513/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9514/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9515/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9516/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9517///
9518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_pd&expand=2696)
9519#[inline]
9520#[target_feature(enable = "avx512f")]
9521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9522#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9523#[rustc_legacy_const_generics(4)]
9524pub fn _mm512_mask_fmsubadd_round_pd<const ROUNDING: i32>(
9525    a: __m512d,
9526    k: __mmask8,
9527    b: __m512d,
9528    c: __m512d,
9529) -> __m512d {
9530    unsafe {
9531        static_assert_rounding!(ROUNDING);
9532        let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
9533        simd_select_bitmask(k, r, a)
9534    }
9535}
9536
9537/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9538///
9539/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9540/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9541/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9542/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9543/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9544/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9545///
9546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_pd&expand=2698)
9547#[inline]
9548#[target_feature(enable = "avx512f")]
9549#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9550#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9551#[rustc_legacy_const_generics(4)]
9552pub fn _mm512_maskz_fmsubadd_round_pd<const ROUNDING: i32>(
9553    k: __mmask8,
9554    a: __m512d,
9555    b: __m512d,
9556    c: __m512d,
9557) -> __m512d {
9558    unsafe {
9559        static_assert_rounding!(ROUNDING);
9560        let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
9561        simd_select_bitmask(k, r, _mm512_setzero_pd())
9562    }
9563}
9564
9565/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9566///
9567/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9568/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9569/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9570/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9571/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9572/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9573///
9574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_pd&expand=2697)
9575#[inline]
9576#[target_feature(enable = "avx512f")]
9577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9578#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9579#[rustc_legacy_const_generics(4)]
9580pub fn _mm512_mask3_fmsubadd_round_pd<const ROUNDING: i32>(
9581    a: __m512d,
9582    b: __m512d,
9583    c: __m512d,
9584    k: __mmask8,
9585) -> __m512d {
9586    unsafe {
9587        static_assert_rounding!(ROUNDING);
9588        let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
9589        simd_select_bitmask(k, r, c)
9590    }
9591}
9592
9593/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9594///
9595/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9596/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9597/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9598/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9599/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9600/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9601///
9602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_ps&expand=2731)
9603#[inline]
9604#[target_feature(enable = "avx512f")]
9605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9606#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9607#[rustc_legacy_const_generics(3)]
9608pub fn _mm512_fnmadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9609    unsafe {
9610        static_assert_rounding!(ROUNDING);
9611        vfmadd132psround(simd_neg(a), b, c, ROUNDING)
9612    }
9613}
9614
9615/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9616///
9617/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9618/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9619/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9620/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9621/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9622/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9623///
9624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_ps&expand=2732)
9625#[inline]
9626#[target_feature(enable = "avx512f")]
9627#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9628#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9629#[rustc_legacy_const_generics(4)]
9630pub fn _mm512_mask_fnmadd_round_ps<const ROUNDING: i32>(
9631    a: __m512,
9632    k: __mmask16,
9633    b: __m512,
9634    c: __m512,
9635) -> __m512 {
9636    unsafe {
9637        static_assert_rounding!(ROUNDING);
9638        let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
9639        simd_select_bitmask(k, r, a)
9640    }
9641}
9642
9643/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9644///
9645/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9646/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9647/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9648/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9649/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9650/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9651///
9652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_ps&expand=2734)
9653#[inline]
9654#[target_feature(enable = "avx512f")]
9655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9656#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9657#[rustc_legacy_const_generics(4)]
9658pub fn _mm512_maskz_fnmadd_round_ps<const ROUNDING: i32>(
9659    k: __mmask16,
9660    a: __m512,
9661    b: __m512,
9662    c: __m512,
9663) -> __m512 {
9664    unsafe {
9665        static_assert_rounding!(ROUNDING);
9666        let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
9667        simd_select_bitmask(k, r, _mm512_setzero_ps())
9668    }
9669}
9670
9671/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9672///
9673/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9674/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9675/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9676/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9677/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9678/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9679///
9680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_ps&expand=2733)
9681#[inline]
9682#[target_feature(enable = "avx512f")]
9683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9684#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9685#[rustc_legacy_const_generics(4)]
9686pub fn _mm512_mask3_fnmadd_round_ps<const ROUNDING: i32>(
9687    a: __m512,
9688    b: __m512,
9689    c: __m512,
9690    k: __mmask16,
9691) -> __m512 {
9692    unsafe {
9693        static_assert_rounding!(ROUNDING);
9694        let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
9695        simd_select_bitmask(k, r, c)
9696    }
9697}
9698
9699/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9700///
9701/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9702/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9703/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9704/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9705/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9706/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9707///
9708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_pd&expand=2711)
9709#[inline]
9710#[target_feature(enable = "avx512f")]
9711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9712#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9713#[rustc_legacy_const_generics(3)]
9714pub fn _mm512_fnmadd_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9715    unsafe {
9716        static_assert_rounding!(ROUNDING);
9717        vfmadd132pdround(simd_neg(a), b, c, ROUNDING)
9718    }
9719}
9720
9721/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9722///
9723/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9724/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9725/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9726/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9727/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9728/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9729///
9730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_pd&expand=2728)
9731#[inline]
9732#[target_feature(enable = "avx512f")]
9733#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9734#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9735#[rustc_legacy_const_generics(4)]
9736pub fn _mm512_mask_fnmadd_round_pd<const ROUNDING: i32>(
9737    a: __m512d,
9738    k: __mmask8,
9739    b: __m512d,
9740    c: __m512d,
9741) -> __m512d {
9742    unsafe {
9743        static_assert_rounding!(ROUNDING);
9744        let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
9745        simd_select_bitmask(k, r, a)
9746    }
9747}
9748
9749/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9750///
9751/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9752/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9753/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9754/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9755/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9756/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9757///
9758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_pd&expand=2730)
9759#[inline]
9760#[target_feature(enable = "avx512f")]
9761#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9762#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9763#[rustc_legacy_const_generics(4)]
9764pub fn _mm512_maskz_fnmadd_round_pd<const ROUNDING: i32>(
9765    k: __mmask8,
9766    a: __m512d,
9767    b: __m512d,
9768    c: __m512d,
9769) -> __m512d {
9770    unsafe {
9771        static_assert_rounding!(ROUNDING);
9772        let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
9773        simd_select_bitmask(k, r, _mm512_setzero_pd())
9774    }
9775}
9776
9777/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9778///
9779/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9780/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9781/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9782/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9783/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9784/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9785///
9786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_pd&expand=2729)
9787#[inline]
9788#[target_feature(enable = "avx512f")]
9789#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9790#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9791#[rustc_legacy_const_generics(4)]
9792pub fn _mm512_mask3_fnmadd_round_pd<const ROUNDING: i32>(
9793    a: __m512d,
9794    b: __m512d,
9795    c: __m512d,
9796    k: __mmask8,
9797) -> __m512d {
9798    unsafe {
9799        static_assert_rounding!(ROUNDING);
9800        let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
9801        simd_select_bitmask(k, r, c)
9802    }
9803}
9804
9805/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9806///
9807/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9808/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9809/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9810/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9811/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9812/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9813///
9814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_ps&expand=2779)
9815#[inline]
9816#[target_feature(enable = "avx512f")]
9817#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9818#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9819#[rustc_legacy_const_generics(3)]
9820pub fn _mm512_fnmsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9821    unsafe {
9822        static_assert_rounding!(ROUNDING);
9823        vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING)
9824    }
9825}
9826
9827/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9828///
9829/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9830/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9831/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9832/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9833/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9834/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9835///
9836/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_ps&expand=2780)
9837#[inline]
9838#[target_feature(enable = "avx512f")]
9839#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9840#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9841#[rustc_legacy_const_generics(4)]
9842pub fn _mm512_mask_fnmsub_round_ps<const ROUNDING: i32>(
9843    a: __m512,
9844    k: __mmask16,
9845    b: __m512,
9846    c: __m512,
9847) -> __m512 {
9848    unsafe {
9849        static_assert_rounding!(ROUNDING);
9850        let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
9851        simd_select_bitmask(k, r, a)
9852    }
9853}
9854
9855/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9856///
9857/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9858/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9859/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9860/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9861/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9862/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9863///
9864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_ps&expand=2782)
9865#[inline]
9866#[target_feature(enable = "avx512f")]
9867#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9868#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9869#[rustc_legacy_const_generics(4)]
9870pub fn _mm512_maskz_fnmsub_round_ps<const ROUNDING: i32>(
9871    k: __mmask16,
9872    a: __m512,
9873    b: __m512,
9874    c: __m512,
9875) -> __m512 {
9876    unsafe {
9877        static_assert_rounding!(ROUNDING);
9878        let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
9879        simd_select_bitmask(k, r, _mm512_setzero_ps())
9880    }
9881}
9882
9883/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9884///
9885/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9886/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9887/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9888/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9889/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9890/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9891///
9892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_ps&expand=2781)
9893#[inline]
9894#[target_feature(enable = "avx512f")]
9895#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9896#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9897#[rustc_legacy_const_generics(4)]
9898pub fn _mm512_mask3_fnmsub_round_ps<const ROUNDING: i32>(
9899    a: __m512,
9900    b: __m512,
9901    c: __m512,
9902    k: __mmask16,
9903) -> __m512 {
9904    unsafe {
9905        static_assert_rounding!(ROUNDING);
9906        let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
9907        simd_select_bitmask(k, r, c)
9908    }
9909}
9910
9911/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9912///
9913/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9914/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9915/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9916/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9917/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9918/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9919///
9920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_pd&expand=2775)
9921#[inline]
9922#[target_feature(enable = "avx512f")]
9923#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9924#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9925#[rustc_legacy_const_generics(3)]
9926pub fn _mm512_fnmsub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9927    unsafe {
9928        static_assert_rounding!(ROUNDING);
9929        vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING)
9930    }
9931}
9932
9933/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9934///
9935/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9936/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9937/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9938/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9939/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9940/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9941///
9942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_pd&expand=2776)
9943#[inline]
9944#[target_feature(enable = "avx512f")]
9945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9946#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9947#[rustc_legacy_const_generics(4)]
9948pub fn _mm512_mask_fnmsub_round_pd<const ROUNDING: i32>(
9949    a: __m512d,
9950    k: __mmask8,
9951    b: __m512d,
9952    c: __m512d,
9953) -> __m512d {
9954    unsafe {
9955        static_assert_rounding!(ROUNDING);
9956        let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
9957        simd_select_bitmask(k, r, a)
9958    }
9959}
9960
9961/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9962///
9963/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9964/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9965/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9966/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9967/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9968/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9969///
9970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_pd&expand=2778)
9971#[inline]
9972#[target_feature(enable = "avx512f")]
9973#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9974#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9975#[rustc_legacy_const_generics(4)]
9976pub fn _mm512_maskz_fnmsub_round_pd<const ROUNDING: i32>(
9977    k: __mmask8,
9978    a: __m512d,
9979    b: __m512d,
9980    c: __m512d,
9981) -> __m512d {
9982    unsafe {
9983        static_assert_rounding!(ROUNDING);
9984        let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
9985        simd_select_bitmask(k, r, _mm512_setzero_pd())
9986    }
9987}
9988
9989/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9990///
9991/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9992/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9993/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9994/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9995/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9996/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9997///
9998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_pd&expand=2777)
9999#[inline]
10000#[target_feature(enable = "avx512f")]
10001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10002#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
10003#[rustc_legacy_const_generics(4)]
10004pub fn _mm512_mask3_fnmsub_round_pd<const ROUNDING: i32>(
10005    a: __m512d,
10006    b: __m512d,
10007    c: __m512d,
10008    k: __mmask8,
10009) -> __m512d {
10010    unsafe {
10011        static_assert_rounding!(ROUNDING);
10012        let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
10013        simd_select_bitmask(k, r, c)
10014    }
10015}
10016
10017/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.\
10018/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10019///
10020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_ps&expand=3662)
10021#[inline]
10022#[target_feature(enable = "avx512f")]
10023#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10024#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
10025#[rustc_legacy_const_generics(2)]
10026pub fn _mm512_max_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
10027    unsafe {
10028        static_assert_sae!(SAE);
10029        let a = a.as_f32x16();
10030        let b = b.as_f32x16();
10031        let r = vmaxps(a, b, SAE);
10032        transmute(r)
10033    }
10034}
10035
10036/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10037/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10038///
10039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_ps&expand=3660)
10040#[inline]
10041#[target_feature(enable = "avx512f")]
10042#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10043#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
10044#[rustc_legacy_const_generics(4)]
10045pub fn _mm512_mask_max_round_ps<const SAE: i32>(
10046    src: __m512,
10047    k: __mmask16,
10048    a: __m512,
10049    b: __m512,
10050) -> __m512 {
10051    unsafe {
10052        static_assert_sae!(SAE);
10053        let a = a.as_f32x16();
10054        let b = b.as_f32x16();
10055        let r = vmaxps(a, b, SAE);
10056        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
10057    }
10058}
10059
10060/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10061/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10062///
10063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_ps&expand=3661)
10064#[inline]
10065#[target_feature(enable = "avx512f")]
10066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10067#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
10068#[rustc_legacy_const_generics(3)]
10069pub fn _mm512_maskz_max_round_ps<const SAE: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
10070    unsafe {
10071        static_assert_sae!(SAE);
10072        let a = a.as_f32x16();
10073        let b = b.as_f32x16();
10074        let r = vmaxps(a, b, SAE);
10075        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
10076    }
10077}
10078
10079/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.\
10080/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10081///
10082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_pd&expand=3659)
10083#[inline]
10084#[target_feature(enable = "avx512f")]
10085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10086#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
10087#[rustc_legacy_const_generics(2)]
10088pub fn _mm512_max_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
10089    unsafe {
10090        static_assert_sae!(SAE);
10091        let a = a.as_f64x8();
10092        let b = b.as_f64x8();
10093        let r = vmaxpd(a, b, SAE);
10094        transmute(r)
10095    }
10096}
10097
10098/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10099/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10100///
10101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_pd&expand=3657)
10102#[inline]
10103#[target_feature(enable = "avx512f")]
10104#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10105#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
10106#[rustc_legacy_const_generics(4)]
10107pub fn _mm512_mask_max_round_pd<const SAE: i32>(
10108    src: __m512d,
10109    k: __mmask8,
10110    a: __m512d,
10111    b: __m512d,
10112) -> __m512d {
10113    unsafe {
10114        static_assert_sae!(SAE);
10115        let a = a.as_f64x8();
10116        let b = b.as_f64x8();
10117        let r = vmaxpd(a, b, SAE);
10118        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
10119    }
10120}
10121
10122/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10123/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10124///
10125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_pd&expand=3658)
10126#[inline]
10127#[target_feature(enable = "avx512f")]
10128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10129#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
10130#[rustc_legacy_const_generics(3)]
10131pub fn _mm512_maskz_max_round_pd<const SAE: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
10132    unsafe {
10133        static_assert_sae!(SAE);
10134        let a = a.as_f64x8();
10135        let b = b.as_f64x8();
10136        let r = vmaxpd(a, b, SAE);
10137        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
10138    }
10139}
10140
10141/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.\
10142/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10143///
10144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_ps&expand=3776)
10145#[inline]
10146#[target_feature(enable = "avx512f")]
10147#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10148#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
10149#[rustc_legacy_const_generics(2)]
10150pub fn _mm512_min_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
10151    unsafe {
10152        static_assert_sae!(SAE);
10153        let a = a.as_f32x16();
10154        let b = b.as_f32x16();
10155        let r = vminps(a, b, SAE);
10156        transmute(r)
10157    }
10158}
10159
10160/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10161/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10162///
10163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_ps&expand=3774)
10164#[inline]
10165#[target_feature(enable = "avx512f")]
10166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10167#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
10168#[rustc_legacy_const_generics(4)]
10169pub fn _mm512_mask_min_round_ps<const SAE: i32>(
10170    src: __m512,
10171    k: __mmask16,
10172    a: __m512,
10173    b: __m512,
10174) -> __m512 {
10175    unsafe {
10176        static_assert_sae!(SAE);
10177        let a = a.as_f32x16();
10178        let b = b.as_f32x16();
10179        let r = vminps(a, b, SAE);
10180        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
10181    }
10182}
10183
10184/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10185/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10186///
10187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_ps&expand=3775)
10188#[inline]
10189#[target_feature(enable = "avx512f")]
10190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10191#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
10192#[rustc_legacy_const_generics(3)]
10193pub fn _mm512_maskz_min_round_ps<const SAE: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
10194    unsafe {
10195        static_assert_sae!(SAE);
10196        let a = a.as_f32x16();
10197        let b = b.as_f32x16();
10198        let r = vminps(a, b, SAE);
10199        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
10200    }
10201}
10202
10203/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.\
10204/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10205///
10206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_pd&expand=3773)
10207#[inline]
10208#[target_feature(enable = "avx512f")]
10209#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10210#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
10211#[rustc_legacy_const_generics(2)]
10212pub fn _mm512_min_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
10213    unsafe {
10214        static_assert_sae!(SAE);
10215        let a = a.as_f64x8();
10216        let b = b.as_f64x8();
10217        let r = vminpd(a, b, SAE);
10218        transmute(r)
10219    }
10220}
10221
10222/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10223/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10224///
10225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_pd&expand=3771)
10226#[inline]
10227#[target_feature(enable = "avx512f")]
10228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10229#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
10230#[rustc_legacy_const_generics(4)]
10231pub fn _mm512_mask_min_round_pd<const SAE: i32>(
10232    src: __m512d,
10233    k: __mmask8,
10234    a: __m512d,
10235    b: __m512d,
10236) -> __m512d {
10237    unsafe {
10238        static_assert_sae!(SAE);
10239        let a = a.as_f64x8();
10240        let b = b.as_f64x8();
10241        let r = vminpd(a, b, SAE);
10242        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
10243    }
10244}
10245
10246/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10247/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10248///
10249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_pd&expand=3772)
10250#[inline]
10251#[target_feature(enable = "avx512f")]
10252#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10253#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
10254#[rustc_legacy_const_generics(3)]
10255pub fn _mm512_maskz_min_round_pd<const SAE: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
10256    unsafe {
10257        static_assert_sae!(SAE);
10258        let a = a.as_f64x8();
10259        let b = b.as_f64x8();
10260        let r = vminpd(a, b, SAE);
10261        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
10262    }
10263}
10264
10265/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
10266/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10267///
10268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_ps&expand=2850)
10269#[inline]
10270#[target_feature(enable = "avx512f")]
10271#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10272#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
10273#[rustc_legacy_const_generics(1)]
10274pub fn _mm512_getexp_round_ps<const SAE: i32>(a: __m512) -> __m512 {
10275    unsafe {
10276        static_assert_sae!(SAE);
10277        let a = a.as_f32x16();
10278        let r = vgetexpps(a, f32x16::ZERO, 0b11111111_11111111, SAE);
10279        transmute(r)
10280    }
10281}
10282
10283/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10284/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10285///
10286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_ps&expand=2851)
10287#[inline]
10288#[target_feature(enable = "avx512f")]
10289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10290#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
10291#[rustc_legacy_const_generics(3)]
10292pub fn _mm512_mask_getexp_round_ps<const SAE: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
10293    unsafe {
10294        static_assert_sae!(SAE);
10295        let a = a.as_f32x16();
10296        let src = src.as_f32x16();
10297        let r = vgetexpps(a, src, k, SAE);
10298        transmute(r)
10299    }
10300}
10301
10302/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10303/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10304///
10305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_ps&expand=2852)
10306#[inline]
10307#[target_feature(enable = "avx512f")]
10308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10309#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
10310#[rustc_legacy_const_generics(2)]
10311pub fn _mm512_maskz_getexp_round_ps<const SAE: i32>(k: __mmask16, a: __m512) -> __m512 {
10312    unsafe {
10313        static_assert_sae!(SAE);
10314        let a = a.as_f32x16();
10315        let r = vgetexpps(a, f32x16::ZERO, k, SAE);
10316        transmute(r)
10317    }
10318}
10319
10320/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
10321/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10322///
10323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_pd&expand=2847)
10324#[inline]
10325#[target_feature(enable = "avx512f")]
10326#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10327#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10328#[rustc_legacy_const_generics(1)]
10329pub fn _mm512_getexp_round_pd<const SAE: i32>(a: __m512d) -> __m512d {
10330    unsafe {
10331        static_assert_sae!(SAE);
10332        let a = a.as_f64x8();
10333        let r = vgetexppd(a, f64x8::ZERO, 0b11111111, SAE);
10334        transmute(r)
10335    }
10336}
10337
10338/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10339/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10340///
10341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_pd&expand=2848)
10342#[inline]
10343#[target_feature(enable = "avx512f")]
10344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10345#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10346#[rustc_legacy_const_generics(3)]
10347pub fn _mm512_mask_getexp_round_pd<const SAE: i32>(
10348    src: __m512d,
10349    k: __mmask8,
10350    a: __m512d,
10351) -> __m512d {
10352    unsafe {
10353        static_assert_sae!(SAE);
10354        let a = a.as_f64x8();
10355        let src = src.as_f64x8();
10356        let r = vgetexppd(a, src, k, SAE);
10357        transmute(r)
10358    }
10359}
10360
10361/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10362/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10363///
10364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_pd&expand=2849)
10365#[inline]
10366#[target_feature(enable = "avx512f")]
10367#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10368#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10369#[rustc_legacy_const_generics(2)]
10370pub fn _mm512_maskz_getexp_round_pd<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512d {
10371    unsafe {
10372        static_assert_sae!(SAE);
10373        let a = a.as_f64x8();
10374        let r = vgetexppd(a, f64x8::ZERO, k, SAE);
10375        transmute(r)
10376    }
10377}
10378
10379/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
10380/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10381/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10382/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10383/// * [`_MM_FROUND_TO_POS_INF`] : round up
10384/// * [`_MM_FROUND_TO_ZERO`] : truncate
10385/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10386///
10387/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_ps&expand=4790)
10389#[inline]
10390#[target_feature(enable = "avx512f")]
10391#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10392#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10393#[rustc_legacy_const_generics(1, 2)]
10394pub fn _mm512_roundscale_round_ps<const IMM8: i32, const SAE: i32>(a: __m512) -> __m512 {
10395    unsafe {
10396        static_assert_uimm_bits!(IMM8, 8);
10397        static_assert_mantissas_sae!(SAE);
10398        let a = a.as_f32x16();
10399        let r = vrndscaleps(a, IMM8, f32x16::ZERO, 0b11111111_11111111, SAE);
10400        transmute(r)
10401    }
10402}
10403
10404/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10405/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10406/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10407/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10408/// * [`_MM_FROUND_TO_POS_INF`] : round up
10409/// * [`_MM_FROUND_TO_ZERO`] : truncate
10410/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10411///
10412/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_ps&expand=4788)
10414#[inline]
10415#[target_feature(enable = "avx512f")]
10416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10417#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10418#[rustc_legacy_const_generics(3, 4)]
10419pub fn _mm512_mask_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
10420    src: __m512,
10421    k: __mmask16,
10422    a: __m512,
10423) -> __m512 {
10424    unsafe {
10425        static_assert_uimm_bits!(IMM8, 8);
10426        static_assert_mantissas_sae!(SAE);
10427        let a = a.as_f32x16();
10428        let src = src.as_f32x16();
10429        let r = vrndscaleps(a, IMM8, src, k, SAE);
10430        transmute(r)
10431    }
10432}
10433
10434/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10435/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10436/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10437/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10438/// * [`_MM_FROUND_TO_POS_INF`] : round up
10439/// * [`_MM_FROUND_TO_ZERO`] : truncate
10440/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10441///
10442/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_ps&expand=4789)
10444#[inline]
10445#[target_feature(enable = "avx512f")]
10446#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10447#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10448#[rustc_legacy_const_generics(2, 3)]
10449pub fn _mm512_maskz_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
10450    k: __mmask16,
10451    a: __m512,
10452) -> __m512 {
10453    unsafe {
10454        static_assert_uimm_bits!(IMM8, 8);
10455        static_assert_mantissas_sae!(SAE);
10456        let a = a.as_f32x16();
10457        let r = vrndscaleps(a, IMM8, f32x16::ZERO, k, SAE);
10458        transmute(r)
10459    }
10460}
10461
10462/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
10463/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10464/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10465/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10466/// * [`_MM_FROUND_TO_POS_INF`] : round up
10467/// * [`_MM_FROUND_TO_ZERO`] : truncate
10468/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10469///
10470/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_pd&expand=4787)
10472#[inline]
10473#[target_feature(enable = "avx512f")]
10474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10475#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10476#[rustc_legacy_const_generics(1, 2)]
10477pub fn _mm512_roundscale_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d) -> __m512d {
10478    unsafe {
10479        static_assert_uimm_bits!(IMM8, 8);
10480        static_assert_mantissas_sae!(SAE);
10481        let a = a.as_f64x8();
10482        let r = vrndscalepd(a, IMM8, f64x8::ZERO, 0b11111111, SAE);
10483        transmute(r)
10484    }
10485}
10486
10487/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10488/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10489/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10490/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10491/// * [`_MM_FROUND_TO_POS_INF`] : round up
10492/// * [`_MM_FROUND_TO_ZERO`] : truncate
10493/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10494///
10495/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_pd&expand=4785)
10497#[inline]
10498#[target_feature(enable = "avx512f")]
10499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10500#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10501#[rustc_legacy_const_generics(3, 4)]
10502pub fn _mm512_mask_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10503    src: __m512d,
10504    k: __mmask8,
10505    a: __m512d,
10506) -> __m512d {
10507    unsafe {
10508        static_assert_uimm_bits!(IMM8, 8);
10509        static_assert_mantissas_sae!(SAE);
10510        let a = a.as_f64x8();
10511        let src = src.as_f64x8();
10512        let r = vrndscalepd(a, IMM8, src, k, SAE);
10513        transmute(r)
10514    }
10515}
10516
10517/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10518/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10519/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10520/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10521/// * [`_MM_FROUND_TO_POS_INF`] : round up
10522/// * [`_MM_FROUND_TO_ZERO`] : truncate
10523/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10524///
10525/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_pd&expand=4786)
10527#[inline]
10528#[target_feature(enable = "avx512f")]
10529#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10530#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10531#[rustc_legacy_const_generics(2, 3)]
10532pub fn _mm512_maskz_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10533    k: __mmask8,
10534    a: __m512d,
10535) -> __m512d {
10536    unsafe {
10537        static_assert_uimm_bits!(IMM8, 8);
10538        static_assert_mantissas_sae!(SAE);
10539        let a = a.as_f64x8();
10540        let r = vrndscalepd(a, IMM8, f64x8::ZERO, k, SAE);
10541        transmute(r)
10542    }
10543}
10544
10545/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.\
10546///
10547/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10548/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10549/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10550/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10551/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10552/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10553///
10554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_ps&expand=4889)
10555#[inline]
10556#[target_feature(enable = "avx512f")]
10557#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10558#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10559#[rustc_legacy_const_generics(2)]
10560pub fn _mm512_scalef_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
10561    unsafe {
10562        static_assert_rounding!(ROUNDING);
10563        let a = a.as_f32x16();
10564        let b = b.as_f32x16();
10565        let r = vscalefps(a, b, f32x16::ZERO, 0b11111111_11111111, ROUNDING);
10566        transmute(r)
10567    }
10568}
10569
10570/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10571///
10572/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10573/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10574/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10575/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10576/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10577/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10578///
10579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_ps&expand=4887)
10580#[inline]
10581#[target_feature(enable = "avx512f")]
10582#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10583#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10584#[rustc_legacy_const_generics(4)]
10585pub fn _mm512_mask_scalef_round_ps<const ROUNDING: i32>(
10586    src: __m512,
10587    k: __mmask16,
10588    a: __m512,
10589    b: __m512,
10590) -> __m512 {
10591    unsafe {
10592        static_assert_rounding!(ROUNDING);
10593        let a = a.as_f32x16();
10594        let b = b.as_f32x16();
10595        let src = src.as_f32x16();
10596        let r = vscalefps(a, b, src, k, ROUNDING);
10597        transmute(r)
10598    }
10599}
10600
10601/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10602///
10603/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10604/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10605/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10606/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10607/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10608/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10609///
10610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_ps&expand=4888)
10611#[inline]
10612#[target_feature(enable = "avx512f")]
10613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10614#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10615#[rustc_legacy_const_generics(3)]
10616pub fn _mm512_maskz_scalef_round_ps<const ROUNDING: i32>(
10617    k: __mmask16,
10618    a: __m512,
10619    b: __m512,
10620) -> __m512 {
10621    unsafe {
10622        static_assert_rounding!(ROUNDING);
10623        let a = a.as_f32x16();
10624        let b = b.as_f32x16();
10625        let r = vscalefps(a, b, f32x16::ZERO, k, ROUNDING);
10626        transmute(r)
10627    }
10628}
10629
10630/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.\
10631///
10632/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10633/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10634/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10635/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10636/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10637/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10638///
10639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_pd&expand=4886)
10640#[inline]
10641#[target_feature(enable = "avx512f")]
10642#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10643#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10644#[rustc_legacy_const_generics(2)]
10645pub fn _mm512_scalef_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
10646    unsafe {
10647        static_assert_rounding!(ROUNDING);
10648        let a = a.as_f64x8();
10649        let b = b.as_f64x8();
10650        let r = vscalefpd(a, b, f64x8::ZERO, 0b11111111, ROUNDING);
10651        transmute(r)
10652    }
10653}
10654
10655/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10656///
10657/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10658/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10659/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10660/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10661/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10662/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10663///
10664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_pd&expand=4884)
10665#[inline]
10666#[target_feature(enable = "avx512f")]
10667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10668#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10669#[rustc_legacy_const_generics(4)]
10670pub fn _mm512_mask_scalef_round_pd<const ROUNDING: i32>(
10671    src: __m512d,
10672    k: __mmask8,
10673    a: __m512d,
10674    b: __m512d,
10675) -> __m512d {
10676    unsafe {
10677        static_assert_rounding!(ROUNDING);
10678        let a = a.as_f64x8();
10679        let b = b.as_f64x8();
10680        let src = src.as_f64x8();
10681        let r = vscalefpd(a, b, src, k, ROUNDING);
10682        transmute(r)
10683    }
10684}
10685
10686/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10687///
10688/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10689/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10690/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10691/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10692/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10693/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10694///
10695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_pd&expand=4885)
10696#[inline]
10697#[target_feature(enable = "avx512f")]
10698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10699#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10700#[rustc_legacy_const_generics(3)]
10701pub fn _mm512_maskz_scalef_round_pd<const ROUNDING: i32>(
10702    k: __mmask8,
10703    a: __m512d,
10704    b: __m512d,
10705) -> __m512d {
10706    unsafe {
10707        static_assert_rounding!(ROUNDING);
10708        let a = a.as_f64x8();
10709        let b = b.as_f64x8();
10710        let r = vscalefpd(a, b, f64x8::ZERO, k, ROUNDING);
10711        transmute(r)
10712    }
10713}
10714
10715/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10716///
10717/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10718/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_ps&expand=2505)
10719#[inline]
10720#[target_feature(enable = "avx512f")]
10721#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10722#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10723#[rustc_legacy_const_generics(3, 4)]
10724pub fn _mm512_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10725    a: __m512,
10726    b: __m512,
10727    c: __m512i,
10728) -> __m512 {
10729    unsafe {
10730        static_assert_uimm_bits!(IMM8, 8);
10731        static_assert_mantissas_sae!(SAE);
10732        let a = a.as_f32x16();
10733        let b = b.as_f32x16();
10734        let c = c.as_i32x16();
10735        let r = vfixupimmps(a, b, c, IMM8, 0b11111111_11111111, SAE);
10736        transmute(r)
10737    }
10738}
10739
10740/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10741///
10742/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_ps&expand=2506)
10744#[inline]
10745#[target_feature(enable = "avx512f")]
10746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10747#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10748#[rustc_legacy_const_generics(4, 5)]
10749pub fn _mm512_mask_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10750    a: __m512,
10751    k: __mmask16,
10752    b: __m512,
10753    c: __m512i,
10754) -> __m512 {
10755    unsafe {
10756        static_assert_uimm_bits!(IMM8, 8);
10757        static_assert_mantissas_sae!(SAE);
10758        let a = a.as_f32x16();
10759        let b = b.as_f32x16();
10760        let c = c.as_i32x16();
10761        let r = vfixupimmps(a, b, c, IMM8, k, SAE);
10762        transmute(r)
10763    }
10764}
10765
10766/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10767///
10768/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_ps&expand=2507)
10770#[inline]
10771#[target_feature(enable = "avx512f")]
10772#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10773#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10774#[rustc_legacy_const_generics(4, 5)]
10775pub fn _mm512_maskz_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10776    k: __mmask16,
10777    a: __m512,
10778    b: __m512,
10779    c: __m512i,
10780) -> __m512 {
10781    unsafe {
10782        static_assert_uimm_bits!(IMM8, 8);
10783        static_assert_mantissas_sae!(SAE);
10784        let a = a.as_f32x16();
10785        let b = b.as_f32x16();
10786        let c = c.as_i32x16();
10787        let r = vfixupimmpsz(a, b, c, IMM8, k, SAE);
10788        transmute(r)
10789    }
10790}
10791
10792/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10793///
10794/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_pd&expand=2502)
10796#[inline]
10797#[target_feature(enable = "avx512f")]
10798#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10799#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10800#[rustc_legacy_const_generics(3, 4)]
10801pub fn _mm512_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10802    a: __m512d,
10803    b: __m512d,
10804    c: __m512i,
10805) -> __m512d {
10806    unsafe {
10807        static_assert_uimm_bits!(IMM8, 8);
10808        static_assert_mantissas_sae!(SAE);
10809        let a = a.as_f64x8();
10810        let b = b.as_f64x8();
10811        let c = c.as_i64x8();
10812        let r = vfixupimmpd(a, b, c, IMM8, 0b11111111, SAE);
10813        transmute(r)
10814    }
10815}
10816
10817/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10818///
10819/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_pd&expand=2503)
10821#[inline]
10822#[target_feature(enable = "avx512f")]
10823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10824#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10825#[rustc_legacy_const_generics(4, 5)]
10826pub fn _mm512_mask_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10827    a: __m512d,
10828    k: __mmask8,
10829    b: __m512d,
10830    c: __m512i,
10831) -> __m512d {
10832    unsafe {
10833        static_assert_uimm_bits!(IMM8, 8);
10834        static_assert_mantissas_sae!(SAE);
10835        let a = a.as_f64x8();
10836        let b = b.as_f64x8();
10837        let c = c.as_i64x8();
10838        let r = vfixupimmpd(a, b, c, IMM8, k, SAE);
10839        transmute(r)
10840    }
10841}
10842
10843/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10844///
10845/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_pd&expand=2504)
10847#[inline]
10848#[target_feature(enable = "avx512f")]
10849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10850#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10851#[rustc_legacy_const_generics(4, 5)]
10852pub fn _mm512_maskz_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10853    k: __mmask8,
10854    a: __m512d,
10855    b: __m512d,
10856    c: __m512i,
10857) -> __m512d {
10858    unsafe {
10859        static_assert_uimm_bits!(IMM8, 8);
10860        static_assert_mantissas_sae!(SAE);
10861        let a = a.as_f64x8();
10862        let b = b.as_f64x8();
10863        let c = c.as_i64x8();
10864        let r = vfixupimmpdz(a, b, c, IMM8, k, SAE);
10865        transmute(r)
10866    }
10867}
10868
10869/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10870/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10871///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10872///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10873///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10874///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10875/// The sign is determined by sc which can take the following values:\
10876///    _MM_MANT_SIGN_src     // sign = sign(src)\
10877///    _MM_MANT_SIGN_zero    // sign = 0\
10878///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10879/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10880///
10881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_ps&expand=2886)
10882#[inline]
10883#[target_feature(enable = "avx512f")]
10884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10885#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10886#[rustc_legacy_const_generics(1, 2, 3)]
10887pub fn _mm512_getmant_round_ps<
10888    const NORM: _MM_MANTISSA_NORM_ENUM,
10889    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10890    const SAE: i32,
10891>(
10892    a: __m512,
10893) -> __m512 {
10894    unsafe {
10895        static_assert_uimm_bits!(NORM, 4);
10896        static_assert_uimm_bits!(SIGN, 2);
10897        static_assert_mantissas_sae!(SAE);
10898        let a = a.as_f32x16();
10899        let r = vgetmantps(a, SIGN << 2 | NORM, f32x16::ZERO, 0b11111111_11111111, SAE);
10900        transmute(r)
10901    }
10902}
10903
10904/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10905/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10906///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10907///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10908///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10909///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10910/// The sign is determined by sc which can take the following values:\
10911///    _MM_MANT_SIGN_src     // sign = sign(src)\
10912///    _MM_MANT_SIGN_zero    // sign = 0\
10913///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10914/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10915///
10916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_ps&expand=2887)
10917#[inline]
10918#[target_feature(enable = "avx512f")]
10919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10920#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10921#[rustc_legacy_const_generics(3, 4, 5)]
10922pub fn _mm512_mask_getmant_round_ps<
10923    const NORM: _MM_MANTISSA_NORM_ENUM,
10924    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10925    const SAE: i32,
10926>(
10927    src: __m512,
10928    k: __mmask16,
10929    a: __m512,
10930) -> __m512 {
10931    unsafe {
10932        static_assert_uimm_bits!(NORM, 4);
10933        static_assert_uimm_bits!(SIGN, 2);
10934        static_assert_mantissas_sae!(SAE);
10935        let a = a.as_f32x16();
10936        let src = src.as_f32x16();
10937        let r = vgetmantps(a, SIGN << 2 | NORM, src, k, SAE);
10938        transmute(r)
10939    }
10940}
10941
10942/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10943/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10944///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10945///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10946///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10947///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10948/// The sign is determined by sc which can take the following values:\
10949///    _MM_MANT_SIGN_src     // sign = sign(src)\
10950///    _MM_MANT_SIGN_zero    // sign = 0\
10951///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10952/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10953///
10954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_ps&expand=2888)
10955#[inline]
10956#[target_feature(enable = "avx512f")]
10957#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10958#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10959#[rustc_legacy_const_generics(2, 3, 4)]
10960pub fn _mm512_maskz_getmant_round_ps<
10961    const NORM: _MM_MANTISSA_NORM_ENUM,
10962    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10963    const SAE: i32,
10964>(
10965    k: __mmask16,
10966    a: __m512,
10967) -> __m512 {
10968    unsafe {
10969        static_assert_uimm_bits!(NORM, 4);
10970        static_assert_uimm_bits!(SIGN, 2);
10971        static_assert_mantissas_sae!(SAE);
10972        let a = a.as_f32x16();
10973        let r = vgetmantps(a, SIGN << 2 | NORM, f32x16::ZERO, k, SAE);
10974        transmute(r)
10975    }
10976}
10977
10978/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10979/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10980///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10981///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10982///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10983///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10984/// The sign is determined by sc which can take the following values:\
10985///    _MM_MANT_SIGN_src     // sign = sign(src)\
10986///    _MM_MANT_SIGN_zero    // sign = 0\
10987///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10988/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10989///
10990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_pd&expand=2883)
10991#[inline]
10992#[target_feature(enable = "avx512f")]
10993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10994#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10995#[rustc_legacy_const_generics(1, 2, 3)]
10996pub fn _mm512_getmant_round_pd<
10997    const NORM: _MM_MANTISSA_NORM_ENUM,
10998    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10999    const SAE: i32,
11000>(
11001    a: __m512d,
11002) -> __m512d {
11003    unsafe {
11004        static_assert_uimm_bits!(NORM, 4);
11005        static_assert_uimm_bits!(SIGN, 2);
11006        static_assert_mantissas_sae!(SAE);
11007        let a = a.as_f64x8();
11008        let r = vgetmantpd(a, SIGN << 2 | NORM, f64x8::ZERO, 0b11111111, SAE);
11009        transmute(r)
11010    }
11011}
11012
11013/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
11014/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
11015///    _MM_MANT_NORM_1_2     // interval [1, 2)\
11016///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
11017///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
11018///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
11019/// The sign is determined by sc which can take the following values:\
11020///    _MM_MANT_SIGN_src     // sign = sign(src)\
11021///    _MM_MANT_SIGN_zero    // sign = 0\
11022///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
11023/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
11024///
11025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_pd&expand=2884)
11026#[inline]
11027#[target_feature(enable = "avx512f")]
11028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11029#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
11030#[rustc_legacy_const_generics(3, 4, 5)]
11031pub fn _mm512_mask_getmant_round_pd<
11032    const NORM: _MM_MANTISSA_NORM_ENUM,
11033    const SIGN: _MM_MANTISSA_SIGN_ENUM,
11034    const SAE: i32,
11035>(
11036    src: __m512d,
11037    k: __mmask8,
11038    a: __m512d,
11039) -> __m512d {
11040    unsafe {
11041        static_assert_uimm_bits!(NORM, 4);
11042        static_assert_uimm_bits!(SIGN, 2);
11043        static_assert_mantissas_sae!(SAE);
11044        let a = a.as_f64x8();
11045        let src = src.as_f64x8();
11046        let r = vgetmantpd(a, SIGN << 2 | NORM, src, k, SAE);
11047        transmute(r)
11048    }
11049}
11050
11051/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
11052/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
11053///    _MM_MANT_NORM_1_2     // interval [1, 2)\
11054///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
11055///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
11056///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
11057/// The sign is determined by sc which can take the following values:\
11058///    _MM_MANT_SIGN_src     // sign = sign(src)\
11059///    _MM_MANT_SIGN_zero    // sign = 0\
11060///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
11061/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
11062///
11063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_pd&expand=2885)
11064#[inline]
11065#[target_feature(enable = "avx512f")]
11066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11067#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
11068#[rustc_legacy_const_generics(2, 3, 4)]
11069pub fn _mm512_maskz_getmant_round_pd<
11070    const NORM: _MM_MANTISSA_NORM_ENUM,
11071    const SIGN: _MM_MANTISSA_SIGN_ENUM,
11072    const SAE: i32,
11073>(
11074    k: __mmask8,
11075    a: __m512d,
11076) -> __m512d {
11077    unsafe {
11078        static_assert_uimm_bits!(NORM, 4);
11079        static_assert_uimm_bits!(SIGN, 2);
11080        static_assert_mantissas_sae!(SAE);
11081        let a = a.as_f64x8();
11082        let r = vgetmantpd(a, SIGN << 2 | NORM, f64x8::ZERO, k, SAE);
11083        transmute(r)
11084    }
11085}
11086
11087/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
11088///
11089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi32&expand=1737)
11090#[inline]
11091#[target_feature(enable = "avx512f")]
11092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11093#[cfg_attr(test, assert_instr(vcvtps2dq))]
11094pub fn _mm512_cvtps_epi32(a: __m512) -> __m512i {
11095    unsafe {
11096        transmute(vcvtps2dq(
11097            a.as_f32x16(),
11098            i32x16::ZERO,
11099            0b11111111_11111111,
11100            _MM_FROUND_CUR_DIRECTION,
11101        ))
11102    }
11103}
11104
11105/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11106///
11107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epi32&expand=1738)
11108#[inline]
11109#[target_feature(enable = "avx512f")]
11110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11111#[cfg_attr(test, assert_instr(vcvtps2dq))]
11112pub fn _mm512_mask_cvtps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
11113    unsafe {
11114        transmute(vcvtps2dq(
11115            a.as_f32x16(),
11116            src.as_i32x16(),
11117            k,
11118            _MM_FROUND_CUR_DIRECTION,
11119        ))
11120    }
11121}
11122
11123/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11124///
11125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epi32&expand=1739)
11126#[inline]
11127#[target_feature(enable = "avx512f")]
11128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11129#[cfg_attr(test, assert_instr(vcvtps2dq))]
11130pub fn _mm512_maskz_cvtps_epi32(k: __mmask16, a: __m512) -> __m512i {
11131    unsafe {
11132        transmute(vcvtps2dq(
11133            a.as_f32x16(),
11134            i32x16::ZERO,
11135            k,
11136            _MM_FROUND_CUR_DIRECTION,
11137        ))
11138    }
11139}
11140
11141/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11142///
11143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epi32&expand=1735)
11144#[inline]
11145#[target_feature(enable = "avx512f,avx512vl")]
11146#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11147#[cfg_attr(test, assert_instr(vcvtps2dq))]
11148pub fn _mm256_mask_cvtps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
11149    unsafe {
11150        let convert = _mm256_cvtps_epi32(a);
11151        transmute(simd_select_bitmask(k, convert.as_i32x8(), src.as_i32x8()))
11152    }
11153}
11154
11155/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11156///
11157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epi32&expand=1736)
11158#[inline]
11159#[target_feature(enable = "avx512f,avx512vl")]
11160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11161#[cfg_attr(test, assert_instr(vcvtps2dq))]
11162pub fn _mm256_maskz_cvtps_epi32(k: __mmask8, a: __m256) -> __m256i {
11163    unsafe {
11164        let convert = _mm256_cvtps_epi32(a);
11165        transmute(simd_select_bitmask(k, convert.as_i32x8(), i32x8::ZERO))
11166    }
11167}
11168
11169/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11170///
11171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epi32&expand=1732)
11172#[inline]
11173#[target_feature(enable = "avx512f,avx512vl")]
11174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11175#[cfg_attr(test, assert_instr(vcvtps2dq))]
11176pub fn _mm_mask_cvtps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
11177    unsafe {
11178        let convert = _mm_cvtps_epi32(a);
11179        transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
11180    }
11181}
11182
11183/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11184///
11185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epi32&expand=1733)
11186#[inline]
11187#[target_feature(enable = "avx512f,avx512vl")]
11188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11189#[cfg_attr(test, assert_instr(vcvtps2dq))]
11190pub fn _mm_maskz_cvtps_epi32(k: __mmask8, a: __m128) -> __m128i {
11191    unsafe {
11192        let convert = _mm_cvtps_epi32(a);
11193        transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
11194    }
11195}
11196
11197/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11198///
11199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epu32&expand=1755)
11200#[inline]
11201#[target_feature(enable = "avx512f")]
11202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11203#[cfg_attr(test, assert_instr(vcvtps2udq))]
11204pub fn _mm512_cvtps_epu32(a: __m512) -> __m512i {
11205    unsafe {
11206        transmute(vcvtps2udq(
11207            a.as_f32x16(),
11208            u32x16::ZERO,
11209            0b11111111_11111111,
11210            _MM_FROUND_CUR_DIRECTION,
11211        ))
11212    }
11213}
11214
11215/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11216///
11217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epu32&expand=1756)
11218#[inline]
11219#[target_feature(enable = "avx512f")]
11220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11221#[cfg_attr(test, assert_instr(vcvtps2udq))]
11222pub fn _mm512_mask_cvtps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
11223    unsafe {
11224        transmute(vcvtps2udq(
11225            a.as_f32x16(),
11226            src.as_u32x16(),
11227            k,
11228            _MM_FROUND_CUR_DIRECTION,
11229        ))
11230    }
11231}
11232
11233/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11234///
11235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epu32&expand=1343)
11236#[inline]
11237#[target_feature(enable = "avx512f")]
11238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11239#[cfg_attr(test, assert_instr(vcvtps2udq))]
11240pub fn _mm512_maskz_cvtps_epu32(k: __mmask16, a: __m512) -> __m512i {
11241    unsafe {
11242        transmute(vcvtps2udq(
11243            a.as_f32x16(),
11244            u32x16::ZERO,
11245            k,
11246            _MM_FROUND_CUR_DIRECTION,
11247        ))
11248    }
11249}
11250
11251/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11252///
11253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epu32&expand=1752)
11254#[inline]
11255#[target_feature(enable = "avx512f,avx512vl")]
11256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11257#[cfg_attr(test, assert_instr(vcvtps2udq))]
11258pub fn _mm256_cvtps_epu32(a: __m256) -> __m256i {
11259    unsafe { transmute(vcvtps2udq256(a.as_f32x8(), u32x8::ZERO, 0b11111111)) }
11260}
11261
11262/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11263///
11264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epu32&expand=1753)
11265#[inline]
11266#[target_feature(enable = "avx512f,avx512vl")]
11267#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11268#[cfg_attr(test, assert_instr(vcvtps2udq))]
11269pub fn _mm256_mask_cvtps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
11270    unsafe { transmute(vcvtps2udq256(a.as_f32x8(), src.as_u32x8(), k)) }
11271}
11272
11273/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11274///
11275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epu32&expand=1754)
11276#[inline]
11277#[target_feature(enable = "avx512f,avx512vl")]
11278#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11279#[cfg_attr(test, assert_instr(vcvtps2udq))]
11280pub fn _mm256_maskz_cvtps_epu32(k: __mmask8, a: __m256) -> __m256i {
11281    unsafe { transmute(vcvtps2udq256(a.as_f32x8(), u32x8::ZERO, k)) }
11282}
11283
11284/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11285///
11286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epu32&expand=1749)
11287#[inline]
11288#[target_feature(enable = "avx512f,avx512vl")]
11289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11290#[cfg_attr(test, assert_instr(vcvtps2udq))]
11291pub fn _mm_cvtps_epu32(a: __m128) -> __m128i {
11292    unsafe { transmute(vcvtps2udq128(a.as_f32x4(), u32x4::ZERO, 0b11111111)) }
11293}
11294
11295/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11296///
11297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epu32&expand=1750)
11298#[inline]
11299#[target_feature(enable = "avx512f,avx512vl")]
11300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11301#[cfg_attr(test, assert_instr(vcvtps2udq))]
11302pub fn _mm_mask_cvtps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
11303    unsafe { transmute(vcvtps2udq128(a.as_f32x4(), src.as_u32x4(), k)) }
11304}
11305
11306/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11307///
11308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epu32&expand=1751)
11309#[inline]
11310#[target_feature(enable = "avx512f,avx512vl")]
11311#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11312#[cfg_attr(test, assert_instr(vcvtps2udq))]
11313pub fn _mm_maskz_cvtps_epu32(k: __mmask8, a: __m128) -> __m128i {
11314    unsafe { transmute(vcvtps2udq128(a.as_f32x4(), u32x4::ZERO, k)) }
11315}
11316
11317/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
11318///
11319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_pd&expand=1769)
11320#[inline]
11321#[target_feature(enable = "avx512f")]
11322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11323#[cfg_attr(test, assert_instr(vcvtps2pd))]
11324pub fn _mm512_cvtps_pd(a: __m256) -> __m512d {
11325    unsafe {
11326        transmute(vcvtps2pd(
11327            a.as_f32x8(),
11328            f64x8::ZERO,
11329            0b11111111,
11330            _MM_FROUND_CUR_DIRECTION,
11331        ))
11332    }
11333}
11334
11335/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11336///
11337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_pd&expand=1770)
11338#[inline]
11339#[target_feature(enable = "avx512f")]
11340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11341#[cfg_attr(test, assert_instr(vcvtps2pd))]
11342pub fn _mm512_mask_cvtps_pd(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
11343    unsafe {
11344        transmute(vcvtps2pd(
11345            a.as_f32x8(),
11346            src.as_f64x8(),
11347            k,
11348            _MM_FROUND_CUR_DIRECTION,
11349        ))
11350    }
11351}
11352
11353/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11354///
11355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_pd&expand=1771)
11356#[inline]
11357#[target_feature(enable = "avx512f")]
11358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11359#[cfg_attr(test, assert_instr(vcvtps2pd))]
11360pub fn _mm512_maskz_cvtps_pd(k: __mmask8, a: __m256) -> __m512d {
11361    unsafe {
11362        transmute(vcvtps2pd(
11363            a.as_f32x8(),
11364            f64x8::ZERO,
11365            k,
11366            _MM_FROUND_CUR_DIRECTION,
11367        ))
11368    }
11369}
11370
11371/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
11372///
11373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpslo_pd&expand=1784)
11374#[inline]
11375#[target_feature(enable = "avx512f")]
11376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11377#[cfg_attr(test, assert_instr(vcvtps2pd))]
11378pub fn _mm512_cvtpslo_pd(v2: __m512) -> __m512d {
11379    unsafe {
11380        transmute(vcvtps2pd(
11381            _mm512_castps512_ps256(v2).as_f32x8(),
11382            f64x8::ZERO,
11383            0b11111111,
11384            _MM_FROUND_CUR_DIRECTION,
11385        ))
11386    }
11387}
11388
11389/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11390///
11391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpslo_pd&expand=1785)
11392#[inline]
11393#[target_feature(enable = "avx512f")]
11394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11395#[cfg_attr(test, assert_instr(vcvtps2pd))]
11396pub fn _mm512_mask_cvtpslo_pd(src: __m512d, k: __mmask8, v2: __m512) -> __m512d {
11397    unsafe {
11398        transmute(vcvtps2pd(
11399            _mm512_castps512_ps256(v2).as_f32x8(),
11400            src.as_f64x8(),
11401            k,
11402            _MM_FROUND_CUR_DIRECTION,
11403        ))
11404    }
11405}
11406
11407/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
11408///
11409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_ps&expand=1712)
11410#[inline]
11411#[target_feature(enable = "avx512f")]
11412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11413#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11414pub fn _mm512_cvtpd_ps(a: __m512d) -> __m256 {
11415    unsafe {
11416        transmute(vcvtpd2ps(
11417            a.as_f64x8(),
11418            f32x8::ZERO,
11419            0b11111111,
11420            _MM_FROUND_CUR_DIRECTION,
11421        ))
11422    }
11423}
11424
11425/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11426///
11427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_ps&expand=1713)
11428#[inline]
11429#[target_feature(enable = "avx512f")]
11430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11431#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11432pub fn _mm512_mask_cvtpd_ps(src: __m256, k: __mmask8, a: __m512d) -> __m256 {
11433    unsafe {
11434        transmute(vcvtpd2ps(
11435            a.as_f64x8(),
11436            src.as_f32x8(),
11437            k,
11438            _MM_FROUND_CUR_DIRECTION,
11439        ))
11440    }
11441}
11442
11443/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11444///
11445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_ps&expand=1714)
11446#[inline]
11447#[target_feature(enable = "avx512f")]
11448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11449#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11450pub fn _mm512_maskz_cvtpd_ps(k: __mmask8, a: __m512d) -> __m256 {
11451    unsafe {
11452        transmute(vcvtpd2ps(
11453            a.as_f64x8(),
11454            f32x8::ZERO,
11455            k,
11456            _MM_FROUND_CUR_DIRECTION,
11457        ))
11458    }
11459}
11460
11461/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11462///
11463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_ps&expand=1710)
11464#[inline]
11465#[target_feature(enable = "avx512f,avx512vl")]
11466#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11467#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11468pub fn _mm256_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m256d) -> __m128 {
11469    unsafe {
11470        let convert = _mm256_cvtpd_ps(a);
11471        transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
11472    }
11473}
11474
11475/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11476///
11477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_ps&expand=1711)
11478#[inline]
11479#[target_feature(enable = "avx512f,avx512vl")]
11480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11481#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11482pub fn _mm256_maskz_cvtpd_ps(k: __mmask8, a: __m256d) -> __m128 {
11483    unsafe {
11484        let convert = _mm256_cvtpd_ps(a);
11485        transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
11486    }
11487}
11488
11489/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11490///
11491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_ps&expand=1707)
11492#[inline]
11493#[target_feature(enable = "avx512f,avx512vl")]
11494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11495#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11496pub fn _mm_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m128d) -> __m128 {
11497    unsafe { vcvtpd2ps128(a.as_f64x2(), src.as_f32x4(), k).as_m128() }
11498}
11499
11500/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11501///
11502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_ps&expand=1708)
11503#[inline]
11504#[target_feature(enable = "avx512f,avx512vl")]
11505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11506#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11507pub fn _mm_maskz_cvtpd_ps(k: __mmask8, a: __m128d) -> __m128 {
11508    unsafe {
11509        let convert = _mm_cvtpd_ps(a);
11510        transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
11511    }
11512}
11513
11514/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
11515///
11516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epi32&expand=1675)
11517#[inline]
11518#[target_feature(enable = "avx512f")]
11519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11520#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11521pub fn _mm512_cvtpd_epi32(a: __m512d) -> __m256i {
11522    unsafe {
11523        transmute(vcvtpd2dq(
11524            a.as_f64x8(),
11525            i32x8::ZERO,
11526            0b11111111,
11527            _MM_FROUND_CUR_DIRECTION,
11528        ))
11529    }
11530}
11531
11532/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11533///
11534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epi32&expand=1676)
11535#[inline]
11536#[target_feature(enable = "avx512f")]
11537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11538#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11539pub fn _mm512_mask_cvtpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11540    unsafe {
11541        transmute(vcvtpd2dq(
11542            a.as_f64x8(),
11543            src.as_i32x8(),
11544            k,
11545            _MM_FROUND_CUR_DIRECTION,
11546        ))
11547    }
11548}
11549
11550/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11551///
11552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epi32&expand=1677)
11553#[inline]
11554#[target_feature(enable = "avx512f")]
11555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11556#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11557pub fn _mm512_maskz_cvtpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
11558    unsafe {
11559        transmute(vcvtpd2dq(
11560            a.as_f64x8(),
11561            i32x8::ZERO,
11562            k,
11563            _MM_FROUND_CUR_DIRECTION,
11564        ))
11565    }
11566}
11567
11568/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11569///
11570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epi32&expand=1673)
11571#[inline]
11572#[target_feature(enable = "avx512f,avx512vl")]
11573#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11574#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11575pub fn _mm256_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11576    unsafe {
11577        let convert = _mm256_cvtpd_epi32(a);
11578        transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
11579    }
11580}
11581
11582/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11583///
11584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epi32&expand=1674)
11585#[inline]
11586#[target_feature(enable = "avx512f,avx512vl")]
11587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11588#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11589pub fn _mm256_maskz_cvtpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
11590    unsafe {
11591        let convert = _mm256_cvtpd_epi32(a);
11592        transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
11593    }
11594}
11595
11596/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11597///
11598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epi32&expand=1670)
11599#[inline]
11600#[target_feature(enable = "avx512f,avx512vl")]
11601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11602#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11603pub fn _mm_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11604    unsafe { vcvtpd2dq128(a.as_f64x2(), src.as_i32x4(), k).as_m128i() }
11605}
11606
11607/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11608///
11609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epi32&expand=1671)
11610#[inline]
11611#[target_feature(enable = "avx512f,avx512vl")]
11612#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11613#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11614pub fn _mm_maskz_cvtpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
11615    unsafe {
11616        let convert = _mm_cvtpd_epi32(a);
11617        transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
11618    }
11619}
11620
11621/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11622///
11623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epu32&expand=1693)
11624#[inline]
11625#[target_feature(enable = "avx512f")]
11626#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11627#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11628pub fn _mm512_cvtpd_epu32(a: __m512d) -> __m256i {
11629    unsafe {
11630        transmute(vcvtpd2udq(
11631            a.as_f64x8(),
11632            u32x8::ZERO,
11633            0b11111111,
11634            _MM_FROUND_CUR_DIRECTION,
11635        ))
11636    }
11637}
11638
11639/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11640///
11641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epu32&expand=1694)
11642#[inline]
11643#[target_feature(enable = "avx512f")]
11644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11645#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11646pub fn _mm512_mask_cvtpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11647    unsafe {
11648        transmute(vcvtpd2udq(
11649            a.as_f64x8(),
11650            src.as_u32x8(),
11651            k,
11652            _MM_FROUND_CUR_DIRECTION,
11653        ))
11654    }
11655}
11656
11657/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11658///
11659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epu32&expand=1695)
11660#[inline]
11661#[target_feature(enable = "avx512f")]
11662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11663#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11664pub fn _mm512_maskz_cvtpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
11665    unsafe {
11666        transmute(vcvtpd2udq(
11667            a.as_f64x8(),
11668            u32x8::ZERO,
11669            k,
11670            _MM_FROUND_CUR_DIRECTION,
11671        ))
11672    }
11673}
11674
11675/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11676///
11677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epu32&expand=1690)
11678#[inline]
11679#[target_feature(enable = "avx512f,avx512vl")]
11680#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11681#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11682pub fn _mm256_cvtpd_epu32(a: __m256d) -> __m128i {
11683    unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), u32x4::ZERO, 0b11111111)) }
11684}
11685
11686/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11687///
11688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epu32&expand=1691)
11689#[inline]
11690#[target_feature(enable = "avx512f,avx512vl")]
11691#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11692#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11693pub fn _mm256_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11694    unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), src.as_u32x4(), k)) }
11695}
11696
11697/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11698///
11699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epu32&expand=1692)
11700#[inline]
11701#[target_feature(enable = "avx512f,avx512vl")]
11702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11703#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11704pub fn _mm256_maskz_cvtpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
11705    unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), u32x4::ZERO, k)) }
11706}
11707
11708/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11709///
11710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epu32&expand=1687)
11711#[inline]
11712#[target_feature(enable = "avx512f,avx512vl")]
11713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11714#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11715pub fn _mm_cvtpd_epu32(a: __m128d) -> __m128i {
11716    unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), u32x4::ZERO, 0b11111111)) }
11717}
11718
11719/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11720///
11721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epu32&expand=1688)
11722#[inline]
11723#[target_feature(enable = "avx512f,avx512vl")]
11724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11725#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11726pub fn _mm_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11727    unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), src.as_u32x4(), k)) }
11728}
11729
11730/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11731///
11732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epu32&expand=1689)
11733#[inline]
11734#[target_feature(enable = "avx512f,avx512vl")]
11735#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11736#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11737pub fn _mm_maskz_cvtpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
11738    unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), u32x4::ZERO, k)) }
11739}
11740
11741/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst. The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11742///
11743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_pslo&expand=1715)
11744#[inline]
11745#[target_feature(enable = "avx512f")]
11746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11747#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11748pub fn _mm512_cvtpd_pslo(v2: __m512d) -> __m512 {
11749    unsafe {
11750        let r: f32x8 = vcvtpd2ps(
11751            v2.as_f64x8(),
11752            f32x8::ZERO,
11753            0b11111111,
11754            _MM_FROUND_CUR_DIRECTION,
11755        );
11756        simd_shuffle!(
11757            r,
11758            f32x8::ZERO,
11759            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
11760        )
11761    }
11762}
11763
11764/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11765///
11766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_pslo&expand=1716)
11767#[inline]
11768#[target_feature(enable = "avx512f")]
11769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11770#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11771pub fn _mm512_mask_cvtpd_pslo(src: __m512, k: __mmask8, v2: __m512d) -> __m512 {
11772    unsafe {
11773        let r: f32x8 = vcvtpd2ps(
11774            v2.as_f64x8(),
11775            _mm512_castps512_ps256(src).as_f32x8(),
11776            k,
11777            _MM_FROUND_CUR_DIRECTION,
11778        );
11779        simd_shuffle!(
11780            r,
11781            f32x8::ZERO,
11782            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
11783        )
11784    }
11785}
11786
11787/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11788///
11789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi32&expand=1535)
11790#[inline]
11791#[target_feature(enable = "avx512f")]
11792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11793#[cfg_attr(test, assert_instr(vpmovsxbd))]
11794#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11795pub const fn _mm512_cvtepi8_epi32(a: __m128i) -> __m512i {
11796    unsafe {
11797        let a = a.as_i8x16();
11798        transmute::<i32x16, _>(simd_cast(a))
11799    }
11800}
11801
11802/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11803///
11804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi32&expand=1536)
11805#[inline]
11806#[target_feature(enable = "avx512f")]
11807#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11808#[cfg_attr(test, assert_instr(vpmovsxbd))]
11809#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11810pub const fn _mm512_mask_cvtepi8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
11811    unsafe {
11812        let convert = _mm512_cvtepi8_epi32(a).as_i32x16();
11813        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
11814    }
11815}
11816
11817/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11818///
11819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi32&expand=1537)
11820#[inline]
11821#[target_feature(enable = "avx512f")]
11822#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11823#[cfg_attr(test, assert_instr(vpmovsxbd))]
11824#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11825pub const fn _mm512_maskz_cvtepi8_epi32(k: __mmask16, a: __m128i) -> __m512i {
11826    unsafe {
11827        let convert = _mm512_cvtepi8_epi32(a).as_i32x16();
11828        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
11829    }
11830}
11831
11832/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11833///
11834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi32&expand=1533)
11835#[inline]
11836#[target_feature(enable = "avx512f,avx512vl")]
11837#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11838#[cfg_attr(test, assert_instr(vpmovsxbd))]
11839#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11840pub const fn _mm256_mask_cvtepi8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11841    unsafe {
11842        let convert = _mm256_cvtepi8_epi32(a).as_i32x8();
11843        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
11844    }
11845}
11846
11847/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11848///
11849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi32&expand=1534)
11850#[inline]
11851#[target_feature(enable = "avx512f,avx512vl")]
11852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11853#[cfg_attr(test, assert_instr(vpmovsxbd))]
11854#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11855pub const fn _mm256_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m256i {
11856    unsafe {
11857        let convert = _mm256_cvtepi8_epi32(a).as_i32x8();
11858        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
11859    }
11860}
11861
11862/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11863///
11864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi32&expand=1530)
11865#[inline]
11866#[target_feature(enable = "avx512f,avx512vl")]
11867#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11868#[cfg_attr(test, assert_instr(vpmovsxbd))]
11869#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11870pub const fn _mm_mask_cvtepi8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11871    unsafe {
11872        let convert = _mm_cvtepi8_epi32(a).as_i32x4();
11873        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
11874    }
11875}
11876
11877/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11878///
11879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi32&expand=1531)
11880#[inline]
11881#[target_feature(enable = "avx512f,avx512vl")]
11882#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11883#[cfg_attr(test, assert_instr(vpmovsxbd))]
11884#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11885pub const fn _mm_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m128i {
11886    unsafe {
11887        let convert = _mm_cvtepi8_epi32(a).as_i32x4();
11888        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
11889    }
11890}
11891
11892/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst.
11893///
11894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi64&expand=1544)
11895#[inline]
11896#[target_feature(enable = "avx512f")]
11897#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11898#[cfg_attr(test, assert_instr(vpmovsxbq))]
11899#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11900pub const fn _mm512_cvtepi8_epi64(a: __m128i) -> __m512i {
11901    unsafe {
11902        let a = a.as_i8x16();
11903        let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11904        transmute::<i64x8, _>(simd_cast(v64))
11905    }
11906}
11907
11908/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11909///
11910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi64&expand=1545)
11911#[inline]
11912#[target_feature(enable = "avx512f")]
11913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11914#[cfg_attr(test, assert_instr(vpmovsxbq))]
11915#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11916pub const fn _mm512_mask_cvtepi8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11917    unsafe {
11918        let convert = _mm512_cvtepi8_epi64(a).as_i64x8();
11919        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
11920    }
11921}
11922
11923/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11924///
11925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi64&expand=1546)
11926#[inline]
11927#[target_feature(enable = "avx512f")]
11928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11929#[cfg_attr(test, assert_instr(vpmovsxbq))]
11930#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11931pub const fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i {
11932    unsafe {
11933        let convert = _mm512_cvtepi8_epi64(a).as_i64x8();
11934        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
11935    }
11936}
11937
11938/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11939///
11940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi64&expand=1542)
11941#[inline]
11942#[target_feature(enable = "avx512f,avx512vl")]
11943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11944#[cfg_attr(test, assert_instr(vpmovsxbq))]
11945#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11946pub const fn _mm256_mask_cvtepi8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11947    unsafe {
11948        let convert = _mm256_cvtepi8_epi64(a).as_i64x4();
11949        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
11950    }
11951}
11952
11953/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11954///
11955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi64&expand=1543)
11956#[inline]
11957#[target_feature(enable = "avx512f,avx512vl")]
11958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11959#[cfg_attr(test, assert_instr(vpmovsxbq))]
11960#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11961pub const fn _mm256_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m256i {
11962    unsafe {
11963        let convert = _mm256_cvtepi8_epi64(a).as_i64x4();
11964        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
11965    }
11966}
11967
11968/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11969///
11970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi64&expand=1539)
11971#[inline]
11972#[target_feature(enable = "avx512f,avx512vl")]
11973#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11974#[cfg_attr(test, assert_instr(vpmovsxbq))]
11975#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11976pub const fn _mm_mask_cvtepi8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11977    unsafe {
11978        let convert = _mm_cvtepi8_epi64(a).as_i64x2();
11979        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
11980    }
11981}
11982
11983/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11984///
11985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi64&expand=1540)
11986#[inline]
11987#[target_feature(enable = "avx512f,avx512vl")]
11988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11989#[cfg_attr(test, assert_instr(vpmovsxbq))]
11990#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11991pub const fn _mm_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m128i {
11992    unsafe {
11993        let convert = _mm_cvtepi8_epi64(a).as_i64x2();
11994        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
11995    }
11996}
11997
11998/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11999///
12000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi32&expand=1621)
12001#[inline]
12002#[target_feature(enable = "avx512f")]
12003#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12004#[cfg_attr(test, assert_instr(vpmovzxbd))]
12005#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12006pub const fn _mm512_cvtepu8_epi32(a: __m128i) -> __m512i {
12007    unsafe {
12008        let a = a.as_u8x16();
12009        transmute::<i32x16, _>(simd_cast(a))
12010    }
12011}
12012
12013/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12014///
12015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi32&expand=1622)
12016#[inline]
12017#[target_feature(enable = "avx512f")]
12018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12019#[cfg_attr(test, assert_instr(vpmovzxbd))]
12020#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12021pub const fn _mm512_mask_cvtepu8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
12022    unsafe {
12023        let convert = _mm512_cvtepu8_epi32(a).as_i32x16();
12024        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
12025    }
12026}
12027
12028/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12029///
12030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi32&expand=1623)
12031#[inline]
12032#[target_feature(enable = "avx512f")]
12033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12034#[cfg_attr(test, assert_instr(vpmovzxbd))]
12035#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12036pub const fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i {
12037    unsafe {
12038        let convert = _mm512_cvtepu8_epi32(a).as_i32x16();
12039        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
12040    }
12041}
12042
12043/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12044///
12045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi32&expand=1619)
12046#[inline]
12047#[target_feature(enable = "avx512f,avx512vl")]
12048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12049#[cfg_attr(test, assert_instr(vpmovzxbd))]
12050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12051pub const fn _mm256_mask_cvtepu8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12052    unsafe {
12053        let convert = _mm256_cvtepu8_epi32(a).as_i32x8();
12054        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
12055    }
12056}
12057
12058/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12059///
12060/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm256_maskz_cvtepu8_epi32&expand=1620)
12061#[inline]
12062#[target_feature(enable = "avx512f,avx512vl")]
12063#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12064#[cfg_attr(test, assert_instr(vpmovzxbd))]
12065#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12066pub const fn _mm256_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m256i {
12067    unsafe {
12068        let convert = _mm256_cvtepu8_epi32(a).as_i32x8();
12069        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
12070    }
12071}
12072
12073/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12074///
12075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi32&expand=1616)
12076#[inline]
12077#[target_feature(enable = "avx512f,avx512vl")]
12078#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12079#[cfg_attr(test, assert_instr(vpmovzxbd))]
12080#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12081pub const fn _mm_mask_cvtepu8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12082    unsafe {
12083        let convert = _mm_cvtepu8_epi32(a).as_i32x4();
12084        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
12085    }
12086}
12087
12088/// Zero extend packed unsigned 8-bit integers in th elow 4 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12089///
12090/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_cvtepu8_epi32&expand=1617)
12091#[inline]
12092#[target_feature(enable = "avx512f,avx512vl")]
12093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12094#[cfg_attr(test, assert_instr(vpmovzxbd))]
12095#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12096pub const fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i {
12097    unsafe {
12098        let convert = _mm_cvtepu8_epi32(a).as_i32x4();
12099        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
12100    }
12101}
12102
12103/// Zero extend packed unsigned 8-bit integers in the low 8 byte sof a to packed 64-bit integers, and store the results in dst.
12104///
12105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi64&expand=1630)
12106#[inline]
12107#[target_feature(enable = "avx512f")]
12108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12109#[cfg_attr(test, assert_instr(vpmovzxbq))]
12110#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12111pub const fn _mm512_cvtepu8_epi64(a: __m128i) -> __m512i {
12112    unsafe {
12113        let a = a.as_u8x16();
12114        let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
12115        transmute::<i64x8, _>(simd_cast(v64))
12116    }
12117}
12118
12119/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12120///
12121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi64&expand=1631)
12122#[inline]
12123#[target_feature(enable = "avx512f")]
12124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12125#[cfg_attr(test, assert_instr(vpmovzxbq))]
12126#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12127pub const fn _mm512_mask_cvtepu8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12128    unsafe {
12129        let convert = _mm512_cvtepu8_epi64(a).as_i64x8();
12130        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12131    }
12132}
12133
12134/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12135///
12136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi64&expand=1632)
12137#[inline]
12138#[target_feature(enable = "avx512f")]
12139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12140#[cfg_attr(test, assert_instr(vpmovzxbq))]
12141#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12142pub const fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i {
12143    unsafe {
12144        let convert = _mm512_cvtepu8_epi64(a).as_i64x8();
12145        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12146    }
12147}
12148
12149/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12150///
12151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi64&expand=1628)
12152#[inline]
12153#[target_feature(enable = "avx512f,avx512vl")]
12154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12155#[cfg_attr(test, assert_instr(vpmovzxbq))]
12156#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12157pub const fn _mm256_mask_cvtepu8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12158    unsafe {
12159        let convert = _mm256_cvtepu8_epi64(a).as_i64x4();
12160        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12161    }
12162}
12163
12164/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12165///
12166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi64&expand=1629)
12167#[inline]
12168#[target_feature(enable = "avx512f,avx512vl")]
12169#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12170#[cfg_attr(test, assert_instr(vpmovzxbq))]
12171#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12172pub const fn _mm256_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m256i {
12173    unsafe {
12174        let convert = _mm256_cvtepu8_epi64(a).as_i64x4();
12175        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12176    }
12177}
12178
12179/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12180///
12181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi64&expand=1625)
12182#[inline]
12183#[target_feature(enable = "avx512f,avx512vl")]
12184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12185#[cfg_attr(test, assert_instr(vpmovzxbq))]
12186#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12187pub const fn _mm_mask_cvtepu8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12188    unsafe {
12189        let convert = _mm_cvtepu8_epi64(a).as_i64x2();
12190        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12191    }
12192}
12193
12194/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12195///
12196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi64&expand=1626)
12197#[inline]
12198#[target_feature(enable = "avx512f,avx512vl")]
12199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12200#[cfg_attr(test, assert_instr(vpmovzxbq))]
12201#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12202pub const fn _mm_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m128i {
12203    unsafe {
12204        let convert = _mm_cvtepu8_epi64(a).as_i64x2();
12205        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12206    }
12207}
12208
12209/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst.
12210///
12211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi32&expand=1389)
12212#[inline]
12213#[target_feature(enable = "avx512f")]
12214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12215#[cfg_attr(test, assert_instr(vpmovsxwd))]
12216#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12217pub const fn _mm512_cvtepi16_epi32(a: __m256i) -> __m512i {
12218    unsafe {
12219        let a = a.as_i16x16();
12220        transmute::<i32x16, _>(simd_cast(a))
12221    }
12222}
12223
12224/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12225///
12226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi32&expand=1390)
12227#[inline]
12228#[target_feature(enable = "avx512f")]
12229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12230#[cfg_attr(test, assert_instr(vpmovsxwd))]
12231#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12232pub const fn _mm512_mask_cvtepi16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
12233    unsafe {
12234        let convert = _mm512_cvtepi16_epi32(a).as_i32x16();
12235        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
12236    }
12237}
12238
12239/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12240///
12241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi32&expand=1391)
12242#[inline]
12243#[target_feature(enable = "avx512f")]
12244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12245#[cfg_attr(test, assert_instr(vpmovsxwd))]
12246#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12247pub const fn _mm512_maskz_cvtepi16_epi32(k: __mmask16, a: __m256i) -> __m512i {
12248    unsafe {
12249        let convert = _mm512_cvtepi16_epi32(a).as_i32x16();
12250        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
12251    }
12252}
12253
12254/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12255///
12256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi32&expand=1387)
12257#[inline]
12258#[target_feature(enable = "avx512f,avx512vl")]
12259#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12260#[cfg_attr(test, assert_instr(vpmovsxwd))]
12261#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12262pub const fn _mm256_mask_cvtepi16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12263    unsafe {
12264        let convert = _mm256_cvtepi16_epi32(a).as_i32x8();
12265        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
12266    }
12267}
12268
12269/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12270///
12271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi32&expand=1388)
12272#[inline]
12273#[target_feature(enable = "avx512f,avx512vl")]
12274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12275#[cfg_attr(test, assert_instr(vpmovsxwd))]
12276#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12277pub const fn _mm256_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m256i {
12278    unsafe {
12279        let convert = _mm256_cvtepi16_epi32(a).as_i32x8();
12280        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
12281    }
12282}
12283
12284/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12285///
12286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi32&expand=1384)
12287#[inline]
12288#[target_feature(enable = "avx512f,avx512vl")]
12289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12290#[cfg_attr(test, assert_instr(vpmovsxwd))]
12291#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12292pub const fn _mm_mask_cvtepi16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12293    unsafe {
12294        let convert = _mm_cvtepi16_epi32(a).as_i32x4();
12295        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
12296    }
12297}
12298
12299/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12300///
12301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi32&expand=1385)
12302#[inline]
12303#[target_feature(enable = "avx512f,avx512vl")]
12304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12305#[cfg_attr(test, assert_instr(vpmovsxwd))]
12306#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12307pub const fn _mm_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m128i {
12308    unsafe {
12309        let convert = _mm_cvtepi16_epi32(a).as_i32x4();
12310        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
12311    }
12312}
12313
12314/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst.
12315///
12316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi64&expand=1398)
12317#[inline]
12318#[target_feature(enable = "avx512f")]
12319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12320#[cfg_attr(test, assert_instr(vpmovsxwq))]
12321#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12322pub const fn _mm512_cvtepi16_epi64(a: __m128i) -> __m512i {
12323    unsafe {
12324        let a = a.as_i16x8();
12325        transmute::<i64x8, _>(simd_cast(a))
12326    }
12327}
12328
12329/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12330///
12331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi64&expand=1399)
12332#[inline]
12333#[target_feature(enable = "avx512f")]
12334#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12335#[cfg_attr(test, assert_instr(vpmovsxwq))]
12336#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12337pub const fn _mm512_mask_cvtepi16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12338    unsafe {
12339        let convert = _mm512_cvtepi16_epi64(a).as_i64x8();
12340        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12341    }
12342}
12343
12344/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12345///
12346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi64&expand=1400)
12347#[inline]
12348#[target_feature(enable = "avx512f")]
12349#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12350#[cfg_attr(test, assert_instr(vpmovsxwq))]
12351#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12352pub const fn _mm512_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m512i {
12353    unsafe {
12354        let convert = _mm512_cvtepi16_epi64(a).as_i64x8();
12355        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12356    }
12357}
12358
12359/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12360///
12361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi64&expand=1396)
12362#[inline]
12363#[target_feature(enable = "avx512f,avx512vl")]
12364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12365#[cfg_attr(test, assert_instr(vpmovsxwq))]
12366#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12367pub const fn _mm256_mask_cvtepi16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12368    unsafe {
12369        let convert = _mm256_cvtepi16_epi64(a).as_i64x4();
12370        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12371    }
12372}
12373
12374/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12375///
12376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi64&expand=1397)
12377#[inline]
12378#[target_feature(enable = "avx512f,avx512vl")]
12379#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12380#[cfg_attr(test, assert_instr(vpmovsxwq))]
12381#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12382pub const fn _mm256_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m256i {
12383    unsafe {
12384        let convert = _mm256_cvtepi16_epi64(a).as_i64x4();
12385        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12386    }
12387}
12388
12389/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12390///
12391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi64&expand=1393)
12392#[inline]
12393#[target_feature(enable = "avx512f,avx512vl")]
12394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12395#[cfg_attr(test, assert_instr(vpmovsxwq))]
12396#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12397pub const fn _mm_mask_cvtepi16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12398    unsafe {
12399        let convert = _mm_cvtepi16_epi64(a).as_i64x2();
12400        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12401    }
12402}
12403
12404/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12405///
12406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi64&expand=1394)
12407#[inline]
12408#[target_feature(enable = "avx512f,avx512vl")]
12409#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12410#[cfg_attr(test, assert_instr(vpmovsxwq))]
12411#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12412pub const fn _mm_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m128i {
12413    unsafe {
12414        let convert = _mm_cvtepi16_epi64(a).as_i64x2();
12415        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12416    }
12417}
12418
12419/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst.
12420///
12421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi32&expand=1553)
12422#[inline]
12423#[target_feature(enable = "avx512f")]
12424#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12425#[cfg_attr(test, assert_instr(vpmovzxwd))]
12426#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12427pub const fn _mm512_cvtepu16_epi32(a: __m256i) -> __m512i {
12428    unsafe {
12429        let a = a.as_u16x16();
12430        transmute::<i32x16, _>(simd_cast(a))
12431    }
12432}
12433
12434/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12435///
12436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi32&expand=1554)
12437#[inline]
12438#[target_feature(enable = "avx512f")]
12439#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12440#[cfg_attr(test, assert_instr(vpmovzxwd))]
12441#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12442pub const fn _mm512_mask_cvtepu16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
12443    unsafe {
12444        let convert = _mm512_cvtepu16_epi32(a).as_i32x16();
12445        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
12446    }
12447}
12448
12449/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12450///
12451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi32&expand=1555)
12452#[inline]
12453#[target_feature(enable = "avx512f")]
12454#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12455#[cfg_attr(test, assert_instr(vpmovzxwd))]
12456#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12457pub const fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i {
12458    unsafe {
12459        let convert = _mm512_cvtepu16_epi32(a).as_i32x16();
12460        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
12461    }
12462}
12463
12464/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12465///
12466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi32&expand=1551)
12467#[inline]
12468#[target_feature(enable = "avx512f,avx512vl")]
12469#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12470#[cfg_attr(test, assert_instr(vpmovzxwd))]
12471#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12472pub const fn _mm256_mask_cvtepu16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12473    unsafe {
12474        let convert = _mm256_cvtepu16_epi32(a).as_i32x8();
12475        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
12476    }
12477}
12478
12479/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12480///
12481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi32&expand=1552)
12482#[inline]
12483#[target_feature(enable = "avx512f,avx512vl")]
12484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12485#[cfg_attr(test, assert_instr(vpmovzxwd))]
12486#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12487pub const fn _mm256_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m256i {
12488    unsafe {
12489        let convert = _mm256_cvtepu16_epi32(a).as_i32x8();
12490        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
12491    }
12492}
12493
12494/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12495///
12496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi32&expand=1548)
12497#[inline]
12498#[target_feature(enable = "avx512f,avx512vl")]
12499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12500#[cfg_attr(test, assert_instr(vpmovzxwd))]
12501#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12502pub const fn _mm_mask_cvtepu16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12503    unsafe {
12504        let convert = _mm_cvtepu16_epi32(a).as_i32x4();
12505        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
12506    }
12507}
12508
12509/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12510///
12511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi32&expand=1549)
12512#[inline]
12513#[target_feature(enable = "avx512f,avx512vl")]
12514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12515#[cfg_attr(test, assert_instr(vpmovzxwd))]
12516#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12517pub const fn _mm_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m128i {
12518    unsafe {
12519        let convert = _mm_cvtepu16_epi32(a).as_i32x4();
12520        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
12521    }
12522}
12523
12524/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst.
12525///
12526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi64&expand=1562)
12527#[inline]
12528#[target_feature(enable = "avx512f")]
12529#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12530#[cfg_attr(test, assert_instr(vpmovzxwq))]
12531#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12532pub const fn _mm512_cvtepu16_epi64(a: __m128i) -> __m512i {
12533    unsafe {
12534        let a = a.as_u16x8();
12535        transmute::<i64x8, _>(simd_cast(a))
12536    }
12537}
12538
12539/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12540///
12541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi64&expand=1563)
12542#[inline]
12543#[target_feature(enable = "avx512f")]
12544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12545#[cfg_attr(test, assert_instr(vpmovzxwq))]
12546#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12547pub const fn _mm512_mask_cvtepu16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12548    unsafe {
12549        let convert = _mm512_cvtepu16_epi64(a).as_i64x8();
12550        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12551    }
12552}
12553
12554/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12555///
12556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi64&expand=1564)
12557#[inline]
12558#[target_feature(enable = "avx512f")]
12559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12560#[cfg_attr(test, assert_instr(vpmovzxwq))]
12561#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12562pub const fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i {
12563    unsafe {
12564        let convert = _mm512_cvtepu16_epi64(a).as_i64x8();
12565        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12566    }
12567}
12568
12569/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12570///
12571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi64&expand=1560)
12572#[inline]
12573#[target_feature(enable = "avx512f,avx512vl")]
12574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12575#[cfg_attr(test, assert_instr(vpmovzxwq))]
12576#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12577pub const fn _mm256_mask_cvtepu16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12578    unsafe {
12579        let convert = _mm256_cvtepu16_epi64(a).as_i64x4();
12580        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12581    }
12582}
12583
12584/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12585///
12586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi64&expand=1561)
12587#[inline]
12588#[target_feature(enable = "avx512f,avx512vl")]
12589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12590#[cfg_attr(test, assert_instr(vpmovzxwq))]
12591#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12592pub const fn _mm256_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m256i {
12593    unsafe {
12594        let convert = _mm256_cvtepu16_epi64(a).as_i64x4();
12595        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12596    }
12597}
12598
12599/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12600///
12601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi64&expand=1557)
12602#[inline]
12603#[target_feature(enable = "avx512f,avx512vl")]
12604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12605#[cfg_attr(test, assert_instr(vpmovzxwq))]
12606#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12607pub const fn _mm_mask_cvtepu16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12608    unsafe {
12609        let convert = _mm_cvtepu16_epi64(a).as_i64x2();
12610        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12611    }
12612}
12613
12614/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12615///
12616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi64&expand=1558)
12617#[inline]
12618#[target_feature(enable = "avx512f,avx512vl")]
12619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12620#[cfg_attr(test, assert_instr(vpmovzxwq))]
12621#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12622pub const fn _mm_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m128i {
12623    unsafe {
12624        let convert = _mm_cvtepu16_epi64(a).as_i64x2();
12625        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12626    }
12627}
12628
12629/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12630///
12631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi64&expand=1428)
12632#[inline]
12633#[target_feature(enable = "avx512f")]
12634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12635#[cfg_attr(test, assert_instr(vpmovsxdq))]
12636#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12637pub const fn _mm512_cvtepi32_epi64(a: __m256i) -> __m512i {
12638    unsafe {
12639        let a = a.as_i32x8();
12640        transmute::<i64x8, _>(simd_cast(a))
12641    }
12642}
12643
12644/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12645///
12646/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi64&expand=1429)
12647#[inline]
12648#[target_feature(enable = "avx512f")]
12649#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12650#[cfg_attr(test, assert_instr(vpmovsxdq))]
12651#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12652pub const fn _mm512_mask_cvtepi32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12653    unsafe {
12654        let convert = _mm512_cvtepi32_epi64(a).as_i64x8();
12655        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12656    }
12657}
12658
12659/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12660///
12661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi64&expand=1430)
12662#[inline]
12663#[target_feature(enable = "avx512f")]
12664#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12665#[cfg_attr(test, assert_instr(vpmovsxdq))]
12666#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12667pub const fn _mm512_maskz_cvtepi32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12668    unsafe {
12669        let convert = _mm512_cvtepi32_epi64(a).as_i64x8();
12670        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12671    }
12672}
12673
12674/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12675///
12676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi64&expand=1426)
12677#[inline]
12678#[target_feature(enable = "avx512f,avx512vl")]
12679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12680#[cfg_attr(test, assert_instr(vpmovsxdq))]
12681#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12682pub const fn _mm256_mask_cvtepi32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12683    unsafe {
12684        let convert = _mm256_cvtepi32_epi64(a).as_i64x4();
12685        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12686    }
12687}
12688
12689/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12690///
12691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi64&expand=1427)
12692#[inline]
12693#[target_feature(enable = "avx512f,avx512vl")]
12694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12695#[cfg_attr(test, assert_instr(vpmovsxdq))]
12696#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12697pub const fn _mm256_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12698    unsafe {
12699        let convert = _mm256_cvtepi32_epi64(a).as_i64x4();
12700        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12701    }
12702}
12703
12704/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12705///
12706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi64&expand=1423)
12707#[inline]
12708#[target_feature(enable = "avx512f,avx512vl")]
12709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12710#[cfg_attr(test, assert_instr(vpmovsxdq))]
12711#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12712pub const fn _mm_mask_cvtepi32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12713    unsafe {
12714        let convert = _mm_cvtepi32_epi64(a).as_i64x2();
12715        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12716    }
12717}
12718
12719/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12720///
12721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi64&expand=1424)
12722#[inline]
12723#[target_feature(enable = "avx512f,avx512vl")]
12724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12725#[cfg_attr(test, assert_instr(vpmovsxdq))]
12726#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12727pub const fn _mm_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12728    unsafe {
12729        let convert = _mm_cvtepi32_epi64(a).as_i64x2();
12730        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12731    }
12732}
12733
12734/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12735///
12736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_epi64&expand=1571)
12737#[inline]
12738#[target_feature(enable = "avx512f")]
12739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12740#[cfg_attr(test, assert_instr(vpmovzxdq))]
12741#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12742pub const fn _mm512_cvtepu32_epi64(a: __m256i) -> __m512i {
12743    unsafe {
12744        let a = a.as_u32x8();
12745        transmute::<i64x8, _>(simd_cast(a))
12746    }
12747}
12748
12749/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12750///
12751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_epi64&expand=1572)
12752#[inline]
12753#[target_feature(enable = "avx512f")]
12754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12755#[cfg_attr(test, assert_instr(vpmovzxdq))]
12756#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12757pub const fn _mm512_mask_cvtepu32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12758    unsafe {
12759        let convert = _mm512_cvtepu32_epi64(a).as_i64x8();
12760        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12761    }
12762}
12763
12764/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12765///
12766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_epi64&expand=1573)
12767#[inline]
12768#[target_feature(enable = "avx512f")]
12769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12770#[cfg_attr(test, assert_instr(vpmovzxdq))]
12771#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12772pub const fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12773    unsafe {
12774        let convert = _mm512_cvtepu32_epi64(a).as_i64x8();
12775        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12776    }
12777}
12778
12779/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12780///
12781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_epi64&expand=1569)
12782#[inline]
12783#[target_feature(enable = "avx512f,avx512vl")]
12784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12785#[cfg_attr(test, assert_instr(vpmovzxdq))]
12786#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12787pub const fn _mm256_mask_cvtepu32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12788    unsafe {
12789        let convert = _mm256_cvtepu32_epi64(a).as_i64x4();
12790        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12791    }
12792}
12793
12794/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12795///
12796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_epi64&expand=1570)
12797#[inline]
12798#[target_feature(enable = "avx512f,avx512vl")]
12799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12800#[cfg_attr(test, assert_instr(vpmovzxdq))]
12801#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12802pub const fn _mm256_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12803    unsafe {
12804        let convert = _mm256_cvtepu32_epi64(a).as_i64x4();
12805        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12806    }
12807}
12808
12809/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12810///
12811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_epi64&expand=1566)
12812#[inline]
12813#[target_feature(enable = "avx512f,avx512vl")]
12814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12815#[cfg_attr(test, assert_instr(vpmovzxdq))]
12816#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12817pub const fn _mm_mask_cvtepu32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12818    unsafe {
12819        let convert = _mm_cvtepu32_epi64(a).as_i64x2();
12820        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12821    }
12822}
12823
12824/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12825///
12826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_epi64&expand=1567)
12827#[inline]
12828#[target_feature(enable = "avx512f,avx512vl")]
12829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12830#[cfg_attr(test, assert_instr(vpmovzxdq))]
12831#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12832pub const fn _mm_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12833    unsafe {
12834        let convert = _mm_cvtepu32_epi64(a).as_i64x2();
12835        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12836    }
12837}
12838
12839/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
12840///
12841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_ps&expand=1455)
12842#[inline]
12843#[target_feature(enable = "avx512f")]
12844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12845#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12846#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12847pub const fn _mm512_cvtepi32_ps(a: __m512i) -> __m512 {
12848    unsafe {
12849        let a = a.as_i32x16();
12850        transmute::<f32x16, _>(simd_cast(a))
12851    }
12852}
12853
12854/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12855///
12856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_ps&expand=1456)
12857#[inline]
12858#[target_feature(enable = "avx512f")]
12859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12860#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12861#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12862pub const fn _mm512_mask_cvtepi32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
12863    unsafe {
12864        let convert = _mm512_cvtepi32_ps(a).as_f32x16();
12865        transmute(simd_select_bitmask(k, convert, src.as_f32x16()))
12866    }
12867}
12868
12869/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12870///
12871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_ps&expand=1457)
12872#[inline]
12873#[target_feature(enable = "avx512f")]
12874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12875#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12876#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12877pub const fn _mm512_maskz_cvtepi32_ps(k: __mmask16, a: __m512i) -> __m512 {
12878    unsafe {
12879        let convert = _mm512_cvtepi32_ps(a).as_f32x16();
12880        transmute(simd_select_bitmask(k, convert, f32x16::ZERO))
12881    }
12882}
12883
12884/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12885///
12886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_ps&expand=1453)
12887#[inline]
12888#[target_feature(enable = "avx512f,avx512vl")]
12889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12890#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12891#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12892pub const fn _mm256_mask_cvtepi32_ps(src: __m256, k: __mmask8, a: __m256i) -> __m256 {
12893    unsafe {
12894        let convert = _mm256_cvtepi32_ps(a).as_f32x8();
12895        transmute(simd_select_bitmask(k, convert, src.as_f32x8()))
12896    }
12897}
12898
12899/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12900///
12901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_ps&expand=1454)
12902#[inline]
12903#[target_feature(enable = "avx512f,avx512vl")]
12904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12905#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12907pub const fn _mm256_maskz_cvtepi32_ps(k: __mmask8, a: __m256i) -> __m256 {
12908    unsafe {
12909        let convert = _mm256_cvtepi32_ps(a).as_f32x8();
12910        transmute(simd_select_bitmask(k, convert, f32x8::ZERO))
12911    }
12912}
12913
12914/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12915///
12916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_ps&expand=1450)
12917#[inline]
12918#[target_feature(enable = "avx512f,avx512vl")]
12919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12920#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12921#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12922pub const fn _mm_mask_cvtepi32_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
12923    unsafe {
12924        let convert = _mm_cvtepi32_ps(a).as_f32x4();
12925        transmute(simd_select_bitmask(k, convert, src.as_f32x4()))
12926    }
12927}
12928
12929/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12930///
12931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_ps&expand=1451)
12932#[inline]
12933#[target_feature(enable = "avx512f,avx512vl")]
12934#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12935#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12936#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12937pub const fn _mm_maskz_cvtepi32_ps(k: __mmask8, a: __m128i) -> __m128 {
12938    unsafe {
12939        let convert = _mm_cvtepi32_ps(a).as_f32x4();
12940        transmute(simd_select_bitmask(k, convert, f32x4::ZERO))
12941    }
12942}
12943
12944/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12945///
12946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_pd&expand=1446)
12947#[inline]
12948#[target_feature(enable = "avx512f")]
12949#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12950#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12951#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12952pub const fn _mm512_cvtepi32_pd(a: __m256i) -> __m512d {
12953    unsafe {
12954        let a = a.as_i32x8();
12955        transmute::<f64x8, _>(simd_cast(a))
12956    }
12957}
12958
12959/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12960///
12961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_pd&expand=1447)
12962#[inline]
12963#[target_feature(enable = "avx512f")]
12964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12965#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12966#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12967pub const fn _mm512_mask_cvtepi32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
12968    unsafe {
12969        let convert = _mm512_cvtepi32_pd(a).as_f64x8();
12970        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
12971    }
12972}
12973
12974/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12975///
12976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_pd&expand=1448)
12977#[inline]
12978#[target_feature(enable = "avx512f")]
12979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12980#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12981#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12982pub const fn _mm512_maskz_cvtepi32_pd(k: __mmask8, a: __m256i) -> __m512d {
12983    unsafe {
12984        let convert = _mm512_cvtepi32_pd(a).as_f64x8();
12985        transmute(simd_select_bitmask(k, convert, f64x8::ZERO))
12986    }
12987}
12988
12989/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12990///
12991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_pd&expand=1444)
12992#[inline]
12993#[target_feature(enable = "avx512f,avx512vl")]
12994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12995#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12996#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12997pub const fn _mm256_mask_cvtepi32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
12998    unsafe {
12999        let convert = _mm256_cvtepi32_pd(a).as_f64x4();
13000        transmute(simd_select_bitmask(k, convert, src.as_f64x4()))
13001    }
13002}
13003
13004/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13005///
13006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_pd&expand=1445)
13007#[inline]
13008#[target_feature(enable = "avx512f,avx512vl")]
13009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13010#[cfg_attr(test, assert_instr(vcvtdq2pd))]
13011#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13012pub const fn _mm256_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m256d {
13013    unsafe {
13014        let convert = _mm256_cvtepi32_pd(a).as_f64x4();
13015        transmute(simd_select_bitmask(k, convert, f64x4::ZERO))
13016    }
13017}
13018
13019/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13020///
13021/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_pd&expand=1441)
13022#[inline]
13023#[target_feature(enable = "avx512f,avx512vl")]
13024#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13025#[cfg_attr(test, assert_instr(vcvtdq2pd))]
13026#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13027pub const fn _mm_mask_cvtepi32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
13028    unsafe {
13029        let convert = _mm_cvtepi32_pd(a).as_f64x2();
13030        transmute(simd_select_bitmask(k, convert, src.as_f64x2()))
13031    }
13032}
13033
13034/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13035///
13036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_pd&expand=1442)
13037#[inline]
13038#[target_feature(enable = "avx512f,avx512vl")]
13039#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13040#[cfg_attr(test, assert_instr(vcvtdq2pd))]
13041#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13042pub const fn _mm_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m128d {
13043    unsafe {
13044        let convert = _mm_cvtepi32_pd(a).as_f64x2();
13045        transmute(simd_select_bitmask(k, convert, f64x2::ZERO))
13046    }
13047}
13048
13049/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
13050///
13051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_ps&expand=1583)
13052#[inline]
13053#[target_feature(enable = "avx512f")]
13054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13055#[cfg_attr(test, assert_instr(vcvtudq2ps))]
13056#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13057pub const fn _mm512_cvtepu32_ps(a: __m512i) -> __m512 {
13058    unsafe {
13059        let a = a.as_u32x16();
13060        transmute::<f32x16, _>(simd_cast(a))
13061    }
13062}
13063
13064/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13065///
13066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_ps&expand=1584)
13067#[inline]
13068#[target_feature(enable = "avx512f")]
13069#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13070#[cfg_attr(test, assert_instr(vcvtudq2ps))]
13071#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13072pub const fn _mm512_mask_cvtepu32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
13073    unsafe {
13074        let convert = _mm512_cvtepu32_ps(a).as_f32x16();
13075        transmute(simd_select_bitmask(k, convert, src.as_f32x16()))
13076    }
13077}
13078
13079/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13080///
13081/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_ps&expand=1585)
13082#[inline]
13083#[target_feature(enable = "avx512f")]
13084#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13085#[cfg_attr(test, assert_instr(vcvtudq2ps))]
13086#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13087pub const fn _mm512_maskz_cvtepu32_ps(k: __mmask16, a: __m512i) -> __m512 {
13088    unsafe {
13089        let convert = _mm512_cvtepu32_ps(a).as_f32x16();
13090        transmute(simd_select_bitmask(k, convert, f32x16::ZERO))
13091    }
13092}
13093
13094/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
13095///
13096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_pd&expand=1580)
13097#[inline]
13098#[target_feature(enable = "avx512f")]
13099#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13100#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13101#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13102pub const fn _mm512_cvtepu32_pd(a: __m256i) -> __m512d {
13103    unsafe {
13104        let a = a.as_u32x8();
13105        transmute::<f64x8, _>(simd_cast(a))
13106    }
13107}
13108
13109/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13110///
13111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_pd&expand=1581)
13112#[inline]
13113#[target_feature(enable = "avx512f")]
13114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13115#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13116#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13117pub const fn _mm512_mask_cvtepu32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
13118    unsafe {
13119        let convert = _mm512_cvtepu32_pd(a).as_f64x8();
13120        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
13121    }
13122}
13123
13124/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13125///
13126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_pd&expand=1582)
13127#[inline]
13128#[target_feature(enable = "avx512f")]
13129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13130#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13131#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13132pub const fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d {
13133    unsafe {
13134        let convert = _mm512_cvtepu32_pd(a).as_f64x8();
13135        transmute(simd_select_bitmask(k, convert, f64x8::ZERO))
13136    }
13137}
13138
13139/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
13140///
13141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu32_pd&expand=1577)
13142#[inline]
13143#[target_feature(enable = "avx512f,avx512vl")]
13144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13145#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13146#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13147pub const fn _mm256_cvtepu32_pd(a: __m128i) -> __m256d {
13148    unsafe {
13149        let a = a.as_u32x4();
13150        transmute::<f64x4, _>(simd_cast(a))
13151    }
13152}
13153
13154/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13155///
13156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_pd&expand=1578)
13157#[inline]
13158#[target_feature(enable = "avx512f,avx512vl")]
13159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13160#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13161#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13162pub const fn _mm256_mask_cvtepu32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
13163    unsafe {
13164        let convert = _mm256_cvtepu32_pd(a).as_f64x4();
13165        transmute(simd_select_bitmask(k, convert, src.as_f64x4()))
13166    }
13167}
13168
13169/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13170///
13171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_pd&expand=1579)
13172#[inline]
13173#[target_feature(enable = "avx512f,avx512vl")]
13174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13175#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13176#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13177pub const fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d {
13178    unsafe {
13179        let convert = _mm256_cvtepu32_pd(a).as_f64x4();
13180        transmute(simd_select_bitmask(k, convert, f64x4::ZERO))
13181    }
13182}
13183
13184/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
13185///
13186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu32_pd&expand=1574)
13187#[inline]
13188#[target_feature(enable = "avx512f,avx512vl")]
13189#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13190#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13191#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13192pub const fn _mm_cvtepu32_pd(a: __m128i) -> __m128d {
13193    unsafe {
13194        let a = a.as_u32x4();
13195        let u64: u32x2 = simd_shuffle!(a, a, [0, 1]);
13196        transmute::<f64x2, _>(simd_cast(u64))
13197    }
13198}
13199
13200/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13201///
13202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_pd&expand=1575)
13203#[inline]
13204#[target_feature(enable = "avx512f,avx512vl")]
13205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13206#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13207#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13208pub const fn _mm_mask_cvtepu32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
13209    unsafe {
13210        let convert = _mm_cvtepu32_pd(a).as_f64x2();
13211        transmute(simd_select_bitmask(k, convert, src.as_f64x2()))
13212    }
13213}
13214
13215/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13216///
13217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_pd&expand=1576)
13218#[inline]
13219#[target_feature(enable = "avx512f,avx512vl")]
13220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13221#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13222#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13223pub const fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d {
13224    unsafe {
13225        let convert = _mm_cvtepu32_pd(a).as_f64x2();
13226        transmute(simd_select_bitmask(k, convert, f64x2::ZERO))
13227    }
13228}
13229
13230/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
13231///
13232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32lo_pd&expand=1464)
13233#[inline]
13234#[target_feature(enable = "avx512f")]
13235#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13236#[cfg_attr(test, assert_instr(vcvtdq2pd))]
13237#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13238pub const fn _mm512_cvtepi32lo_pd(v2: __m512i) -> __m512d {
13239    unsafe {
13240        let v2 = v2.as_i32x16();
13241        let v256: i32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
13242        transmute::<f64x8, _>(simd_cast(v256))
13243    }
13244}
13245
13246/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13247///
13248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32lo_pd&expand=1465)
13249#[inline]
13250#[target_feature(enable = "avx512f")]
13251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13252#[cfg_attr(test, assert_instr(vcvtdq2pd))]
13253#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13254pub const fn _mm512_mask_cvtepi32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
13255    unsafe {
13256        let convert = _mm512_cvtepi32lo_pd(v2).as_f64x8();
13257        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
13258    }
13259}
13260
13261/// Performs element-by-element conversion of the lower half of packed 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
13262///
13263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32lo_pd&expand=1586)
13264#[inline]
13265#[target_feature(enable = "avx512f")]
13266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13267#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13268#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13269pub const fn _mm512_cvtepu32lo_pd(v2: __m512i) -> __m512d {
13270    unsafe {
13271        let v2 = v2.as_u32x16();
13272        let v256: u32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
13273        transmute::<f64x8, _>(simd_cast(v256))
13274    }
13275}
13276
13277/// Performs element-by-element conversion of the lower half of 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13278///
13279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32lo_pd&expand=1587)
13280#[inline]
13281#[target_feature(enable = "avx512f")]
13282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13283#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13284#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13285pub const fn _mm512_mask_cvtepu32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
13286    unsafe {
13287        let convert = _mm512_cvtepu32lo_pd(v2).as_f64x8();
13288        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
13289    }
13290}
13291
13292/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13293///
13294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi16&expand=1419)
13295#[inline]
13296#[target_feature(enable = "avx512f")]
13297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13298#[cfg_attr(test, assert_instr(vpmovdw))]
13299#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13300pub const fn _mm512_cvtepi32_epi16(a: __m512i) -> __m256i {
13301    unsafe {
13302        let a = a.as_i32x16();
13303        transmute::<i16x16, _>(simd_cast(a))
13304    }
13305}
13306
13307/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13308///
13309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi16&expand=1420)
13310#[inline]
13311#[target_feature(enable = "avx512f")]
13312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13313#[cfg_attr(test, assert_instr(vpmovdw))]
13314#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13315pub const fn _mm512_mask_cvtepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13316    unsafe {
13317        let convert = _mm512_cvtepi32_epi16(a).as_i16x16();
13318        transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
13319    }
13320}
13321
13322/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13323///
13324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi16&expand=1421)
13325#[inline]
13326#[target_feature(enable = "avx512f")]
13327#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13328#[cfg_attr(test, assert_instr(vpmovdw))]
13329#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13330pub const fn _mm512_maskz_cvtepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13331    unsafe {
13332        let convert = _mm512_cvtepi32_epi16(a).as_i16x16();
13333        transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
13334    }
13335}
13336
13337/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13338///
13339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi16&expand=1416)
13340#[inline]
13341#[target_feature(enable = "avx512f,avx512vl")]
13342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13343#[cfg_attr(test, assert_instr(vpmovdw))]
13344#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13345pub const fn _mm256_cvtepi32_epi16(a: __m256i) -> __m128i {
13346    unsafe {
13347        let a = a.as_i32x8();
13348        transmute::<i16x8, _>(simd_cast(a))
13349    }
13350}
13351
13352/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13353///
13354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi16&expand=1417)
13355#[inline]
13356#[target_feature(enable = "avx512f,avx512vl")]
13357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13358#[cfg_attr(test, assert_instr(vpmovdw))]
13359#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13360pub const fn _mm256_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13361    unsafe {
13362        let convert = _mm256_cvtepi32_epi16(a).as_i16x8();
13363        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
13364    }
13365}
13366
13367/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13368///
13369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi16&expand=1418)
13370#[inline]
13371#[target_feature(enable = "avx512f,avx512vl")]
13372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13373#[cfg_attr(test, assert_instr(vpmovdw))]
13374#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13375pub const fn _mm256_maskz_cvtepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
13376    unsafe {
13377        let convert = _mm256_cvtepi32_epi16(a).as_i16x8();
13378        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
13379    }
13380}
13381
13382/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13383///
13384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi16&expand=1413)
13385#[inline]
13386#[target_feature(enable = "avx512f,avx512vl")]
13387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13388#[cfg_attr(test, assert_instr(vpmovdw))]
13389pub fn _mm_cvtepi32_epi16(a: __m128i) -> __m128i {
13390    unsafe { transmute(vpmovdw128(a.as_i32x4(), i16x8::ZERO, 0b11111111)) }
13391}
13392
13393/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13394///
13395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi16&expand=1414)
13396#[inline]
13397#[target_feature(enable = "avx512f,avx512vl")]
13398#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13399#[cfg_attr(test, assert_instr(vpmovdw))]
13400pub fn _mm_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13401    unsafe { transmute(vpmovdw128(a.as_i32x4(), src.as_i16x8(), k)) }
13402}
13403
13404/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13405///
13406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi16&expand=1415)
13407#[inline]
13408#[target_feature(enable = "avx512f,avx512vl")]
13409#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13410#[cfg_attr(test, assert_instr(vpmovdw))]
13411pub fn _mm_maskz_cvtepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13412    unsafe { transmute(vpmovdw128(a.as_i32x4(), i16x8::ZERO, k)) }
13413}
13414
13415/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13416///
13417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi8&expand=1437)
13418#[inline]
13419#[target_feature(enable = "avx512f")]
13420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13421#[cfg_attr(test, assert_instr(vpmovdb))]
13422#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13423pub const fn _mm512_cvtepi32_epi8(a: __m512i) -> __m128i {
13424    unsafe {
13425        let a = a.as_i32x16();
13426        transmute::<i8x16, _>(simd_cast(a))
13427    }
13428}
13429
13430/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13431///
13432/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi8&expand=1438)
13433#[inline]
13434#[target_feature(enable = "avx512f")]
13435#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13436#[cfg_attr(test, assert_instr(vpmovdb))]
13437#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13438pub const fn _mm512_mask_cvtepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13439    unsafe {
13440        let convert = _mm512_cvtepi32_epi8(a).as_i8x16();
13441        transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
13442    }
13443}
13444
13445/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13446///
13447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi8&expand=1439)
13448#[inline]
13449#[target_feature(enable = "avx512f")]
13450#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13451#[cfg_attr(test, assert_instr(vpmovdb))]
13452#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13453pub const fn _mm512_maskz_cvtepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13454    unsafe {
13455        let convert = _mm512_cvtepi32_epi8(a).as_i8x16();
13456        transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
13457    }
13458}
13459
13460/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13461///
13462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi8&expand=1434)
13463#[inline]
13464#[target_feature(enable = "avx512f,avx512vl")]
13465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13466#[cfg_attr(test, assert_instr(vpmovdb))]
13467pub fn _mm256_cvtepi32_epi8(a: __m256i) -> __m128i {
13468    unsafe { transmute(vpmovdb256(a.as_i32x8(), i8x16::ZERO, 0b11111111)) }
13469}
13470
13471/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13472///
13473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi8&expand=1435)
13474#[inline]
13475#[target_feature(enable = "avx512f,avx512vl")]
13476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13477#[cfg_attr(test, assert_instr(vpmovdb))]
13478pub fn _mm256_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13479    unsafe { transmute(vpmovdb256(a.as_i32x8(), src.as_i8x16(), k)) }
13480}
13481
13482/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13483///
13484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi8&expand=1436)
13485#[inline]
13486#[target_feature(enable = "avx512f,avx512vl")]
13487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13488#[cfg_attr(test, assert_instr(vpmovdb))]
13489pub fn _mm256_maskz_cvtepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
13490    unsafe { transmute(vpmovdb256(a.as_i32x8(), i8x16::ZERO, k)) }
13491}
13492
13493/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13494///
13495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi8&expand=1431)
13496#[inline]
13497#[target_feature(enable = "avx512f,avx512vl")]
13498#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13499#[cfg_attr(test, assert_instr(vpmovdb))]
13500pub fn _mm_cvtepi32_epi8(a: __m128i) -> __m128i {
13501    unsafe { transmute(vpmovdb128(a.as_i32x4(), i8x16::ZERO, 0b11111111)) }
13502}
13503
13504/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13505///
13506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi8&expand=1432)
13507#[inline]
13508#[target_feature(enable = "avx512f,avx512vl")]
13509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13510#[cfg_attr(test, assert_instr(vpmovdb))]
13511pub fn _mm_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13512    unsafe { transmute(vpmovdb128(a.as_i32x4(), src.as_i8x16(), k)) }
13513}
13514
13515/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13516///
13517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi8&expand=1433)
13518#[inline]
13519#[target_feature(enable = "avx512f,avx512vl")]
13520#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13521#[cfg_attr(test, assert_instr(vpmovdb))]
13522pub fn _mm_maskz_cvtepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
13523    unsafe { transmute(vpmovdb128(a.as_i32x4(), i8x16::ZERO, k)) }
13524}
13525
13526/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13527///
13528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi32&expand=1481)
13529#[inline]
13530#[target_feature(enable = "avx512f")]
13531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13532#[cfg_attr(test, assert_instr(vpmovqd))]
13533#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13534pub const fn _mm512_cvtepi64_epi32(a: __m512i) -> __m256i {
13535    unsafe {
13536        let a = a.as_i64x8();
13537        transmute::<i32x8, _>(simd_cast(a))
13538    }
13539}
13540
13541/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13542///
13543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi32&expand=1482)
13544#[inline]
13545#[target_feature(enable = "avx512f")]
13546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13547#[cfg_attr(test, assert_instr(vpmovqd))]
13548#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13549pub const fn _mm512_mask_cvtepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
13550    unsafe {
13551        let convert = _mm512_cvtepi64_epi32(a).as_i32x8();
13552        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
13553    }
13554}
13555
13556/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13557///
13558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi32&expand=1483)
13559#[inline]
13560#[target_feature(enable = "avx512f")]
13561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13562#[cfg_attr(test, assert_instr(vpmovqd))]
13563#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13564pub const fn _mm512_maskz_cvtepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
13565    unsafe {
13566        let convert = _mm512_cvtepi64_epi32(a).as_i32x8();
13567        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
13568    }
13569}
13570
13571/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13572///
13573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi32&expand=1478)
13574#[inline]
13575#[target_feature(enable = "avx512f,avx512vl")]
13576#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13577#[cfg_attr(test, assert_instr(vpmovqd))]
13578#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13579pub const fn _mm256_cvtepi64_epi32(a: __m256i) -> __m128i {
13580    unsafe {
13581        let a = a.as_i64x4();
13582        transmute::<i32x4, _>(simd_cast(a))
13583    }
13584}
13585
13586/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13587///
13588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi32&expand=1479)
13589#[inline]
13590#[target_feature(enable = "avx512f,avx512vl")]
13591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13592#[cfg_attr(test, assert_instr(vpmovqd))]
13593#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13594pub const fn _mm256_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13595    unsafe {
13596        let convert = _mm256_cvtepi64_epi32(a).as_i32x4();
13597        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
13598    }
13599}
13600
13601/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13602///
13603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi32&expand=1480)
13604#[inline]
13605#[target_feature(enable = "avx512f,avx512vl")]
13606#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13607#[cfg_attr(test, assert_instr(vpmovqd))]
13608#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13609pub const fn _mm256_maskz_cvtepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
13610    unsafe {
13611        let convert = _mm256_cvtepi64_epi32(a).as_i32x4();
13612        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
13613    }
13614}
13615
13616/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13617///
13618/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi32&expand=1475)
13619#[inline]
13620#[target_feature(enable = "avx512f,avx512vl")]
13621#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13622#[cfg_attr(test, assert_instr(vpmovqd))]
13623pub fn _mm_cvtepi64_epi32(a: __m128i) -> __m128i {
13624    unsafe { transmute(vpmovqd128(a.as_i64x2(), i32x4::ZERO, 0b11111111)) }
13625}
13626
13627/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13628///
13629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi32&expand=1476)
13630#[inline]
13631#[target_feature(enable = "avx512f,avx512vl")]
13632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13633#[cfg_attr(test, assert_instr(vpmovqd))]
13634pub fn _mm_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13635    unsafe { transmute(vpmovqd128(a.as_i64x2(), src.as_i32x4(), k)) }
13636}
13637
13638/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13639///
13640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi32&expand=1477)
13641#[inline]
13642#[target_feature(enable = "avx512f,avx512vl")]
13643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13644#[cfg_attr(test, assert_instr(vpmovqd))]
13645pub fn _mm_maskz_cvtepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
13646    unsafe { transmute(vpmovqd128(a.as_i64x2(), i32x4::ZERO, k)) }
13647}
13648
13649/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13650///
13651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi16&expand=1472)
13652#[inline]
13653#[target_feature(enable = "avx512f")]
13654#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13655#[cfg_attr(test, assert_instr(vpmovqw))]
13656#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13657pub const fn _mm512_cvtepi64_epi16(a: __m512i) -> __m128i {
13658    unsafe {
13659        let a = a.as_i64x8();
13660        transmute::<i16x8, _>(simd_cast(a))
13661    }
13662}
13663
13664/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13665///
13666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi16&expand=1473)
13667#[inline]
13668#[target_feature(enable = "avx512f")]
13669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13670#[cfg_attr(test, assert_instr(vpmovqw))]
13671#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13672pub const fn _mm512_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13673    unsafe {
13674        let convert = _mm512_cvtepi64_epi16(a).as_i16x8();
13675        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
13676    }
13677}
13678
13679/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13680///
13681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi16&expand=1474)
13682#[inline]
13683#[target_feature(enable = "avx512f")]
13684#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13685#[cfg_attr(test, assert_instr(vpmovqw))]
13686#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13687pub const fn _mm512_maskz_cvtepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
13688    unsafe {
13689        let convert = _mm512_cvtepi64_epi16(a).as_i16x8();
13690        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
13691    }
13692}
13693
13694/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13695///
13696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi16&expand=1469)
13697#[inline]
13698#[target_feature(enable = "avx512f,avx512vl")]
13699#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13700#[cfg_attr(test, assert_instr(vpmovqw))]
13701pub fn _mm256_cvtepi64_epi16(a: __m256i) -> __m128i {
13702    unsafe { transmute(vpmovqw256(a.as_i64x4(), i16x8::ZERO, 0b11111111)) }
13703}
13704
13705/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13706///
13707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi16&expand=1470)
13708#[inline]
13709#[target_feature(enable = "avx512f,avx512vl")]
13710#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13711#[cfg_attr(test, assert_instr(vpmovqw))]
13712pub fn _mm256_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13713    unsafe { transmute(vpmovqw256(a.as_i64x4(), src.as_i16x8(), k)) }
13714}
13715
13716/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13717///
13718/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi16&expand=1471)
13719#[inline]
13720#[target_feature(enable = "avx512f,avx512vl")]
13721#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13722#[cfg_attr(test, assert_instr(vpmovqw))]
13723pub fn _mm256_maskz_cvtepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
13724    unsafe { transmute(vpmovqw256(a.as_i64x4(), i16x8::ZERO, k)) }
13725}
13726
13727/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13728///
13729/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi16&expand=1466)
13730#[inline]
13731#[target_feature(enable = "avx512f,avx512vl")]
13732#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13733#[cfg_attr(test, assert_instr(vpmovqw))]
13734pub fn _mm_cvtepi64_epi16(a: __m128i) -> __m128i {
13735    unsafe { transmute(vpmovqw128(a.as_i64x2(), i16x8::ZERO, 0b11111111)) }
13736}
13737
13738/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13739///
13740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi16&expand=1467)
13741#[inline]
13742#[target_feature(enable = "avx512f,avx512vl")]
13743#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13744#[cfg_attr(test, assert_instr(vpmovqw))]
13745pub fn _mm_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13746    unsafe { transmute(vpmovqw128(a.as_i64x2(), src.as_i16x8(), k)) }
13747}
13748
13749/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13750///
13751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi16&expand=1468)
13752#[inline]
13753#[target_feature(enable = "avx512f,avx512vl")]
13754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13755#[cfg_attr(test, assert_instr(vpmovqw))]
13756pub fn _mm_maskz_cvtepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
13757    unsafe { transmute(vpmovqw128(a.as_i64x2(), i16x8::ZERO, k)) }
13758}
13759
13760/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13761///
13762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi8&expand=1490)
13763#[inline]
13764#[target_feature(enable = "avx512f")]
13765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13766#[cfg_attr(test, assert_instr(vpmovqb))]
13767pub fn _mm512_cvtepi64_epi8(a: __m512i) -> __m128i {
13768    unsafe { transmute(vpmovqb(a.as_i64x8(), i8x16::ZERO, 0b11111111)) }
13769}
13770
13771/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13772///
13773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi8&expand=1491)
13774#[inline]
13775#[target_feature(enable = "avx512f")]
13776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13777#[cfg_attr(test, assert_instr(vpmovqb))]
13778pub fn _mm512_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13779    unsafe { transmute(vpmovqb(a.as_i64x8(), src.as_i8x16(), k)) }
13780}
13781
13782/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13783///
13784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi8&expand=1492)
13785#[inline]
13786#[target_feature(enable = "avx512f")]
13787#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13788#[cfg_attr(test, assert_instr(vpmovqb))]
13789pub fn _mm512_maskz_cvtepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
13790    unsafe { transmute(vpmovqb(a.as_i64x8(), i8x16::ZERO, k)) }
13791}
13792
13793/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13794///
13795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi8&expand=1487)
13796#[inline]
13797#[target_feature(enable = "avx512f,avx512vl")]
13798#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13799#[cfg_attr(test, assert_instr(vpmovqb))]
13800pub fn _mm256_cvtepi64_epi8(a: __m256i) -> __m128i {
13801    unsafe { transmute(vpmovqb256(a.as_i64x4(), i8x16::ZERO, 0b11111111)) }
13802}
13803
13804/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13805///
13806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi8&expand=1488)
13807#[inline]
13808#[target_feature(enable = "avx512f,avx512vl")]
13809#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13810#[cfg_attr(test, assert_instr(vpmovqb))]
13811pub fn _mm256_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13812    unsafe { transmute(vpmovqb256(a.as_i64x4(), src.as_i8x16(), k)) }
13813}
13814
13815/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13816///
13817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi8&expand=1489)
13818#[inline]
13819#[target_feature(enable = "avx512f,avx512vl")]
13820#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13821#[cfg_attr(test, assert_instr(vpmovqb))]
13822pub fn _mm256_maskz_cvtepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
13823    unsafe { transmute(vpmovqb256(a.as_i64x4(), i8x16::ZERO, k)) }
13824}
13825
13826/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13827///
13828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi8&expand=1484)
13829#[inline]
13830#[target_feature(enable = "avx512f,avx512vl")]
13831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13832#[cfg_attr(test, assert_instr(vpmovqb))]
13833pub fn _mm_cvtepi64_epi8(a: __m128i) -> __m128i {
13834    unsafe { transmute(vpmovqb128(a.as_i64x2(), i8x16::ZERO, 0b11111111)) }
13835}
13836
13837/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13838///
13839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi8&expand=1485)
13840#[inline]
13841#[target_feature(enable = "avx512f,avx512vl")]
13842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13843#[cfg_attr(test, assert_instr(vpmovqb))]
13844pub fn _mm_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13845    unsafe { transmute(vpmovqb128(a.as_i64x2(), src.as_i8x16(), k)) }
13846}
13847
13848/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13849///
13850/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi8&expand=1486)
13851#[inline]
13852#[target_feature(enable = "avx512f,avx512vl")]
13853#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13854#[cfg_attr(test, assert_instr(vpmovqb))]
13855pub fn _mm_maskz_cvtepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
13856    unsafe { transmute(vpmovqb128(a.as_i64x2(), i8x16::ZERO, k)) }
13857}
13858
13859/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13860///
13861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi16&expand=1819)
13862#[inline]
13863#[target_feature(enable = "avx512f")]
13864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13865#[cfg_attr(test, assert_instr(vpmovsdw))]
13866pub fn _mm512_cvtsepi32_epi16(a: __m512i) -> __m256i {
13867    unsafe { transmute(vpmovsdw(a.as_i32x16(), i16x16::ZERO, 0b11111111_11111111)) }
13868}
13869
13870/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13871///
13872/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi16&expand=1820)
13873#[inline]
13874#[target_feature(enable = "avx512f")]
13875#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13876#[cfg_attr(test, assert_instr(vpmovsdw))]
13877pub fn _mm512_mask_cvtsepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13878    unsafe { transmute(vpmovsdw(a.as_i32x16(), src.as_i16x16(), k)) }
13879}
13880
13881/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13882///
13883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi16&expand=1819)
13884#[inline]
13885#[target_feature(enable = "avx512f")]
13886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13887#[cfg_attr(test, assert_instr(vpmovsdw))]
13888pub fn _mm512_maskz_cvtsepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13889    unsafe { transmute(vpmovsdw(a.as_i32x16(), i16x16::ZERO, k)) }
13890}
13891
13892/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13893///
13894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi16&expand=1816)
13895#[inline]
13896#[target_feature(enable = "avx512f,avx512vl")]
13897#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13898#[cfg_attr(test, assert_instr(vpmovsdw))]
13899pub fn _mm256_cvtsepi32_epi16(a: __m256i) -> __m128i {
13900    unsafe { transmute(vpmovsdw256(a.as_i32x8(), i16x8::ZERO, 0b11111111)) }
13901}
13902
13903/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13904///
13905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi16&expand=1817)
13906#[inline]
13907#[target_feature(enable = "avx512f,avx512vl")]
13908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13909#[cfg_attr(test, assert_instr(vpmovsdw))]
13910pub fn _mm256_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13911    unsafe { transmute(vpmovsdw256(a.as_i32x8(), src.as_i16x8(), k)) }
13912}
13913
13914/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13915///
13916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi16&expand=1818)
13917#[inline]
13918#[target_feature(enable = "avx512f,avx512vl")]
13919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13920#[cfg_attr(test, assert_instr(vpmovsdw))]
13921pub fn _mm256_maskz_cvtsepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
13922    unsafe { transmute(vpmovsdw256(a.as_i32x8(), i16x8::ZERO, k)) }
13923}
13924
13925/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13926///
13927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi16&expand=1813)
13928#[inline]
13929#[target_feature(enable = "avx512f,avx512vl")]
13930#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13931#[cfg_attr(test, assert_instr(vpmovsdw))]
13932pub fn _mm_cvtsepi32_epi16(a: __m128i) -> __m128i {
13933    unsafe { transmute(vpmovsdw128(a.as_i32x4(), i16x8::ZERO, 0b11111111)) }
13934}
13935
13936/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13937///
13938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi16&expand=1814)
13939#[inline]
13940#[target_feature(enable = "avx512f,avx512vl")]
13941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13942#[cfg_attr(test, assert_instr(vpmovsdw))]
13943pub fn _mm_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13944    unsafe { transmute(vpmovsdw128(a.as_i32x4(), src.as_i16x8(), k)) }
13945}
13946
13947/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13948///
13949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi16&expand=1815)
13950#[inline]
13951#[target_feature(enable = "avx512f,avx512vl")]
13952#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13953#[cfg_attr(test, assert_instr(vpmovsdw))]
13954pub fn _mm_maskz_cvtsepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13955    unsafe { transmute(vpmovsdw128(a.as_i32x4(), i16x8::ZERO, k)) }
13956}
13957
13958/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13959///
13960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi8&expand=1828)
13961#[inline]
13962#[target_feature(enable = "avx512f")]
13963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13964#[cfg_attr(test, assert_instr(vpmovsdb))]
13965pub fn _mm512_cvtsepi32_epi8(a: __m512i) -> __m128i {
13966    unsafe { transmute(vpmovsdb(a.as_i32x16(), i8x16::ZERO, 0b11111111_11111111)) }
13967}
13968
13969/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13970///
13971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi8&expand=1829)
13972#[inline]
13973#[target_feature(enable = "avx512f")]
13974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13975#[cfg_attr(test, assert_instr(vpmovsdb))]
13976pub fn _mm512_mask_cvtsepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13977    unsafe { transmute(vpmovsdb(a.as_i32x16(), src.as_i8x16(), k)) }
13978}
13979
13980/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13981///
13982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi8&expand=1830)
13983#[inline]
13984#[target_feature(enable = "avx512f")]
13985#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13986#[cfg_attr(test, assert_instr(vpmovsdb))]
13987pub fn _mm512_maskz_cvtsepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13988    unsafe { transmute(vpmovsdb(a.as_i32x16(), i8x16::ZERO, k)) }
13989}
13990
13991/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13992///
13993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi8&expand=1825)
13994#[inline]
13995#[target_feature(enable = "avx512f,avx512vl")]
13996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13997#[cfg_attr(test, assert_instr(vpmovsdb))]
13998pub fn _mm256_cvtsepi32_epi8(a: __m256i) -> __m128i {
13999    unsafe { transmute(vpmovsdb256(a.as_i32x8(), i8x16::ZERO, 0b11111111)) }
14000}
14001
14002/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14003///
14004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi8&expand=1826)
14005#[inline]
14006#[target_feature(enable = "avx512f,avx512vl")]
14007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14008#[cfg_attr(test, assert_instr(vpmovsdb))]
14009pub fn _mm256_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14010    unsafe { transmute(vpmovsdb256(a.as_i32x8(), src.as_i8x16(), k)) }
14011}
14012
14013/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14014///
14015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi8&expand=1827)
14016#[inline]
14017#[target_feature(enable = "avx512f,avx512vl")]
14018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14019#[cfg_attr(test, assert_instr(vpmovsdb))]
14020pub fn _mm256_maskz_cvtsepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
14021    unsafe { transmute(vpmovsdb256(a.as_i32x8(), i8x16::ZERO, k)) }
14022}
14023
14024/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
14025///
14026/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi8&expand=1822)
14027#[inline]
14028#[target_feature(enable = "avx512f,avx512vl")]
14029#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14030#[cfg_attr(test, assert_instr(vpmovsdb))]
14031pub fn _mm_cvtsepi32_epi8(a: __m128i) -> __m128i {
14032    unsafe { transmute(vpmovsdb128(a.as_i32x4(), i8x16::ZERO, 0b11111111)) }
14033}
14034
14035/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14036///
14037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi8&expand=1823)
14038#[inline]
14039#[target_feature(enable = "avx512f,avx512vl")]
14040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14041#[cfg_attr(test, assert_instr(vpmovsdb))]
14042pub fn _mm_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14043    unsafe { transmute(vpmovsdb128(a.as_i32x4(), src.as_i8x16(), k)) }
14044}
14045
14046/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14047///
14048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi8&expand=1824)
14049#[inline]
14050#[target_feature(enable = "avx512f,avx512vl")]
14051#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14052#[cfg_attr(test, assert_instr(vpmovsdb))]
14053pub fn _mm_maskz_cvtsepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
14054    unsafe { transmute(vpmovsdb128(a.as_i32x4(), i8x16::ZERO, k)) }
14055}
14056
14057/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
14058///
14059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi32&expand=1852)
14060#[inline]
14061#[target_feature(enable = "avx512f")]
14062#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14063#[cfg_attr(test, assert_instr(vpmovsqd))]
14064pub fn _mm512_cvtsepi64_epi32(a: __m512i) -> __m256i {
14065    unsafe { transmute(vpmovsqd(a.as_i64x8(), i32x8::ZERO, 0b11111111)) }
14066}
14067
14068/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14069///
14070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi32&expand=1853)
14071#[inline]
14072#[target_feature(enable = "avx512f")]
14073#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14074#[cfg_attr(test, assert_instr(vpmovsqd))]
14075pub fn _mm512_mask_cvtsepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
14076    unsafe { transmute(vpmovsqd(a.as_i64x8(), src.as_i32x8(), k)) }
14077}
14078
14079/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14080///
14081/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi32&expand=1854)
14082#[inline]
14083#[target_feature(enable = "avx512f")]
14084#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14085#[cfg_attr(test, assert_instr(vpmovsqd))]
14086pub fn _mm512_maskz_cvtsepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
14087    unsafe { transmute(vpmovsqd(a.as_i64x8(), i32x8::ZERO, k)) }
14088}
14089
14090/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
14091///
14092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi32&expand=1849)
14093#[inline]
14094#[target_feature(enable = "avx512f,avx512vl")]
14095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14096#[cfg_attr(test, assert_instr(vpmovsqd))]
14097pub fn _mm256_cvtsepi64_epi32(a: __m256i) -> __m128i {
14098    unsafe { transmute(vpmovsqd256(a.as_i64x4(), i32x4::ZERO, 0b11111111)) }
14099}
14100
14101/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14102///
14103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi32&expand=1850)
14104#[inline]
14105#[target_feature(enable = "avx512f,avx512vl")]
14106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14107#[cfg_attr(test, assert_instr(vpmovsqd))]
14108pub fn _mm256_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14109    unsafe { transmute(vpmovsqd256(a.as_i64x4(), src.as_i32x4(), k)) }
14110}
14111
14112/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14113///
14114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi32&expand=1851)
14115#[inline]
14116#[target_feature(enable = "avx512f,avx512vl")]
14117#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14118#[cfg_attr(test, assert_instr(vpmovsqd))]
14119pub fn _mm256_maskz_cvtsepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
14120    unsafe { transmute(vpmovsqd256(a.as_i64x4(), i32x4::ZERO, k)) }
14121}
14122
14123/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
14124///
14125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi32&expand=1846)
14126#[inline]
14127#[target_feature(enable = "avx512f,avx512vl")]
14128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14129#[cfg_attr(test, assert_instr(vpmovsqd))]
14130pub fn _mm_cvtsepi64_epi32(a: __m128i) -> __m128i {
14131    unsafe { transmute(vpmovsqd128(a.as_i64x2(), i32x4::ZERO, 0b11111111)) }
14132}
14133
14134/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14135///
14136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi32&expand=1847)
14137#[inline]
14138#[target_feature(enable = "avx512f,avx512vl")]
14139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14140#[cfg_attr(test, assert_instr(vpmovsqd))]
14141pub fn _mm_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14142    unsafe { transmute(vpmovsqd128(a.as_i64x2(), src.as_i32x4(), k)) }
14143}
14144
14145/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14146///
14147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi32&expand=1848)
14148#[inline]
14149#[target_feature(enable = "avx512f,avx512vl")]
14150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14151#[cfg_attr(test, assert_instr(vpmovsqd))]
14152pub fn _mm_maskz_cvtsepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
14153    unsafe { transmute(vpmovsqd128(a.as_i64x2(), i32x4::ZERO, k)) }
14154}
14155
14156/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
14157///
14158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi16&expand=1843)
14159#[inline]
14160#[target_feature(enable = "avx512f")]
14161#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14162#[cfg_attr(test, assert_instr(vpmovsqw))]
14163pub fn _mm512_cvtsepi64_epi16(a: __m512i) -> __m128i {
14164    unsafe { transmute(vpmovsqw(a.as_i64x8(), i16x8::ZERO, 0b11111111)) }
14165}
14166
14167/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14168///
14169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi16&expand=1844)
14170#[inline]
14171#[target_feature(enable = "avx512f")]
14172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14173#[cfg_attr(test, assert_instr(vpmovsqw))]
14174pub fn _mm512_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14175    unsafe { transmute(vpmovsqw(a.as_i64x8(), src.as_i16x8(), k)) }
14176}
14177
14178/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14179///
14180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi16&expand=1845)
14181#[inline]
14182#[target_feature(enable = "avx512f")]
14183#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14184#[cfg_attr(test, assert_instr(vpmovsqw))]
14185pub fn _mm512_maskz_cvtsepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
14186    unsafe { transmute(vpmovsqw(a.as_i64x8(), i16x8::ZERO, k)) }
14187}
14188
14189/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
14190///
14191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi16&expand=1840)
14192#[inline]
14193#[target_feature(enable = "avx512f,avx512vl")]
14194#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14195#[cfg_attr(test, assert_instr(vpmovsqw))]
14196pub fn _mm256_cvtsepi64_epi16(a: __m256i) -> __m128i {
14197    unsafe { transmute(vpmovsqw256(a.as_i64x4(), i16x8::ZERO, 0b11111111)) }
14198}
14199
14200/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14201///
14202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi16&expand=1841)
14203#[inline]
14204#[target_feature(enable = "avx512f,avx512vl")]
14205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14206#[cfg_attr(test, assert_instr(vpmovsqw))]
14207pub fn _mm256_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14208    unsafe { transmute(vpmovsqw256(a.as_i64x4(), src.as_i16x8(), k)) }
14209}
14210
14211/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14212///
14213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi16&expand=1842)
14214#[inline]
14215#[target_feature(enable = "avx512f,avx512vl")]
14216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14217#[cfg_attr(test, assert_instr(vpmovsqw))]
14218pub fn _mm256_maskz_cvtsepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
14219    unsafe { transmute(vpmovsqw256(a.as_i64x4(), i16x8::ZERO, k)) }
14220}
14221
14222/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
14223///
14224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi16&expand=1837)
14225#[inline]
14226#[target_feature(enable = "avx512f,avx512vl")]
14227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14228#[cfg_attr(test, assert_instr(vpmovsqw))]
14229pub fn _mm_cvtsepi64_epi16(a: __m128i) -> __m128i {
14230    unsafe { transmute(vpmovsqw128(a.as_i64x2(), i16x8::ZERO, 0b11111111)) }
14231}
14232
14233/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14234///
14235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi16&expand=1838)
14236#[inline]
14237#[target_feature(enable = "avx512f,avx512vl")]
14238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14239#[cfg_attr(test, assert_instr(vpmovsqw))]
14240pub fn _mm_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14241    unsafe { transmute(vpmovsqw128(a.as_i64x2(), src.as_i16x8(), k)) }
14242}
14243
14244/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14245///
14246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi16&expand=1839)
14247#[inline]
14248#[target_feature(enable = "avx512f,avx512vl")]
14249#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14250#[cfg_attr(test, assert_instr(vpmovsqw))]
14251pub fn _mm_maskz_cvtsepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
14252    unsafe { transmute(vpmovsqw128(a.as_i64x2(), i16x8::ZERO, k)) }
14253}
14254
14255/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
14256///
14257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi8&expand=1861)
14258#[inline]
14259#[target_feature(enable = "avx512f")]
14260#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14261#[cfg_attr(test, assert_instr(vpmovsqb))]
14262pub fn _mm512_cvtsepi64_epi8(a: __m512i) -> __m128i {
14263    unsafe { transmute(vpmovsqb(a.as_i64x8(), i8x16::ZERO, 0b11111111)) }
14264}
14265
14266/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14267///
14268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi8&expand=1862)
14269#[inline]
14270#[target_feature(enable = "avx512f")]
14271#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14272#[cfg_attr(test, assert_instr(vpmovsqb))]
14273pub fn _mm512_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14274    unsafe { transmute(vpmovsqb(a.as_i64x8(), src.as_i8x16(), k)) }
14275}
14276
14277/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14278///
14279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi8&expand=1863)
14280#[inline]
14281#[target_feature(enable = "avx512f")]
14282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14283#[cfg_attr(test, assert_instr(vpmovsqb))]
14284pub fn _mm512_maskz_cvtsepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
14285    unsafe { transmute(vpmovsqb(a.as_i64x8(), i8x16::ZERO, k)) }
14286}
14287
14288/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
14289///
14290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi8&expand=1858)
14291#[inline]
14292#[target_feature(enable = "avx512f,avx512vl")]
14293#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14294#[cfg_attr(test, assert_instr(vpmovsqb))]
14295pub fn _mm256_cvtsepi64_epi8(a: __m256i) -> __m128i {
14296    unsafe { transmute(vpmovsqb256(a.as_i64x4(), i8x16::ZERO, 0b11111111)) }
14297}
14298
14299/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14300///
14301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi8&expand=1859)
14302#[inline]
14303#[target_feature(enable = "avx512f,avx512vl")]
14304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14305#[cfg_attr(test, assert_instr(vpmovsqb))]
14306pub fn _mm256_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14307    unsafe { transmute(vpmovsqb256(a.as_i64x4(), src.as_i8x16(), k)) }
14308}
14309
14310/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14311///
14312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi8&expand=1860)
14313#[inline]
14314#[target_feature(enable = "avx512f,avx512vl")]
14315#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14316#[cfg_attr(test, assert_instr(vpmovsqb))]
14317pub fn _mm256_maskz_cvtsepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
14318    unsafe { transmute(vpmovsqb256(a.as_i64x4(), i8x16::ZERO, k)) }
14319}
14320
14321/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
14322///
14323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi8&expand=1855)
14324#[inline]
14325#[target_feature(enable = "avx512f,avx512vl")]
14326#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14327#[cfg_attr(test, assert_instr(vpmovsqb))]
14328pub fn _mm_cvtsepi64_epi8(a: __m128i) -> __m128i {
14329    unsafe { transmute(vpmovsqb128(a.as_i64x2(), i8x16::ZERO, 0b11111111)) }
14330}
14331
14332/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14333///
14334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi8&expand=1856)
14335#[inline]
14336#[target_feature(enable = "avx512f,avx512vl")]
14337#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14338#[cfg_attr(test, assert_instr(vpmovsqb))]
14339pub fn _mm_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14340    unsafe { transmute(vpmovsqb128(a.as_i64x2(), src.as_i8x16(), k)) }
14341}
14342
14343/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14344///
14345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi8&expand=1857)
14346#[inline]
14347#[target_feature(enable = "avx512f,avx512vl")]
14348#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14349#[cfg_attr(test, assert_instr(vpmovsqb))]
14350pub fn _mm_maskz_cvtsepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
14351    unsafe { transmute(vpmovsqb128(a.as_i64x2(), i8x16::ZERO, k)) }
14352}
14353
14354/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14355///
14356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi16&expand=2054)
14357#[inline]
14358#[target_feature(enable = "avx512f")]
14359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14360#[cfg_attr(test, assert_instr(vpmovusdw))]
14361pub fn _mm512_cvtusepi32_epi16(a: __m512i) -> __m256i {
14362    unsafe { transmute(vpmovusdw(a.as_u32x16(), u16x16::ZERO, 0b11111111_11111111)) }
14363}
14364
14365/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14366///
14367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi16&expand=2055)
14368#[inline]
14369#[target_feature(enable = "avx512f")]
14370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14371#[cfg_attr(test, assert_instr(vpmovusdw))]
14372pub fn _mm512_mask_cvtusepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
14373    unsafe { transmute(vpmovusdw(a.as_u32x16(), src.as_u16x16(), k)) }
14374}
14375
14376/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14377///
14378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi16&expand=2056)
14379#[inline]
14380#[target_feature(enable = "avx512f")]
14381#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14382#[cfg_attr(test, assert_instr(vpmovusdw))]
14383pub fn _mm512_maskz_cvtusepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
14384    unsafe { transmute(vpmovusdw(a.as_u32x16(), u16x16::ZERO, k)) }
14385}
14386
14387/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14388///
14389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi16&expand=2051)
14390#[inline]
14391#[target_feature(enable = "avx512f,avx512vl")]
14392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14393#[cfg_attr(test, assert_instr(vpmovusdw))]
14394pub fn _mm256_cvtusepi32_epi16(a: __m256i) -> __m128i {
14395    unsafe { transmute(vpmovusdw256(a.as_u32x8(), u16x8::ZERO, 0b11111111)) }
14396}
14397
14398/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14399///
14400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi16&expand=2052)
14401#[inline]
14402#[target_feature(enable = "avx512f,avx512vl")]
14403#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14404#[cfg_attr(test, assert_instr(vpmovusdw))]
14405pub fn _mm256_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14406    unsafe { transmute(vpmovusdw256(a.as_u32x8(), src.as_u16x8(), k)) }
14407}
14408
14409/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14410///
14411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi16&expand=2053)
14412#[inline]
14413#[target_feature(enable = "avx512f,avx512vl")]
14414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14415#[cfg_attr(test, assert_instr(vpmovusdw))]
14416pub fn _mm256_maskz_cvtusepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
14417    unsafe { transmute(vpmovusdw256(a.as_u32x8(), u16x8::ZERO, k)) }
14418}
14419
14420/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14421///
14422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi16&expand=2048)
14423#[inline]
14424#[target_feature(enable = "avx512f,avx512vl")]
14425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14426#[cfg_attr(test, assert_instr(vpmovusdw))]
14427pub fn _mm_cvtusepi32_epi16(a: __m128i) -> __m128i {
14428    unsafe { transmute(vpmovusdw128(a.as_u32x4(), u16x8::ZERO, 0b11111111)) }
14429}
14430
14431/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14432///
14433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi16&expand=2049)
14434#[inline]
14435#[target_feature(enable = "avx512f,avx512vl")]
14436#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14437#[cfg_attr(test, assert_instr(vpmovusdw))]
14438pub fn _mm_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14439    unsafe { transmute(vpmovusdw128(a.as_u32x4(), src.as_u16x8(), k)) }
14440}
14441
14442/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14443///
14444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi16&expand=2050)
14445#[inline]
14446#[target_feature(enable = "avx512f,avx512vl")]
14447#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14448#[cfg_attr(test, assert_instr(vpmovusdw))]
14449pub fn _mm_maskz_cvtusepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
14450    unsafe { transmute(vpmovusdw128(a.as_u32x4(), u16x8::ZERO, k)) }
14451}
14452
14453/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14454///
14455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi8&expand=2063)
14456#[inline]
14457#[target_feature(enable = "avx512f")]
14458#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14459#[cfg_attr(test, assert_instr(vpmovusdb))]
14460pub fn _mm512_cvtusepi32_epi8(a: __m512i) -> __m128i {
14461    unsafe { transmute(vpmovusdb(a.as_u32x16(), u8x16::ZERO, 0b11111111_11111111)) }
14462}
14463
14464/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14465///
14466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi8&expand=2064)
14467#[inline]
14468#[target_feature(enable = "avx512f")]
14469#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14470#[cfg_attr(test, assert_instr(vpmovusdb))]
14471pub fn _mm512_mask_cvtusepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
14472    unsafe { transmute(vpmovusdb(a.as_u32x16(), src.as_u8x16(), k)) }
14473}
14474
14475/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14476///
14477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi8&expand=2065)
14478#[inline]
14479#[target_feature(enable = "avx512f")]
14480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14481#[cfg_attr(test, assert_instr(vpmovusdb))]
14482pub fn _mm512_maskz_cvtusepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
14483    unsafe { transmute(vpmovusdb(a.as_u32x16(), u8x16::ZERO, k)) }
14484}
14485
14486/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14487///
14488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi8&expand=2060)
14489#[inline]
14490#[target_feature(enable = "avx512f,avx512vl")]
14491#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14492#[cfg_attr(test, assert_instr(vpmovusdb))]
14493pub fn _mm256_cvtusepi32_epi8(a: __m256i) -> __m128i {
14494    unsafe { transmute(vpmovusdb256(a.as_u32x8(), u8x16::ZERO, 0b11111111)) }
14495}
14496
14497/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14498///
14499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi8&expand=2061)
14500#[inline]
14501#[target_feature(enable = "avx512f,avx512vl")]
14502#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14503#[cfg_attr(test, assert_instr(vpmovusdb))]
14504pub fn _mm256_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14505    unsafe { transmute(vpmovusdb256(a.as_u32x8(), src.as_u8x16(), k)) }
14506}
14507
14508/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14509///
14510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi8&expand=2062)
14511#[inline]
14512#[target_feature(enable = "avx512f,avx512vl")]
14513#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14514#[cfg_attr(test, assert_instr(vpmovusdb))]
14515pub fn _mm256_maskz_cvtusepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
14516    unsafe { transmute(vpmovusdb256(a.as_u32x8(), u8x16::ZERO, k)) }
14517}
14518
14519/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14520///
14521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi8&expand=2057)
14522#[inline]
14523#[target_feature(enable = "avx512f,avx512vl")]
14524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14525#[cfg_attr(test, assert_instr(vpmovusdb))]
14526pub fn _mm_cvtusepi32_epi8(a: __m128i) -> __m128i {
14527    unsafe { transmute(vpmovusdb128(a.as_u32x4(), u8x16::ZERO, 0b11111111)) }
14528}
14529
14530/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14531///
14532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi8&expand=2058)
14533#[inline]
14534#[target_feature(enable = "avx512f,avx512vl")]
14535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14536#[cfg_attr(test, assert_instr(vpmovusdb))]
14537pub fn _mm_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14538    unsafe { transmute(vpmovusdb128(a.as_u32x4(), src.as_u8x16(), k)) }
14539}
14540
14541/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14542///
14543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi8&expand=2059)
14544#[inline]
14545#[target_feature(enable = "avx512f,avx512vl")]
14546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14547#[cfg_attr(test, assert_instr(vpmovusdb))]
14548pub fn _mm_maskz_cvtusepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
14549    unsafe { transmute(vpmovusdb128(a.as_u32x4(), u8x16::ZERO, k)) }
14550}
14551
14552/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14553///
14554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi32&expand=2087)
14555#[inline]
14556#[target_feature(enable = "avx512f")]
14557#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14558#[cfg_attr(test, assert_instr(vpmovusqd))]
14559pub fn _mm512_cvtusepi64_epi32(a: __m512i) -> __m256i {
14560    unsafe { transmute(vpmovusqd(a.as_u64x8(), u32x8::ZERO, 0b11111111)) }
14561}
14562
14563/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14564///
14565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi32&expand=2088)
14566#[inline]
14567#[target_feature(enable = "avx512f")]
14568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14569#[cfg_attr(test, assert_instr(vpmovusqd))]
14570pub fn _mm512_mask_cvtusepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
14571    unsafe { transmute(vpmovusqd(a.as_u64x8(), src.as_u32x8(), k)) }
14572}
14573
14574/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14575///
14576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi32&expand=2089)
14577#[inline]
14578#[target_feature(enable = "avx512f")]
14579#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14580#[cfg_attr(test, assert_instr(vpmovusqd))]
14581pub fn _mm512_maskz_cvtusepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
14582    unsafe { transmute(vpmovusqd(a.as_u64x8(), u32x8::ZERO, k)) }
14583}
14584
14585/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14586///
14587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi32&expand=2084)
14588#[inline]
14589#[target_feature(enable = "avx512f,avx512vl")]
14590#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14591#[cfg_attr(test, assert_instr(vpmovusqd))]
14592pub fn _mm256_cvtusepi64_epi32(a: __m256i) -> __m128i {
14593    unsafe { transmute(vpmovusqd256(a.as_u64x4(), u32x4::ZERO, 0b11111111)) }
14594}
14595
14596/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14597///
14598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi32&expand=2085)
14599#[inline]
14600#[target_feature(enable = "avx512f,avx512vl")]
14601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14602#[cfg_attr(test, assert_instr(vpmovusqd))]
14603pub fn _mm256_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14604    unsafe { transmute(vpmovusqd256(a.as_u64x4(), src.as_u32x4(), k)) }
14605}
14606
14607/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14608///
14609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi32&expand=2086)
14610#[inline]
14611#[target_feature(enable = "avx512f,avx512vl")]
14612#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14613#[cfg_attr(test, assert_instr(vpmovusqd))]
14614pub fn _mm256_maskz_cvtusepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
14615    unsafe { transmute(vpmovusqd256(a.as_u64x4(), u32x4::ZERO, k)) }
14616}
14617
14618/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14619///
14620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi32&expand=2081)
14621#[inline]
14622#[target_feature(enable = "avx512f,avx512vl")]
14623#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14624#[cfg_attr(test, assert_instr(vpmovusqd))]
14625pub fn _mm_cvtusepi64_epi32(a: __m128i) -> __m128i {
14626    unsafe { transmute(vpmovusqd128(a.as_u64x2(), u32x4::ZERO, 0b11111111)) }
14627}
14628
14629/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14630///
14631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi32&expand=2082)
14632#[inline]
14633#[target_feature(enable = "avx512f,avx512vl")]
14634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14635#[cfg_attr(test, assert_instr(vpmovusqd))]
14636pub fn _mm_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14637    unsafe { transmute(vpmovusqd128(a.as_u64x2(), src.as_u32x4(), k)) }
14638}
14639
14640/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14641///
14642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi32&expand=2083)
14643#[inline]
14644#[target_feature(enable = "avx512f,avx512vl")]
14645#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14646#[cfg_attr(test, assert_instr(vpmovusqd))]
14647pub fn _mm_maskz_cvtusepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
14648    unsafe { transmute(vpmovusqd128(a.as_u64x2(), u32x4::ZERO, k)) }
14649}
14650
14651/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14652///
14653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi16&expand=2078)
14654#[inline]
14655#[target_feature(enable = "avx512f")]
14656#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14657#[cfg_attr(test, assert_instr(vpmovusqw))]
14658pub fn _mm512_cvtusepi64_epi16(a: __m512i) -> __m128i {
14659    unsafe { transmute(vpmovusqw(a.as_u64x8(), u16x8::ZERO, 0b11111111)) }
14660}
14661
14662/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14663///
14664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi16&expand=2079)
14665#[inline]
14666#[target_feature(enable = "avx512f")]
14667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14668#[cfg_attr(test, assert_instr(vpmovusqw))]
14669pub fn _mm512_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14670    unsafe { transmute(vpmovusqw(a.as_u64x8(), src.as_u16x8(), k)) }
14671}
14672
14673/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14674///
14675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi16&expand=2080)
14676#[inline]
14677#[target_feature(enable = "avx512f")]
14678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14679#[cfg_attr(test, assert_instr(vpmovusqw))]
14680pub fn _mm512_maskz_cvtusepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
14681    unsafe { transmute(vpmovusqw(a.as_u64x8(), u16x8::ZERO, k)) }
14682}
14683
14684/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14685///
14686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi16&expand=2075)
14687#[inline]
14688#[target_feature(enable = "avx512f,avx512vl")]
14689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14690#[cfg_attr(test, assert_instr(vpmovusqw))]
14691pub fn _mm256_cvtusepi64_epi16(a: __m256i) -> __m128i {
14692    unsafe { transmute(vpmovusqw256(a.as_u64x4(), u16x8::ZERO, 0b11111111)) }
14693}
14694
14695/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14696///
14697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi16&expand=2076)
14698#[inline]
14699#[target_feature(enable = "avx512f,avx512vl")]
14700#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14701#[cfg_attr(test, assert_instr(vpmovusqw))]
14702pub fn _mm256_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14703    unsafe { transmute(vpmovusqw256(a.as_u64x4(), src.as_u16x8(), k)) }
14704}
14705
14706/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14707///
14708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi16&expand=2077)
14709#[inline]
14710#[target_feature(enable = "avx512f,avx512vl")]
14711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14712#[cfg_attr(test, assert_instr(vpmovusqw))]
14713pub fn _mm256_maskz_cvtusepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
14714    unsafe { transmute(vpmovusqw256(a.as_u64x4(), u16x8::ZERO, k)) }
14715}
14716
14717/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14718///
14719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi16&expand=2072)
14720#[inline]
14721#[target_feature(enable = "avx512f,avx512vl")]
14722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14723#[cfg_attr(test, assert_instr(vpmovusqw))]
14724pub fn _mm_cvtusepi64_epi16(a: __m128i) -> __m128i {
14725    unsafe { transmute(vpmovusqw128(a.as_u64x2(), u16x8::ZERO, 0b11111111)) }
14726}
14727
14728/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14729///
14730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi16&expand=2073)
14731#[inline]
14732#[target_feature(enable = "avx512f,avx512vl")]
14733#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14734#[cfg_attr(test, assert_instr(vpmovusqw))]
14735pub fn _mm_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14736    unsafe { transmute(vpmovusqw128(a.as_u64x2(), src.as_u16x8(), k)) }
14737}
14738
14739/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14740///
14741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi16&expand=2074)
14742#[inline]
14743#[target_feature(enable = "avx512f,avx512vl")]
14744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14745#[cfg_attr(test, assert_instr(vpmovusqw))]
14746pub fn _mm_maskz_cvtusepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
14747    unsafe { transmute(vpmovusqw128(a.as_u64x2(), u16x8::ZERO, k)) }
14748}
14749
14750/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14751///
14752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi8&expand=2096)
14753#[inline]
14754#[target_feature(enable = "avx512f")]
14755#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14756#[cfg_attr(test, assert_instr(vpmovusqb))]
14757pub fn _mm512_cvtusepi64_epi8(a: __m512i) -> __m128i {
14758    unsafe { transmute(vpmovusqb(a.as_u64x8(), u8x16::ZERO, 0b11111111)) }
14759}
14760
14761/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14762///
14763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi8&expand=2097)
14764#[inline]
14765#[target_feature(enable = "avx512f")]
14766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14767#[cfg_attr(test, assert_instr(vpmovusqb))]
14768pub fn _mm512_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14769    unsafe { transmute(vpmovusqb(a.as_u64x8(), src.as_u8x16(), k)) }
14770}
14771
14772/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14773///
14774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi8&expand=2098)
14775#[inline]
14776#[target_feature(enable = "avx512f")]
14777#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14778#[cfg_attr(test, assert_instr(vpmovusqb))]
14779pub fn _mm512_maskz_cvtusepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
14780    unsafe { transmute(vpmovusqb(a.as_u64x8(), u8x16::ZERO, k)) }
14781}
14782
14783/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14784///
14785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi8&expand=2093)
14786#[inline]
14787#[target_feature(enable = "avx512f,avx512vl")]
14788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14789#[cfg_attr(test, assert_instr(vpmovusqb))]
14790pub fn _mm256_cvtusepi64_epi8(a: __m256i) -> __m128i {
14791    unsafe { transmute(vpmovusqb256(a.as_u64x4(), u8x16::ZERO, 0b11111111)) }
14792}
14793
14794/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14795///
14796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi8&expand=2094)
14797#[inline]
14798#[target_feature(enable = "avx512f,avx512vl")]
14799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14800#[cfg_attr(test, assert_instr(vpmovusqb))]
14801pub fn _mm256_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14802    unsafe { transmute(vpmovusqb256(a.as_u64x4(), src.as_u8x16(), k)) }
14803}
14804
14805/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14806///
14807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi8&expand=2095)
14808#[inline]
14809#[target_feature(enable = "avx512f,avx512vl")]
14810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14811#[cfg_attr(test, assert_instr(vpmovusqb))]
14812pub fn _mm256_maskz_cvtusepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
14813    unsafe { transmute(vpmovusqb256(a.as_u64x4(), u8x16::ZERO, k)) }
14814}
14815
14816/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14817///
14818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi8&expand=2090)
14819#[inline]
14820#[target_feature(enable = "avx512f,avx512vl")]
14821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14822#[cfg_attr(test, assert_instr(vpmovusqb))]
14823pub fn _mm_cvtusepi64_epi8(a: __m128i) -> __m128i {
14824    unsafe { transmute(vpmovusqb128(a.as_u64x2(), u8x16::ZERO, 0b11111111)) }
14825}
14826
14827/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14828///
14829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi8&expand=2091)
14830#[inline]
14831#[target_feature(enable = "avx512f,avx512vl")]
14832#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14833#[cfg_attr(test, assert_instr(vpmovusqb))]
14834pub fn _mm_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14835    unsafe { transmute(vpmovusqb128(a.as_u64x2(), src.as_u8x16(), k)) }
14836}
14837
14838/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14839///
14840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi8&expand=2092)
14841#[inline]
14842#[target_feature(enable = "avx512f,avx512vl")]
14843#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14844#[cfg_attr(test, assert_instr(vpmovusqb))]
14845pub fn _mm_maskz_cvtusepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
14846    unsafe { transmute(vpmovusqb128(a.as_u64x2(), u8x16::ZERO, k)) }
14847}
14848
14849/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
14850///
14851/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
14852/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14853/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14854/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14855/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14856/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14857///
14858/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi32&expand=1335)
14859#[inline]
14860#[target_feature(enable = "avx512f")]
14861#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14862#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14863#[rustc_legacy_const_generics(1)]
14864pub fn _mm512_cvt_roundps_epi32<const ROUNDING: i32>(a: __m512) -> __m512i {
14865    unsafe {
14866        static_assert_rounding!(ROUNDING);
14867        let a = a.as_f32x16();
14868        let r = vcvtps2dq(a, i32x16::ZERO, 0b11111111_11111111, ROUNDING);
14869        transmute(r)
14870    }
14871}
14872
14873/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14874///
14875/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14876/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14877/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14878/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14879/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14880/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14881///
14882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi32&expand=1336)
14883#[inline]
14884#[target_feature(enable = "avx512f")]
14885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14886#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14887#[rustc_legacy_const_generics(3)]
14888pub fn _mm512_mask_cvt_roundps_epi32<const ROUNDING: i32>(
14889    src: __m512i,
14890    k: __mmask16,
14891    a: __m512,
14892) -> __m512i {
14893    unsafe {
14894        static_assert_rounding!(ROUNDING);
14895        let a = a.as_f32x16();
14896        let src = src.as_i32x16();
14897        let r = vcvtps2dq(a, src, k, ROUNDING);
14898        transmute(r)
14899    }
14900}
14901
14902/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14903///
14904/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14905/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14906/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14907/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14908/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14909/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14910///
14911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi32&expand=1337)
14912#[inline]
14913#[target_feature(enable = "avx512f")]
14914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14915#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14916#[rustc_legacy_const_generics(2)]
14917pub fn _mm512_maskz_cvt_roundps_epi32<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512i {
14918    unsafe {
14919        static_assert_rounding!(ROUNDING);
14920        let a = a.as_f32x16();
14921        let r = vcvtps2dq(a, i32x16::ZERO, k, ROUNDING);
14922        transmute(r)
14923    }
14924}
14925
14926/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
14927///
14928/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14929/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14930/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14931/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14932/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14933/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14934///
14935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epu32&expand=1341)
14936#[inline]
14937#[target_feature(enable = "avx512f")]
14938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14939#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14940#[rustc_legacy_const_generics(1)]
14941pub fn _mm512_cvt_roundps_epu32<const ROUNDING: i32>(a: __m512) -> __m512i {
14942    unsafe {
14943        static_assert_rounding!(ROUNDING);
14944        let a = a.as_f32x16();
14945        let r = vcvtps2udq(a, u32x16::ZERO, 0b11111111_11111111, ROUNDING);
14946        transmute(r)
14947    }
14948}
14949
14950/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14951///
14952/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14953/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14954/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14955/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14956/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14957/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14958///
14959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epu32&expand=1342)
14960#[inline]
14961#[target_feature(enable = "avx512f")]
14962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14963#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14964#[rustc_legacy_const_generics(3)]
14965pub fn _mm512_mask_cvt_roundps_epu32<const ROUNDING: i32>(
14966    src: __m512i,
14967    k: __mmask16,
14968    a: __m512,
14969) -> __m512i {
14970    unsafe {
14971        static_assert_rounding!(ROUNDING);
14972        let a = a.as_f32x16();
14973        let src = src.as_u32x16();
14974        let r = vcvtps2udq(a, src, k, ROUNDING);
14975        transmute(r)
14976    }
14977}
14978
14979/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14980///
14981/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14982/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14983/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14984/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14985/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14986/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14987///
14988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu32&expand=1343)
14989#[inline]
14990#[target_feature(enable = "avx512f")]
14991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14992#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14993#[rustc_legacy_const_generics(2)]
14994pub fn _mm512_maskz_cvt_roundps_epu32<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512i {
14995    unsafe {
14996        static_assert_rounding!(ROUNDING);
14997        let a = a.as_f32x16();
14998        let r = vcvtps2udq(a, u32x16::ZERO, k, ROUNDING);
14999        transmute(r)
15000    }
15001}
15002
15003/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.\
15004/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15005///
15006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_pd&expand=1347)
15007#[inline]
15008#[target_feature(enable = "avx512f")]
15009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15010#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
15011#[rustc_legacy_const_generics(1)]
15012pub fn _mm512_cvt_roundps_pd<const SAE: i32>(a: __m256) -> __m512d {
15013    unsafe {
15014        static_assert_sae!(SAE);
15015        let a = a.as_f32x8();
15016        let r = vcvtps2pd(a, f64x8::ZERO, 0b11111111, SAE);
15017        transmute(r)
15018    }
15019}
15020
15021/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15022/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15023///
15024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_pd&expand=1336)
15025#[inline]
15026#[target_feature(enable = "avx512f")]
15027#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15028#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
15029#[rustc_legacy_const_generics(3)]
15030pub fn _mm512_mask_cvt_roundps_pd<const SAE: i32>(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
15031    unsafe {
15032        static_assert_sae!(SAE);
15033        let a = a.as_f32x8();
15034        let src = src.as_f64x8();
15035        let r = vcvtps2pd(a, src, k, SAE);
15036        transmute(r)
15037    }
15038}
15039
15040/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15041/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15042///
15043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_pd&expand=1337)
15044#[inline]
15045#[target_feature(enable = "avx512f")]
15046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15047#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
15048#[rustc_legacy_const_generics(2)]
15049pub fn _mm512_maskz_cvt_roundps_pd<const SAE: i32>(k: __mmask8, a: __m256) -> __m512d {
15050    unsafe {
15051        static_assert_sae!(SAE);
15052        let a = a.as_f32x8();
15053        let r = vcvtps2pd(a, f64x8::ZERO, k, SAE);
15054        transmute(r)
15055    }
15056}
15057
15058/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.\
15059///
15060/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15061/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15062/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15063/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15064/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15065/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15066///
15067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epi32&expand=1315)
15068#[inline]
15069#[target_feature(enable = "avx512f")]
15070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15071#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
15072#[rustc_legacy_const_generics(1)]
15073pub fn _mm512_cvt_roundpd_epi32<const ROUNDING: i32>(a: __m512d) -> __m256i {
15074    unsafe {
15075        static_assert_rounding!(ROUNDING);
15076        let a = a.as_f64x8();
15077        let r = vcvtpd2dq(a, i32x8::ZERO, 0b11111111, ROUNDING);
15078        transmute(r)
15079    }
15080}
15081
15082/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15083///
15084/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15085/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15086/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15087/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15088/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15089/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15090///
15091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epi32&expand=1316)
15092#[inline]
15093#[target_feature(enable = "avx512f")]
15094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15095#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
15096#[rustc_legacy_const_generics(3)]
15097pub fn _mm512_mask_cvt_roundpd_epi32<const ROUNDING: i32>(
15098    src: __m256i,
15099    k: __mmask8,
15100    a: __m512d,
15101) -> __m256i {
15102    unsafe {
15103        static_assert_rounding!(ROUNDING);
15104        let a = a.as_f64x8();
15105        let src = src.as_i32x8();
15106        let r = vcvtpd2dq(a, src, k, ROUNDING);
15107        transmute(r)
15108    }
15109}
15110
15111/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15112///
15113/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15114/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15115/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15116/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15117/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15118/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15119///
15120/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epi32&expand=1317)
15121#[inline]
15122#[target_feature(enable = "avx512f")]
15123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15124#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
15125#[rustc_legacy_const_generics(2)]
15126pub fn _mm512_maskz_cvt_roundpd_epi32<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256i {
15127    unsafe {
15128        static_assert_rounding!(ROUNDING);
15129        let a = a.as_f64x8();
15130        let r = vcvtpd2dq(a, i32x8::ZERO, k, ROUNDING);
15131        transmute(r)
15132    }
15133}
15134
15135/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
15136///
15137/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15138/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15139/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15140/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15141/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15142/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15143///
15144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epu32&expand=1321)
15145#[inline]
15146#[target_feature(enable = "avx512f")]
15147#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15148#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
15149#[rustc_legacy_const_generics(1)]
15150pub fn _mm512_cvt_roundpd_epu32<const ROUNDING: i32>(a: __m512d) -> __m256i {
15151    unsafe {
15152        static_assert_rounding!(ROUNDING);
15153        let a = a.as_f64x8();
15154        let r = vcvtpd2udq(a, u32x8::ZERO, 0b11111111, ROUNDING);
15155        transmute(r)
15156    }
15157}
15158
15159/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15160///
15161/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15162/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15163/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15164/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15165/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15166/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15167///
15168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epu32&expand=1322)
15169#[inline]
15170#[target_feature(enable = "avx512f")]
15171#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15172#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
15173#[rustc_legacy_const_generics(3)]
15174pub fn _mm512_mask_cvt_roundpd_epu32<const ROUNDING: i32>(
15175    src: __m256i,
15176    k: __mmask8,
15177    a: __m512d,
15178) -> __m256i {
15179    unsafe {
15180        static_assert_rounding!(ROUNDING);
15181        let a = a.as_f64x8();
15182        let src = src.as_u32x8();
15183        let r = vcvtpd2udq(a, src, k, ROUNDING);
15184        transmute(r)
15185    }
15186}
15187
15188/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15189///
15190/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15191/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15192/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15193/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15194/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15195/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15196///
15197/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epu32&expand=1323)
15198#[inline]
15199#[target_feature(enable = "avx512f")]
15200#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15201#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
15202#[rustc_legacy_const_generics(2)]
15203pub fn _mm512_maskz_cvt_roundpd_epu32<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256i {
15204    unsafe {
15205        static_assert_rounding!(ROUNDING);
15206        let a = a.as_f64x8();
15207        let r = vcvtpd2udq(a, u32x8::ZERO, k, ROUNDING);
15208        transmute(r)
15209    }
15210}
15211
15212/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15213///
15214/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15215/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15216/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15217/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15218/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15219/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15220///
15221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_ps&expand=1327)
15222#[inline]
15223#[target_feature(enable = "avx512f")]
15224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15225#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
15226#[rustc_legacy_const_generics(1)]
15227pub fn _mm512_cvt_roundpd_ps<const ROUNDING: i32>(a: __m512d) -> __m256 {
15228    unsafe {
15229        static_assert_rounding!(ROUNDING);
15230        let a = a.as_f64x8();
15231        let r = vcvtpd2ps(a, f32x8::ZERO, 0b11111111, ROUNDING);
15232        transmute(r)
15233    }
15234}
15235
15236/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15237///
15238/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15239/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15240/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15241/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15242/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15243/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15244///
15245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_ps&expand=1328)
15246#[inline]
15247#[target_feature(enable = "avx512f")]
15248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15249#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
15250#[rustc_legacy_const_generics(3)]
15251pub fn _mm512_mask_cvt_roundpd_ps<const ROUNDING: i32>(
15252    src: __m256,
15253    k: __mmask8,
15254    a: __m512d,
15255) -> __m256 {
15256    unsafe {
15257        static_assert_rounding!(ROUNDING);
15258        let a = a.as_f64x8();
15259        let src = src.as_f32x8();
15260        let r = vcvtpd2ps(a, src, k, ROUNDING);
15261        transmute(r)
15262    }
15263}
15264
15265/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15266///
15267/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15268/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15269/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15270/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15271/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15272/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15273///
15274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_ps&expand=1329)
15275#[inline]
15276#[target_feature(enable = "avx512f")]
15277#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15278#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
15279#[rustc_legacy_const_generics(2)]
15280pub fn _mm512_maskz_cvt_roundpd_ps<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256 {
15281    unsafe {
15282        static_assert_rounding!(ROUNDING);
15283        let a = a.as_f64x8();
15284        let r = vcvtpd2ps(a, f32x8::ZERO, k, ROUNDING);
15285        transmute(r)
15286    }
15287}
15288
15289/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15290///
15291/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15292/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15293/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15294/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15295/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15296/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15297///
15298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi32_ps&expand=1294)
15299#[inline]
15300#[target_feature(enable = "avx512f")]
15301#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15302#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
15303#[rustc_legacy_const_generics(1)]
15304pub fn _mm512_cvt_roundepi32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
15305    unsafe {
15306        static_assert_rounding!(ROUNDING);
15307        let a = a.as_i32x16();
15308        let r = vcvtdq2ps(a, ROUNDING);
15309        transmute(r)
15310    }
15311}
15312
15313/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15314///
15315/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15316/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15317/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15318/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15319/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15320/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15321///
15322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi32_ps&expand=1295)
15323#[inline]
15324#[target_feature(enable = "avx512f")]
15325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15326#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
15327#[rustc_legacy_const_generics(3)]
15328pub fn _mm512_mask_cvt_roundepi32_ps<const ROUNDING: i32>(
15329    src: __m512,
15330    k: __mmask16,
15331    a: __m512i,
15332) -> __m512 {
15333    unsafe {
15334        static_assert_rounding!(ROUNDING);
15335        let a = a.as_i32x16();
15336        let r = vcvtdq2ps(a, ROUNDING);
15337        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
15338    }
15339}
15340
15341/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15342///
15343/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15344/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15345/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15346/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15347/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15348/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15349///
15350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi32_ps&expand=1296)
15351#[inline]
15352#[target_feature(enable = "avx512f")]
15353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15354#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
15355#[rustc_legacy_const_generics(2)]
15356pub fn _mm512_maskz_cvt_roundepi32_ps<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m512 {
15357    unsafe {
15358        static_assert_rounding!(ROUNDING);
15359        let a = a.as_i32x16();
15360        let r = vcvtdq2ps(a, ROUNDING);
15361        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
15362    }
15363}
15364
15365/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15366///
15367/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15368/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15369/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15370/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15371/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15372/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15373///
15374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu32_ps&expand=1303)
15375#[inline]
15376#[target_feature(enable = "avx512f")]
15377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15378#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
15379#[rustc_legacy_const_generics(1)]
15380pub fn _mm512_cvt_roundepu32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
15381    unsafe {
15382        static_assert_rounding!(ROUNDING);
15383        let a = a.as_u32x16();
15384        let r = vcvtudq2ps(a, ROUNDING);
15385        transmute(r)
15386    }
15387}
15388
15389/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15390///
15391/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15392/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15393/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15394/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15395/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15396/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15397///
15398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu32_ps&expand=1304)
15399#[inline]
15400#[target_feature(enable = "avx512f")]
15401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15402#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
15403#[rustc_legacy_const_generics(3)]
15404pub fn _mm512_mask_cvt_roundepu32_ps<const ROUNDING: i32>(
15405    src: __m512,
15406    k: __mmask16,
15407    a: __m512i,
15408) -> __m512 {
15409    unsafe {
15410        static_assert_rounding!(ROUNDING);
15411        let a = a.as_u32x16();
15412        let r = vcvtudq2ps(a, ROUNDING);
15413        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
15414    }
15415}
15416
15417/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15418///
15419/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15420/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15421/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15422/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15423/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15424/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15425///
15426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu32_ps&expand=1305)
15427#[inline]
15428#[target_feature(enable = "avx512f")]
15429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15430#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
15431#[rustc_legacy_const_generics(2)]
15432pub fn _mm512_maskz_cvt_roundepu32_ps<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m512 {
15433    unsafe {
15434        static_assert_rounding!(ROUNDING);
15435        let a = a.as_u32x16();
15436        let r = vcvtudq2ps(a, ROUNDING);
15437        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
15438    }
15439}
15440
15441/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15442/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15443///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15444///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15445///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15446///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15447///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15448///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15449///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15450///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15451///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15452///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15453///
15454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_ph&expand=1354)
15455#[inline]
15456#[target_feature(enable = "avx512f")]
15457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15458#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15459#[rustc_legacy_const_generics(1)]
15460pub fn _mm512_cvt_roundps_ph<const ROUNDING: i32>(a: __m512) -> __m256i {
15461    unsafe {
15462        static_assert_extended_rounding!(ROUNDING);
15463        let a = a.as_f32x16();
15464        let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, 0b11111111_11111111);
15465        transmute(r)
15466    }
15467}
15468
15469/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15470/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15471///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15472///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15473///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15474///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15475///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15476///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15477///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15478///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15479///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15480///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15481///
15482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_ph&expand=1355)
15483#[inline]
15484#[target_feature(enable = "avx512f")]
15485#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15486#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15487#[rustc_legacy_const_generics(3)]
15488pub fn _mm512_mask_cvt_roundps_ph<const ROUNDING: i32>(
15489    src: __m256i,
15490    k: __mmask16,
15491    a: __m512,
15492) -> __m256i {
15493    unsafe {
15494        static_assert_extended_rounding!(ROUNDING);
15495        let a = a.as_f32x16();
15496        let src = src.as_i16x16();
15497        let r = vcvtps2ph(a, ROUNDING, src, k);
15498        transmute(r)
15499    }
15500}
15501
15502/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15503/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15504///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15505///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15506///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15507///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15508///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15509///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15510///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15511///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15512///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15513///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15514///
15515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_ph&expand=1356)
15516#[inline]
15517#[target_feature(enable = "avx512f")]
15518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15519#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15520#[rustc_legacy_const_generics(2)]
15521pub fn _mm512_maskz_cvt_roundps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256i {
15522    unsafe {
15523        static_assert_extended_rounding!(ROUNDING);
15524        let a = a.as_f32x16();
15525        let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, k);
15526        transmute(r)
15527    }
15528}
15529
15530/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15531/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:
15532/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15533/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15534/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15535/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15536/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15537///
15538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvt_roundps_ph&expand=1352)
15539#[inline]
15540#[target_feature(enable = "avx512f,avx512vl")]
15541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15542#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15543#[rustc_legacy_const_generics(3)]
15544pub fn _mm256_mask_cvt_roundps_ph<const IMM8: i32>(
15545    src: __m128i,
15546    k: __mmask8,
15547    a: __m256,
15548) -> __m128i {
15549    unsafe {
15550        static_assert_uimm_bits!(IMM8, 8);
15551        let a = a.as_f32x8();
15552        let src = src.as_i16x8();
15553        let r = vcvtps2ph256(a, IMM8, src, k);
15554        transmute(r)
15555    }
15556}
15557
15558/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15559/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15560/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15561/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15562/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15563/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15564/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15565///
15566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvt_roundps_ph&expand=1353)
15567#[inline]
15568#[target_feature(enable = "avx512f,avx512vl")]
15569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15570#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15571#[rustc_legacy_const_generics(2)]
15572pub fn _mm256_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15573    unsafe {
15574        static_assert_uimm_bits!(IMM8, 8);
15575        let a = a.as_f32x8();
15576        let r = vcvtps2ph256(a, IMM8, i16x8::ZERO, k);
15577        transmute(r)
15578    }
15579}
15580
15581/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15582/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15583/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15584/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15585/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15586/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15587/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15588///
15589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvt_roundps_ph&expand=1350)
15590#[inline]
15591#[target_feature(enable = "avx512f,avx512vl")]
15592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15593#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15594#[rustc_legacy_const_generics(3)]
15595pub fn _mm_mask_cvt_roundps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15596    unsafe {
15597        static_assert_uimm_bits!(IMM8, 8);
15598        let a = a.as_f32x4();
15599        let src = src.as_i16x8();
15600        let r = vcvtps2ph128(a, IMM8, src, k);
15601        transmute(r)
15602    }
15603}
15604
15605/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15606/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15607/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15608/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15609/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15610/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15611/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15612///
15613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvt_roundps_ph&expand=1351)
15614#[inline]
15615#[target_feature(enable = "avx512f,avx512vl")]
15616#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15617#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15618#[rustc_legacy_const_generics(2)]
15619pub fn _mm_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15620    unsafe {
15621        static_assert_uimm_bits!(IMM8, 8);
15622        let a = a.as_f32x4();
15623        let r = vcvtps2ph128(a, IMM8, i16x8::ZERO, k);
15624        transmute(r)
15625    }
15626}
15627
15628/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15629/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15630///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15631///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15632///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15633///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15634///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15635///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15636///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15637///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15638///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15639///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15640///
15641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_ph&expand=1778)
15642#[inline]
15643#[target_feature(enable = "avx512f")]
15644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15645#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15646#[rustc_legacy_const_generics(1)]
15647pub fn _mm512_cvtps_ph<const ROUNDING: i32>(a: __m512) -> __m256i {
15648    unsafe {
15649        static_assert_extended_rounding!(ROUNDING);
15650        let a = a.as_f32x16();
15651        let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, 0b11111111_11111111);
15652        transmute(r)
15653    }
15654}
15655
15656/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15657/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15658///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15659///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15660///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15661///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15662///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15663///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15664///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15665///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15666///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15667///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15668///
15669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_ph&expand=1779)
15670#[inline]
15671#[target_feature(enable = "avx512f")]
15672#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15673#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15674#[rustc_legacy_const_generics(3)]
15675pub fn _mm512_mask_cvtps_ph<const ROUNDING: i32>(src: __m256i, k: __mmask16, a: __m512) -> __m256i {
15676    unsafe {
15677        static_assert_extended_rounding!(ROUNDING);
15678        let a = a.as_f32x16();
15679        let src = src.as_i16x16();
15680        let r = vcvtps2ph(a, ROUNDING, src, k);
15681        transmute(r)
15682    }
15683}
15684
15685/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15686/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15687///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15688///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15689///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15690///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15691///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15692///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15693///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15694///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15695///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15696///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15697///
15698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_ph&expand=1780)
15699#[inline]
15700#[target_feature(enable = "avx512f")]
15701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15702#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15703#[rustc_legacy_const_generics(2)]
15704pub fn _mm512_maskz_cvtps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256i {
15705    unsafe {
15706        static_assert_extended_rounding!(ROUNDING);
15707        let a = a.as_f32x16();
15708        let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, k);
15709        transmute(r)
15710    }
15711}
15712
15713/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15714/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15715/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15716/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15717/// * [`_MM_FROUND_TO_POS_INF`] : round up
15718/// * [`_MM_FROUND_TO_ZERO`] : truncate
15719/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15720///
15721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_ph&expand=1776)
15722#[inline]
15723#[target_feature(enable = "avx512f,avx512vl")]
15724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15725#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15726#[rustc_legacy_const_generics(3)]
15727pub fn _mm256_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m256) -> __m128i {
15728    unsafe {
15729        static_assert_uimm_bits!(IMM8, 8);
15730        let a = a.as_f32x8();
15731        let src = src.as_i16x8();
15732        let r = vcvtps2ph256(a, IMM8, src, k);
15733        transmute(r)
15734    }
15735}
15736
15737/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15738/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15739/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15740/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15741/// * [`_MM_FROUND_TO_POS_INF`] : round up
15742/// * [`_MM_FROUND_TO_ZERO`] : truncate
15743/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15744///
15745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_ph&expand=1777)
15746#[inline]
15747#[target_feature(enable = "avx512f,avx512vl")]
15748#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15749#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15750#[rustc_legacy_const_generics(2)]
15751pub fn _mm256_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15752    unsafe {
15753        static_assert_uimm_bits!(IMM8, 8);
15754        let a = a.as_f32x8();
15755        let r = vcvtps2ph256(a, IMM8, i16x8::ZERO, k);
15756        transmute(r)
15757    }
15758}
15759
15760/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15761/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15762/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15763/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15764/// * [`_MM_FROUND_TO_POS_INF`] : round up
15765/// * [`_MM_FROUND_TO_ZERO`] : truncate
15766/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15767///
15768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_ph&expand=1773)
15769#[inline]
15770#[target_feature(enable = "avx512f,avx512vl")]
15771#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15772#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15773#[rustc_legacy_const_generics(3)]
15774pub fn _mm_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15775    unsafe {
15776        static_assert_uimm_bits!(IMM8, 8);
15777        let a = a.as_f32x4();
15778        let src = src.as_i16x8();
15779        let r = vcvtps2ph128(a, IMM8, src, k);
15780        transmute(r)
15781    }
15782}
15783
15784/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15785/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15786/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15787/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15788/// * [`_MM_FROUND_TO_POS_INF`] : round up
15789/// * [`_MM_FROUND_TO_ZERO`] : truncate
15790/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15791///
15792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_ph&expand=1774)
15793#[inline]
15794#[target_feature(enable = "avx512f,avx512vl")]
15795#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15796#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15797#[rustc_legacy_const_generics(2)]
15798pub fn _mm_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15799    unsafe {
15800        static_assert_uimm_bits!(IMM8, 8);
15801        let a = a.as_f32x4();
15802        let r = vcvtps2ph128(a, IMM8, i16x8::ZERO, k);
15803        transmute(r)
15804    }
15805}
15806
15807/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15808/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15809///
15810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundph_ps&expand=1332)
15811#[inline]
15812#[target_feature(enable = "avx512f")]
15813#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15814#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15815#[rustc_legacy_const_generics(1)]
15816pub fn _mm512_cvt_roundph_ps<const SAE: i32>(a: __m256i) -> __m512 {
15817    unsafe {
15818        static_assert_sae!(SAE);
15819        let a = a.as_i16x16();
15820        let r = vcvtph2ps(a, f32x16::ZERO, 0b11111111_11111111, SAE);
15821        transmute(r)
15822    }
15823}
15824
15825/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15826/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15827///
15828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundph_ps&expand=1333)
15829#[inline]
15830#[target_feature(enable = "avx512f")]
15831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15832#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15833#[rustc_legacy_const_generics(3)]
15834pub fn _mm512_mask_cvt_roundph_ps<const SAE: i32>(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15835    unsafe {
15836        static_assert_sae!(SAE);
15837        let a = a.as_i16x16();
15838        let src = src.as_f32x16();
15839        let r = vcvtph2ps(a, src, k, SAE);
15840        transmute(r)
15841    }
15842}
15843
15844/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15845/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15846///
15847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundph_ps&expand=1334)
15848#[inline]
15849#[target_feature(enable = "avx512f")]
15850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15851#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15852#[rustc_legacy_const_generics(2)]
15853pub fn _mm512_maskz_cvt_roundph_ps<const SAE: i32>(k: __mmask16, a: __m256i) -> __m512 {
15854    unsafe {
15855        static_assert_sae!(SAE);
15856        let a = a.as_i16x16();
15857        let r = vcvtph2ps(a, f32x16::ZERO, k, SAE);
15858        transmute(r)
15859    }
15860}
15861
15862/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
15863///
15864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtph_ps&expand=1723)
15865#[inline]
15866#[target_feature(enable = "avx512f")]
15867#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15868#[cfg_attr(test, assert_instr(vcvtph2ps))]
15869pub fn _mm512_cvtph_ps(a: __m256i) -> __m512 {
15870    unsafe {
15871        transmute(vcvtph2ps(
15872            a.as_i16x16(),
15873            f32x16::ZERO,
15874            0b11111111_11111111,
15875            _MM_FROUND_NO_EXC,
15876        ))
15877    }
15878}
15879
15880/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15881///
15882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtph_ps&expand=1724)
15883#[inline]
15884#[target_feature(enable = "avx512f")]
15885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15886#[cfg_attr(test, assert_instr(vcvtph2ps))]
15887pub fn _mm512_mask_cvtph_ps(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15888    unsafe {
15889        transmute(vcvtph2ps(
15890            a.as_i16x16(),
15891            src.as_f32x16(),
15892            k,
15893            _MM_FROUND_NO_EXC,
15894        ))
15895    }
15896}
15897
15898/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15899///
15900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtph_ps&expand=1725)
15901#[inline]
15902#[target_feature(enable = "avx512f")]
15903#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15904#[cfg_attr(test, assert_instr(vcvtph2ps))]
15905pub fn _mm512_maskz_cvtph_ps(k: __mmask16, a: __m256i) -> __m512 {
15906    unsafe { transmute(vcvtph2ps(a.as_i16x16(), f32x16::ZERO, k, _MM_FROUND_NO_EXC)) }
15907}
15908
15909/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15910///
15911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtph_ps&expand=1721)
15912#[inline]
15913#[target_feature(enable = "avx512f,avx512vl")]
15914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15915#[cfg_attr(test, assert_instr(vcvtph2ps))]
15916pub fn _mm256_mask_cvtph_ps(src: __m256, k: __mmask8, a: __m128i) -> __m256 {
15917    unsafe {
15918        let convert = _mm256_cvtph_ps(a);
15919        transmute(simd_select_bitmask(k, convert.as_f32x8(), src.as_f32x8()))
15920    }
15921}
15922
15923/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15924///
15925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtph_ps&expand=1722)
15926#[inline]
15927#[target_feature(enable = "avx512f,avx512vl")]
15928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15929#[cfg_attr(test, assert_instr(vcvtph2ps))]
15930pub fn _mm256_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m256 {
15931    unsafe {
15932        let convert = _mm256_cvtph_ps(a);
15933        transmute(simd_select_bitmask(k, convert.as_f32x8(), f32x8::ZERO))
15934    }
15935}
15936
15937/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15938///
15939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtph_ps&expand=1718)
15940#[inline]
15941#[target_feature(enable = "avx512f,avx512vl")]
15942#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15943#[cfg_attr(test, assert_instr(vcvtph2ps))]
15944pub fn _mm_mask_cvtph_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
15945    unsafe {
15946        let convert = _mm_cvtph_ps(a);
15947        transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
15948    }
15949}
15950
15951/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15952///
15953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtph_ps&expand=1719)
15954#[inline]
15955#[target_feature(enable = "avx512f,avx512vl")]
15956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15957#[cfg_attr(test, assert_instr(vcvtph2ps))]
15958pub fn _mm_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m128 {
15959    unsafe {
15960        let convert = _mm_cvtph_ps(a);
15961        transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
15962    }
15963}
15964
15965/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
15966/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15967///
15968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi32&expand=1916)
15969#[inline]
15970#[target_feature(enable = "avx512f")]
15971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15972#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15973#[rustc_legacy_const_generics(1)]
15974pub fn _mm512_cvtt_roundps_epi32<const SAE: i32>(a: __m512) -> __m512i {
15975    unsafe {
15976        static_assert_sae!(SAE);
15977        let a = a.as_f32x16();
15978        let r = vcvttps2dq(a, i32x16::ZERO, 0b11111111_11111111, SAE);
15979        transmute(r)
15980    }
15981}
15982
15983/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15984/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15985///
15986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epi32&expand=1917)
15987#[inline]
15988#[target_feature(enable = "avx512f")]
15989#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15990#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15991#[rustc_legacy_const_generics(3)]
15992pub fn _mm512_mask_cvtt_roundps_epi32<const SAE: i32>(
15993    src: __m512i,
15994    k: __mmask16,
15995    a: __m512,
15996) -> __m512i {
15997    unsafe {
15998        static_assert_sae!(SAE);
15999        let a = a.as_f32x16();
16000        let src = src.as_i32x16();
16001        let r = vcvttps2dq(a, src, k, SAE);
16002        transmute(r)
16003    }
16004}
16005
16006/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
16007/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16008///
16009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi32&expand=1918)
16010#[inline]
16011#[target_feature(enable = "avx512f")]
16012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16013#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
16014#[rustc_legacy_const_generics(2)]
16015pub fn _mm512_maskz_cvtt_roundps_epi32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
16016    unsafe {
16017        static_assert_sae!(SAE);
16018        let a = a.as_f32x16();
16019        let r = vcvttps2dq(a, i32x16::ZERO, k, SAE);
16020        transmute(r)
16021    }
16022}
16023
16024/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
16025/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16026///
16027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu32&expand=1922)
16028#[inline]
16029#[target_feature(enable = "avx512f")]
16030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16031#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
16032#[rustc_legacy_const_generics(1)]
16033pub fn _mm512_cvtt_roundps_epu32<const SAE: i32>(a: __m512) -> __m512i {
16034    unsafe {
16035        static_assert_sae!(SAE);
16036        let a = a.as_f32x16();
16037        let r = vcvttps2udq(a, u32x16::ZERO, 0b11111111_11111111, SAE);
16038        transmute(r)
16039    }
16040}
16041
16042/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
16043/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16044///
16045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epu32&expand=1923)
16046#[inline]
16047#[target_feature(enable = "avx512f")]
16048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16049#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
16050#[rustc_legacy_const_generics(3)]
16051pub fn _mm512_mask_cvtt_roundps_epu32<const SAE: i32>(
16052    src: __m512i,
16053    k: __mmask16,
16054    a: __m512,
16055) -> __m512i {
16056    unsafe {
16057        static_assert_sae!(SAE);
16058        let a = a.as_f32x16();
16059        let src = src.as_u32x16();
16060        let r = vcvttps2udq(a, src, k, SAE);
16061        transmute(r)
16062    }
16063}
16064
16065/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
16066/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16067///
16068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epu32&expand=1924)
16069#[inline]
16070#[target_feature(enable = "avx512f")]
16071#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16072#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
16073#[rustc_legacy_const_generics(2)]
16074pub fn _mm512_maskz_cvtt_roundps_epu32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
16075    unsafe {
16076        static_assert_sae!(SAE);
16077        let a = a.as_f32x16();
16078        let r = vcvttps2udq(a, u32x16::ZERO, k, SAE);
16079        transmute(r)
16080    }
16081}
16082
16083/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
16084/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16085///
16086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epi32&expand=1904)
16087#[inline]
16088#[target_feature(enable = "avx512f")]
16089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16090#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
16091#[rustc_legacy_const_generics(1)]
16092pub fn _mm512_cvtt_roundpd_epi32<const SAE: i32>(a: __m512d) -> __m256i {
16093    unsafe {
16094        static_assert_sae!(SAE);
16095        let a = a.as_f64x8();
16096        let r = vcvttpd2dq(a, i32x8::ZERO, 0b11111111, SAE);
16097        transmute(r)
16098    }
16099}
16100
16101/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
16102/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16103///
16104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epi32&expand=1905)
16105#[inline]
16106#[target_feature(enable = "avx512f")]
16107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16108#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
16109#[rustc_legacy_const_generics(3)]
16110pub fn _mm512_mask_cvtt_roundpd_epi32<const SAE: i32>(
16111    src: __m256i,
16112    k: __mmask8,
16113    a: __m512d,
16114) -> __m256i {
16115    unsafe {
16116        static_assert_sae!(SAE);
16117        let a = a.as_f64x8();
16118        let src = src.as_i32x8();
16119        let r = vcvttpd2dq(a, src, k, SAE);
16120        transmute(r)
16121    }
16122}
16123
16124/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
16125/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16126///
16127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epi32&expand=1918)
16128#[inline]
16129#[target_feature(enable = "avx512f")]
16130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16131#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
16132#[rustc_legacy_const_generics(2)]
16133pub fn _mm512_maskz_cvtt_roundpd_epi32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
16134    unsafe {
16135        static_assert_sae!(SAE);
16136        let a = a.as_f64x8();
16137        let r = vcvttpd2dq(a, i32x8::ZERO, k, SAE);
16138        transmute(r)
16139    }
16140}
16141
16142/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
16143/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16144///
16145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epu32&expand=1910)
16146#[inline]
16147#[target_feature(enable = "avx512f")]
16148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16149#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
16150#[rustc_legacy_const_generics(1)]
16151pub fn _mm512_cvtt_roundpd_epu32<const SAE: i32>(a: __m512d) -> __m256i {
16152    unsafe {
16153        static_assert_sae!(SAE);
16154        let a = a.as_f64x8();
16155        let r = vcvttpd2udq(a, i32x8::ZERO, 0b11111111, SAE);
16156        transmute(r)
16157    }
16158}
16159
16160/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
16161/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16162///
16163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epu32&expand=1911)
16164#[inline]
16165#[target_feature(enable = "avx512f")]
16166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16167#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
16168#[rustc_legacy_const_generics(3)]
16169pub fn _mm512_mask_cvtt_roundpd_epu32<const SAE: i32>(
16170    src: __m256i,
16171    k: __mmask8,
16172    a: __m512d,
16173) -> __m256i {
16174    unsafe {
16175        static_assert_sae!(SAE);
16176        let a = a.as_f64x8();
16177        let src = src.as_i32x8();
16178        let r = vcvttpd2udq(a, src, k, SAE);
16179        transmute(r)
16180    }
16181}
16182
16183/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
16184///
16185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epi32&expand=1984)
16186#[inline]
16187#[target_feature(enable = "avx512f")]
16188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16189#[cfg_attr(test, assert_instr(vcvttps2dq))]
16190pub fn _mm512_cvttps_epi32(a: __m512) -> __m512i {
16191    unsafe {
16192        transmute(vcvttps2dq(
16193            a.as_f32x16(),
16194            i32x16::ZERO,
16195            0b11111111_11111111,
16196            _MM_FROUND_CUR_DIRECTION,
16197        ))
16198    }
16199}
16200
16201/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16202///
16203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epi32&expand=1985)
16204#[inline]
16205#[target_feature(enable = "avx512f")]
16206#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16207#[cfg_attr(test, assert_instr(vcvttps2dq))]
16208pub fn _mm512_mask_cvttps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
16209    unsafe {
16210        transmute(vcvttps2dq(
16211            a.as_f32x16(),
16212            src.as_i32x16(),
16213            k,
16214            _MM_FROUND_CUR_DIRECTION,
16215        ))
16216    }
16217}
16218
16219/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16220///
16221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epi32&expand=1986)
16222#[inline]
16223#[target_feature(enable = "avx512f")]
16224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16225#[cfg_attr(test, assert_instr(vcvttps2dq))]
16226pub fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i {
16227    unsafe {
16228        transmute(vcvttps2dq(
16229            a.as_f32x16(),
16230            i32x16::ZERO,
16231            k,
16232            _MM_FROUND_CUR_DIRECTION,
16233        ))
16234    }
16235}
16236
16237/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16238///
16239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epi32&expand=1982)
16240#[inline]
16241#[target_feature(enable = "avx512f,avx512vl")]
16242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16243#[cfg_attr(test, assert_instr(vcvttps2dq))]
16244pub fn _mm256_mask_cvttps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
16245    unsafe { transmute(vcvttps2dq256(a.as_f32x8(), src.as_i32x8(), k)) }
16246}
16247
16248/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16249///
16250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epi32&expand=1983)
16251#[inline]
16252#[target_feature(enable = "avx512f,avx512vl")]
16253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16254#[cfg_attr(test, assert_instr(vcvttps2dq))]
16255pub fn _mm256_maskz_cvttps_epi32(k: __mmask8, a: __m256) -> __m256i {
16256    unsafe { transmute(vcvttps2dq256(a.as_f32x8(), i32x8::ZERO, k)) }
16257}
16258
16259/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16260///
16261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epi32&expand=1979)
16262#[inline]
16263#[target_feature(enable = "avx512f,avx512vl")]
16264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16265#[cfg_attr(test, assert_instr(vcvttps2dq))]
16266pub fn _mm_mask_cvttps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
16267    unsafe { transmute(vcvttps2dq128(a.as_f32x4(), src.as_i32x4(), k)) }
16268}
16269
16270/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16271///
16272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epi32&expand=1980)
16273#[inline]
16274#[target_feature(enable = "avx512f,avx512vl")]
16275#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16276#[cfg_attr(test, assert_instr(vcvttps2dq))]
16277pub fn _mm_maskz_cvttps_epi32(k: __mmask8, a: __m128) -> __m128i {
16278    unsafe { transmute(vcvttps2dq128(a.as_f32x4(), i32x4::ZERO, k)) }
16279}
16280
16281/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16282///
16283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epu32&expand=2002)
16284#[inline]
16285#[target_feature(enable = "avx512f")]
16286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16287#[cfg_attr(test, assert_instr(vcvttps2udq))]
16288pub fn _mm512_cvttps_epu32(a: __m512) -> __m512i {
16289    unsafe {
16290        transmute(vcvttps2udq(
16291            a.as_f32x16(),
16292            u32x16::ZERO,
16293            0b11111111_11111111,
16294            _MM_FROUND_CUR_DIRECTION,
16295        ))
16296    }
16297}
16298
16299/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16300///
16301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epu32&expand=2003)
16302#[inline]
16303#[target_feature(enable = "avx512f")]
16304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16305#[cfg_attr(test, assert_instr(vcvttps2udq))]
16306pub fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
16307    unsafe {
16308        transmute(vcvttps2udq(
16309            a.as_f32x16(),
16310            src.as_u32x16(),
16311            k,
16312            _MM_FROUND_CUR_DIRECTION,
16313        ))
16314    }
16315}
16316
16317/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16318///
16319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epu32&expand=2004)
16320#[inline]
16321#[target_feature(enable = "avx512f")]
16322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16323#[cfg_attr(test, assert_instr(vcvttps2udq))]
16324pub fn _mm512_maskz_cvttps_epu32(k: __mmask16, a: __m512) -> __m512i {
16325    unsafe {
16326        transmute(vcvttps2udq(
16327            a.as_f32x16(),
16328            u32x16::ZERO,
16329            k,
16330            _MM_FROUND_CUR_DIRECTION,
16331        ))
16332    }
16333}
16334
16335/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16336///
16337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epu32&expand=1999)
16338#[inline]
16339#[target_feature(enable = "avx512f,avx512vl")]
16340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16341#[cfg_attr(test, assert_instr(vcvttps2udq))]
16342pub fn _mm256_cvttps_epu32(a: __m256) -> __m256i {
16343    unsafe { transmute(vcvttps2udq256(a.as_f32x8(), u32x8::ZERO, 0b11111111)) }
16344}
16345
16346/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16347///
16348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epu32&expand=2000)
16349#[inline]
16350#[target_feature(enable = "avx512f,avx512vl")]
16351#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16352#[cfg_attr(test, assert_instr(vcvttps2udq))]
16353pub fn _mm256_mask_cvttps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
16354    unsafe { transmute(vcvttps2udq256(a.as_f32x8(), src.as_u32x8(), k)) }
16355}
16356
16357/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16358///
16359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epu32&expand=2001)
16360#[inline]
16361#[target_feature(enable = "avx512f,avx512vl")]
16362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16363#[cfg_attr(test, assert_instr(vcvttps2udq))]
16364pub fn _mm256_maskz_cvttps_epu32(k: __mmask8, a: __m256) -> __m256i {
16365    unsafe { transmute(vcvttps2udq256(a.as_f32x8(), u32x8::ZERO, k)) }
16366}
16367
16368/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16369///
16370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epu32&expand=1996)
16371#[inline]
16372#[target_feature(enable = "avx512f,avx512vl")]
16373#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16374#[cfg_attr(test, assert_instr(vcvttps2udq))]
16375pub fn _mm_cvttps_epu32(a: __m128) -> __m128i {
16376    unsafe { transmute(vcvttps2udq128(a.as_f32x4(), u32x4::ZERO, 0b11111111)) }
16377}
16378
16379/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16380///
16381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epu32&expand=1997)
16382#[inline]
16383#[target_feature(enable = "avx512f,avx512vl")]
16384#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16385#[cfg_attr(test, assert_instr(vcvttps2udq))]
16386pub fn _mm_mask_cvttps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
16387    unsafe { transmute(vcvttps2udq128(a.as_f32x4(), src.as_u32x4(), k)) }
16388}
16389
16390/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16391///
16392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epu32&expand=1998)
16393#[inline]
16394#[target_feature(enable = "avx512f,avx512vl")]
16395#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16396#[cfg_attr(test, assert_instr(vcvttps2udq))]
16397pub fn _mm_maskz_cvttps_epu32(k: __mmask8, a: __m128) -> __m128i {
16398    unsafe { transmute(vcvttps2udq128(a.as_f32x4(), u32x4::ZERO, k)) }
16399}
16400
16401/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
16402/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16403///
16404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epu32&expand=1912)
16405#[inline]
16406#[target_feature(enable = "avx512f")]
16407#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16408#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
16409#[rustc_legacy_const_generics(2)]
16410pub fn _mm512_maskz_cvtt_roundpd_epu32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
16411    unsafe {
16412        static_assert_sae!(SAE);
16413        let a = a.as_f64x8();
16414        let r = vcvttpd2udq(a, i32x8::ZERO, k, SAE);
16415        transmute(r)
16416    }
16417}
16418
16419/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
16420///
16421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epi32&expand=1947)
16422#[inline]
16423#[target_feature(enable = "avx512f")]
16424#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16425#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16426pub fn _mm512_cvttpd_epi32(a: __m512d) -> __m256i {
16427    unsafe {
16428        transmute(vcvttpd2dq(
16429            a.as_f64x8(),
16430            i32x8::ZERO,
16431            0b11111111,
16432            _MM_FROUND_CUR_DIRECTION,
16433        ))
16434    }
16435}
16436
16437/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16438///
16439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epi32&expand=1948)
16440#[inline]
16441#[target_feature(enable = "avx512f")]
16442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16443#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16444pub fn _mm512_mask_cvttpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
16445    unsafe {
16446        transmute(vcvttpd2dq(
16447            a.as_f64x8(),
16448            src.as_i32x8(),
16449            k,
16450            _MM_FROUND_CUR_DIRECTION,
16451        ))
16452    }
16453}
16454
16455/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16456///
16457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epi32&expand=1949)
16458#[inline]
16459#[target_feature(enable = "avx512f")]
16460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16461#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16462pub fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
16463    unsafe {
16464        transmute(vcvttpd2dq(
16465            a.as_f64x8(),
16466            i32x8::ZERO,
16467            k,
16468            _MM_FROUND_CUR_DIRECTION,
16469        ))
16470    }
16471}
16472
16473/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16474///
16475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epi32&expand=1945)
16476#[inline]
16477#[target_feature(enable = "avx512f,avx512vl")]
16478#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16479#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16480pub fn _mm256_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
16481    unsafe { transmute(vcvttpd2dq256(a.as_f64x4(), src.as_i32x4(), k)) }
16482}
16483
16484/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16485///
16486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epi32&expand=1946)
16487#[inline]
16488#[target_feature(enable = "avx512f,avx512vl")]
16489#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16490#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16491pub fn _mm256_maskz_cvttpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
16492    unsafe { transmute(vcvttpd2dq256(a.as_f64x4(), i32x4::ZERO, k)) }
16493}
16494
16495/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16496///
16497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epi32&expand=1942)
16498#[inline]
16499#[target_feature(enable = "avx512f,avx512vl")]
16500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16501#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16502pub fn _mm_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
16503    unsafe { transmute(vcvttpd2dq128(a.as_f64x2(), src.as_i32x4(), k)) }
16504}
16505
16506/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16507///
16508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epi32&expand=1943)
16509#[inline]
16510#[target_feature(enable = "avx512f,avx512vl")]
16511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16512#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16513pub fn _mm_maskz_cvttpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
16514    unsafe { transmute(vcvttpd2dq128(a.as_f64x2(), i32x4::ZERO, k)) }
16515}
16516
16517/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16518///
16519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epu32&expand=1965)
16520#[inline]
16521#[target_feature(enable = "avx512f")]
16522#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16523#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16524pub fn _mm512_cvttpd_epu32(a: __m512d) -> __m256i {
16525    unsafe {
16526        transmute(vcvttpd2udq(
16527            a.as_f64x8(),
16528            i32x8::ZERO,
16529            0b11111111,
16530            _MM_FROUND_CUR_DIRECTION,
16531        ))
16532    }
16533}
16534
16535/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16536///
16537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epu32&expand=1966)
16538#[inline]
16539#[target_feature(enable = "avx512f")]
16540#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16541#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16542pub fn _mm512_mask_cvttpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
16543    unsafe {
16544        transmute(vcvttpd2udq(
16545            a.as_f64x8(),
16546            src.as_i32x8(),
16547            k,
16548            _MM_FROUND_CUR_DIRECTION,
16549        ))
16550    }
16551}
16552
16553/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16554///
16555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epu32&expand=1967)
16556#[inline]
16557#[target_feature(enable = "avx512f")]
16558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16559#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16560pub fn _mm512_maskz_cvttpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
16561    unsafe {
16562        transmute(vcvttpd2udq(
16563            a.as_f64x8(),
16564            i32x8::ZERO,
16565            k,
16566            _MM_FROUND_CUR_DIRECTION,
16567        ))
16568    }
16569}
16570
16571/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16572///
16573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epu32&expand=1962)
16574#[inline]
16575#[target_feature(enable = "avx512f,avx512vl")]
16576#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16577#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16578pub fn _mm256_cvttpd_epu32(a: __m256d) -> __m128i {
16579    unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), i32x4::ZERO, 0b11111111)) }
16580}
16581
16582/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16583///
16584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epu32&expand=1963)
16585#[inline]
16586#[target_feature(enable = "avx512f,avx512vl")]
16587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16588#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16589pub fn _mm256_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
16590    unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), src.as_i32x4(), k)) }
16591}
16592
16593/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16594///
16595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epu32&expand=1964)
16596#[inline]
16597#[target_feature(enable = "avx512f,avx512vl")]
16598#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16599#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16600pub fn _mm256_maskz_cvttpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
16601    unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), i32x4::ZERO, k)) }
16602}
16603
16604/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16605///
16606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epu32&expand=1959)
16607#[inline]
16608#[target_feature(enable = "avx512f,avx512vl")]
16609#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16610#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16611pub fn _mm_cvttpd_epu32(a: __m128d) -> __m128i {
16612    unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), i32x4::ZERO, 0b11111111)) }
16613}
16614
16615/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16616///
16617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epu32&expand=1960)
16618#[inline]
16619#[target_feature(enable = "avx512f,avx512vl")]
16620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16621#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16622pub fn _mm_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
16623    unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), src.as_i32x4(), k)) }
16624}
16625
16626/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16627///
16628/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epu32&expand=1961)
16629#[inline]
16630#[target_feature(enable = "avx512f,avx512vl")]
16631#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16632#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16633pub fn _mm_maskz_cvttpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
16634    unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), i32x4::ZERO, k)) }
16635}
16636
16637/// Returns vector of type `__m512d` with all elements set to zero.
16638///
16639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_pd&expand=5018)
16640#[inline]
16641#[target_feature(enable = "avx512f")]
16642#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16643#[cfg_attr(test, assert_instr(vxorps))]
16644#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16645pub const fn _mm512_setzero_pd() -> __m512d {
16646    // All-0 is a properly initialized __m512d
16647    unsafe { const { mem::zeroed() } }
16648}
16649
16650/// Returns vector of type `__m512` with all elements set to zero.
16651///
16652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_ps&expand=5021)
16653#[inline]
16654#[target_feature(enable = "avx512f")]
16655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16656#[cfg_attr(test, assert_instr(vxorps))]
16657#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16658pub const fn _mm512_setzero_ps() -> __m512 {
16659    // All-0 is a properly initialized __m512
16660    unsafe { const { mem::zeroed() } }
16661}
16662
16663/// Return vector of type `__m512` with all elements set to zero.
16664///
16665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero&expand=5014)
16666#[inline]
16667#[target_feature(enable = "avx512f")]
16668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16669#[cfg_attr(test, assert_instr(vxorps))]
16670#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16671pub const fn _mm512_setzero() -> __m512 {
16672    // All-0 is a properly initialized __m512
16673    unsafe { const { mem::zeroed() } }
16674}
16675
16676/// Returns vector of type `__m512i` with all elements set to zero.
16677///
16678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_si512&expand=5024)
16679#[inline]
16680#[target_feature(enable = "avx512f")]
16681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16682#[cfg_attr(test, assert_instr(vxorps))]
16683#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16684pub const fn _mm512_setzero_si512() -> __m512i {
16685    // All-0 is a properly initialized __m512i
16686    unsafe { const { mem::zeroed() } }
16687}
16688
16689/// Return vector of type `__m512i` with all elements set to zero.
16690///
16691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_epi32&expand=5015)
16692#[inline]
16693#[target_feature(enable = "avx512f")]
16694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16695#[cfg_attr(test, assert_instr(vxorps))]
16696#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16697pub const fn _mm512_setzero_epi32() -> __m512i {
16698    // All-0 is a properly initialized __m512i
16699    unsafe { const { mem::zeroed() } }
16700}
16701
16702/// Sets packed 32-bit integers in `dst` with the supplied values in reverse
16703/// order.
16704///
16705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi32&expand=4991)
16706#[inline]
16707#[target_feature(enable = "avx512f")]
16708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16709#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16710pub const fn _mm512_setr_epi32(
16711    e15: i32,
16712    e14: i32,
16713    e13: i32,
16714    e12: i32,
16715    e11: i32,
16716    e10: i32,
16717    e9: i32,
16718    e8: i32,
16719    e7: i32,
16720    e6: i32,
16721    e5: i32,
16722    e4: i32,
16723    e3: i32,
16724    e2: i32,
16725    e1: i32,
16726    e0: i32,
16727) -> __m512i {
16728    unsafe {
16729        let r = i32x16::new(
16730            e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
16731        );
16732        transmute(r)
16733    }
16734}
16735
16736/// Set packed 8-bit integers in dst with the supplied values.
16737///
16738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi8&expand=4915)
16739#[inline]
16740#[target_feature(enable = "avx512f")]
16741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16742#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16743pub const fn _mm512_set_epi8(
16744    e63: i8,
16745    e62: i8,
16746    e61: i8,
16747    e60: i8,
16748    e59: i8,
16749    e58: i8,
16750    e57: i8,
16751    e56: i8,
16752    e55: i8,
16753    e54: i8,
16754    e53: i8,
16755    e52: i8,
16756    e51: i8,
16757    e50: i8,
16758    e49: i8,
16759    e48: i8,
16760    e47: i8,
16761    e46: i8,
16762    e45: i8,
16763    e44: i8,
16764    e43: i8,
16765    e42: i8,
16766    e41: i8,
16767    e40: i8,
16768    e39: i8,
16769    e38: i8,
16770    e37: i8,
16771    e36: i8,
16772    e35: i8,
16773    e34: i8,
16774    e33: i8,
16775    e32: i8,
16776    e31: i8,
16777    e30: i8,
16778    e29: i8,
16779    e28: i8,
16780    e27: i8,
16781    e26: i8,
16782    e25: i8,
16783    e24: i8,
16784    e23: i8,
16785    e22: i8,
16786    e21: i8,
16787    e20: i8,
16788    e19: i8,
16789    e18: i8,
16790    e17: i8,
16791    e16: i8,
16792    e15: i8,
16793    e14: i8,
16794    e13: i8,
16795    e12: i8,
16796    e11: i8,
16797    e10: i8,
16798    e9: i8,
16799    e8: i8,
16800    e7: i8,
16801    e6: i8,
16802    e5: i8,
16803    e4: i8,
16804    e3: i8,
16805    e2: i8,
16806    e1: i8,
16807    e0: i8,
16808) -> __m512i {
16809    unsafe {
16810        let r = i8x64::new(
16811            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18,
16812            e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, e32, e33, e34, e35,
16813            e36, e37, e38, e39, e40, e41, e42, e43, e44, e45, e46, e47, e48, e49, e50, e51, e52,
16814            e53, e54, e55, e56, e57, e58, e59, e60, e61, e62, e63,
16815        );
16816        transmute(r)
16817    }
16818}
16819
16820/// Set packed 16-bit integers in dst with the supplied values.
16821///
16822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi16&expand=4905)
16823#[inline]
16824#[target_feature(enable = "avx512f")]
16825#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16826#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16827pub const fn _mm512_set_epi16(
16828    e31: i16,
16829    e30: i16,
16830    e29: i16,
16831    e28: i16,
16832    e27: i16,
16833    e26: i16,
16834    e25: i16,
16835    e24: i16,
16836    e23: i16,
16837    e22: i16,
16838    e21: i16,
16839    e20: i16,
16840    e19: i16,
16841    e18: i16,
16842    e17: i16,
16843    e16: i16,
16844    e15: i16,
16845    e14: i16,
16846    e13: i16,
16847    e12: i16,
16848    e11: i16,
16849    e10: i16,
16850    e9: i16,
16851    e8: i16,
16852    e7: i16,
16853    e6: i16,
16854    e5: i16,
16855    e4: i16,
16856    e3: i16,
16857    e2: i16,
16858    e1: i16,
16859    e0: i16,
16860) -> __m512i {
16861    unsafe {
16862        let r = i16x32::new(
16863            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18,
16864            e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31,
16865        );
16866        transmute(r)
16867    }
16868}
16869
16870/// Set packed 32-bit integers in dst with the repeated 4 element sequence.
16871///
16872/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi32&expand=4982)
16873#[inline]
16874#[target_feature(enable = "avx512f")]
16875#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16876#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16877pub const fn _mm512_set4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16878    _mm512_set_epi32(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a)
16879}
16880
16881/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence.
16882///
16883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_ps&expand=4985)
16884#[inline]
16885#[target_feature(enable = "avx512f")]
16886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16887#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16888pub const fn _mm512_set4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16889    _mm512_set_ps(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a)
16890}
16891
16892/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence.
16893///
16894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_pd&expand=4984)
16895#[inline]
16896#[target_feature(enable = "avx512f")]
16897#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16899pub const fn _mm512_set4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16900    _mm512_set_pd(d, c, b, a, d, c, b, a)
16901}
16902
16903/// Set packed 32-bit integers in dst with the repeated 4 element sequence in reverse order.
16904///
16905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi32&expand=5009)
16906#[inline]
16907#[target_feature(enable = "avx512f")]
16908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16909#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16910pub const fn _mm512_setr4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16911    _mm512_set_epi32(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d)
16912}
16913
16914/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16915///
16916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_ps&expand=5012)
16917#[inline]
16918#[target_feature(enable = "avx512f")]
16919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16920#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16921pub const fn _mm512_setr4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16922    _mm512_set_ps(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d)
16923}
16924
16925/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16926///
16927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_pd&expand=5011)
16928#[inline]
16929#[target_feature(enable = "avx512f")]
16930#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16931#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16932pub const fn _mm512_setr4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16933    _mm512_set_pd(a, b, c, d, a, b, c, d)
16934}
16935
16936/// Set packed 64-bit integers in dst with the supplied values.
16937///
16938/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi64&expand=4910)
16939#[inline]
16940#[target_feature(enable = "avx512f")]
16941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16942#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16943pub const fn _mm512_set_epi64(
16944    e0: i64,
16945    e1: i64,
16946    e2: i64,
16947    e3: i64,
16948    e4: i64,
16949    e5: i64,
16950    e6: i64,
16951    e7: i64,
16952) -> __m512i {
16953    _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0)
16954}
16955
16956/// Set packed 64-bit integers in dst with the supplied values in reverse order.
16957///
16958/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi64&expand=4993)
16959#[inline]
16960#[target_feature(enable = "avx512f")]
16961#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16962#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16963pub const fn _mm512_setr_epi64(
16964    e0: i64,
16965    e1: i64,
16966    e2: i64,
16967    e3: i64,
16968    e4: i64,
16969    e5: i64,
16970    e6: i64,
16971    e7: i64,
16972) -> __m512i {
16973    unsafe {
16974        let r = i64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
16975        transmute(r)
16976    }
16977}
16978
16979/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16980///
16981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_pd&expand=3002)
16982#[inline]
16983#[target_feature(enable = "avx512f")]
16984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16985#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
16986#[rustc_legacy_const_generics(2)]
16987pub unsafe fn _mm512_i32gather_pd<const SCALE: i32>(
16988    offsets: __m256i,
16989    slice: *const f64,
16990) -> __m512d {
16991    static_assert_imm8_scale!(SCALE);
16992    let zero = f64x8::ZERO;
16993    let neg_one = -1;
16994    let slice = slice as *const i8;
16995    let offsets = offsets.as_i32x8();
16996    let r = vgatherdpd(zero, slice, offsets, neg_one, SCALE);
16997    transmute(r)
16998}
16999
17000/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17001///
17002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_pd&expand=3003)
17003#[inline]
17004#[target_feature(enable = "avx512f")]
17005#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17006#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17007#[rustc_legacy_const_generics(4)]
17008pub unsafe fn _mm512_mask_i32gather_pd<const SCALE: i32>(
17009    src: __m512d,
17010    mask: __mmask8,
17011    offsets: __m256i,
17012    slice: *const f64,
17013) -> __m512d {
17014    static_assert_imm8_scale!(SCALE);
17015    let src = src.as_f64x8();
17016    let slice = slice as *const i8;
17017    let offsets = offsets.as_i32x8();
17018    let r = vgatherdpd(src, slice, offsets, mask as i8, SCALE);
17019    transmute(r)
17020}
17021
17022/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17023///
17024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_pd&expand=3092)
17025#[inline]
17026#[target_feature(enable = "avx512f")]
17027#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17028#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
17029#[rustc_legacy_const_generics(2)]
17030pub unsafe fn _mm512_i64gather_pd<const SCALE: i32>(
17031    offsets: __m512i,
17032    slice: *const f64,
17033) -> __m512d {
17034    static_assert_imm8_scale!(SCALE);
17035    let zero = f64x8::ZERO;
17036    let neg_one = -1;
17037    let slice = slice as *const i8;
17038    let offsets = offsets.as_i64x8();
17039    let r = vgatherqpd(zero, slice, offsets, neg_one, SCALE);
17040    transmute(r)
17041}
17042
17043/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17044///
17045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_pd&expand=3093)
17046#[inline]
17047#[target_feature(enable = "avx512f")]
17048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17049#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
17050#[rustc_legacy_const_generics(4)]
17051pub unsafe fn _mm512_mask_i64gather_pd<const SCALE: i32>(
17052    src: __m512d,
17053    mask: __mmask8,
17054    offsets: __m512i,
17055    slice: *const f64,
17056) -> __m512d {
17057    static_assert_imm8_scale!(SCALE);
17058    let src = src.as_f64x8();
17059    let slice = slice as *const i8;
17060    let offsets = offsets.as_i64x8();
17061    let r = vgatherqpd(src, slice, offsets, mask as i8, SCALE);
17062    transmute(r)
17063}
17064
17065/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17066///
17067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_ps&expand=3100)
17068#[inline]
17069#[target_feature(enable = "avx512f")]
17070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17071#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
17072#[rustc_legacy_const_generics(2)]
17073pub unsafe fn _mm512_i64gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const f32) -> __m256 {
17074    static_assert_imm8_scale!(SCALE);
17075    let zero = f32x8::ZERO;
17076    let neg_one = -1;
17077    let slice = slice as *const i8;
17078    let offsets = offsets.as_i64x8();
17079    let r = vgatherqps(zero, slice, offsets, neg_one, SCALE);
17080    transmute(r)
17081}
17082
17083/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17084///
17085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_ps&expand=3101)
17086#[inline]
17087#[target_feature(enable = "avx512f")]
17088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17089#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
17090#[rustc_legacy_const_generics(4)]
17091pub unsafe fn _mm512_mask_i64gather_ps<const SCALE: i32>(
17092    src: __m256,
17093    mask: __mmask8,
17094    offsets: __m512i,
17095    slice: *const f32,
17096) -> __m256 {
17097    static_assert_imm8_scale!(SCALE);
17098    let src = src.as_f32x8();
17099    let slice = slice as *const i8;
17100    let offsets = offsets.as_i64x8();
17101    let r = vgatherqps(src, slice, offsets, mask as i8, SCALE);
17102    transmute(r)
17103}
17104
17105/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17106///
17107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_ps&expand=3010)
17108#[inline]
17109#[target_feature(enable = "avx512f")]
17110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17111#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
17112#[rustc_legacy_const_generics(2)]
17113pub unsafe fn _mm512_i32gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const f32) -> __m512 {
17114    static_assert_imm8_scale!(SCALE);
17115    let zero = f32x16::ZERO;
17116    let neg_one = -1;
17117    let slice = slice as *const i8;
17118    let offsets = offsets.as_i32x16();
17119    let r = vgatherdps(zero, slice, offsets, neg_one, SCALE);
17120    transmute(r)
17121}
17122
17123/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17124///
17125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_ps&expand=3011)
17126#[inline]
17127#[target_feature(enable = "avx512f")]
17128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17129#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
17130#[rustc_legacy_const_generics(4)]
17131pub unsafe fn _mm512_mask_i32gather_ps<const SCALE: i32>(
17132    src: __m512,
17133    mask: __mmask16,
17134    offsets: __m512i,
17135    slice: *const f32,
17136) -> __m512 {
17137    static_assert_imm8_scale!(SCALE);
17138    let src = src.as_f32x16();
17139    let slice = slice as *const i8;
17140    let offsets = offsets.as_i32x16();
17141    let r = vgatherdps(src, slice, offsets, mask as i16, SCALE);
17142    transmute(r)
17143}
17144
17145/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17146///
17147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi32&expand=2986)
17148#[inline]
17149#[target_feature(enable = "avx512f")]
17150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17151#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
17152#[rustc_legacy_const_generics(2)]
17153pub unsafe fn _mm512_i32gather_epi32<const SCALE: i32>(
17154    offsets: __m512i,
17155    slice: *const i32,
17156) -> __m512i {
17157    static_assert_imm8_scale!(SCALE);
17158    let zero = i32x16::ZERO;
17159    let neg_one = -1;
17160    let slice = slice as *const i8;
17161    let offsets = offsets.as_i32x16();
17162    let r = vpgatherdd(zero, slice, offsets, neg_one, SCALE);
17163    transmute(r)
17164}
17165
17166/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17167///
17168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi32&expand=2987)
17169#[inline]
17170#[target_feature(enable = "avx512f")]
17171#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17172#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
17173#[rustc_legacy_const_generics(4)]
17174pub unsafe fn _mm512_mask_i32gather_epi32<const SCALE: i32>(
17175    src: __m512i,
17176    mask: __mmask16,
17177    offsets: __m512i,
17178    slice: *const i32,
17179) -> __m512i {
17180    static_assert_imm8_scale!(SCALE);
17181    let src = src.as_i32x16();
17182    let mask = mask as i16;
17183    let slice = slice as *const i8;
17184    let offsets = offsets.as_i32x16();
17185    let r = vpgatherdd(src, slice, offsets, mask, SCALE);
17186    transmute(r)
17187}
17188
17189/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17190///
17191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi64&expand=2994)
17192#[inline]
17193#[target_feature(enable = "avx512f")]
17194#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17195#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17196#[rustc_legacy_const_generics(2)]
17197pub unsafe fn _mm512_i32gather_epi64<const SCALE: i32>(
17198    offsets: __m256i,
17199    slice: *const i64,
17200) -> __m512i {
17201    static_assert_imm8_scale!(SCALE);
17202    let zero = i64x8::ZERO;
17203    let neg_one = -1;
17204    let slice = slice as *const i8;
17205    let offsets = offsets.as_i32x8();
17206    let r = vpgatherdq(zero, slice, offsets, neg_one, SCALE);
17207    transmute(r)
17208}
17209
17210/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17211///
17212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi64&expand=2995)
17213#[inline]
17214#[target_feature(enable = "avx512f")]
17215#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17216#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17217#[rustc_legacy_const_generics(4)]
17218pub unsafe fn _mm512_mask_i32gather_epi64<const SCALE: i32>(
17219    src: __m512i,
17220    mask: __mmask8,
17221    offsets: __m256i,
17222    slice: *const i64,
17223) -> __m512i {
17224    static_assert_imm8_scale!(SCALE);
17225    let src = src.as_i64x8();
17226    let mask = mask as i8;
17227    let slice = slice as *const i8;
17228    let offsets = offsets.as_i32x8();
17229    let r = vpgatherdq(src, slice, offsets, mask, SCALE);
17230    transmute(r)
17231}
17232
17233/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17234///
17235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi64&expand=3084)
17236#[inline]
17237#[target_feature(enable = "avx512f")]
17238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17239#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
17240#[rustc_legacy_const_generics(2)]
17241pub unsafe fn _mm512_i64gather_epi64<const SCALE: i32>(
17242    offsets: __m512i,
17243    slice: *const i64,
17244) -> __m512i {
17245    static_assert_imm8_scale!(SCALE);
17246    let zero = i64x8::ZERO;
17247    let neg_one = -1;
17248    let slice = slice as *const i8;
17249    let offsets = offsets.as_i64x8();
17250    let r = vpgatherqq(zero, slice, offsets, neg_one, SCALE);
17251    transmute(r)
17252}
17253
17254/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17255///
17256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi64&expand=3085)
17257#[inline]
17258#[target_feature(enable = "avx512f")]
17259#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17260#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
17261#[rustc_legacy_const_generics(4)]
17262pub unsafe fn _mm512_mask_i64gather_epi64<const SCALE: i32>(
17263    src: __m512i,
17264    mask: __mmask8,
17265    offsets: __m512i,
17266    slice: *const i64,
17267) -> __m512i {
17268    static_assert_imm8_scale!(SCALE);
17269    let src = src.as_i64x8();
17270    let mask = mask as i8;
17271    let slice = slice as *const i8;
17272    let offsets = offsets.as_i64x8();
17273    let r = vpgatherqq(src, slice, offsets, mask, SCALE);
17274    transmute(r)
17275}
17276
17277/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17278///
17279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi32&expand=3074)
17280#[inline]
17281#[target_feature(enable = "avx512f")]
17282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17283#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
17284#[rustc_legacy_const_generics(2)]
17285pub unsafe fn _mm512_i64gather_epi32<const SCALE: i32>(
17286    offsets: __m512i,
17287    slice: *const i32,
17288) -> __m256i {
17289    static_assert_imm8_scale!(SCALE);
17290    let zeros = i32x8::ZERO;
17291    let neg_one = -1;
17292    let slice = slice as *const i8;
17293    let offsets = offsets.as_i64x8();
17294    let r = vpgatherqd(zeros, slice, offsets, neg_one, SCALE);
17295    transmute(r)
17296}
17297
17298/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17299///
17300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi32&expand=3075)
17301#[inline]
17302#[target_feature(enable = "avx512f")]
17303#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17304#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
17305#[rustc_legacy_const_generics(4)]
17306pub unsafe fn _mm512_mask_i64gather_epi32<const SCALE: i32>(
17307    src: __m256i,
17308    mask: __mmask8,
17309    offsets: __m512i,
17310    slice: *const i32,
17311) -> __m256i {
17312    static_assert_imm8_scale!(SCALE);
17313    let src = src.as_i32x8();
17314    let mask = mask as i8;
17315    let slice = slice as *const i8;
17316    let offsets = offsets.as_i64x8();
17317    let r = vpgatherqd(src, slice, offsets, mask, SCALE);
17318    transmute(r)
17319}
17320
17321/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17322///
17323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_pd&expand=3044)
17324#[inline]
17325#[target_feature(enable = "avx512f")]
17326#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17327#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17328#[rustc_legacy_const_generics(3)]
17329pub unsafe fn _mm512_i32scatter_pd<const SCALE: i32>(
17330    slice: *mut f64,
17331    offsets: __m256i,
17332    src: __m512d,
17333) {
17334    static_assert_imm8_scale!(SCALE);
17335    let src = src.as_f64x8();
17336    let neg_one = -1;
17337    let slice = slice as *mut i8;
17338    let offsets = offsets.as_i32x8();
17339    vscatterdpd(slice, neg_one, offsets, src, SCALE);
17340}
17341
17342/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17343///
17344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_pd&expand=3045)
17345#[inline]
17346#[target_feature(enable = "avx512f")]
17347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17348#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17349#[rustc_legacy_const_generics(4)]
17350pub unsafe fn _mm512_mask_i32scatter_pd<const SCALE: i32>(
17351    slice: *mut f64,
17352    mask: __mmask8,
17353    offsets: __m256i,
17354    src: __m512d,
17355) {
17356    static_assert_imm8_scale!(SCALE);
17357    let src = src.as_f64x8();
17358    let slice = slice as *mut i8;
17359    let offsets = offsets.as_i32x8();
17360    vscatterdpd(slice, mask as i8, offsets, src, SCALE);
17361}
17362
17363/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17364///
17365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_pd&expand=3122)
17366#[inline]
17367#[target_feature(enable = "avx512f")]
17368#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17369#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
17370#[rustc_legacy_const_generics(3)]
17371pub unsafe fn _mm512_i64scatter_pd<const SCALE: i32>(
17372    slice: *mut f64,
17373    offsets: __m512i,
17374    src: __m512d,
17375) {
17376    static_assert_imm8_scale!(SCALE);
17377    let src = src.as_f64x8();
17378    let neg_one = -1;
17379    let slice = slice as *mut i8;
17380    let offsets = offsets.as_i64x8();
17381    vscatterqpd(slice, neg_one, offsets, src, SCALE);
17382}
17383
17384/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17385///
17386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_pd&expand=3123)
17387#[inline]
17388#[target_feature(enable = "avx512f")]
17389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17390#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
17391#[rustc_legacy_const_generics(4)]
17392pub unsafe fn _mm512_mask_i64scatter_pd<const SCALE: i32>(
17393    slice: *mut f64,
17394    mask: __mmask8,
17395    offsets: __m512i,
17396    src: __m512d,
17397) {
17398    static_assert_imm8_scale!(SCALE);
17399    let src = src.as_f64x8();
17400    let slice = slice as *mut i8;
17401    let offsets = offsets.as_i64x8();
17402    vscatterqpd(slice, mask as i8, offsets, src, SCALE);
17403}
17404
17405/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17406///
17407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_ps&expand=3050)
17408#[inline]
17409#[target_feature(enable = "avx512f")]
17410#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17411#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17412#[rustc_legacy_const_generics(3)]
17413pub unsafe fn _mm512_i32scatter_ps<const SCALE: i32>(
17414    slice: *mut f32,
17415    offsets: __m512i,
17416    src: __m512,
17417) {
17418    static_assert_imm8_scale!(SCALE);
17419    let src = src.as_f32x16();
17420    let neg_one = -1;
17421    let slice = slice as *mut i8;
17422    let offsets = offsets.as_i32x16();
17423    vscatterdps(slice, neg_one, offsets, src, SCALE);
17424}
17425
17426/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17427///
17428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_ps&expand=3051)
17429#[inline]
17430#[target_feature(enable = "avx512f")]
17431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17432#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17433#[rustc_legacy_const_generics(4)]
17434pub unsafe fn _mm512_mask_i32scatter_ps<const SCALE: i32>(
17435    slice: *mut f32,
17436    mask: __mmask16,
17437    offsets: __m512i,
17438    src: __m512,
17439) {
17440    static_assert_imm8_scale!(SCALE);
17441    let src = src.as_f32x16();
17442    let slice = slice as *mut i8;
17443    let offsets = offsets.as_i32x16();
17444    vscatterdps(slice, mask as i16, offsets, src, SCALE);
17445}
17446
17447/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17448///
17449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_ps&expand=3128)
17450#[inline]
17451#[target_feature(enable = "avx512f")]
17452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17453#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17454#[rustc_legacy_const_generics(3)]
17455pub unsafe fn _mm512_i64scatter_ps<const SCALE: i32>(
17456    slice: *mut f32,
17457    offsets: __m512i,
17458    src: __m256,
17459) {
17460    static_assert_imm8_scale!(SCALE);
17461    let src = src.as_f32x8();
17462    let neg_one = -1;
17463    let slice = slice as *mut i8;
17464    let offsets = offsets.as_i64x8();
17465    vscatterqps(slice, neg_one, offsets, src, SCALE);
17466}
17467
17468/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17469///
17470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_ps&expand=3129)
17471#[inline]
17472#[target_feature(enable = "avx512f")]
17473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17474#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17475#[rustc_legacy_const_generics(4)]
17476pub unsafe fn _mm512_mask_i64scatter_ps<const SCALE: i32>(
17477    slice: *mut f32,
17478    mask: __mmask8,
17479    offsets: __m512i,
17480    src: __m256,
17481) {
17482    static_assert_imm8_scale!(SCALE);
17483    let src = src.as_f32x8();
17484    let slice = slice as *mut i8;
17485    let offsets = offsets.as_i64x8();
17486    vscatterqps(slice, mask as i8, offsets, src, SCALE);
17487}
17488
17489/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17490///
17491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi64&expand=3038)
17492#[inline]
17493#[target_feature(enable = "avx512f")]
17494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17495#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17496#[rustc_legacy_const_generics(3)]
17497pub unsafe fn _mm512_i32scatter_epi64<const SCALE: i32>(
17498    slice: *mut i64,
17499    offsets: __m256i,
17500    src: __m512i,
17501) {
17502    static_assert_imm8_scale!(SCALE);
17503    let src = src.as_i64x8();
17504    let neg_one = -1;
17505    let slice = slice as *mut i8;
17506    let offsets = offsets.as_i32x8();
17507    vpscatterdq(slice, neg_one, offsets, src, SCALE);
17508}
17509
17510/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17511///
17512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi64&expand=3039)
17513#[inline]
17514#[target_feature(enable = "avx512f")]
17515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17516#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17517#[rustc_legacy_const_generics(4)]
17518pub unsafe fn _mm512_mask_i32scatter_epi64<const SCALE: i32>(
17519    slice: *mut i64,
17520    mask: __mmask8,
17521    offsets: __m256i,
17522    src: __m512i,
17523) {
17524    static_assert_imm8_scale!(SCALE);
17525    let src = src.as_i64x8();
17526    let mask = mask as i8;
17527    let slice = slice as *mut i8;
17528    let offsets = offsets.as_i32x8();
17529    vpscatterdq(slice, mask, offsets, src, SCALE);
17530}
17531
17532/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17533///
17534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi64&expand=3116)
17535#[inline]
17536#[target_feature(enable = "avx512f")]
17537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17538#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17539#[rustc_legacy_const_generics(3)]
17540pub unsafe fn _mm512_i64scatter_epi64<const SCALE: i32>(
17541    slice: *mut i64,
17542    offsets: __m512i,
17543    src: __m512i,
17544) {
17545    static_assert_imm8_scale!(SCALE);
17546    let src = src.as_i64x8();
17547    let neg_one = -1;
17548    let slice = slice as *mut i8;
17549    let offsets = offsets.as_i64x8();
17550    vpscatterqq(slice, neg_one, offsets, src, SCALE);
17551}
17552
17553/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17554///
17555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi64&expand=3117)
17556#[inline]
17557#[target_feature(enable = "avx512f")]
17558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17559#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17560#[rustc_legacy_const_generics(4)]
17561pub unsafe fn _mm512_mask_i64scatter_epi64<const SCALE: i32>(
17562    slice: *mut i64,
17563    mask: __mmask8,
17564    offsets: __m512i,
17565    src: __m512i,
17566) {
17567    static_assert_imm8_scale!(SCALE);
17568    let src = src.as_i64x8();
17569    let mask = mask as i8;
17570    let slice = slice as *mut i8;
17571    let offsets = offsets.as_i64x8();
17572    vpscatterqq(slice, mask, offsets, src, SCALE);
17573}
17574
17575/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17576///
17577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi32&expand=3032)
17578#[inline]
17579#[target_feature(enable = "avx512f")]
17580#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17581#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17582#[rustc_legacy_const_generics(3)]
17583pub unsafe fn _mm512_i32scatter_epi32<const SCALE: i32>(
17584    slice: *mut i32,
17585    offsets: __m512i,
17586    src: __m512i,
17587) {
17588    static_assert_imm8_scale!(SCALE);
17589    let src = src.as_i32x16();
17590    let neg_one = -1;
17591    let slice = slice as *mut i8;
17592    let offsets = offsets.as_i32x16();
17593    vpscatterdd(slice, neg_one, offsets, src, SCALE);
17594}
17595
17596/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17597///
17598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi32&expand=3033)
17599#[inline]
17600#[target_feature(enable = "avx512f")]
17601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17602#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17603#[rustc_legacy_const_generics(4)]
17604pub unsafe fn _mm512_mask_i32scatter_epi32<const SCALE: i32>(
17605    slice: *mut i32,
17606    mask: __mmask16,
17607    offsets: __m512i,
17608    src: __m512i,
17609) {
17610    static_assert_imm8_scale!(SCALE);
17611    let src = src.as_i32x16();
17612    let mask = mask as i16;
17613    let slice = slice as *mut i8;
17614    let offsets = offsets.as_i32x16();
17615    vpscatterdd(slice, mask, offsets, src, SCALE);
17616}
17617
17618/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17619///
17620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi32&expand=3108)
17621#[inline]
17622#[target_feature(enable = "avx512f")]
17623#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17624#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17625#[rustc_legacy_const_generics(3)]
17626pub unsafe fn _mm512_i64scatter_epi32<const SCALE: i32>(
17627    slice: *mut i32,
17628    offsets: __m512i,
17629    src: __m256i,
17630) {
17631    static_assert_imm8_scale!(SCALE);
17632    let src = src.as_i32x8();
17633    let neg_one = -1;
17634    let slice = slice as *mut i8;
17635    let offsets = offsets.as_i64x8();
17636    vpscatterqd(slice, neg_one, offsets, src, SCALE);
17637}
17638
17639/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17640///
17641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi32&expand=3109)
17642#[inline]
17643#[target_feature(enable = "avx512f")]
17644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17645#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17646#[rustc_legacy_const_generics(4)]
17647pub unsafe fn _mm512_mask_i64scatter_epi32<const SCALE: i32>(
17648    slice: *mut i32,
17649    mask: __mmask8,
17650    offsets: __m512i,
17651    src: __m256i,
17652) {
17653    static_assert_imm8_scale!(SCALE);
17654    let src = src.as_i32x8();
17655    let mask = mask as i8;
17656    let slice = slice as *mut i8;
17657    let offsets = offsets.as_i64x8();
17658    vpscatterqd(slice, mask, offsets, src, SCALE);
17659}
17660
17661/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17662/// indices stored in the lower half of vindex scaled by scale and stores them in dst.
17663///
17664/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_epi64)
17665#[inline]
17666#[target_feature(enable = "avx512f")]
17667#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17668#[rustc_legacy_const_generics(2)]
17669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17670pub unsafe fn _mm512_i32logather_epi64<const SCALE: i32>(
17671    vindex: __m512i,
17672    base_addr: *const i64,
17673) -> __m512i {
17674    _mm512_i32gather_epi64::<SCALE>(_mm512_castsi512_si256(vindex), base_addr)
17675}
17676
17677/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17678/// indices stored in the lower half of vindex scaled by scale and stores them in dst using writemask k
17679/// (elements are copied from src when the corresponding mask bit is not set).
17680///
17681/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_epi64)
17682#[inline]
17683#[target_feature(enable = "avx512f")]
17684#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17685#[rustc_legacy_const_generics(4)]
17686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17687pub unsafe fn _mm512_mask_i32logather_epi64<const SCALE: i32>(
17688    src: __m512i,
17689    k: __mmask8,
17690    vindex: __m512i,
17691    base_addr: *const i64,
17692) -> __m512i {
17693    _mm512_mask_i32gather_epi64::<SCALE>(src, k, _mm512_castsi512_si256(vindex), base_addr)
17694}
17695
17696/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17697/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst.
17698///
17699/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_pd)
17700#[inline]
17701#[target_feature(enable = "avx512f")]
17702#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17703#[rustc_legacy_const_generics(2)]
17704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17705pub unsafe fn _mm512_i32logather_pd<const SCALE: i32>(
17706    vindex: __m512i,
17707    base_addr: *const f64,
17708) -> __m512d {
17709    _mm512_i32gather_pd::<SCALE>(_mm512_castsi512_si256(vindex), base_addr)
17710}
17711
17712/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17713/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst
17714/// using writemask k (elements are copied from src when the corresponding mask bit is not set).
17715///
17716/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_pd)
17717#[inline]
17718#[target_feature(enable = "avx512f")]
17719#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17720#[rustc_legacy_const_generics(4)]
17721#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17722pub unsafe fn _mm512_mask_i32logather_pd<const SCALE: i32>(
17723    src: __m512d,
17724    k: __mmask8,
17725    vindex: __m512i,
17726    base_addr: *const f64,
17727) -> __m512d {
17728    _mm512_mask_i32gather_pd::<SCALE>(src, k, _mm512_castsi512_si256(vindex), base_addr)
17729}
17730
17731/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17732/// indices stored in the lower half of vindex scaled by scale.
17733///
17734/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_epi64)
17735#[inline]
17736#[target_feature(enable = "avx512f")]
17737#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17738#[rustc_legacy_const_generics(3)]
17739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17740pub unsafe fn _mm512_i32loscatter_epi64<const SCALE: i32>(
17741    base_addr: *mut i64,
17742    vindex: __m512i,
17743    a: __m512i,
17744) {
17745    _mm512_i32scatter_epi64::<SCALE>(base_addr, _mm512_castsi512_si256(vindex), a)
17746}
17747
17748/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17749/// indices stored in the lower half of vindex scaled by scale using writemask k (elements whose corresponding
17750/// mask bit is not set are not written to memory).
17751///
17752/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_epi64)
17753#[inline]
17754#[target_feature(enable = "avx512f")]
17755#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17756#[rustc_legacy_const_generics(4)]
17757#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17758pub unsafe fn _mm512_mask_i32loscatter_epi64<const SCALE: i32>(
17759    base_addr: *mut i64,
17760    k: __mmask8,
17761    vindex: __m512i,
17762    a: __m512i,
17763) {
17764    _mm512_mask_i32scatter_epi64::<SCALE>(base_addr, k, _mm512_castsi512_si256(vindex), a)
17765}
17766
17767/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17768/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale.
17769///
17770/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_pd)
17771#[inline]
17772#[target_feature(enable = "avx512f")]
17773#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17774#[rustc_legacy_const_generics(3)]
17775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17776pub unsafe fn _mm512_i32loscatter_pd<const SCALE: i32>(
17777    base_addr: *mut f64,
17778    vindex: __m512i,
17779    a: __m512d,
17780) {
17781    _mm512_i32scatter_pd::<SCALE>(base_addr, _mm512_castsi512_si256(vindex), a)
17782}
17783
17784/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17785/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale using writemask k
17786/// (elements whose corresponding mask bit is not set are not written to memory).
17787///
17788/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_pd)
17789#[inline]
17790#[target_feature(enable = "avx512f")]
17791#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17792#[rustc_legacy_const_generics(4)]
17793#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17794pub unsafe fn _mm512_mask_i32loscatter_pd<const SCALE: i32>(
17795    base_addr: *mut f64,
17796    k: __mmask8,
17797    vindex: __m512i,
17798    a: __m512d,
17799) {
17800    _mm512_mask_i32scatter_pd::<SCALE>(base_addr, k, _mm512_castsi512_si256(vindex), a)
17801}
17802
17803/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17804/// indices stored in vindex scaled by scale
17805///
17806/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_epi32)
17807#[inline]
17808#[target_feature(enable = "avx512f,avx512vl")]
17809#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17810#[rustc_legacy_const_generics(3)]
17811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17812pub unsafe fn _mm256_i32scatter_epi32<const SCALE: i32>(
17813    base_addr: *mut i32,
17814    vindex: __m256i,
17815    a: __m256i,
17816) {
17817    static_assert_imm8_scale!(SCALE);
17818    vpscatterdd_256(base_addr as _, 0xff, vindex.as_i32x8(), a.as_i32x8(), SCALE)
17819}
17820
17821/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17822/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17823/// are not written to memory).
17824///
17825/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi32)
17826#[inline]
17827#[target_feature(enable = "avx512f,avx512vl")]
17828#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17829#[rustc_legacy_const_generics(4)]
17830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17831pub unsafe fn _mm256_mask_i32scatter_epi32<const SCALE: i32>(
17832    base_addr: *mut i32,
17833    k: __mmask8,
17834    vindex: __m256i,
17835    a: __m256i,
17836) {
17837    static_assert_imm8_scale!(SCALE);
17838    vpscatterdd_256(base_addr as _, k, vindex.as_i32x8(), a.as_i32x8(), SCALE)
17839}
17840
17841/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17842///
17843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32scatter_epi64&expand=4099)
17844#[inline]
17845#[target_feature(enable = "avx512f,avx512vl")]
17846#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17847#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17848#[rustc_legacy_const_generics(3)]
17849pub unsafe fn _mm256_i32scatter_epi64<const SCALE: i32>(
17850    slice: *mut i64,
17851    offsets: __m128i,
17852    src: __m256i,
17853) {
17854    static_assert_imm8_scale!(SCALE);
17855    let src = src.as_i64x4();
17856    let slice = slice as *mut i8;
17857    let offsets = offsets.as_i32x4();
17858    vpscatterdq_256(slice, 0xff, offsets, src, SCALE);
17859}
17860
17861/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17862/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17863/// are not written to memory).
17864///
17865/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi64)
17866#[inline]
17867#[target_feature(enable = "avx512f,avx512vl")]
17868#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17869#[rustc_legacy_const_generics(4)]
17870#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17871pub unsafe fn _mm256_mask_i32scatter_epi64<const SCALE: i32>(
17872    base_addr: *mut i64,
17873    k: __mmask8,
17874    vindex: __m128i,
17875    a: __m256i,
17876) {
17877    static_assert_imm8_scale!(SCALE);
17878    vpscatterdq_256(base_addr as _, k, vindex.as_i32x4(), a.as_i64x4(), SCALE)
17879}
17880
17881/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17882/// at packed 32-bit integer indices stored in vindex scaled by scale
17883///
17884/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_pd)
17885#[inline]
17886#[target_feature(enable = "avx512f,avx512vl")]
17887#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17888#[rustc_legacy_const_generics(3)]
17889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17890pub unsafe fn _mm256_i32scatter_pd<const SCALE: i32>(
17891    base_addr: *mut f64,
17892    vindex: __m128i,
17893    a: __m256d,
17894) {
17895    static_assert_imm8_scale!(SCALE);
17896    vscatterdpd_256(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f64x4(), SCALE)
17897}
17898
17899/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17900/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17901/// mask bit is not set are not written to memory).
17902///
17903/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_pd)
17904#[inline]
17905#[target_feature(enable = "avx512f,avx512vl")]
17906#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17907#[rustc_legacy_const_generics(4)]
17908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17909pub unsafe fn _mm256_mask_i32scatter_pd<const SCALE: i32>(
17910    base_addr: *mut f64,
17911    k: __mmask8,
17912    vindex: __m128i,
17913    a: __m256d,
17914) {
17915    static_assert_imm8_scale!(SCALE);
17916    vscatterdpd_256(base_addr as _, k, vindex.as_i32x4(), a.as_f64x4(), SCALE)
17917}
17918
17919/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17920/// at packed 32-bit integer indices stored in vindex scaled by scale
17921///
17922/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_ps)
17923#[inline]
17924#[target_feature(enable = "avx512f,avx512vl")]
17925#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17926#[rustc_legacy_const_generics(3)]
17927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17928pub unsafe fn _mm256_i32scatter_ps<const SCALE: i32>(
17929    base_addr: *mut f32,
17930    vindex: __m256i,
17931    a: __m256,
17932) {
17933    static_assert_imm8_scale!(SCALE);
17934    vscatterdps_256(base_addr as _, 0xff, vindex.as_i32x8(), a.as_f32x8(), SCALE)
17935}
17936
17937/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17938/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17939/// mask bit is not set are not written to memory).
17940///
17941/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_ps)
17942#[inline]
17943#[target_feature(enable = "avx512f,avx512vl")]
17944#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17945#[rustc_legacy_const_generics(4)]
17946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17947pub unsafe fn _mm256_mask_i32scatter_ps<const SCALE: i32>(
17948    base_addr: *mut f32,
17949    k: __mmask8,
17950    vindex: __m256i,
17951    a: __m256,
17952) {
17953    static_assert_imm8_scale!(SCALE);
17954    vscatterdps_256(base_addr as _, k, vindex.as_i32x8(), a.as_f32x8(), SCALE)
17955}
17956
17957/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17958/// indices stored in vindex scaled by scale
17959///
17960/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi32)
17961#[inline]
17962#[target_feature(enable = "avx512f,avx512vl")]
17963#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17964#[rustc_legacy_const_generics(3)]
17965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17966pub unsafe fn _mm256_i64scatter_epi32<const SCALE: i32>(
17967    base_addr: *mut i32,
17968    vindex: __m256i,
17969    a: __m128i,
17970) {
17971    static_assert_imm8_scale!(SCALE);
17972    vpscatterqd_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_i32x4(), SCALE)
17973}
17974
17975/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17976/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17977/// are not written to memory).
17978///
17979/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi32)
17980#[inline]
17981#[target_feature(enable = "avx512f,avx512vl")]
17982#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17983#[rustc_legacy_const_generics(4)]
17984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17985pub unsafe fn _mm256_mask_i64scatter_epi32<const SCALE: i32>(
17986    base_addr: *mut i32,
17987    k: __mmask8,
17988    vindex: __m256i,
17989    a: __m128i,
17990) {
17991    static_assert_imm8_scale!(SCALE);
17992    vpscatterqd_256(base_addr as _, k, vindex.as_i64x4(), a.as_i32x4(), SCALE)
17993}
17994
17995/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17996/// indices stored in vindex scaled by scale
17997///
17998/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi64)
17999#[inline]
18000#[target_feature(enable = "avx512f,avx512vl")]
18001#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18002#[rustc_legacy_const_generics(3)]
18003#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18004pub unsafe fn _mm256_i64scatter_epi64<const SCALE: i32>(
18005    base_addr: *mut i64,
18006    vindex: __m256i,
18007    a: __m256i,
18008) {
18009    static_assert_imm8_scale!(SCALE);
18010    vpscatterqq_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_i64x4(), SCALE)
18011}
18012
18013/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18014/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18015/// are not written to memory).
18016///
18017/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi64)
18018#[inline]
18019#[target_feature(enable = "avx512f,avx512vl")]
18020#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18021#[rustc_legacy_const_generics(4)]
18022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18023pub unsafe fn _mm256_mask_i64scatter_epi64<const SCALE: i32>(
18024    base_addr: *mut i64,
18025    k: __mmask8,
18026    vindex: __m256i,
18027    a: __m256i,
18028) {
18029    static_assert_imm8_scale!(SCALE);
18030    vpscatterqq_256(base_addr as _, k, vindex.as_i64x4(), a.as_i64x4(), SCALE)
18031}
18032
18033/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18034/// at packed 64-bit integer indices stored in vindex scaled by scale
18035///
18036/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_pd)
18037#[inline]
18038#[target_feature(enable = "avx512f,avx512vl")]
18039#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18040#[rustc_legacy_const_generics(3)]
18041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18042pub unsafe fn _mm256_i64scatter_pd<const SCALE: i32>(
18043    base_addr: *mut f64,
18044    vindex: __m256i,
18045    a: __m256d,
18046) {
18047    static_assert_imm8_scale!(SCALE);
18048    vscatterqpd_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_f64x4(), SCALE)
18049}
18050
18051/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18052/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18053/// mask bit is not set are not written to memory).
18054///
18055/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_pd)
18056#[inline]
18057#[target_feature(enable = "avx512f,avx512vl")]
18058#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18059#[rustc_legacy_const_generics(4)]
18060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18061pub unsafe fn _mm256_mask_i64scatter_pd<const SCALE: i32>(
18062    base_addr: *mut f64,
18063    k: __mmask8,
18064    vindex: __m256i,
18065    a: __m256d,
18066) {
18067    static_assert_imm8_scale!(SCALE);
18068    vscatterqpd_256(base_addr as _, k, vindex.as_i64x4(), a.as_f64x4(), SCALE)
18069}
18070
18071/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18072/// at packed 64-bit integer indices stored in vindex scaled by scale
18073///
18074/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_ps)
18075#[inline]
18076#[target_feature(enable = "avx512f,avx512vl")]
18077#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18078#[rustc_legacy_const_generics(3)]
18079#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18080pub unsafe fn _mm256_i64scatter_ps<const SCALE: i32>(
18081    base_addr: *mut f32,
18082    vindex: __m256i,
18083    a: __m128,
18084) {
18085    static_assert_imm8_scale!(SCALE);
18086    vscatterqps_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_f32x4(), SCALE)
18087}
18088
18089/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18090/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18091/// mask bit is not set are not written to memory).
18092///
18093/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_ps)
18094#[inline]
18095#[target_feature(enable = "avx512f,avx512vl")]
18096#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18097#[rustc_legacy_const_generics(4)]
18098#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18099pub unsafe fn _mm256_mask_i64scatter_ps<const SCALE: i32>(
18100    base_addr: *mut f32,
18101    k: __mmask8,
18102    vindex: __m256i,
18103    a: __m128,
18104) {
18105    static_assert_imm8_scale!(SCALE);
18106    vscatterqps_256(base_addr as _, k, vindex.as_i64x4(), a.as_f32x4(), SCALE)
18107}
18108
18109/// Loads 8 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18110/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18111/// mask bit is not set).
18112///
18113/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi32)
18114#[inline]
18115#[target_feature(enable = "avx512f,avx512vl")]
18116#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
18117#[rustc_legacy_const_generics(4)]
18118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18119pub unsafe fn _mm256_mmask_i32gather_epi32<const SCALE: i32>(
18120    src: __m256i,
18121    k: __mmask8,
18122    vindex: __m256i,
18123    base_addr: *const i32,
18124) -> __m256i {
18125    static_assert_imm8_scale!(SCALE);
18126    transmute(vpgatherdd_256(
18127        src.as_i32x8(),
18128        base_addr as _,
18129        vindex.as_i32x8(),
18130        k,
18131        SCALE,
18132    ))
18133}
18134
18135/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18136/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18137/// mask bit is not set).
18138///
18139/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi64)
18140#[inline]
18141#[target_feature(enable = "avx512f,avx512vl")]
18142#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
18143#[rustc_legacy_const_generics(4)]
18144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18145pub unsafe fn _mm256_mmask_i32gather_epi64<const SCALE: i32>(
18146    src: __m256i,
18147    k: __mmask8,
18148    vindex: __m128i,
18149    base_addr: *const i64,
18150) -> __m256i {
18151    static_assert_imm8_scale!(SCALE);
18152    transmute(vpgatherdq_256(
18153        src.as_i64x4(),
18154        base_addr as _,
18155        vindex.as_i32x4(),
18156        k,
18157        SCALE,
18158    ))
18159}
18160
18161/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18162/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18163/// from src when the corresponding mask bit is not set).
18164///
18165/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_pd)
18166#[inline]
18167#[target_feature(enable = "avx512f,avx512vl")]
18168#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
18169#[rustc_legacy_const_generics(4)]
18170#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18171pub unsafe fn _mm256_mmask_i32gather_pd<const SCALE: i32>(
18172    src: __m256d,
18173    k: __mmask8,
18174    vindex: __m128i,
18175    base_addr: *const f64,
18176) -> __m256d {
18177    static_assert_imm8_scale!(SCALE);
18178    transmute(vgatherdpd_256(
18179        src.as_f64x4(),
18180        base_addr as _,
18181        vindex.as_i32x4(),
18182        k,
18183        SCALE,
18184    ))
18185}
18186
18187/// Loads 8 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18188/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18189/// from src when the corresponding mask bit is not set).
18190///
18191/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_ps)
18192#[inline]
18193#[target_feature(enable = "avx512f,avx512vl")]
18194#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
18195#[rustc_legacy_const_generics(4)]
18196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18197pub unsafe fn _mm256_mmask_i32gather_ps<const SCALE: i32>(
18198    src: __m256,
18199    k: __mmask8,
18200    vindex: __m256i,
18201    base_addr: *const f32,
18202) -> __m256 {
18203    static_assert_imm8_scale!(SCALE);
18204    transmute(vgatherdps_256(
18205        src.as_f32x8(),
18206        base_addr as _,
18207        vindex.as_i32x8(),
18208        k,
18209        SCALE,
18210    ))
18211}
18212
18213/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18214/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18215/// mask bit is not set).
18216///
18217/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi32)
18218#[inline]
18219#[target_feature(enable = "avx512f,avx512vl")]
18220#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
18221#[rustc_legacy_const_generics(4)]
18222#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18223pub unsafe fn _mm256_mmask_i64gather_epi32<const SCALE: i32>(
18224    src: __m128i,
18225    k: __mmask8,
18226    vindex: __m256i,
18227    base_addr: *const i32,
18228) -> __m128i {
18229    static_assert_imm8_scale!(SCALE);
18230    transmute(vpgatherqd_256(
18231        src.as_i32x4(),
18232        base_addr as _,
18233        vindex.as_i64x4(),
18234        k,
18235        SCALE,
18236    ))
18237}
18238
18239/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18240/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18241/// mask bit is not set).
18242///
18243/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi64)
18244#[inline]
18245#[target_feature(enable = "avx512f,avx512vl")]
18246#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
18247#[rustc_legacy_const_generics(4)]
18248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18249pub unsafe fn _mm256_mmask_i64gather_epi64<const SCALE: i32>(
18250    src: __m256i,
18251    k: __mmask8,
18252    vindex: __m256i,
18253    base_addr: *const i64,
18254) -> __m256i {
18255    static_assert_imm8_scale!(SCALE);
18256    transmute(vpgatherqq_256(
18257        src.as_i64x4(),
18258        base_addr as _,
18259        vindex.as_i64x4(),
18260        k,
18261        SCALE,
18262    ))
18263}
18264
18265/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18266/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18267/// from src when the corresponding mask bit is not set).
18268///
18269/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_pd)
18270#[inline]
18271#[target_feature(enable = "avx512f,avx512vl")]
18272#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
18273#[rustc_legacy_const_generics(4)]
18274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18275pub unsafe fn _mm256_mmask_i64gather_pd<const SCALE: i32>(
18276    src: __m256d,
18277    k: __mmask8,
18278    vindex: __m256i,
18279    base_addr: *const f64,
18280) -> __m256d {
18281    static_assert_imm8_scale!(SCALE);
18282    transmute(vgatherqpd_256(
18283        src.as_f64x4(),
18284        base_addr as _,
18285        vindex.as_i64x4(),
18286        k,
18287        SCALE,
18288    ))
18289}
18290
18291/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18292/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18293/// from src when the corresponding mask bit is not set).
18294///
18295/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_ps)
18296#[inline]
18297#[target_feature(enable = "avx512f,avx512vl")]
18298#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
18299#[rustc_legacy_const_generics(4)]
18300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18301pub unsafe fn _mm256_mmask_i64gather_ps<const SCALE: i32>(
18302    src: __m128,
18303    k: __mmask8,
18304    vindex: __m256i,
18305    base_addr: *const f32,
18306) -> __m128 {
18307    static_assert_imm8_scale!(SCALE);
18308    transmute(vgatherqps_256(
18309        src.as_f32x4(),
18310        base_addr as _,
18311        vindex.as_i64x4(),
18312        k,
18313        SCALE,
18314    ))
18315}
18316
18317/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
18318/// indices stored in vindex scaled by scale
18319///
18320/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi32)
18321#[inline]
18322#[target_feature(enable = "avx512f,avx512vl")]
18323#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
18324#[rustc_legacy_const_generics(3)]
18325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18326pub unsafe fn _mm_i32scatter_epi32<const SCALE: i32>(
18327    base_addr: *mut i32,
18328    vindex: __m128i,
18329    a: __m128i,
18330) {
18331    static_assert_imm8_scale!(SCALE);
18332    vpscatterdd_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_i32x4(), SCALE)
18333}
18334
18335/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
18336/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18337/// are not written to memory).
18338///
18339/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi32)
18340#[inline]
18341#[target_feature(enable = "avx512f,avx512vl")]
18342#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
18343#[rustc_legacy_const_generics(4)]
18344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18345pub unsafe fn _mm_mask_i32scatter_epi32<const SCALE: i32>(
18346    base_addr: *mut i32,
18347    k: __mmask8,
18348    vindex: __m128i,
18349    a: __m128i,
18350) {
18351    static_assert_imm8_scale!(SCALE);
18352    vpscatterdd_128(base_addr as _, k, vindex.as_i32x4(), a.as_i32x4(), SCALE)
18353}
18354
18355/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
18356/// indices stored in vindex scaled by scale
18357///
18358/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi64)
18359#[inline]
18360#[target_feature(enable = "avx512f,avx512vl")]
18361#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
18362#[rustc_legacy_const_generics(3)]
18363#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18364pub unsafe fn _mm_i32scatter_epi64<const SCALE: i32>(
18365    base_addr: *mut i64,
18366    vindex: __m128i,
18367    a: __m128i,
18368) {
18369    static_assert_imm8_scale!(SCALE);
18370    vpscatterdq_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_i64x2(), SCALE)
18371}
18372
18373/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
18374/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18375/// are not written to memory).
18376///
18377/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi64)
18378#[inline]
18379#[target_feature(enable = "avx512f,avx512vl")]
18380#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
18381#[rustc_legacy_const_generics(4)]
18382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18383pub unsafe fn _mm_mask_i32scatter_epi64<const SCALE: i32>(
18384    base_addr: *mut i64,
18385    k: __mmask8,
18386    vindex: __m128i,
18387    a: __m128i,
18388) {
18389    static_assert_imm8_scale!(SCALE);
18390    vpscatterdq_128(base_addr as _, k, vindex.as_i32x4(), a.as_i64x2(), SCALE)
18391}
18392
18393/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18394/// at packed 32-bit integer indices stored in vindex scaled by scale
18395///
18396/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_pd)
18397#[inline]
18398#[target_feature(enable = "avx512f,avx512vl")]
18399#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
18400#[rustc_legacy_const_generics(3)]
18401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18402pub unsafe fn _mm_i32scatter_pd<const SCALE: i32>(
18403    base_addr: *mut f64,
18404    vindex: __m128i,
18405    a: __m128d,
18406) {
18407    static_assert_imm8_scale!(SCALE);
18408    vscatterdpd_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f64x2(), SCALE)
18409}
18410
18411/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18412/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18413/// mask bit is not set are not written to memory).
18414///
18415/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_pd)
18416#[inline]
18417#[target_feature(enable = "avx512f,avx512vl")]
18418#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
18419#[rustc_legacy_const_generics(4)]
18420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18421pub unsafe fn _mm_mask_i32scatter_pd<const SCALE: i32>(
18422    base_addr: *mut f64,
18423    k: __mmask8,
18424    vindex: __m128i,
18425    a: __m128d,
18426) {
18427    static_assert_imm8_scale!(SCALE);
18428    vscatterdpd_128(base_addr as _, k, vindex.as_i32x4(), a.as_f64x2(), SCALE)
18429}
18430
18431/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18432/// at packed 32-bit integer indices stored in vindex scaled by scale
18433///
18434/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_ps)
18435#[inline]
18436#[target_feature(enable = "avx512f,avx512vl")]
18437#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
18438#[rustc_legacy_const_generics(3)]
18439#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18440pub unsafe fn _mm_i32scatter_ps<const SCALE: i32>(base_addr: *mut f32, vindex: __m128i, a: __m128) {
18441    static_assert_imm8_scale!(SCALE);
18442    vscatterdps_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f32x4(), SCALE)
18443}
18444
18445/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18446/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18447/// mask bit is not set are not written to memory).
18448///
18449/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_ps)
18450#[inline]
18451#[target_feature(enable = "avx512f,avx512vl")]
18452#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
18453#[rustc_legacy_const_generics(4)]
18454#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18455pub unsafe fn _mm_mask_i32scatter_ps<const SCALE: i32>(
18456    base_addr: *mut f32,
18457    k: __mmask8,
18458    vindex: __m128i,
18459    a: __m128,
18460) {
18461    static_assert_imm8_scale!(SCALE);
18462    vscatterdps_128(base_addr as _, k, vindex.as_i32x4(), a.as_f32x4(), SCALE)
18463}
18464
18465/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18466/// indices stored in vindex scaled by scale
18467///
18468/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi32)
18469#[inline]
18470#[target_feature(enable = "avx512f,avx512vl")]
18471#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
18472#[rustc_legacy_const_generics(3)]
18473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18474pub unsafe fn _mm_i64scatter_epi32<const SCALE: i32>(
18475    base_addr: *mut i32,
18476    vindex: __m128i,
18477    a: __m128i,
18478) {
18479    static_assert_imm8_scale!(SCALE);
18480    vpscatterqd_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_i32x4(), SCALE)
18481}
18482
18483/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18484/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18485/// are not written to memory).
18486///
18487/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi32)
18488#[inline]
18489#[target_feature(enable = "avx512f,avx512vl")]
18490#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
18491#[rustc_legacy_const_generics(4)]
18492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18493pub unsafe fn _mm_mask_i64scatter_epi32<const SCALE: i32>(
18494    base_addr: *mut i32,
18495    k: __mmask8,
18496    vindex: __m128i,
18497    a: __m128i,
18498) {
18499    static_assert_imm8_scale!(SCALE);
18500    vpscatterqd_128(base_addr as _, k, vindex.as_i64x2(), a.as_i32x4(), SCALE)
18501}
18502
18503/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18504/// indices stored in vindex scaled by scale
18505///
18506/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi64)
18507#[inline]
18508#[target_feature(enable = "avx512f,avx512vl")]
18509#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18510#[rustc_legacy_const_generics(3)]
18511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18512pub unsafe fn _mm_i64scatter_epi64<const SCALE: i32>(
18513    base_addr: *mut i64,
18514    vindex: __m128i,
18515    a: __m128i,
18516) {
18517    static_assert_imm8_scale!(SCALE);
18518    vpscatterqq_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_i64x2(), SCALE)
18519}
18520
18521/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18522/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18523/// are not written to memory).
18524///
18525/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi64)
18526#[inline]
18527#[target_feature(enable = "avx512f,avx512vl")]
18528#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18529#[rustc_legacy_const_generics(4)]
18530#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18531pub unsafe fn _mm_mask_i64scatter_epi64<const SCALE: i32>(
18532    base_addr: *mut i64,
18533    k: __mmask8,
18534    vindex: __m128i,
18535    a: __m128i,
18536) {
18537    static_assert_imm8_scale!(SCALE);
18538    vpscatterqq_128(base_addr as _, k, vindex.as_i64x2(), a.as_i64x2(), SCALE)
18539}
18540
18541/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18542/// at packed 64-bit integer indices stored in vindex scaled by scale
18543///
18544/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_pd)
18545#[inline]
18546#[target_feature(enable = "avx512f,avx512vl")]
18547#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18548#[rustc_legacy_const_generics(3)]
18549#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18550pub unsafe fn _mm_i64scatter_pd<const SCALE: i32>(
18551    base_addr: *mut f64,
18552    vindex: __m128i,
18553    a: __m128d,
18554) {
18555    static_assert_imm8_scale!(SCALE);
18556    vscatterqpd_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_f64x2(), SCALE)
18557}
18558
18559/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18560/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18561/// mask bit is not set are not written to memory).
18562///
18563/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_pd)
18564#[inline]
18565#[target_feature(enable = "avx512f,avx512vl")]
18566#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18567#[rustc_legacy_const_generics(4)]
18568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18569pub unsafe fn _mm_mask_i64scatter_pd<const SCALE: i32>(
18570    base_addr: *mut f64,
18571    k: __mmask8,
18572    vindex: __m128i,
18573    a: __m128d,
18574) {
18575    static_assert_imm8_scale!(SCALE);
18576    vscatterqpd_128(base_addr as _, k, vindex.as_i64x2(), a.as_f64x2(), SCALE)
18577}
18578
18579/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18580/// at packed 64-bit integer indices stored in vindex scaled by scale
18581///
18582/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_ps)
18583#[inline]
18584#[target_feature(enable = "avx512f,avx512vl")]
18585#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18586#[rustc_legacy_const_generics(3)]
18587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18588pub unsafe fn _mm_i64scatter_ps<const SCALE: i32>(base_addr: *mut f32, vindex: __m128i, a: __m128) {
18589    static_assert_imm8_scale!(SCALE);
18590    vscatterqps_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_f32x4(), SCALE)
18591}
18592
18593/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18594/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18595///
18596/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_ps)
18597#[inline]
18598#[target_feature(enable = "avx512f,avx512vl")]
18599#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18600#[rustc_legacy_const_generics(4)]
18601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18602pub unsafe fn _mm_mask_i64scatter_ps<const SCALE: i32>(
18603    base_addr: *mut f32,
18604    k: __mmask8,
18605    vindex: __m128i,
18606    a: __m128,
18607) {
18608    static_assert_imm8_scale!(SCALE);
18609    vscatterqps_128(base_addr as _, k, vindex.as_i64x2(), a.as_f32x4(), SCALE)
18610}
18611
18612/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18613/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18614/// mask bit is not set).
18615///
18616/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi32)
18617#[inline]
18618#[target_feature(enable = "avx512f,avx512vl")]
18619#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
18620#[rustc_legacy_const_generics(4)]
18621#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18622pub unsafe fn _mm_mmask_i32gather_epi32<const SCALE: i32>(
18623    src: __m128i,
18624    k: __mmask8,
18625    vindex: __m128i,
18626    base_addr: *const i32,
18627) -> __m128i {
18628    static_assert_imm8_scale!(SCALE);
18629    transmute(vpgatherdd_128(
18630        src.as_i32x4(),
18631        base_addr as _,
18632        vindex.as_i32x4(),
18633        k,
18634        SCALE,
18635    ))
18636}
18637
18638/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18639/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18640/// mask bit is not set).
18641///
18642/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi64)
18643#[inline]
18644#[target_feature(enable = "avx512f,avx512vl")]
18645#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
18646#[rustc_legacy_const_generics(4)]
18647#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18648pub unsafe fn _mm_mmask_i32gather_epi64<const SCALE: i32>(
18649    src: __m128i,
18650    k: __mmask8,
18651    vindex: __m128i,
18652    base_addr: *const i64,
18653) -> __m128i {
18654    static_assert_imm8_scale!(SCALE);
18655    transmute(vpgatherdq_128(
18656        src.as_i64x2(),
18657        base_addr as _,
18658        vindex.as_i32x4(),
18659        k,
18660        SCALE,
18661    ))
18662}
18663
18664/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18665/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18666/// from src when the corresponding mask bit is not set).
18667///
18668/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_pd)
18669#[inline]
18670#[target_feature(enable = "avx512f,avx512vl")]
18671#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
18672#[rustc_legacy_const_generics(4)]
18673#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18674pub unsafe fn _mm_mmask_i32gather_pd<const SCALE: i32>(
18675    src: __m128d,
18676    k: __mmask8,
18677    vindex: __m128i,
18678    base_addr: *const f64,
18679) -> __m128d {
18680    static_assert_imm8_scale!(SCALE);
18681    transmute(vgatherdpd_128(
18682        src.as_f64x2(),
18683        base_addr as _,
18684        vindex.as_i32x4(),
18685        k,
18686        SCALE,
18687    ))
18688}
18689
18690/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18691/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18692/// from src when the corresponding mask bit is not set).
18693///
18694/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_ps)
18695#[inline]
18696#[target_feature(enable = "avx512f,avx512vl")]
18697#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
18698#[rustc_legacy_const_generics(4)]
18699#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18700pub unsafe fn _mm_mmask_i32gather_ps<const SCALE: i32>(
18701    src: __m128,
18702    k: __mmask8,
18703    vindex: __m128i,
18704    base_addr: *const f32,
18705) -> __m128 {
18706    static_assert_imm8_scale!(SCALE);
18707    transmute(vgatherdps_128(
18708        src.as_f32x4(),
18709        base_addr as _,
18710        vindex.as_i32x4(),
18711        k,
18712        SCALE,
18713    ))
18714}
18715
18716/// Loads 2 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18717/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18718/// mask bit is not set).
18719///
18720/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi32)
18721#[inline]
18722#[target_feature(enable = "avx512f,avx512vl")]
18723#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
18724#[rustc_legacy_const_generics(4)]
18725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18726pub unsafe fn _mm_mmask_i64gather_epi32<const SCALE: i32>(
18727    src: __m128i,
18728    k: __mmask8,
18729    vindex: __m128i,
18730    base_addr: *const i32,
18731) -> __m128i {
18732    static_assert_imm8_scale!(SCALE);
18733    transmute(vpgatherqd_128(
18734        src.as_i32x4(),
18735        base_addr as _,
18736        vindex.as_i64x2(),
18737        k,
18738        SCALE,
18739    ))
18740}
18741
18742/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18743/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18744/// mask bit is not set).
18745///
18746/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi64)
18747#[inline]
18748#[target_feature(enable = "avx512f,avx512vl")]
18749#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
18750#[rustc_legacy_const_generics(4)]
18751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18752pub unsafe fn _mm_mmask_i64gather_epi64<const SCALE: i32>(
18753    src: __m128i,
18754    k: __mmask8,
18755    vindex: __m128i,
18756    base_addr: *const i64,
18757) -> __m128i {
18758    static_assert_imm8_scale!(SCALE);
18759    transmute(vpgatherqq_128(
18760        src.as_i64x2(),
18761        base_addr as _,
18762        vindex.as_i64x2(),
18763        k,
18764        SCALE,
18765    ))
18766}
18767
18768/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18769/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18770/// from src when the corresponding mask bit is not set).
18771///
18772/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_pd)
18773#[inline]
18774#[target_feature(enable = "avx512f,avx512vl")]
18775#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
18776#[rustc_legacy_const_generics(4)]
18777#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18778pub unsafe fn _mm_mmask_i64gather_pd<const SCALE: i32>(
18779    src: __m128d,
18780    k: __mmask8,
18781    vindex: __m128i,
18782    base_addr: *const f64,
18783) -> __m128d {
18784    static_assert_imm8_scale!(SCALE);
18785    transmute(vgatherqpd_128(
18786        src.as_f64x2(),
18787        base_addr as _,
18788        vindex.as_i64x2(),
18789        k,
18790        SCALE,
18791    ))
18792}
18793
18794/// Loads 2 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18795/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18796/// from src when the corresponding mask bit is not set).
18797///
18798/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_ps)
18799#[inline]
18800#[target_feature(enable = "avx512f,avx512vl")]
18801#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
18802#[rustc_legacy_const_generics(4)]
18803#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18804pub unsafe fn _mm_mmask_i64gather_ps<const SCALE: i32>(
18805    src: __m128,
18806    k: __mmask8,
18807    vindex: __m128i,
18808    base_addr: *const f32,
18809) -> __m128 {
18810    static_assert_imm8_scale!(SCALE);
18811    transmute(vgatherqps_128(
18812        src.as_f32x4(),
18813        base_addr as _,
18814        vindex.as_i64x2(),
18815        k,
18816        SCALE,
18817    ))
18818}
18819
18820/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18821///
18822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi32&expand=1198)
18823#[inline]
18824#[target_feature(enable = "avx512f")]
18825#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18826#[cfg_attr(test, assert_instr(vpcompressd))]
18827pub fn _mm512_mask_compress_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
18828    unsafe { transmute(vpcompressd(a.as_i32x16(), src.as_i32x16(), k)) }
18829}
18830
18831/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18832///
18833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi32&expand=1199)
18834#[inline]
18835#[target_feature(enable = "avx512f")]
18836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18837#[cfg_attr(test, assert_instr(vpcompressd))]
18838pub fn _mm512_maskz_compress_epi32(k: __mmask16, a: __m512i) -> __m512i {
18839    unsafe { transmute(vpcompressd(a.as_i32x16(), i32x16::ZERO, k)) }
18840}
18841
18842/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18843///
18844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi32&expand=1196)
18845#[inline]
18846#[target_feature(enable = "avx512f,avx512vl")]
18847#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18848#[cfg_attr(test, assert_instr(vpcompressd))]
18849pub fn _mm256_mask_compress_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18850    unsafe { transmute(vpcompressd256(a.as_i32x8(), src.as_i32x8(), k)) }
18851}
18852
18853/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18854///
18855/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi32&expand=1197)
18856#[inline]
18857#[target_feature(enable = "avx512f,avx512vl")]
18858#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18859#[cfg_attr(test, assert_instr(vpcompressd))]
18860pub fn _mm256_maskz_compress_epi32(k: __mmask8, a: __m256i) -> __m256i {
18861    unsafe { transmute(vpcompressd256(a.as_i32x8(), i32x8::ZERO, k)) }
18862}
18863
18864/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18865///
18866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi32&expand=1194)
18867#[inline]
18868#[target_feature(enable = "avx512f,avx512vl")]
18869#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18870#[cfg_attr(test, assert_instr(vpcompressd))]
18871pub fn _mm_mask_compress_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18872    unsafe { transmute(vpcompressd128(a.as_i32x4(), src.as_i32x4(), k)) }
18873}
18874
18875/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18876///
18877/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi32&expand=1195)
18878#[inline]
18879#[target_feature(enable = "avx512f,avx512vl")]
18880#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18881#[cfg_attr(test, assert_instr(vpcompressd))]
18882pub fn _mm_maskz_compress_epi32(k: __mmask8, a: __m128i) -> __m128i {
18883    unsafe { transmute(vpcompressd128(a.as_i32x4(), i32x4::ZERO, k)) }
18884}
18885
18886/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18887///
18888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi64&expand=1204)
18889#[inline]
18890#[target_feature(enable = "avx512f")]
18891#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18892#[cfg_attr(test, assert_instr(vpcompressq))]
18893pub fn _mm512_mask_compress_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
18894    unsafe { transmute(vpcompressq(a.as_i64x8(), src.as_i64x8(), k)) }
18895}
18896
18897/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18898///
18899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi64&expand=1205)
18900#[inline]
18901#[target_feature(enable = "avx512f")]
18902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18903#[cfg_attr(test, assert_instr(vpcompressq))]
18904pub fn _mm512_maskz_compress_epi64(k: __mmask8, a: __m512i) -> __m512i {
18905    unsafe { transmute(vpcompressq(a.as_i64x8(), i64x8::ZERO, k)) }
18906}
18907
18908/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18909///
18910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi64&expand=1202)
18911#[inline]
18912#[target_feature(enable = "avx512f,avx512vl")]
18913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18914#[cfg_attr(test, assert_instr(vpcompressq))]
18915pub fn _mm256_mask_compress_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18916    unsafe { transmute(vpcompressq256(a.as_i64x4(), src.as_i64x4(), k)) }
18917}
18918
18919/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18920///
18921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi64&expand=1203)
18922#[inline]
18923#[target_feature(enable = "avx512f,avx512vl")]
18924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18925#[cfg_attr(test, assert_instr(vpcompressq))]
18926pub fn _mm256_maskz_compress_epi64(k: __mmask8, a: __m256i) -> __m256i {
18927    unsafe { transmute(vpcompressq256(a.as_i64x4(), i64x4::ZERO, k)) }
18928}
18929
18930/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18931///
18932/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi64&expand=1200)
18933#[inline]
18934#[target_feature(enable = "avx512f,avx512vl")]
18935#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18936#[cfg_attr(test, assert_instr(vpcompressq))]
18937pub fn _mm_mask_compress_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18938    unsafe { transmute(vpcompressq128(a.as_i64x2(), src.as_i64x2(), k)) }
18939}
18940
18941/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18942///
18943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi64&expand=1201)
18944#[inline]
18945#[target_feature(enable = "avx512f,avx512vl")]
18946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18947#[cfg_attr(test, assert_instr(vpcompressq))]
18948pub fn _mm_maskz_compress_epi64(k: __mmask8, a: __m128i) -> __m128i {
18949    unsafe { transmute(vpcompressq128(a.as_i64x2(), i64x2::ZERO, k)) }
18950}
18951
18952/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18953///
18954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_ps&expand=1222)
18955#[inline]
18956#[target_feature(enable = "avx512f")]
18957#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18958#[cfg_attr(test, assert_instr(vcompressps))]
18959pub fn _mm512_mask_compress_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
18960    unsafe { transmute(vcompressps(a.as_f32x16(), src.as_f32x16(), k)) }
18961}
18962
18963/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18964///
18965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_ps&expand=1223)
18966#[inline]
18967#[target_feature(enable = "avx512f")]
18968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18969#[cfg_attr(test, assert_instr(vcompressps))]
18970pub fn _mm512_maskz_compress_ps(k: __mmask16, a: __m512) -> __m512 {
18971    unsafe { transmute(vcompressps(a.as_f32x16(), f32x16::ZERO, k)) }
18972}
18973
18974/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18975///
18976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_ps&expand=1220)
18977#[inline]
18978#[target_feature(enable = "avx512f,avx512vl")]
18979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18980#[cfg_attr(test, assert_instr(vcompressps))]
18981pub fn _mm256_mask_compress_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
18982    unsafe { transmute(vcompressps256(a.as_f32x8(), src.as_f32x8(), k)) }
18983}
18984
18985/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18986///
18987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_ps&expand=1221)
18988#[inline]
18989#[target_feature(enable = "avx512f,avx512vl")]
18990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18991#[cfg_attr(test, assert_instr(vcompressps))]
18992pub fn _mm256_maskz_compress_ps(k: __mmask8, a: __m256) -> __m256 {
18993    unsafe { transmute(vcompressps256(a.as_f32x8(), f32x8::ZERO, k)) }
18994}
18995
18996/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18997///
18998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_ps&expand=1218)
18999#[inline]
19000#[target_feature(enable = "avx512f,avx512vl")]
19001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19002#[cfg_attr(test, assert_instr(vcompressps))]
19003pub fn _mm_mask_compress_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
19004    unsafe { transmute(vcompressps128(a.as_f32x4(), src.as_f32x4(), k)) }
19005}
19006
19007/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
19008///
19009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_ps&expand=1219)
19010#[inline]
19011#[target_feature(enable = "avx512f,avx512vl")]
19012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19013#[cfg_attr(test, assert_instr(vcompressps))]
19014pub fn _mm_maskz_compress_ps(k: __mmask8, a: __m128) -> __m128 {
19015    unsafe { transmute(vcompressps128(a.as_f32x4(), f32x4::ZERO, k)) }
19016}
19017
19018/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
19019///
19020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_pd&expand=1216)
19021#[inline]
19022#[target_feature(enable = "avx512f")]
19023#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19024#[cfg_attr(test, assert_instr(vcompresspd))]
19025pub fn _mm512_mask_compress_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
19026    unsafe { transmute(vcompresspd(a.as_f64x8(), src.as_f64x8(), k)) }
19027}
19028
19029/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
19030///
19031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_pd&expand=1217)
19032#[inline]
19033#[target_feature(enable = "avx512f")]
19034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19035#[cfg_attr(test, assert_instr(vcompresspd))]
19036pub fn _mm512_maskz_compress_pd(k: __mmask8, a: __m512d) -> __m512d {
19037    unsafe { transmute(vcompresspd(a.as_f64x8(), f64x8::ZERO, k)) }
19038}
19039
19040/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
19041///
19042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_pd&expand=1214)
19043#[inline]
19044#[target_feature(enable = "avx512f,avx512vl")]
19045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19046#[cfg_attr(test, assert_instr(vcompresspd))]
19047pub fn _mm256_mask_compress_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
19048    unsafe { transmute(vcompresspd256(a.as_f64x4(), src.as_f64x4(), k)) }
19049}
19050
19051/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
19052///
19053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_pd&expand=1215)
19054#[inline]
19055#[target_feature(enable = "avx512f,avx512vl")]
19056#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19057#[cfg_attr(test, assert_instr(vcompresspd))]
19058pub fn _mm256_maskz_compress_pd(k: __mmask8, a: __m256d) -> __m256d {
19059    unsafe { transmute(vcompresspd256(a.as_f64x4(), f64x4::ZERO, k)) }
19060}
19061
19062/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
19063///
19064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_pd&expand=1212)
19065#[inline]
19066#[target_feature(enable = "avx512f,avx512vl")]
19067#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19068#[cfg_attr(test, assert_instr(vcompresspd))]
19069pub fn _mm_mask_compress_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
19070    unsafe { transmute(vcompresspd128(a.as_f64x2(), src.as_f64x2(), k)) }
19071}
19072
19073/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
19074///
19075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_pd&expand=1213)
19076#[inline]
19077#[target_feature(enable = "avx512f,avx512vl")]
19078#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19079#[cfg_attr(test, assert_instr(vcompresspd))]
19080pub fn _mm_maskz_compress_pd(k: __mmask8, a: __m128d) -> __m128d {
19081    unsafe { transmute(vcompresspd128(a.as_f64x2(), f64x2::ZERO, k)) }
19082}
19083
19084/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19085///
19086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi32)
19087#[inline]
19088#[target_feature(enable = "avx512f")]
19089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19090#[cfg_attr(test, assert_instr(vpcompressd))]
19091pub unsafe fn _mm512_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask16, a: __m512i) {
19092    vcompressstored(base_addr as *mut _, a.as_i32x16(), k)
19093}
19094
19095/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19096///
19097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi32)
19098#[inline]
19099#[target_feature(enable = "avx512f,avx512vl")]
19100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19101#[cfg_attr(test, assert_instr(vpcompressd))]
19102pub unsafe fn _mm256_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask8, a: __m256i) {
19103    vcompressstored256(base_addr as *mut _, a.as_i32x8(), k)
19104}
19105
19106/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19107///
19108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi32)
19109#[inline]
19110#[target_feature(enable = "avx512f,avx512vl")]
19111#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19112#[cfg_attr(test, assert_instr(vpcompressd))]
19113pub unsafe fn _mm_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask8, a: __m128i) {
19114    vcompressstored128(base_addr as *mut _, a.as_i32x4(), k)
19115}
19116
19117/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19118///
19119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi64)
19120#[inline]
19121#[target_feature(enable = "avx512f")]
19122#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19123#[cfg_attr(test, assert_instr(vpcompressq))]
19124pub unsafe fn _mm512_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m512i) {
19125    vcompressstoreq(base_addr as *mut _, a.as_i64x8(), k)
19126}
19127
19128/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19129///
19130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi64)
19131#[inline]
19132#[target_feature(enable = "avx512f,avx512vl")]
19133#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19134#[cfg_attr(test, assert_instr(vpcompressq))]
19135pub unsafe fn _mm256_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m256i) {
19136    vcompressstoreq256(base_addr as *mut _, a.as_i64x4(), k)
19137}
19138
19139/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19140///
19141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi64)
19142#[inline]
19143#[target_feature(enable = "avx512f,avx512vl")]
19144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19145#[cfg_attr(test, assert_instr(vpcompressq))]
19146pub unsafe fn _mm_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m128i) {
19147    vcompressstoreq128(base_addr as *mut _, a.as_i64x2(), k)
19148}
19149
19150/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19151///
19152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_ps)
19153#[inline]
19154#[target_feature(enable = "avx512f")]
19155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19156#[cfg_attr(test, assert_instr(vcompressps))]
19157pub unsafe fn _mm512_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask16, a: __m512) {
19158    vcompressstoreps(base_addr as *mut _, a.as_f32x16(), k)
19159}
19160
19161/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19162///
19163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_ps)
19164#[inline]
19165#[target_feature(enable = "avx512f,avx512vl")]
19166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19167#[cfg_attr(test, assert_instr(vcompressps))]
19168pub unsafe fn _mm256_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask8, a: __m256) {
19169    vcompressstoreps256(base_addr as *mut _, a.as_f32x8(), k)
19170}
19171
19172/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19173///
19174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_ps)
19175#[inline]
19176#[target_feature(enable = "avx512f,avx512vl")]
19177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19178#[cfg_attr(test, assert_instr(vcompressps))]
19179pub unsafe fn _mm_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask8, a: __m128) {
19180    vcompressstoreps128(base_addr as *mut _, a.as_f32x4(), k)
19181}
19182
19183/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19184///
19185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_pd)
19186#[inline]
19187#[target_feature(enable = "avx512f")]
19188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19189#[cfg_attr(test, assert_instr(vcompresspd))]
19190pub unsafe fn _mm512_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m512d) {
19191    vcompressstorepd(base_addr as *mut _, a.as_f64x8(), k)
19192}
19193
19194/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19195///
19196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_pd)
19197#[inline]
19198#[target_feature(enable = "avx512f,avx512vl")]
19199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19200#[cfg_attr(test, assert_instr(vcompresspd))]
19201pub unsafe fn _mm256_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m256d) {
19202    vcompressstorepd256(base_addr as *mut _, a.as_f64x4(), k)
19203}
19204
19205/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19206///
19207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_pd)
19208#[inline]
19209#[target_feature(enable = "avx512f,avx512vl")]
19210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19211#[cfg_attr(test, assert_instr(vcompresspd))]
19212pub unsafe fn _mm_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m128d) {
19213    vcompressstorepd128(base_addr as *mut _, a.as_f64x2(), k)
19214}
19215
19216/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19217///
19218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi32&expand=2316)
19219#[inline]
19220#[target_feature(enable = "avx512f")]
19221#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19222#[cfg_attr(test, assert_instr(vpexpandd))]
19223pub fn _mm512_mask_expand_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19224    unsafe { transmute(vpexpandd(a.as_i32x16(), src.as_i32x16(), k)) }
19225}
19226
19227/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19228///
19229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi32&expand=2317)
19230#[inline]
19231#[target_feature(enable = "avx512f")]
19232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19233#[cfg_attr(test, assert_instr(vpexpandd))]
19234pub fn _mm512_maskz_expand_epi32(k: __mmask16, a: __m512i) -> __m512i {
19235    unsafe { transmute(vpexpandd(a.as_i32x16(), i32x16::ZERO, k)) }
19236}
19237
19238/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19239///
19240/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi32&expand=2314)
19241#[inline]
19242#[target_feature(enable = "avx512f,avx512vl")]
19243#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19244#[cfg_attr(test, assert_instr(vpexpandd))]
19245pub fn _mm256_mask_expand_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19246    unsafe { transmute(vpexpandd256(a.as_i32x8(), src.as_i32x8(), k)) }
19247}
19248
19249/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19250///
19251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi32&expand=2315)
19252#[inline]
19253#[target_feature(enable = "avx512f,avx512vl")]
19254#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19255#[cfg_attr(test, assert_instr(vpexpandd))]
19256pub fn _mm256_maskz_expand_epi32(k: __mmask8, a: __m256i) -> __m256i {
19257    unsafe { transmute(vpexpandd256(a.as_i32x8(), i32x8::ZERO, k)) }
19258}
19259
19260/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19261///
19262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi32&expand=2312)
19263#[inline]
19264#[target_feature(enable = "avx512f,avx512vl")]
19265#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19266#[cfg_attr(test, assert_instr(vpexpandd))]
19267pub fn _mm_mask_expand_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19268    unsafe { transmute(vpexpandd128(a.as_i32x4(), src.as_i32x4(), k)) }
19269}
19270
19271/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19272///
19273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi32&expand=2313)
19274#[inline]
19275#[target_feature(enable = "avx512f,avx512vl")]
19276#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19277#[cfg_attr(test, assert_instr(vpexpandd))]
19278pub fn _mm_maskz_expand_epi32(k: __mmask8, a: __m128i) -> __m128i {
19279    unsafe { transmute(vpexpandd128(a.as_i32x4(), i32x4::ZERO, k)) }
19280}
19281
19282/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19283///
19284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi64&expand=2322)
19285#[inline]
19286#[target_feature(enable = "avx512f")]
19287#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19288#[cfg_attr(test, assert_instr(vpexpandq))]
19289pub fn _mm512_mask_expand_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19290    unsafe { transmute(vpexpandq(a.as_i64x8(), src.as_i64x8(), k)) }
19291}
19292
19293/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19294///
19295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi64&expand=2323)
19296#[inline]
19297#[target_feature(enable = "avx512f")]
19298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19299#[cfg_attr(test, assert_instr(vpexpandq))]
19300pub fn _mm512_maskz_expand_epi64(k: __mmask8, a: __m512i) -> __m512i {
19301    unsafe { transmute(vpexpandq(a.as_i64x8(), i64x8::ZERO, k)) }
19302}
19303
19304/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19305///
19306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi64&expand=2320)
19307#[inline]
19308#[target_feature(enable = "avx512f,avx512vl")]
19309#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19310#[cfg_attr(test, assert_instr(vpexpandq))]
19311pub fn _mm256_mask_expand_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19312    unsafe { transmute(vpexpandq256(a.as_i64x4(), src.as_i64x4(), k)) }
19313}
19314
19315/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19316///
19317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi64&expand=2321)
19318#[inline]
19319#[target_feature(enable = "avx512f,avx512vl")]
19320#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19321#[cfg_attr(test, assert_instr(vpexpandq))]
19322pub fn _mm256_maskz_expand_epi64(k: __mmask8, a: __m256i) -> __m256i {
19323    unsafe { transmute(vpexpandq256(a.as_i64x4(), i64x4::ZERO, k)) }
19324}
19325
19326/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19327///
19328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi64&expand=2318)
19329#[inline]
19330#[target_feature(enable = "avx512f,avx512vl")]
19331#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19332#[cfg_attr(test, assert_instr(vpexpandq))]
19333pub fn _mm_mask_expand_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19334    unsafe { transmute(vpexpandq128(a.as_i64x2(), src.as_i64x2(), k)) }
19335}
19336
19337/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19338///
19339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi64&expand=2319)
19340#[inline]
19341#[target_feature(enable = "avx512f,avx512vl")]
19342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19343#[cfg_attr(test, assert_instr(vpexpandq))]
19344pub fn _mm_maskz_expand_epi64(k: __mmask8, a: __m128i) -> __m128i {
19345    unsafe { transmute(vpexpandq128(a.as_i64x2(), i64x2::ZERO, k)) }
19346}
19347
19348/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19349///
19350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_ps&expand=2340)
19351#[inline]
19352#[target_feature(enable = "avx512f")]
19353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19354#[cfg_attr(test, assert_instr(vexpandps))]
19355pub fn _mm512_mask_expand_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
19356    unsafe { transmute(vexpandps(a.as_f32x16(), src.as_f32x16(), k)) }
19357}
19358
19359/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19360///
19361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_ps&expand=2341)
19362#[inline]
19363#[target_feature(enable = "avx512f")]
19364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19365#[cfg_attr(test, assert_instr(vexpandps))]
19366pub fn _mm512_maskz_expand_ps(k: __mmask16, a: __m512) -> __m512 {
19367    unsafe { transmute(vexpandps(a.as_f32x16(), f32x16::ZERO, k)) }
19368}
19369
19370/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19371///
19372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_ps&expand=2338)
19373#[inline]
19374#[target_feature(enable = "avx512f,avx512vl")]
19375#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19376#[cfg_attr(test, assert_instr(vexpandps))]
19377pub fn _mm256_mask_expand_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
19378    unsafe { transmute(vexpandps256(a.as_f32x8(), src.as_f32x8(), k)) }
19379}
19380
19381/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19382///
19383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_ps&expand=2339)
19384#[inline]
19385#[target_feature(enable = "avx512f,avx512vl")]
19386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19387#[cfg_attr(test, assert_instr(vexpandps))]
19388pub fn _mm256_maskz_expand_ps(k: __mmask8, a: __m256) -> __m256 {
19389    unsafe { transmute(vexpandps256(a.as_f32x8(), f32x8::ZERO, k)) }
19390}
19391
19392/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19393///
19394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_ps&expand=2336)
19395#[inline]
19396#[target_feature(enable = "avx512f,avx512vl")]
19397#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19398#[cfg_attr(test, assert_instr(vexpandps))]
19399pub fn _mm_mask_expand_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
19400    unsafe { transmute(vexpandps128(a.as_f32x4(), src.as_f32x4(), k)) }
19401}
19402
19403/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19404///
19405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_ps&expand=2337)
19406#[inline]
19407#[target_feature(enable = "avx512f,avx512vl")]
19408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19409#[cfg_attr(test, assert_instr(vexpandps))]
19410pub fn _mm_maskz_expand_ps(k: __mmask8, a: __m128) -> __m128 {
19411    unsafe { transmute(vexpandps128(a.as_f32x4(), f32x4::ZERO, k)) }
19412}
19413
19414/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19415///
19416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_pd&expand=2334)
19417#[inline]
19418#[target_feature(enable = "avx512f")]
19419#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19420#[cfg_attr(test, assert_instr(vexpandpd))]
19421pub fn _mm512_mask_expand_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
19422    unsafe { transmute(vexpandpd(a.as_f64x8(), src.as_f64x8(), k)) }
19423}
19424
19425/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19426///
19427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_pd&expand=2335)
19428#[inline]
19429#[target_feature(enable = "avx512f")]
19430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19431#[cfg_attr(test, assert_instr(vexpandpd))]
19432pub fn _mm512_maskz_expand_pd(k: __mmask8, a: __m512d) -> __m512d {
19433    unsafe { transmute(vexpandpd(a.as_f64x8(), f64x8::ZERO, k)) }
19434}
19435
19436/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19437///
19438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_pd&expand=2332)
19439#[inline]
19440#[target_feature(enable = "avx512f,avx512vl")]
19441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19442#[cfg_attr(test, assert_instr(vexpandpd))]
19443pub fn _mm256_mask_expand_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
19444    unsafe { transmute(vexpandpd256(a.as_f64x4(), src.as_f64x4(), k)) }
19445}
19446
19447/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19448///
19449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_pd&expand=2333)
19450#[inline]
19451#[target_feature(enable = "avx512f,avx512vl")]
19452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19453#[cfg_attr(test, assert_instr(vexpandpd))]
19454pub fn _mm256_maskz_expand_pd(k: __mmask8, a: __m256d) -> __m256d {
19455    unsafe { transmute(vexpandpd256(a.as_f64x4(), f64x4::ZERO, k)) }
19456}
19457
19458/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19459///
19460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_pd&expand=2330)
19461#[inline]
19462#[target_feature(enable = "avx512f,avx512vl")]
19463#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19464#[cfg_attr(test, assert_instr(vexpandpd))]
19465pub fn _mm_mask_expand_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
19466    unsafe { transmute(vexpandpd128(a.as_f64x2(), src.as_f64x2(), k)) }
19467}
19468
19469/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19470///
19471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_pd&expand=2331)
19472#[inline]
19473#[target_feature(enable = "avx512f,avx512vl")]
19474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19475#[cfg_attr(test, assert_instr(vexpandpd))]
19476pub fn _mm_maskz_expand_pd(k: __mmask8, a: __m128d) -> __m128d {
19477    unsafe { transmute(vexpandpd128(a.as_f64x2(), f64x2::ZERO, k)) }
19478}
19479
19480/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19481///
19482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi32&expand=4685)
19483#[inline]
19484#[target_feature(enable = "avx512f")]
19485#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19486#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19487#[rustc_legacy_const_generics(1)]
19488#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19489pub const fn _mm512_rol_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
19490    static_assert_uimm_bits!(IMM8, 8);
19491    _mm512_rolv_epi32(a, _mm512_set1_epi32(IMM8))
19492}
19493
19494/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19495///
19496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi32&expand=4683)
19497#[inline]
19498#[target_feature(enable = "avx512f")]
19499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19500#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19501#[rustc_legacy_const_generics(3)]
19502#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19503pub const fn _mm512_mask_rol_epi32<const IMM8: i32>(
19504    src: __m512i,
19505    k: __mmask16,
19506    a: __m512i,
19507) -> __m512i {
19508    static_assert_uimm_bits!(IMM8, 8);
19509    _mm512_mask_rolv_epi32(src, k, a, _mm512_set1_epi32(IMM8))
19510}
19511
19512/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19513///
19514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi32&expand=4684)
19515#[inline]
19516#[target_feature(enable = "avx512f")]
19517#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19518#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19519#[rustc_legacy_const_generics(2)]
19520#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19521pub const fn _mm512_maskz_rol_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
19522    static_assert_uimm_bits!(IMM8, 8);
19523    _mm512_maskz_rolv_epi32(k, a, _mm512_set1_epi32(IMM8))
19524}
19525
19526/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19527///
19528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi32&expand=4682)
19529#[inline]
19530#[target_feature(enable = "avx512f,avx512vl")]
19531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19532#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19533#[rustc_legacy_const_generics(1)]
19534#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19535pub const fn _mm256_rol_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
19536    static_assert_uimm_bits!(IMM8, 8);
19537    _mm256_rolv_epi32(a, _mm256_set1_epi32(IMM8))
19538}
19539
19540/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19541///
19542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi32&expand=4680)
19543#[inline]
19544#[target_feature(enable = "avx512f,avx512vl")]
19545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19546#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19547#[rustc_legacy_const_generics(3)]
19548#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19549pub const fn _mm256_mask_rol_epi32<const IMM8: i32>(
19550    src: __m256i,
19551    k: __mmask8,
19552    a: __m256i,
19553) -> __m256i {
19554    static_assert_uimm_bits!(IMM8, 8);
19555    _mm256_mask_rolv_epi32(src, k, a, _mm256_set1_epi32(IMM8))
19556}
19557
19558/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19559///
19560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi32&expand=4681)
19561#[inline]
19562#[target_feature(enable = "avx512f,avx512vl")]
19563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19564#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19565#[rustc_legacy_const_generics(2)]
19566#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19567pub const fn _mm256_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19568    static_assert_uimm_bits!(IMM8, 8);
19569    _mm256_maskz_rolv_epi32(k, a, _mm256_set1_epi32(IMM8))
19570}
19571
19572/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19573///
19574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi32&expand=4679)
19575#[inline]
19576#[target_feature(enable = "avx512f,avx512vl")]
19577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19578#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19579#[rustc_legacy_const_generics(1)]
19580#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19581pub const fn _mm_rol_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
19582    static_assert_uimm_bits!(IMM8, 8);
19583    _mm_rolv_epi32(a, _mm_set1_epi32(IMM8))
19584}
19585
19586/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19587///
19588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi32&expand=4677)
19589#[inline]
19590#[target_feature(enable = "avx512f,avx512vl")]
19591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19592#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19593#[rustc_legacy_const_generics(3)]
19594#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19595pub const fn _mm_mask_rol_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19596    static_assert_uimm_bits!(IMM8, 8);
19597    _mm_mask_rolv_epi32(src, k, a, _mm_set1_epi32(IMM8))
19598}
19599
19600/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19601///
19602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi32&expand=4678)
19603#[inline]
19604#[target_feature(enable = "avx512f,avx512vl")]
19605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19606#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19607#[rustc_legacy_const_generics(2)]
19608#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19609pub const fn _mm_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19610    static_assert_uimm_bits!(IMM8, 8);
19611    _mm_maskz_rolv_epi32(k, a, _mm_set1_epi32(IMM8))
19612}
19613
19614/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19615///
19616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi32&expand=4721)
19617#[inline]
19618#[target_feature(enable = "avx512f")]
19619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19620#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19621#[rustc_legacy_const_generics(1)]
19622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19623pub const fn _mm512_ror_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
19624    static_assert_uimm_bits!(IMM8, 8);
19625    _mm512_rorv_epi32(a, _mm512_set1_epi32(IMM8))
19626}
19627
19628/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19629///
19630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi32&expand=4719)
19631#[inline]
19632#[target_feature(enable = "avx512f")]
19633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19634#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19635#[rustc_legacy_const_generics(3)]
19636#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19637pub const fn _mm512_mask_ror_epi32<const IMM8: i32>(
19638    src: __m512i,
19639    k: __mmask16,
19640    a: __m512i,
19641) -> __m512i {
19642    static_assert_uimm_bits!(IMM8, 8);
19643    _mm512_mask_rorv_epi32(src, k, a, _mm512_set1_epi32(IMM8))
19644}
19645
19646/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19647///
19648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi32&expand=4720)
19649#[inline]
19650#[target_feature(enable = "avx512f")]
19651#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19652#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19653#[rustc_legacy_const_generics(2)]
19654#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19655pub const fn _mm512_maskz_ror_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
19656    static_assert_uimm_bits!(IMM8, 8);
19657    _mm512_maskz_rorv_epi32(k, a, _mm512_set1_epi32(IMM8))
19658}
19659
19660/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19661///
19662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi32&expand=4718)
19663#[inline]
19664#[target_feature(enable = "avx512f,avx512vl")]
19665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19666#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19667#[rustc_legacy_const_generics(1)]
19668#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19669pub const fn _mm256_ror_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
19670    static_assert_uimm_bits!(IMM8, 8);
19671    _mm256_rorv_epi32(a, _mm256_set1_epi32(IMM8))
19672}
19673
19674/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19675///
19676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi32&expand=4716)
19677#[inline]
19678#[target_feature(enable = "avx512f,avx512vl")]
19679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19680#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19681#[rustc_legacy_const_generics(3)]
19682#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19683pub const fn _mm256_mask_ror_epi32<const IMM8: i32>(
19684    src: __m256i,
19685    k: __mmask8,
19686    a: __m256i,
19687) -> __m256i {
19688    static_assert_uimm_bits!(IMM8, 8);
19689    _mm256_mask_rorv_epi32(src, k, a, _mm256_set1_epi32(IMM8))
19690}
19691
19692/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19693///
19694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi32&expand=4717)
19695#[inline]
19696#[target_feature(enable = "avx512f,avx512vl")]
19697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19698#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19699#[rustc_legacy_const_generics(2)]
19700#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19701pub const fn _mm256_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19702    static_assert_uimm_bits!(IMM8, 8);
19703    _mm256_maskz_rorv_epi32(k, a, _mm256_set1_epi32(IMM8))
19704}
19705
19706/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19707///
19708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi32&expand=4715)
19709#[inline]
19710#[target_feature(enable = "avx512f,avx512vl")]
19711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19712#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19713#[rustc_legacy_const_generics(1)]
19714#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19715pub const fn _mm_ror_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
19716    static_assert_uimm_bits!(IMM8, 8);
19717    _mm_rorv_epi32(a, _mm_set1_epi32(IMM8))
19718}
19719
19720/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19721///
19722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi32&expand=4713)
19723#[inline]
19724#[target_feature(enable = "avx512f,avx512vl")]
19725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19726#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19727#[rustc_legacy_const_generics(3)]
19728#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19729pub const fn _mm_mask_ror_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19730    static_assert_uimm_bits!(IMM8, 8);
19731    _mm_mask_rorv_epi32(src, k, a, _mm_set1_epi32(IMM8))
19732}
19733
19734/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19735///
19736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi32&expand=4714)
19737#[inline]
19738#[target_feature(enable = "avx512f,avx512vl")]
19739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19740#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19741#[rustc_legacy_const_generics(2)]
19742#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19743pub const fn _mm_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19744    static_assert_uimm_bits!(IMM8, 8);
19745    _mm_maskz_rorv_epi32(k, a, _mm_set1_epi32(IMM8))
19746}
19747
19748/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19749///
19750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi64&expand=4694)
19751#[inline]
19752#[target_feature(enable = "avx512f")]
19753#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19754#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19755#[rustc_legacy_const_generics(1)]
19756#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19757pub const fn _mm512_rol_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
19758    static_assert_uimm_bits!(IMM8, 8);
19759    _mm512_rolv_epi64(a, _mm512_set1_epi64(IMM8 as i64))
19760}
19761
19762/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19763///
19764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi64&expand=4692)
19765#[inline]
19766#[target_feature(enable = "avx512f")]
19767#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19768#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19769#[rustc_legacy_const_generics(3)]
19770#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19771pub const fn _mm512_mask_rol_epi64<const IMM8: i32>(
19772    src: __m512i,
19773    k: __mmask8,
19774    a: __m512i,
19775) -> __m512i {
19776    static_assert_uimm_bits!(IMM8, 8);
19777    _mm512_mask_rolv_epi64(src, k, a, _mm512_set1_epi64(IMM8 as i64))
19778}
19779
19780/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19781///
19782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi64&expand=4693)
19783#[inline]
19784#[target_feature(enable = "avx512f")]
19785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19786#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19787#[rustc_legacy_const_generics(2)]
19788#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19789pub const fn _mm512_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
19790    static_assert_uimm_bits!(IMM8, 8);
19791    _mm512_maskz_rolv_epi64(k, a, _mm512_set1_epi64(IMM8 as i64))
19792}
19793
19794/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19795///
19796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi64&expand=4691)
19797#[inline]
19798#[target_feature(enable = "avx512f,avx512vl")]
19799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19800#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19801#[rustc_legacy_const_generics(1)]
19802#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19803pub const fn _mm256_rol_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
19804    static_assert_uimm_bits!(IMM8, 8);
19805    _mm256_rolv_epi64(a, _mm256_set1_epi64x(IMM8 as i64))
19806}
19807
19808/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19809///
19810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi64&expand=4689)
19811#[inline]
19812#[target_feature(enable = "avx512f,avx512vl")]
19813#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19814#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19815#[rustc_legacy_const_generics(3)]
19816#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19817pub const fn _mm256_mask_rol_epi64<const IMM8: i32>(
19818    src: __m256i,
19819    k: __mmask8,
19820    a: __m256i,
19821) -> __m256i {
19822    static_assert_uimm_bits!(IMM8, 8);
19823    _mm256_mask_rolv_epi64(src, k, a, _mm256_set1_epi64x(IMM8 as i64))
19824}
19825
19826/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19827///
19828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi64&expand=4690)
19829#[inline]
19830#[target_feature(enable = "avx512f,avx512vl")]
19831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19832#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19833#[rustc_legacy_const_generics(2)]
19834#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19835pub const fn _mm256_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19836    static_assert_uimm_bits!(IMM8, 8);
19837    _mm256_maskz_rolv_epi64(k, a, _mm256_set1_epi64x(IMM8 as i64))
19838}
19839
19840/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19841///
19842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi64&expand=4688)
19843#[inline]
19844#[target_feature(enable = "avx512f,avx512vl")]
19845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19846#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19847#[rustc_legacy_const_generics(1)]
19848#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19849pub const fn _mm_rol_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
19850    static_assert_uimm_bits!(IMM8, 8);
19851    _mm_rolv_epi64(a, _mm_set1_epi64x(IMM8 as i64))
19852}
19853
19854/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19855///
19856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi64&expand=4686)
19857#[inline]
19858#[target_feature(enable = "avx512f,avx512vl")]
19859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19860#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19861#[rustc_legacy_const_generics(3)]
19862#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19863pub const fn _mm_mask_rol_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19864    static_assert_uimm_bits!(IMM8, 8);
19865    _mm_mask_rolv_epi64(src, k, a, _mm_set1_epi64x(IMM8 as i64))
19866}
19867
19868/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19869///
19870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi64&expand=4687)
19871#[inline]
19872#[target_feature(enable = "avx512f,avx512vl")]
19873#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19874#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19875#[rustc_legacy_const_generics(2)]
19876#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19877pub const fn _mm_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19878    static_assert_uimm_bits!(IMM8, 8);
19879    _mm_maskz_rolv_epi64(k, a, _mm_set1_epi64x(IMM8 as i64))
19880}
19881
19882/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19883///
19884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi64&expand=4730)
19885#[inline]
19886#[target_feature(enable = "avx512f")]
19887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19888#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19889#[rustc_legacy_const_generics(1)]
19890#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19891pub const fn _mm512_ror_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
19892    static_assert_uimm_bits!(IMM8, 8);
19893    _mm512_rorv_epi64(a, _mm512_set1_epi64(IMM8 as i64))
19894}
19895
19896/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19897///
19898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi64&expand=4728)
19899#[inline]
19900#[target_feature(enable = "avx512f")]
19901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19902#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19903#[rustc_legacy_const_generics(3)]
19904#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19905pub const fn _mm512_mask_ror_epi64<const IMM8: i32>(
19906    src: __m512i,
19907    k: __mmask8,
19908    a: __m512i,
19909) -> __m512i {
19910    static_assert_uimm_bits!(IMM8, 8);
19911    _mm512_mask_rorv_epi64(src, k, a, _mm512_set1_epi64(IMM8 as i64))
19912}
19913
19914/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19915///
19916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi64&expand=4729)
19917#[inline]
19918#[target_feature(enable = "avx512f")]
19919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19920#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19921#[rustc_legacy_const_generics(2)]
19922#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19923pub const fn _mm512_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
19924    static_assert_uimm_bits!(IMM8, 8);
19925    _mm512_maskz_rorv_epi64(k, a, _mm512_set1_epi64(IMM8 as i64))
19926}
19927
19928/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19929///
19930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi64&expand=4727)
19931#[inline]
19932#[target_feature(enable = "avx512f,avx512vl")]
19933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19934#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19935#[rustc_legacy_const_generics(1)]
19936#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19937pub const fn _mm256_ror_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
19938    static_assert_uimm_bits!(IMM8, 8);
19939    _mm256_rorv_epi64(a, _mm256_set1_epi64x(IMM8 as i64))
19940}
19941
19942/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19943///
19944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi64&expand=4725)
19945#[inline]
19946#[target_feature(enable = "avx512f,avx512vl")]
19947#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19948#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19949#[rustc_legacy_const_generics(3)]
19950#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19951pub const fn _mm256_mask_ror_epi64<const IMM8: i32>(
19952    src: __m256i,
19953    k: __mmask8,
19954    a: __m256i,
19955) -> __m256i {
19956    static_assert_uimm_bits!(IMM8, 8);
19957    _mm256_mask_rorv_epi64(src, k, a, _mm256_set1_epi64x(IMM8 as i64))
19958}
19959
19960/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19961///
19962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi64&expand=4726)
19963#[inline]
19964#[target_feature(enable = "avx512f,avx512vl")]
19965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19966#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19967#[rustc_legacy_const_generics(2)]
19968#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19969pub const fn _mm256_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19970    static_assert_uimm_bits!(IMM8, 8);
19971    _mm256_maskz_rorv_epi64(k, a, _mm256_set1_epi64x(IMM8 as i64))
19972}
19973
19974/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19975///
19976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi64&expand=4724)
19977#[inline]
19978#[target_feature(enable = "avx512f,avx512vl")]
19979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19980#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19981#[rustc_legacy_const_generics(1)]
19982#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19983pub const fn _mm_ror_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
19984    static_assert_uimm_bits!(IMM8, 8);
19985    _mm_rorv_epi64(a, _mm_set1_epi64x(IMM8 as i64))
19986}
19987
19988/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19989///
19990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi64&expand=4722)
19991#[inline]
19992#[target_feature(enable = "avx512f,avx512vl")]
19993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19994#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19995#[rustc_legacy_const_generics(3)]
19996#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19997pub const fn _mm_mask_ror_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19998    static_assert_uimm_bits!(IMM8, 8);
19999    _mm_mask_rorv_epi64(src, k, a, _mm_set1_epi64x(IMM8 as i64))
20000}
20001
20002/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20003///
20004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi64&expand=4723)
20005#[inline]
20006#[target_feature(enable = "avx512f,avx512vl")]
20007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20008#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
20009#[rustc_legacy_const_generics(2)]
20010#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20011pub const fn _mm_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
20012    static_assert_uimm_bits!(IMM8, 8);
20013    _mm_maskz_rorv_epi64(k, a, _mm_set1_epi64x(IMM8 as i64))
20014}
20015
20016/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
20017///
20018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi32&expand=5310)
20019#[inline]
20020#[target_feature(enable = "avx512f")]
20021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20022#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
20023#[rustc_legacy_const_generics(1)]
20024#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20025pub const fn _mm512_slli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
20026    unsafe {
20027        static_assert_uimm_bits!(IMM8, 8);
20028        if IMM8 >= 32 {
20029            _mm512_setzero_si512()
20030        } else {
20031            transmute(simd_shl(a.as_u32x16(), u32x16::splat(IMM8)))
20032        }
20033    }
20034}
20035
20036/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20037///
20038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi32&expand=5308)
20039#[inline]
20040#[target_feature(enable = "avx512f")]
20041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20042#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
20043#[rustc_legacy_const_generics(3)]
20044#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20045pub const fn _mm512_mask_slli_epi32<const IMM8: u32>(
20046    src: __m512i,
20047    k: __mmask16,
20048    a: __m512i,
20049) -> __m512i {
20050    unsafe {
20051        static_assert_uimm_bits!(IMM8, 8);
20052        let shf = if IMM8 >= 32 {
20053            u32x16::ZERO
20054        } else {
20055            simd_shl(a.as_u32x16(), u32x16::splat(IMM8))
20056        };
20057        transmute(simd_select_bitmask(k, shf, src.as_u32x16()))
20058    }
20059}
20060
20061/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20062///
20063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi32&expand=5309)
20064#[inline]
20065#[target_feature(enable = "avx512f")]
20066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20067#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
20068#[rustc_legacy_const_generics(2)]
20069#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20070pub const fn _mm512_maskz_slli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
20071    unsafe {
20072        static_assert_uimm_bits!(IMM8, 8);
20073        if IMM8 >= 32 {
20074            _mm512_setzero_si512()
20075        } else {
20076            let shf = simd_shl(a.as_u32x16(), u32x16::splat(IMM8));
20077            transmute(simd_select_bitmask(k, shf, u32x16::ZERO))
20078        }
20079    }
20080}
20081
20082/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20083///
20084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi32&expand=5305)
20085#[inline]
20086#[target_feature(enable = "avx512f,avx512vl")]
20087#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20088#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
20089#[rustc_legacy_const_generics(3)]
20090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20091pub const fn _mm256_mask_slli_epi32<const IMM8: u32>(
20092    src: __m256i,
20093    k: __mmask8,
20094    a: __m256i,
20095) -> __m256i {
20096    unsafe {
20097        static_assert_uimm_bits!(IMM8, 8);
20098        let r = if IMM8 >= 32 {
20099            u32x8::ZERO
20100        } else {
20101            simd_shl(a.as_u32x8(), u32x8::splat(IMM8))
20102        };
20103        transmute(simd_select_bitmask(k, r, src.as_u32x8()))
20104    }
20105}
20106
20107/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20108///
20109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi32&expand=5306)
20110#[inline]
20111#[target_feature(enable = "avx512f,avx512vl")]
20112#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20113#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
20114#[rustc_legacy_const_generics(2)]
20115#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20116pub const fn _mm256_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20117    unsafe {
20118        static_assert_uimm_bits!(IMM8, 8);
20119        if IMM8 >= 32 {
20120            _mm256_setzero_si256()
20121        } else {
20122            let r = simd_shl(a.as_u32x8(), u32x8::splat(IMM8));
20123            transmute(simd_select_bitmask(k, r, u32x8::ZERO))
20124        }
20125    }
20126}
20127
20128/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20129///
20130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi32&expand=5302)
20131#[inline]
20132#[target_feature(enable = "avx512f,avx512vl")]
20133#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20134#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
20135#[rustc_legacy_const_generics(3)]
20136#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20137pub const fn _mm_mask_slli_epi32<const IMM8: u32>(
20138    src: __m128i,
20139    k: __mmask8,
20140    a: __m128i,
20141) -> __m128i {
20142    unsafe {
20143        static_assert_uimm_bits!(IMM8, 8);
20144        let r = if IMM8 >= 32 {
20145            u32x4::ZERO
20146        } else {
20147            simd_shl(a.as_u32x4(), u32x4::splat(IMM8))
20148        };
20149        transmute(simd_select_bitmask(k, r, src.as_u32x4()))
20150    }
20151}
20152
20153/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20154///
20155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi32&expand=5303)
20156#[inline]
20157#[target_feature(enable = "avx512f,avx512vl")]
20158#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20159#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
20160#[rustc_legacy_const_generics(2)]
20161#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20162pub const fn _mm_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20163    unsafe {
20164        static_assert_uimm_bits!(IMM8, 8);
20165        if IMM8 >= 32 {
20166            _mm_setzero_si128()
20167        } else {
20168            let r = simd_shl(a.as_u32x4(), u32x4::splat(IMM8));
20169            transmute(simd_select_bitmask(k, r, u32x4::ZERO))
20170        }
20171    }
20172}
20173
20174/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
20175///
20176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi32&expand=5522)
20177#[inline]
20178#[target_feature(enable = "avx512f")]
20179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20180#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
20181#[rustc_legacy_const_generics(1)]
20182#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20183pub const fn _mm512_srli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
20184    unsafe {
20185        static_assert_uimm_bits!(IMM8, 8);
20186        if IMM8 >= 32 {
20187            _mm512_setzero_si512()
20188        } else {
20189            transmute(simd_shr(a.as_u32x16(), u32x16::splat(IMM8)))
20190        }
20191    }
20192}
20193
20194/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20195///
20196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi32&expand=5520)
20197#[inline]
20198#[target_feature(enable = "avx512f")]
20199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20200#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
20201#[rustc_legacy_const_generics(3)]
20202#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20203pub const fn _mm512_mask_srli_epi32<const IMM8: u32>(
20204    src: __m512i,
20205    k: __mmask16,
20206    a: __m512i,
20207) -> __m512i {
20208    unsafe {
20209        static_assert_uimm_bits!(IMM8, 8);
20210        let shf = if IMM8 >= 32 {
20211            u32x16::ZERO
20212        } else {
20213            simd_shr(a.as_u32x16(), u32x16::splat(IMM8))
20214        };
20215        transmute(simd_select_bitmask(k, shf, src.as_u32x16()))
20216    }
20217}
20218
20219/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20220///
20221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi32&expand=5521)
20222#[inline]
20223#[target_feature(enable = "avx512f")]
20224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20225#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
20226#[rustc_legacy_const_generics(2)]
20227#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20228pub const fn _mm512_maskz_srli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
20229    unsafe {
20230        static_assert_uimm_bits!(IMM8, 8);
20231        if IMM8 >= 32 {
20232            _mm512_setzero_si512()
20233        } else {
20234            let shf = simd_shr(a.as_u32x16(), u32x16::splat(IMM8));
20235            transmute(simd_select_bitmask(k, shf, u32x16::ZERO))
20236        }
20237    }
20238}
20239
20240/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20241///
20242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi32&expand=5517)
20243#[inline]
20244#[target_feature(enable = "avx512f,avx512vl")]
20245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20246#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
20247#[rustc_legacy_const_generics(3)]
20248#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20249pub const fn _mm256_mask_srli_epi32<const IMM8: u32>(
20250    src: __m256i,
20251    k: __mmask8,
20252    a: __m256i,
20253) -> __m256i {
20254    unsafe {
20255        static_assert_uimm_bits!(IMM8, 8);
20256        let r = if IMM8 >= 32 {
20257            u32x8::ZERO
20258        } else {
20259            simd_shr(a.as_u32x8(), u32x8::splat(IMM8))
20260        };
20261        transmute(simd_select_bitmask(k, r, src.as_u32x8()))
20262    }
20263}
20264
20265/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20266///
20267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi32&expand=5518)
20268#[inline]
20269#[target_feature(enable = "avx512f,avx512vl")]
20270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20271#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
20272#[rustc_legacy_const_generics(2)]
20273#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20274pub const fn _mm256_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20275    unsafe {
20276        static_assert_uimm_bits!(IMM8, 8);
20277        if IMM8 >= 32 {
20278            _mm256_setzero_si256()
20279        } else {
20280            let r = simd_shr(a.as_u32x8(), u32x8::splat(IMM8));
20281            transmute(simd_select_bitmask(k, r, u32x8::ZERO))
20282        }
20283    }
20284}
20285
20286/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20287///
20288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi32&expand=5514)
20289#[inline]
20290#[target_feature(enable = "avx512f,avx512vl")]
20291#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20292#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
20293#[rustc_legacy_const_generics(3)]
20294#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20295pub const fn _mm_mask_srli_epi32<const IMM8: u32>(
20296    src: __m128i,
20297    k: __mmask8,
20298    a: __m128i,
20299) -> __m128i {
20300    unsafe {
20301        static_assert_uimm_bits!(IMM8, 8);
20302        let r = if IMM8 >= 32 {
20303            u32x4::ZERO
20304        } else {
20305            simd_shr(a.as_u32x4(), u32x4::splat(IMM8))
20306        };
20307        transmute(simd_select_bitmask(k, r, src.as_u32x4()))
20308    }
20309}
20310
20311/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20312///
20313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi32&expand=5515)
20314#[inline]
20315#[target_feature(enable = "avx512f,avx512vl")]
20316#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20317#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
20318#[rustc_legacy_const_generics(2)]
20319#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20320pub const fn _mm_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20321    unsafe {
20322        static_assert_uimm_bits!(IMM8, 8);
20323        if IMM8 >= 32 {
20324            _mm_setzero_si128()
20325        } else {
20326            let r = simd_shr(a.as_u32x4(), u32x4::splat(IMM8));
20327            transmute(simd_select_bitmask(k, r, u32x4::ZERO))
20328        }
20329    }
20330}
20331
20332/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
20333///
20334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi64&expand=5319)
20335#[inline]
20336#[target_feature(enable = "avx512f")]
20337#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20338#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20339#[rustc_legacy_const_generics(1)]
20340#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20341pub const fn _mm512_slli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
20342    unsafe {
20343        static_assert_uimm_bits!(IMM8, 8);
20344        if IMM8 >= 64 {
20345            _mm512_setzero_si512()
20346        } else {
20347            transmute(simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64)))
20348        }
20349    }
20350}
20351
20352/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20353///
20354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi64&expand=5317)
20355#[inline]
20356#[target_feature(enable = "avx512f")]
20357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20358#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20359#[rustc_legacy_const_generics(3)]
20360#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20361pub const fn _mm512_mask_slli_epi64<const IMM8: u32>(
20362    src: __m512i,
20363    k: __mmask8,
20364    a: __m512i,
20365) -> __m512i {
20366    unsafe {
20367        static_assert_uimm_bits!(IMM8, 8);
20368        let shf = if IMM8 >= 64 {
20369            u64x8::ZERO
20370        } else {
20371            simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64))
20372        };
20373        transmute(simd_select_bitmask(k, shf, src.as_u64x8()))
20374    }
20375}
20376
20377/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20378///
20379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi64&expand=5318)
20380#[inline]
20381#[target_feature(enable = "avx512f")]
20382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20383#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20384#[rustc_legacy_const_generics(2)]
20385#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20386pub const fn _mm512_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20387    unsafe {
20388        static_assert_uimm_bits!(IMM8, 8);
20389        if IMM8 >= 64 {
20390            _mm512_setzero_si512()
20391        } else {
20392            let shf = simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64));
20393            transmute(simd_select_bitmask(k, shf, u64x8::ZERO))
20394        }
20395    }
20396}
20397
20398/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20399///
20400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi64&expand=5314)
20401#[inline]
20402#[target_feature(enable = "avx512f,avx512vl")]
20403#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20404#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20405#[rustc_legacy_const_generics(3)]
20406#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20407pub const fn _mm256_mask_slli_epi64<const IMM8: u32>(
20408    src: __m256i,
20409    k: __mmask8,
20410    a: __m256i,
20411) -> __m256i {
20412    unsafe {
20413        static_assert_uimm_bits!(IMM8, 8);
20414        let r = if IMM8 >= 64 {
20415            u64x4::ZERO
20416        } else {
20417            simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64))
20418        };
20419        transmute(simd_select_bitmask(k, r, src.as_u64x4()))
20420    }
20421}
20422
20423/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20424///
20425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi64&expand=5315)
20426#[inline]
20427#[target_feature(enable = "avx512f,avx512vl")]
20428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20429#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20430#[rustc_legacy_const_generics(2)]
20431#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20432pub const fn _mm256_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20433    unsafe {
20434        static_assert_uimm_bits!(IMM8, 8);
20435        if IMM8 >= 64 {
20436            _mm256_setzero_si256()
20437        } else {
20438            let r = simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64));
20439            transmute(simd_select_bitmask(k, r, u64x4::ZERO))
20440        }
20441    }
20442}
20443
20444/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20445///
20446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi64&expand=5311)
20447#[inline]
20448#[target_feature(enable = "avx512f,avx512vl")]
20449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20450#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20451#[rustc_legacy_const_generics(3)]
20452#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20453pub const fn _mm_mask_slli_epi64<const IMM8: u32>(
20454    src: __m128i,
20455    k: __mmask8,
20456    a: __m128i,
20457) -> __m128i {
20458    unsafe {
20459        static_assert_uimm_bits!(IMM8, 8);
20460        let r = if IMM8 >= 64 {
20461            u64x2::ZERO
20462        } else {
20463            simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64))
20464        };
20465        transmute(simd_select_bitmask(k, r, src.as_u64x2()))
20466    }
20467}
20468
20469/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20470///
20471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi64&expand=5312)
20472#[inline]
20473#[target_feature(enable = "avx512f,avx512vl")]
20474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20475#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20476#[rustc_legacy_const_generics(2)]
20477#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20478pub const fn _mm_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20479    unsafe {
20480        static_assert_uimm_bits!(IMM8, 8);
20481        if IMM8 >= 64 {
20482            _mm_setzero_si128()
20483        } else {
20484            let r = simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64));
20485            transmute(simd_select_bitmask(k, r, u64x2::ZERO))
20486        }
20487    }
20488}
20489
20490/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
20491///
20492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi64&expand=5531)
20493#[inline]
20494#[target_feature(enable = "avx512f")]
20495#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20496#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20497#[rustc_legacy_const_generics(1)]
20498#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20499pub const fn _mm512_srli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
20500    unsafe {
20501        static_assert_uimm_bits!(IMM8, 8);
20502        if IMM8 >= 64 {
20503            _mm512_setzero_si512()
20504        } else {
20505            transmute(simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64)))
20506        }
20507    }
20508}
20509
20510/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20511///
20512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi64&expand=5529)
20513#[inline]
20514#[target_feature(enable = "avx512f")]
20515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20516#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20517#[rustc_legacy_const_generics(3)]
20518#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20519pub const fn _mm512_mask_srli_epi64<const IMM8: u32>(
20520    src: __m512i,
20521    k: __mmask8,
20522    a: __m512i,
20523) -> __m512i {
20524    unsafe {
20525        static_assert_uimm_bits!(IMM8, 8);
20526        let shf = if IMM8 >= 64 {
20527            u64x8::ZERO
20528        } else {
20529            simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64))
20530        };
20531        transmute(simd_select_bitmask(k, shf, src.as_u64x8()))
20532    }
20533}
20534
20535/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20536///
20537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi64&expand=5530)
20538#[inline]
20539#[target_feature(enable = "avx512f")]
20540#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20541#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20542#[rustc_legacy_const_generics(2)]
20543#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20544pub const fn _mm512_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20545    unsafe {
20546        static_assert_uimm_bits!(IMM8, 8);
20547        if IMM8 >= 64 {
20548            _mm512_setzero_si512()
20549        } else {
20550            let shf = simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64));
20551            transmute(simd_select_bitmask(k, shf, u64x8::ZERO))
20552        }
20553    }
20554}
20555
20556/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20557///
20558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi64&expand=5526)
20559#[inline]
20560#[target_feature(enable = "avx512f,avx512vl")]
20561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20562#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20563#[rustc_legacy_const_generics(3)]
20564#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20565pub const fn _mm256_mask_srli_epi64<const IMM8: u32>(
20566    src: __m256i,
20567    k: __mmask8,
20568    a: __m256i,
20569) -> __m256i {
20570    unsafe {
20571        static_assert_uimm_bits!(IMM8, 8);
20572        let r = if IMM8 >= 64 {
20573            u64x4::ZERO
20574        } else {
20575            simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64))
20576        };
20577        transmute(simd_select_bitmask(k, r, src.as_u64x4()))
20578    }
20579}
20580
20581/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20582///
20583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi64&expand=5527)
20584#[inline]
20585#[target_feature(enable = "avx512f,avx512vl")]
20586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20587#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20588#[rustc_legacy_const_generics(2)]
20589#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20590pub const fn _mm256_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20591    unsafe {
20592        static_assert_uimm_bits!(IMM8, 8);
20593        if IMM8 >= 64 {
20594            _mm256_setzero_si256()
20595        } else {
20596            let r = simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64));
20597            transmute(simd_select_bitmask(k, r, u64x4::ZERO))
20598        }
20599    }
20600}
20601
20602/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20603///
20604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi64&expand=5523)
20605#[inline]
20606#[target_feature(enable = "avx512f,avx512vl")]
20607#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20608#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20609#[rustc_legacy_const_generics(3)]
20610#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20611pub const fn _mm_mask_srli_epi64<const IMM8: u32>(
20612    src: __m128i,
20613    k: __mmask8,
20614    a: __m128i,
20615) -> __m128i {
20616    unsafe {
20617        static_assert_uimm_bits!(IMM8, 8);
20618        let r = if IMM8 >= 64 {
20619            u64x2::ZERO
20620        } else {
20621            simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64))
20622        };
20623        transmute(simd_select_bitmask(k, r, src.as_u64x2()))
20624    }
20625}
20626
20627/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20628///
20629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi64&expand=5524)
20630#[inline]
20631#[target_feature(enable = "avx512f,avx512vl")]
20632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20633#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20634#[rustc_legacy_const_generics(2)]
20635#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20636pub const fn _mm_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20637    unsafe {
20638        static_assert_uimm_bits!(IMM8, 8);
20639        if IMM8 >= 64 {
20640            _mm_setzero_si128()
20641        } else {
20642            let r = simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64));
20643            transmute(simd_select_bitmask(k, r, u64x2::ZERO))
20644        }
20645    }
20646}
20647
20648/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst.
20649///
20650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi32&expand=5280)
20651#[inline]
20652#[target_feature(enable = "avx512f")]
20653#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20654#[cfg_attr(test, assert_instr(vpslld))]
20655pub fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
20656    unsafe { transmute(vpslld(a.as_i32x16(), count.as_i32x4())) }
20657}
20658
20659/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20660///
20661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi32&expand=5278)
20662#[inline]
20663#[target_feature(enable = "avx512f")]
20664#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20665#[cfg_attr(test, assert_instr(vpslld))]
20666pub fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20667    unsafe {
20668        let shf = _mm512_sll_epi32(a, count).as_i32x16();
20669        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
20670    }
20671}
20672
20673/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20674///
20675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi32&expand=5279)
20676#[inline]
20677#[target_feature(enable = "avx512f")]
20678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20679#[cfg_attr(test, assert_instr(vpslld))]
20680pub fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20681    unsafe {
20682        let shf = _mm512_sll_epi32(a, count).as_i32x16();
20683        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
20684    }
20685}
20686
20687/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20688///
20689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi32&expand=5275)
20690#[inline]
20691#[target_feature(enable = "avx512f,avx512vl")]
20692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20693#[cfg_attr(test, assert_instr(vpslld))]
20694pub fn _mm256_mask_sll_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20695    unsafe {
20696        let shf = _mm256_sll_epi32(a, count).as_i32x8();
20697        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
20698    }
20699}
20700
20701/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20702///
20703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi32&expand=5276)
20704#[inline]
20705#[target_feature(enable = "avx512f,avx512vl")]
20706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20707#[cfg_attr(test, assert_instr(vpslld))]
20708pub fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20709    unsafe {
20710        let shf = _mm256_sll_epi32(a, count).as_i32x8();
20711        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
20712    }
20713}
20714
20715/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20716///
20717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi32&expand=5272)
20718#[inline]
20719#[target_feature(enable = "avx512f,avx512vl")]
20720#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20721#[cfg_attr(test, assert_instr(vpslld))]
20722pub fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20723    unsafe {
20724        let shf = _mm_sll_epi32(a, count).as_i32x4();
20725        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
20726    }
20727}
20728
20729/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20730///
20731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi32&expand=5273)
20732#[inline]
20733#[target_feature(enable = "avx512f,avx512vl")]
20734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20735#[cfg_attr(test, assert_instr(vpslld))]
20736pub fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20737    unsafe {
20738        let shf = _mm_sll_epi32(a, count).as_i32x4();
20739        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
20740    }
20741}
20742
20743/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst.
20744///
20745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi32&expand=5492)
20746#[inline]
20747#[target_feature(enable = "avx512f")]
20748#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20749#[cfg_attr(test, assert_instr(vpsrld))]
20750pub fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
20751    unsafe { transmute(vpsrld(a.as_i32x16(), count.as_i32x4())) }
20752}
20753
20754/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20755///
20756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi32&expand=5490)
20757#[inline]
20758#[target_feature(enable = "avx512f")]
20759#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20760#[cfg_attr(test, assert_instr(vpsrld))]
20761pub fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20762    unsafe {
20763        let shf = _mm512_srl_epi32(a, count).as_i32x16();
20764        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
20765    }
20766}
20767
20768/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20769///
20770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi32&expand=5491)
20771#[inline]
20772#[target_feature(enable = "avx512f")]
20773#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20774#[cfg_attr(test, assert_instr(vpsrld))]
20775pub fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20776    unsafe {
20777        let shf = _mm512_srl_epi32(a, count).as_i32x16();
20778        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
20779    }
20780}
20781
20782/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20783///
20784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi32&expand=5487)
20785#[inline]
20786#[target_feature(enable = "avx512f,avx512vl")]
20787#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20788#[cfg_attr(test, assert_instr(vpsrld))]
20789pub fn _mm256_mask_srl_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20790    unsafe {
20791        let shf = _mm256_srl_epi32(a, count).as_i32x8();
20792        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
20793    }
20794}
20795
20796/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20797///
20798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi32&expand=5488)
20799#[inline]
20800#[target_feature(enable = "avx512f,avx512vl")]
20801#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20802#[cfg_attr(test, assert_instr(vpsrld))]
20803pub fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20804    unsafe {
20805        let shf = _mm256_srl_epi32(a, count).as_i32x8();
20806        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
20807    }
20808}
20809
20810/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20811///
20812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi32&expand=5484)
20813#[inline]
20814#[target_feature(enable = "avx512f,avx512vl")]
20815#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20816#[cfg_attr(test, assert_instr(vpsrld))]
20817pub fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20818    unsafe {
20819        let shf = _mm_srl_epi32(a, count).as_i32x4();
20820        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
20821    }
20822}
20823
20824/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20825///
20826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi32&expand=5485)
20827#[inline]
20828#[target_feature(enable = "avx512f,avx512vl")]
20829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20830#[cfg_attr(test, assert_instr(vpsrld))]
20831pub fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20832    unsafe {
20833        let shf = _mm_srl_epi32(a, count).as_i32x4();
20834        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
20835    }
20836}
20837
20838/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst.
20839///
20840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi64&expand=5289)
20841#[inline]
20842#[target_feature(enable = "avx512f")]
20843#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20844#[cfg_attr(test, assert_instr(vpsllq))]
20845pub fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
20846    unsafe { transmute(vpsllq(a.as_i64x8(), count.as_i64x2())) }
20847}
20848
20849/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20850///
20851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi64&expand=5287)
20852#[inline]
20853#[target_feature(enable = "avx512f")]
20854#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20855#[cfg_attr(test, assert_instr(vpsllq))]
20856pub fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20857    unsafe {
20858        let shf = _mm512_sll_epi64(a, count).as_i64x8();
20859        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20860    }
20861}
20862
20863/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20864///
20865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi64&expand=5288)
20866#[inline]
20867#[target_feature(enable = "avx512f")]
20868#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20869#[cfg_attr(test, assert_instr(vpsllq))]
20870pub fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20871    unsafe {
20872        let shf = _mm512_sll_epi64(a, count).as_i64x8();
20873        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20874    }
20875}
20876
20877/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20878///
20879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi64&expand=5284)
20880#[inline]
20881#[target_feature(enable = "avx512f,avx512vl")]
20882#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20883#[cfg_attr(test, assert_instr(vpsllq))]
20884pub fn _mm256_mask_sll_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20885    unsafe {
20886        let shf = _mm256_sll_epi64(a, count).as_i64x4();
20887        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20888    }
20889}
20890
20891/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20892///
20893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi64&expand=5285)
20894#[inline]
20895#[target_feature(enable = "avx512f,avx512vl")]
20896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20897#[cfg_attr(test, assert_instr(vpsllq))]
20898pub fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20899    unsafe {
20900        let shf = _mm256_sll_epi64(a, count).as_i64x4();
20901        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20902    }
20903}
20904
20905/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20906///
20907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi64&expand=5281)
20908#[inline]
20909#[target_feature(enable = "avx512f,avx512vl")]
20910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20911#[cfg_attr(test, assert_instr(vpsllq))]
20912pub fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20913    unsafe {
20914        let shf = _mm_sll_epi64(a, count).as_i64x2();
20915        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
20916    }
20917}
20918
20919/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20920///
20921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi64&expand=5282)
20922#[inline]
20923#[target_feature(enable = "avx512f,avx512vl")]
20924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20925#[cfg_attr(test, assert_instr(vpsllq))]
20926pub fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20927    unsafe {
20928        let shf = _mm_sll_epi64(a, count).as_i64x2();
20929        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
20930    }
20931}
20932
20933/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst.
20934///
20935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi64&expand=5501)
20936#[inline]
20937#[target_feature(enable = "avx512f")]
20938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20939#[cfg_attr(test, assert_instr(vpsrlq))]
20940pub fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
20941    unsafe { transmute(vpsrlq(a.as_i64x8(), count.as_i64x2())) }
20942}
20943
20944/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20945///
20946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi64&expand=5499)
20947#[inline]
20948#[target_feature(enable = "avx512f")]
20949#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20950#[cfg_attr(test, assert_instr(vpsrlq))]
20951pub fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20952    unsafe {
20953        let shf = _mm512_srl_epi64(a, count).as_i64x8();
20954        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20955    }
20956}
20957
20958/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20959///
20960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi64&expand=5500)
20961#[inline]
20962#[target_feature(enable = "avx512f")]
20963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20964#[cfg_attr(test, assert_instr(vpsrlq))]
20965pub fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20966    unsafe {
20967        let shf = _mm512_srl_epi64(a, count).as_i64x8();
20968        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20969    }
20970}
20971
20972/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20973///
20974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi64&expand=5496)
20975#[inline]
20976#[target_feature(enable = "avx512f,avx512vl")]
20977#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20978#[cfg_attr(test, assert_instr(vpsrlq))]
20979pub fn _mm256_mask_srl_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20980    unsafe {
20981        let shf = _mm256_srl_epi64(a, count).as_i64x4();
20982        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20983    }
20984}
20985
20986/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20987///
20988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi64&expand=5497)
20989#[inline]
20990#[target_feature(enable = "avx512f,avx512vl")]
20991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20992#[cfg_attr(test, assert_instr(vpsrlq))]
20993pub fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20994    unsafe {
20995        let shf = _mm256_srl_epi64(a, count).as_i64x4();
20996        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20997    }
20998}
20999
21000/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21001///
21002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi64&expand=5493)
21003#[inline]
21004#[target_feature(enable = "avx512f,avx512vl")]
21005#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21006#[cfg_attr(test, assert_instr(vpsrlq))]
21007pub fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21008    unsafe {
21009        let shf = _mm_srl_epi64(a, count).as_i64x2();
21010        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
21011    }
21012}
21013
21014/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21015///
21016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi64&expand=5494)
21017#[inline]
21018#[target_feature(enable = "avx512f,avx512vl")]
21019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21020#[cfg_attr(test, assert_instr(vpsrlq))]
21021pub fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21022    unsafe {
21023        let shf = _mm_srl_epi64(a, count).as_i64x2();
21024        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
21025    }
21026}
21027
21028/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst.
21029///
21030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi32&expand=5407)
21031#[inline]
21032#[target_feature(enable = "avx512f")]
21033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21034#[cfg_attr(test, assert_instr(vpsrad))]
21035pub fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
21036    unsafe { transmute(vpsrad(a.as_i32x16(), count.as_i32x4())) }
21037}
21038
21039/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21040///
21041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi32&expand=5405)
21042#[inline]
21043#[target_feature(enable = "avx512f")]
21044#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21045#[cfg_attr(test, assert_instr(vpsrad))]
21046pub fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
21047    unsafe {
21048        let shf = _mm512_sra_epi32(a, count).as_i32x16();
21049        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
21050    }
21051}
21052
21053/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21054///
21055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi32&expand=5406)
21056#[inline]
21057#[target_feature(enable = "avx512f")]
21058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21059#[cfg_attr(test, assert_instr(vpsrad))]
21060pub fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
21061    unsafe {
21062        let shf = _mm512_sra_epi32(a, count).as_i32x16();
21063        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
21064    }
21065}
21066
21067/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21068///
21069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi32&expand=5402)
21070#[inline]
21071#[target_feature(enable = "avx512f,avx512vl")]
21072#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21073#[cfg_attr(test, assert_instr(vpsrad))]
21074pub fn _mm256_mask_sra_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
21075    unsafe {
21076        let shf = _mm256_sra_epi32(a, count).as_i32x8();
21077        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
21078    }
21079}
21080
21081/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21082///
21083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi32&expand=5403)
21084#[inline]
21085#[target_feature(enable = "avx512f,avx512vl")]
21086#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21087#[cfg_attr(test, assert_instr(vpsrad))]
21088pub fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
21089    unsafe {
21090        let shf = _mm256_sra_epi32(a, count).as_i32x8();
21091        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
21092    }
21093}
21094
21095/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21096///
21097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi32&expand=5399)
21098#[inline]
21099#[target_feature(enable = "avx512f,avx512vl")]
21100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21101#[cfg_attr(test, assert_instr(vpsrad))]
21102pub fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21103    unsafe {
21104        let shf = _mm_sra_epi32(a, count).as_i32x4();
21105        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
21106    }
21107}
21108
21109/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21110///
21111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi32&expand=5400)
21112#[inline]
21113#[target_feature(enable = "avx512f,avx512vl")]
21114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21115#[cfg_attr(test, assert_instr(vpsrad))]
21116pub fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21117    unsafe {
21118        let shf = _mm_sra_epi32(a, count).as_i32x4();
21119        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
21120    }
21121}
21122
21123/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
21124///
21125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi64&expand=5416)
21126#[inline]
21127#[target_feature(enable = "avx512f")]
21128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21129#[cfg_attr(test, assert_instr(vpsraq))]
21130pub fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
21131    unsafe { transmute(vpsraq(a.as_i64x8(), count.as_i64x2())) }
21132}
21133
21134/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21135///
21136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi64&expand=5414)
21137#[inline]
21138#[target_feature(enable = "avx512f")]
21139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21140#[cfg_attr(test, assert_instr(vpsraq))]
21141pub fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
21142    unsafe {
21143        let shf = _mm512_sra_epi64(a, count).as_i64x8();
21144        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
21145    }
21146}
21147
21148/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21149///
21150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi64&expand=5415)
21151#[inline]
21152#[target_feature(enable = "avx512f")]
21153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21154#[cfg_attr(test, assert_instr(vpsraq))]
21155pub fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
21156    unsafe {
21157        let shf = _mm512_sra_epi64(a, count).as_i64x8();
21158        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
21159    }
21160}
21161
21162/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
21163///
21164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi64&expand=5413)
21165#[inline]
21166#[target_feature(enable = "avx512f,avx512vl")]
21167#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21168#[cfg_attr(test, assert_instr(vpsraq))]
21169pub fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i {
21170    unsafe { transmute(vpsraq256(a.as_i64x4(), count.as_i64x2())) }
21171}
21172
21173/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21174///
21175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi64&expand=5411)
21176#[inline]
21177#[target_feature(enable = "avx512f,avx512vl")]
21178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21179#[cfg_attr(test, assert_instr(vpsraq))]
21180pub fn _mm256_mask_sra_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
21181    unsafe {
21182        let shf = _mm256_sra_epi64(a, count).as_i64x4();
21183        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
21184    }
21185}
21186
21187/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21188///
21189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi64&expand=5412)
21190#[inline]
21191#[target_feature(enable = "avx512f,avx512vl")]
21192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21193#[cfg_attr(test, assert_instr(vpsraq))]
21194pub fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
21195    unsafe {
21196        let shf = _mm256_sra_epi64(a, count).as_i64x4();
21197        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
21198    }
21199}
21200
21201/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
21202///
21203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi64&expand=5410)
21204#[inline]
21205#[target_feature(enable = "avx512f,avx512vl")]
21206#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21207#[cfg_attr(test, assert_instr(vpsraq))]
21208pub fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i {
21209    unsafe { transmute(vpsraq128(a.as_i64x2(), count.as_i64x2())) }
21210}
21211
21212/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21213///
21214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi64&expand=5408)
21215#[inline]
21216#[target_feature(enable = "avx512f,avx512vl")]
21217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21218#[cfg_attr(test, assert_instr(vpsraq))]
21219pub fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21220    unsafe {
21221        let shf = _mm_sra_epi64(a, count).as_i64x2();
21222        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
21223    }
21224}
21225
21226/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21227///
21228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi64&expand=5409)
21229#[inline]
21230#[target_feature(enable = "avx512f,avx512vl")]
21231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21232#[cfg_attr(test, assert_instr(vpsraq))]
21233pub fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21234    unsafe {
21235        let shf = _mm_sra_epi64(a, count).as_i64x2();
21236        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
21237    }
21238}
21239
21240/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
21241///
21242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi32&expand=5436)
21243#[inline]
21244#[target_feature(enable = "avx512f")]
21245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21246#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
21247#[rustc_legacy_const_generics(1)]
21248#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21249pub const fn _mm512_srai_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
21250    unsafe {
21251        static_assert_uimm_bits!(IMM8, 8);
21252        transmute(simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32)))
21253    }
21254}
21255
21256/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21257///
21258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi32&expand=5434)
21259#[inline]
21260#[target_feature(enable = "avx512f")]
21261#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21262#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
21263#[rustc_legacy_const_generics(3)]
21264#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21265pub const fn _mm512_mask_srai_epi32<const IMM8: u32>(
21266    src: __m512i,
21267    k: __mmask16,
21268    a: __m512i,
21269) -> __m512i {
21270    unsafe {
21271        static_assert_uimm_bits!(IMM8, 8);
21272        let r = simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32));
21273        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
21274    }
21275}
21276
21277/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21278///
21279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi32&expand=5435)
21280#[inline]
21281#[target_feature(enable = "avx512f")]
21282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21283#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
21284#[rustc_legacy_const_generics(2)]
21285#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21286pub const fn _mm512_maskz_srai_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
21287    unsafe {
21288        static_assert_uimm_bits!(IMM8, 8);
21289        let r = simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32));
21290        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
21291    }
21292}
21293
21294/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21295///
21296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi32&expand=5431)
21297#[inline]
21298#[target_feature(enable = "avx512f,avx512vl")]
21299#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21300#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
21301#[rustc_legacy_const_generics(3)]
21302#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21303pub const fn _mm256_mask_srai_epi32<const IMM8: u32>(
21304    src: __m256i,
21305    k: __mmask8,
21306    a: __m256i,
21307) -> __m256i {
21308    unsafe {
21309        let r = simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31) as i32));
21310        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
21311    }
21312}
21313
21314/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21315///
21316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi32&expand=5432)
21317#[inline]
21318#[target_feature(enable = "avx512f,avx512vl")]
21319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21320#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
21321#[rustc_legacy_const_generics(2)]
21322#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21323pub const fn _mm256_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
21324    unsafe {
21325        let r = simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31) as i32));
21326        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
21327    }
21328}
21329
21330/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21331///
21332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi32&expand=5428)
21333#[inline]
21334#[target_feature(enable = "avx512f,avx512vl")]
21335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21336#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
21337#[rustc_legacy_const_generics(3)]
21338#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21339pub const fn _mm_mask_srai_epi32<const IMM8: u32>(
21340    src: __m128i,
21341    k: __mmask8,
21342    a: __m128i,
21343) -> __m128i {
21344    unsafe {
21345        let r = simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31) as i32));
21346        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
21347    }
21348}
21349
21350/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21351///
21352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi32&expand=5429)
21353#[inline]
21354#[target_feature(enable = "avx512f,avx512vl")]
21355#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21356#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
21357#[rustc_legacy_const_generics(2)]
21358#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21359pub const fn _mm_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
21360    unsafe {
21361        let r = simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31) as i32));
21362        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
21363    }
21364}
21365
21366/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
21367///
21368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi64&expand=5445)
21369#[inline]
21370#[target_feature(enable = "avx512f")]
21371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21372#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21373#[rustc_legacy_const_generics(1)]
21374#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21375pub const fn _mm512_srai_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
21376    unsafe {
21377        static_assert_uimm_bits!(IMM8, 8);
21378        transmute(simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64)))
21379    }
21380}
21381
21382/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21383///
21384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi64&expand=5443)
21385#[inline]
21386#[target_feature(enable = "avx512f")]
21387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21388#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21389#[rustc_legacy_const_generics(3)]
21390#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21391pub const fn _mm512_mask_srai_epi64<const IMM8: u32>(
21392    src: __m512i,
21393    k: __mmask8,
21394    a: __m512i,
21395) -> __m512i {
21396    unsafe {
21397        static_assert_uimm_bits!(IMM8, 8);
21398        let shf = simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64));
21399        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
21400    }
21401}
21402
21403/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21404///
21405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi64&expand=5444)
21406#[inline]
21407#[target_feature(enable = "avx512f")]
21408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21409#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21410#[rustc_legacy_const_generics(2)]
21411#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21412pub const fn _mm512_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
21413    unsafe {
21414        static_assert_uimm_bits!(IMM8, 8);
21415        let shf = simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64));
21416        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
21417    }
21418}
21419
21420/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
21421///
21422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi64&expand=5442)
21423#[inline]
21424#[target_feature(enable = "avx512f,avx512vl")]
21425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21426#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21427#[rustc_legacy_const_generics(1)]
21428#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21429pub const fn _mm256_srai_epi64<const IMM8: u32>(a: __m256i) -> __m256i {
21430    unsafe {
21431        static_assert_uimm_bits!(IMM8, 8);
21432        transmute(simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64)))
21433    }
21434}
21435
21436/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21437///
21438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi64&expand=5440)
21439#[inline]
21440#[target_feature(enable = "avx512f,avx512vl")]
21441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21442#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21443#[rustc_legacy_const_generics(3)]
21444#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21445pub const fn _mm256_mask_srai_epi64<const IMM8: u32>(
21446    src: __m256i,
21447    k: __mmask8,
21448    a: __m256i,
21449) -> __m256i {
21450    unsafe {
21451        static_assert_uimm_bits!(IMM8, 8);
21452        let shf = simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64));
21453        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
21454    }
21455}
21456
21457/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21458///
21459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi64&expand=5441)
21460#[inline]
21461#[target_feature(enable = "avx512f,avx512vl")]
21462#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21463#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21464#[rustc_legacy_const_generics(2)]
21465#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21466pub const fn _mm256_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
21467    unsafe {
21468        static_assert_uimm_bits!(IMM8, 8);
21469        let shf = simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64));
21470        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
21471    }
21472}
21473
21474/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
21475///
21476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi64&expand=5439)
21477#[inline]
21478#[target_feature(enable = "avx512f,avx512vl")]
21479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21480#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21481#[rustc_legacy_const_generics(1)]
21482#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21483pub const fn _mm_srai_epi64<const IMM8: u32>(a: __m128i) -> __m128i {
21484    unsafe {
21485        static_assert_uimm_bits!(IMM8, 8);
21486        transmute(simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64)))
21487    }
21488}
21489
21490/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21491///
21492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi64&expand=5437)
21493#[inline]
21494#[target_feature(enable = "avx512f,avx512vl")]
21495#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21496#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21497#[rustc_legacy_const_generics(3)]
21498#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21499pub const fn _mm_mask_srai_epi64<const IMM8: u32>(
21500    src: __m128i,
21501    k: __mmask8,
21502    a: __m128i,
21503) -> __m128i {
21504    unsafe {
21505        static_assert_uimm_bits!(IMM8, 8);
21506        let shf = simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64));
21507        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
21508    }
21509}
21510
21511/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21512///
21513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi64&expand=5438)
21514#[inline]
21515#[target_feature(enable = "avx512f,avx512vl")]
21516#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21517#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21518#[rustc_legacy_const_generics(2)]
21519#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21520pub const fn _mm_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
21521    unsafe {
21522        static_assert_uimm_bits!(IMM8, 8);
21523        let shf = simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64));
21524        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
21525    }
21526}
21527
21528/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21529///
21530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi32&expand=5465)
21531#[inline]
21532#[target_feature(enable = "avx512f")]
21533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21534#[cfg_attr(test, assert_instr(vpsravd))]
21535#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21536pub const fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
21537    unsafe {
21538        let count = count.as_u32x16();
21539        let no_overflow: u32x16 = simd_lt(count, u32x16::splat(u32::BITS));
21540        let count = simd_select(no_overflow, transmute(count), i32x16::splat(31));
21541        simd_shr(a.as_i32x16(), count).as_m512i()
21542    }
21543}
21544
21545/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21546///
21547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi32&expand=5463)
21548#[inline]
21549#[target_feature(enable = "avx512f")]
21550#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21551#[cfg_attr(test, assert_instr(vpsravd))]
21552#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21553pub const fn _mm512_mask_srav_epi32(
21554    src: __m512i,
21555    k: __mmask16,
21556    a: __m512i,
21557    count: __m512i,
21558) -> __m512i {
21559    unsafe {
21560        let shf = _mm512_srav_epi32(a, count).as_i32x16();
21561        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
21562    }
21563}
21564
21565/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21566///
21567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi32&expand=5464)
21568#[inline]
21569#[target_feature(enable = "avx512f")]
21570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21571#[cfg_attr(test, assert_instr(vpsravd))]
21572#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21573pub const fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21574    unsafe {
21575        let shf = _mm512_srav_epi32(a, count).as_i32x16();
21576        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
21577    }
21578}
21579
21580/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21581///
21582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi32&expand=5460)
21583#[inline]
21584#[target_feature(enable = "avx512f,avx512vl")]
21585#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21586#[cfg_attr(test, assert_instr(vpsravd))]
21587#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21588pub const fn _mm256_mask_srav_epi32(
21589    src: __m256i,
21590    k: __mmask8,
21591    a: __m256i,
21592    count: __m256i,
21593) -> __m256i {
21594    unsafe {
21595        let shf = _mm256_srav_epi32(a, count).as_i32x8();
21596        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
21597    }
21598}
21599
21600/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21601///
21602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi32&expand=5461)
21603#[inline]
21604#[target_feature(enable = "avx512f,avx512vl")]
21605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21606#[cfg_attr(test, assert_instr(vpsravd))]
21607#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21608pub const fn _mm256_maskz_srav_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21609    unsafe {
21610        let shf = _mm256_srav_epi32(a, count).as_i32x8();
21611        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
21612    }
21613}
21614
21615/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21616///
21617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi32&expand=5457)
21618#[inline]
21619#[target_feature(enable = "avx512f,avx512vl")]
21620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21621#[cfg_attr(test, assert_instr(vpsravd))]
21622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21623pub const fn _mm_mask_srav_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21624    unsafe {
21625        let shf = _mm_srav_epi32(a, count).as_i32x4();
21626        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
21627    }
21628}
21629
21630/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21631///
21632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi32&expand=5458)
21633#[inline]
21634#[target_feature(enable = "avx512f,avx512vl")]
21635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21636#[cfg_attr(test, assert_instr(vpsravd))]
21637#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21638pub const fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21639    unsafe {
21640        let shf = _mm_srav_epi32(a, count).as_i32x4();
21641        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
21642    }
21643}
21644
21645/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21646///
21647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi64&expand=5474)
21648#[inline]
21649#[target_feature(enable = "avx512f")]
21650#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21651#[cfg_attr(test, assert_instr(vpsravq))]
21652#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21653pub const fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
21654    unsafe {
21655        let count = count.as_u64x8();
21656        let no_overflow: u64x8 = simd_lt(count, u64x8::splat(u64::BITS as u64));
21657        let count = simd_select(no_overflow, transmute(count), i64x8::splat(63));
21658        simd_shr(a.as_i64x8(), count).as_m512i()
21659    }
21660}
21661
21662/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21663///
21664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi64&expand=5472)
21665#[inline]
21666#[target_feature(enable = "avx512f")]
21667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21668#[cfg_attr(test, assert_instr(vpsravq))]
21669#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21670pub const fn _mm512_mask_srav_epi64(
21671    src: __m512i,
21672    k: __mmask8,
21673    a: __m512i,
21674    count: __m512i,
21675) -> __m512i {
21676    unsafe {
21677        let shf = _mm512_srav_epi64(a, count).as_i64x8();
21678        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
21679    }
21680}
21681
21682/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21683///
21684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi64&expand=5473)
21685#[inline]
21686#[target_feature(enable = "avx512f")]
21687#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21688#[cfg_attr(test, assert_instr(vpsravq))]
21689#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21690pub const fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21691    unsafe {
21692        let shf = _mm512_srav_epi64(a, count).as_i64x8();
21693        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
21694    }
21695}
21696
21697/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21698///
21699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi64&expand=5471)
21700#[inline]
21701#[target_feature(enable = "avx512f,avx512vl")]
21702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21703#[cfg_attr(test, assert_instr(vpsravq))]
21704#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21705pub const fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i {
21706    unsafe {
21707        let count = count.as_u64x4();
21708        let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
21709        let count = simd_select(no_overflow, transmute(count), i64x4::splat(63));
21710        simd_shr(a.as_i64x4(), count).as_m256i()
21711    }
21712}
21713
21714/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21715///
21716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi64&expand=5469)
21717#[inline]
21718#[target_feature(enable = "avx512f,avx512vl")]
21719#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21720#[cfg_attr(test, assert_instr(vpsravq))]
21721#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21722pub const fn _mm256_mask_srav_epi64(
21723    src: __m256i,
21724    k: __mmask8,
21725    a: __m256i,
21726    count: __m256i,
21727) -> __m256i {
21728    unsafe {
21729        let shf = _mm256_srav_epi64(a, count).as_i64x4();
21730        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
21731    }
21732}
21733
21734/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21735///
21736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi64&expand=5470)
21737#[inline]
21738#[target_feature(enable = "avx512f,avx512vl")]
21739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21740#[cfg_attr(test, assert_instr(vpsravq))]
21741#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21742pub const fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21743    unsafe {
21744        let shf = _mm256_srav_epi64(a, count).as_i64x4();
21745        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
21746    }
21747}
21748
21749/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21750///
21751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi64&expand=5468)
21752#[inline]
21753#[target_feature(enable = "avx512f,avx512vl")]
21754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21755#[cfg_attr(test, assert_instr(vpsravq))]
21756#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21757pub const fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i {
21758    unsafe {
21759        let count = count.as_u64x2();
21760        let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
21761        let count = simd_select(no_overflow, transmute(count), i64x2::splat(63));
21762        simd_shr(a.as_i64x2(), count).as_m128i()
21763    }
21764}
21765
21766/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21767///
21768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi64&expand=5466)
21769#[inline]
21770#[target_feature(enable = "avx512f,avx512vl")]
21771#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21772#[cfg_attr(test, assert_instr(vpsravq))]
21773#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21774pub const fn _mm_mask_srav_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21775    unsafe {
21776        let shf = _mm_srav_epi64(a, count).as_i64x2();
21777        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
21778    }
21779}
21780
21781/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21782///
21783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi64&expand=5467)
21784#[inline]
21785#[target_feature(enable = "avx512f,avx512vl")]
21786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21787#[cfg_attr(test, assert_instr(vpsravq))]
21788#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21789pub const fn _mm_maskz_srav_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21790    unsafe {
21791        let shf = _mm_srav_epi64(a, count).as_i64x2();
21792        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
21793    }
21794}
21795
21796/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21797///
21798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi32&expand=4703)
21799#[inline]
21800#[target_feature(enable = "avx512f")]
21801#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21802#[cfg_attr(test, assert_instr(vprolvd))]
21803#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21804pub const fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
21805    unsafe {
21806        transmute(simd_funnel_shl(
21807            a.as_u32x16(),
21808            a.as_u32x16(),
21809            simd_and(b.as_u32x16(), u32x16::splat(31)),
21810        ))
21811    }
21812}
21813
21814/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21815///
21816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi32&expand=4701)
21817#[inline]
21818#[target_feature(enable = "avx512f")]
21819#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21820#[cfg_attr(test, assert_instr(vprolvd))]
21821#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21822pub const fn _mm512_mask_rolv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21823    unsafe {
21824        let rol = _mm512_rolv_epi32(a, b).as_i32x16();
21825        transmute(simd_select_bitmask(k, rol, src.as_i32x16()))
21826    }
21827}
21828
21829/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21830///
21831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi32&expand=4702)
21832#[inline]
21833#[target_feature(enable = "avx512f")]
21834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21835#[cfg_attr(test, assert_instr(vprolvd))]
21836#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21837pub const fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21838    unsafe {
21839        let rol = _mm512_rolv_epi32(a, b).as_i32x16();
21840        transmute(simd_select_bitmask(k, rol, i32x16::ZERO))
21841    }
21842}
21843
21844/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21845///
21846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi32&expand=4700)
21847#[inline]
21848#[target_feature(enable = "avx512f,avx512vl")]
21849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21850#[cfg_attr(test, assert_instr(vprolvd))]
21851#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21852pub const fn _mm256_rolv_epi32(a: __m256i, b: __m256i) -> __m256i {
21853    unsafe {
21854        transmute(simd_funnel_shl(
21855            a.as_u32x8(),
21856            a.as_u32x8(),
21857            simd_and(b.as_u32x8(), u32x8::splat(31)),
21858        ))
21859    }
21860}
21861
21862/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21863///
21864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi32&expand=4698)
21865#[inline]
21866#[target_feature(enable = "avx512f,avx512vl")]
21867#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21868#[cfg_attr(test, assert_instr(vprolvd))]
21869#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21870pub const fn _mm256_mask_rolv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21871    unsafe {
21872        let rol = _mm256_rolv_epi32(a, b).as_i32x8();
21873        transmute(simd_select_bitmask(k, rol, src.as_i32x8()))
21874    }
21875}
21876
21877/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21878///
21879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi32&expand=4699)
21880#[inline]
21881#[target_feature(enable = "avx512f,avx512vl")]
21882#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21883#[cfg_attr(test, assert_instr(vprolvd))]
21884#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21885pub const fn _mm256_maskz_rolv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21886    unsafe {
21887        let rol = _mm256_rolv_epi32(a, b).as_i32x8();
21888        transmute(simd_select_bitmask(k, rol, i32x8::ZERO))
21889    }
21890}
21891
21892/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21893///
21894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi32&expand=4697)
21895#[inline]
21896#[target_feature(enable = "avx512f,avx512vl")]
21897#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21898#[cfg_attr(test, assert_instr(vprolvd))]
21899#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21900pub const fn _mm_rolv_epi32(a: __m128i, b: __m128i) -> __m128i {
21901    unsafe {
21902        transmute(simd_funnel_shl(
21903            a.as_u32x4(),
21904            a.as_u32x4(),
21905            simd_and(b.as_u32x4(), u32x4::splat(31)),
21906        ))
21907    }
21908}
21909
21910/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21911///
21912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi32&expand=4695)
21913#[inline]
21914#[target_feature(enable = "avx512f,avx512vl")]
21915#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21916#[cfg_attr(test, assert_instr(vprolvd))]
21917#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21918pub const fn _mm_mask_rolv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21919    unsafe {
21920        let rol = _mm_rolv_epi32(a, b).as_i32x4();
21921        transmute(simd_select_bitmask(k, rol, src.as_i32x4()))
21922    }
21923}
21924
21925/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21926///
21927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi32&expand=4696)
21928#[inline]
21929#[target_feature(enable = "avx512f,avx512vl")]
21930#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21931#[cfg_attr(test, assert_instr(vprolvd))]
21932#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21933pub const fn _mm_maskz_rolv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21934    unsafe {
21935        let rol = _mm_rolv_epi32(a, b).as_i32x4();
21936        transmute(simd_select_bitmask(k, rol, i32x4::ZERO))
21937    }
21938}
21939
21940/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21941///
21942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi32&expand=4739)
21943#[inline]
21944#[target_feature(enable = "avx512f")]
21945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21946#[cfg_attr(test, assert_instr(vprorvd))]
21947#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21948pub const fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
21949    unsafe {
21950        transmute(simd_funnel_shr(
21951            a.as_u32x16(),
21952            a.as_u32x16(),
21953            simd_and(b.as_u32x16(), u32x16::splat(31)),
21954        ))
21955    }
21956}
21957
21958/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21959///
21960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi32&expand=4737)
21961#[inline]
21962#[target_feature(enable = "avx512f")]
21963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21964#[cfg_attr(test, assert_instr(vprorvd))]
21965#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21966pub const fn _mm512_mask_rorv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21967    unsafe {
21968        let ror = _mm512_rorv_epi32(a, b).as_i32x16();
21969        transmute(simd_select_bitmask(k, ror, src.as_i32x16()))
21970    }
21971}
21972
21973/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21974///
21975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi32&expand=4738)
21976#[inline]
21977#[target_feature(enable = "avx512f")]
21978#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21979#[cfg_attr(test, assert_instr(vprorvd))]
21980#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21981pub const fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21982    unsafe {
21983        let ror = _mm512_rorv_epi32(a, b).as_i32x16();
21984        transmute(simd_select_bitmask(k, ror, i32x16::ZERO))
21985    }
21986}
21987
21988/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21989///
21990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi32&expand=4736)
21991#[inline]
21992#[target_feature(enable = "avx512f,avx512vl")]
21993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21994#[cfg_attr(test, assert_instr(vprorvd))]
21995#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21996pub const fn _mm256_rorv_epi32(a: __m256i, b: __m256i) -> __m256i {
21997    unsafe {
21998        transmute(simd_funnel_shr(
21999            a.as_u32x8(),
22000            a.as_u32x8(),
22001            simd_and(b.as_u32x8(), u32x8::splat(31)),
22002        ))
22003    }
22004}
22005
22006/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22007///
22008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi32&expand=4734)
22009#[inline]
22010#[target_feature(enable = "avx512f,avx512vl")]
22011#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22012#[cfg_attr(test, assert_instr(vprorvd))]
22013#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22014pub const fn _mm256_mask_rorv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22015    unsafe {
22016        let ror = _mm256_rorv_epi32(a, b).as_i32x8();
22017        transmute(simd_select_bitmask(k, ror, src.as_i32x8()))
22018    }
22019}
22020
22021/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22022///
22023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi32&expand=4735)
22024#[inline]
22025#[target_feature(enable = "avx512f,avx512vl")]
22026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22027#[cfg_attr(test, assert_instr(vprorvd))]
22028#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22029pub const fn _mm256_maskz_rorv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22030    unsafe {
22031        let ror = _mm256_rorv_epi32(a, b).as_i32x8();
22032        transmute(simd_select_bitmask(k, ror, i32x8::ZERO))
22033    }
22034}
22035
22036/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
22037///
22038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi32&expand=4733)
22039#[inline]
22040#[target_feature(enable = "avx512f,avx512vl")]
22041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22042#[cfg_attr(test, assert_instr(vprorvd))]
22043#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22044pub const fn _mm_rorv_epi32(a: __m128i, b: __m128i) -> __m128i {
22045    unsafe {
22046        transmute(simd_funnel_shr(
22047            a.as_u32x4(),
22048            a.as_u32x4(),
22049            simd_and(b.as_u32x4(), u32x4::splat(31)),
22050        ))
22051    }
22052}
22053
22054/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22055///
22056/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi32&expand=4731)
22057#[inline]
22058#[target_feature(enable = "avx512f,avx512vl")]
22059#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22060#[cfg_attr(test, assert_instr(vprorvd))]
22061#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22062pub const fn _mm_mask_rorv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22063    unsafe {
22064        let ror = _mm_rorv_epi32(a, b).as_i32x4();
22065        transmute(simd_select_bitmask(k, ror, src.as_i32x4()))
22066    }
22067}
22068
22069/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22070///
22071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi32&expand=4732)
22072#[inline]
22073#[target_feature(enable = "avx512f,avx512vl")]
22074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22075#[cfg_attr(test, assert_instr(vprorvd))]
22076#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22077pub const fn _mm_maskz_rorv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22078    unsafe {
22079        let ror = _mm_rorv_epi32(a, b).as_i32x4();
22080        transmute(simd_select_bitmask(k, ror, i32x4::ZERO))
22081    }
22082}
22083
22084/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
22085///
22086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi64&expand=4712)
22087#[inline]
22088#[target_feature(enable = "avx512f")]
22089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22090#[cfg_attr(test, assert_instr(vprolvq))]
22091#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22092pub const fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
22093    unsafe {
22094        transmute(simd_funnel_shl(
22095            a.as_u64x8(),
22096            a.as_u64x8(),
22097            simd_and(b.as_u64x8(), u64x8::splat(63)),
22098        ))
22099    }
22100}
22101
22102/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22103///
22104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi64&expand=4710)
22105#[inline]
22106#[target_feature(enable = "avx512f")]
22107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22108#[cfg_attr(test, assert_instr(vprolvq))]
22109#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22110pub const fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
22111    unsafe {
22112        let rol = _mm512_rolv_epi64(a, b).as_i64x8();
22113        transmute(simd_select_bitmask(k, rol, src.as_i64x8()))
22114    }
22115}
22116
22117/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22118///
22119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi64&expand=4711)
22120#[inline]
22121#[target_feature(enable = "avx512f")]
22122#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22123#[cfg_attr(test, assert_instr(vprolvq))]
22124#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22125pub const fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
22126    unsafe {
22127        let rol = _mm512_rolv_epi64(a, b).as_i64x8();
22128        transmute(simd_select_bitmask(k, rol, i64x8::ZERO))
22129    }
22130}
22131
22132/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
22133///
22134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi64&expand=4709)
22135#[inline]
22136#[target_feature(enable = "avx512f,avx512vl")]
22137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22138#[cfg_attr(test, assert_instr(vprolvq))]
22139#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22140pub const fn _mm256_rolv_epi64(a: __m256i, b: __m256i) -> __m256i {
22141    unsafe {
22142        transmute(simd_funnel_shl(
22143            a.as_u64x4(),
22144            a.as_u64x4(),
22145            simd_and(b.as_u64x4(), u64x4::splat(63)),
22146        ))
22147    }
22148}
22149
22150/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22151///
22152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi64&expand=4707)
22153#[inline]
22154#[target_feature(enable = "avx512f,avx512vl")]
22155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22156#[cfg_attr(test, assert_instr(vprolvq))]
22157#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22158pub const fn _mm256_mask_rolv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22159    unsafe {
22160        let rol = _mm256_rolv_epi64(a, b).as_i64x4();
22161        transmute(simd_select_bitmask(k, rol, src.as_i64x4()))
22162    }
22163}
22164
22165/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22166///
22167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi64&expand=4708)
22168#[inline]
22169#[target_feature(enable = "avx512f,avx512vl")]
22170#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22171#[cfg_attr(test, assert_instr(vprolvq))]
22172#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22173pub const fn _mm256_maskz_rolv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22174    unsafe {
22175        let rol = _mm256_rolv_epi64(a, b).as_i64x4();
22176        transmute(simd_select_bitmask(k, rol, i64x4::ZERO))
22177    }
22178}
22179
22180/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
22181///
22182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi64&expand=4706)
22183#[inline]
22184#[target_feature(enable = "avx512f,avx512vl")]
22185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22186#[cfg_attr(test, assert_instr(vprolvq))]
22187#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22188pub const fn _mm_rolv_epi64(a: __m128i, b: __m128i) -> __m128i {
22189    unsafe {
22190        transmute(simd_funnel_shl(
22191            a.as_u64x2(),
22192            a.as_u64x2(),
22193            simd_and(b.as_u64x2(), u64x2::splat(63)),
22194        ))
22195    }
22196}
22197
22198/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22199///
22200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi64&expand=4704)
22201#[inline]
22202#[target_feature(enable = "avx512f,avx512vl")]
22203#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22204#[cfg_attr(test, assert_instr(vprolvq))]
22205#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22206pub const fn _mm_mask_rolv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22207    unsafe {
22208        let rol = _mm_rolv_epi64(a, b).as_i64x2();
22209        transmute(simd_select_bitmask(k, rol, src.as_i64x2()))
22210    }
22211}
22212
22213/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22214///
22215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi64&expand=4705)
22216#[inline]
22217#[target_feature(enable = "avx512f,avx512vl")]
22218#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22219#[cfg_attr(test, assert_instr(vprolvq))]
22220#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22221pub const fn _mm_maskz_rolv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22222    unsafe {
22223        let rol = _mm_rolv_epi64(a, b).as_i64x2();
22224        transmute(simd_select_bitmask(k, rol, i64x2::ZERO))
22225    }
22226}
22227
22228/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
22229///
22230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi64&expand=4748)
22231#[inline]
22232#[target_feature(enable = "avx512f")]
22233#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22234#[cfg_attr(test, assert_instr(vprorvq))]
22235#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22236pub const fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
22237    unsafe {
22238        transmute(simd_funnel_shr(
22239            a.as_u64x8(),
22240            a.as_u64x8(),
22241            simd_and(b.as_u64x8(), u64x8::splat(63)),
22242        ))
22243    }
22244}
22245
22246/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22247///
22248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi64&expand=4746)
22249#[inline]
22250#[target_feature(enable = "avx512f")]
22251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22252#[cfg_attr(test, assert_instr(vprorvq))]
22253#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22254pub const fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
22255    unsafe {
22256        let ror = _mm512_rorv_epi64(a, b).as_i64x8();
22257        transmute(simd_select_bitmask(k, ror, src.as_i64x8()))
22258    }
22259}
22260
22261/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22262///
22263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi64&expand=4747)
22264#[inline]
22265#[target_feature(enable = "avx512f")]
22266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22267#[cfg_attr(test, assert_instr(vprorvq))]
22268#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22269pub const fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
22270    unsafe {
22271        let ror = _mm512_rorv_epi64(a, b).as_i64x8();
22272        transmute(simd_select_bitmask(k, ror, i64x8::ZERO))
22273    }
22274}
22275
22276/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
22277///
22278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi64&expand=4745)
22279#[inline]
22280#[target_feature(enable = "avx512f,avx512vl")]
22281#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22282#[cfg_attr(test, assert_instr(vprorvq))]
22283#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22284pub const fn _mm256_rorv_epi64(a: __m256i, b: __m256i) -> __m256i {
22285    unsafe {
22286        transmute(simd_funnel_shr(
22287            a.as_u64x4(),
22288            a.as_u64x4(),
22289            simd_and(b.as_u64x4(), u64x4::splat(63)),
22290        ))
22291    }
22292}
22293
22294/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22295///
22296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi64&expand=4743)
22297#[inline]
22298#[target_feature(enable = "avx512f,avx512vl")]
22299#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22300#[cfg_attr(test, assert_instr(vprorvq))]
22301#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22302pub const fn _mm256_mask_rorv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22303    unsafe {
22304        let ror = _mm256_rorv_epi64(a, b).as_i64x4();
22305        transmute(simd_select_bitmask(k, ror, src.as_i64x4()))
22306    }
22307}
22308
22309/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22310///
22311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi64&expand=4744)
22312#[inline]
22313#[target_feature(enable = "avx512f,avx512vl")]
22314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22315#[cfg_attr(test, assert_instr(vprorvq))]
22316#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22317pub const fn _mm256_maskz_rorv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22318    unsafe {
22319        let ror = _mm256_rorv_epi64(a, b).as_i64x4();
22320        transmute(simd_select_bitmask(k, ror, i64x4::ZERO))
22321    }
22322}
22323
22324/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
22325///
22326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi64&expand=4742)
22327#[inline]
22328#[target_feature(enable = "avx512f,avx512vl")]
22329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22330#[cfg_attr(test, assert_instr(vprorvq))]
22331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22332pub const fn _mm_rorv_epi64(a: __m128i, b: __m128i) -> __m128i {
22333    unsafe {
22334        transmute(simd_funnel_shr(
22335            a.as_u64x2(),
22336            a.as_u64x2(),
22337            simd_and(b.as_u64x2(), u64x2::splat(63)),
22338        ))
22339    }
22340}
22341
22342/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22343///
22344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi64&expand=4740)
22345#[inline]
22346#[target_feature(enable = "avx512f,avx512vl")]
22347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22348#[cfg_attr(test, assert_instr(vprorvq))]
22349#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22350pub const fn _mm_mask_rorv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22351    unsafe {
22352        let ror = _mm_rorv_epi64(a, b).as_i64x2();
22353        transmute(simd_select_bitmask(k, ror, src.as_i64x2()))
22354    }
22355}
22356
22357/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22358///
22359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi64&expand=4741)
22360#[inline]
22361#[target_feature(enable = "avx512f,avx512vl")]
22362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22363#[cfg_attr(test, assert_instr(vprorvq))]
22364#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22365pub const fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22366    unsafe {
22367        let ror = _mm_rorv_epi64(a, b).as_i64x2();
22368        transmute(simd_select_bitmask(k, ror, i64x2::ZERO))
22369    }
22370}
22371
22372/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
22373///
22374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi32&expand=5342)
22375#[inline]
22376#[target_feature(enable = "avx512f")]
22377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22378#[cfg_attr(test, assert_instr(vpsllvd))]
22379#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22380pub const fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
22381    unsafe {
22382        let count = count.as_u32x16();
22383        let no_overflow: u32x16 = simd_lt(count, u32x16::splat(u32::BITS));
22384        let count = simd_select(no_overflow, count, u32x16::ZERO);
22385        simd_select(no_overflow, simd_shl(a.as_u32x16(), count), u32x16::ZERO).as_m512i()
22386    }
22387}
22388
22389/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22390///
22391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi32&expand=5340)
22392#[inline]
22393#[target_feature(enable = "avx512f")]
22394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22395#[cfg_attr(test, assert_instr(vpsllvd))]
22396#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22397pub const fn _mm512_mask_sllv_epi32(
22398    src: __m512i,
22399    k: __mmask16,
22400    a: __m512i,
22401    count: __m512i,
22402) -> __m512i {
22403    unsafe {
22404        let shf = _mm512_sllv_epi32(a, count).as_i32x16();
22405        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
22406    }
22407}
22408
22409/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22410///
22411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi32&expand=5341)
22412#[inline]
22413#[target_feature(enable = "avx512f")]
22414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22415#[cfg_attr(test, assert_instr(vpsllvd))]
22416#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22417pub const fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
22418    unsafe {
22419        let shf = _mm512_sllv_epi32(a, count).as_i32x16();
22420        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
22421    }
22422}
22423
22424/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22425///
22426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi32&expand=5337)
22427#[inline]
22428#[target_feature(enable = "avx512f,avx512vl")]
22429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22430#[cfg_attr(test, assert_instr(vpsllvd))]
22431#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22432pub const fn _mm256_mask_sllv_epi32(
22433    src: __m256i,
22434    k: __mmask8,
22435    a: __m256i,
22436    count: __m256i,
22437) -> __m256i {
22438    unsafe {
22439        let shf = _mm256_sllv_epi32(a, count).as_i32x8();
22440        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
22441    }
22442}
22443
22444/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22445///
22446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi32&expand=5338)
22447#[inline]
22448#[target_feature(enable = "avx512f,avx512vl")]
22449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22450#[cfg_attr(test, assert_instr(vpsllvd))]
22451#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22452pub const fn _mm256_maskz_sllv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22453    unsafe {
22454        let shf = _mm256_sllv_epi32(a, count).as_i32x8();
22455        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
22456    }
22457}
22458
22459/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22460///
22461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi32&expand=5334)
22462#[inline]
22463#[target_feature(enable = "avx512f,avx512vl")]
22464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22465#[cfg_attr(test, assert_instr(vpsllvd))]
22466#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22467pub const fn _mm_mask_sllv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22468    unsafe {
22469        let shf = _mm_sllv_epi32(a, count).as_i32x4();
22470        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
22471    }
22472}
22473
22474/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22475///
22476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi32&expand=5335)
22477#[inline]
22478#[target_feature(enable = "avx512f,avx512vl")]
22479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22480#[cfg_attr(test, assert_instr(vpsllvd))]
22481#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22482pub const fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22483    unsafe {
22484        let shf = _mm_sllv_epi32(a, count).as_i32x4();
22485        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
22486    }
22487}
22488
22489/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
22490///
22491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi32&expand=5554)
22492#[inline]
22493#[target_feature(enable = "avx512f")]
22494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22495#[cfg_attr(test, assert_instr(vpsrlvd))]
22496#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22497pub const fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
22498    unsafe {
22499        let count = count.as_u32x16();
22500        let no_overflow: u32x16 = simd_lt(count, u32x16::splat(u32::BITS));
22501        let count = simd_select(no_overflow, count, u32x16::ZERO);
22502        simd_select(no_overflow, simd_shr(a.as_u32x16(), count), u32x16::ZERO).as_m512i()
22503    }
22504}
22505
22506/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22507///
22508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi32&expand=5552)
22509#[inline]
22510#[target_feature(enable = "avx512f")]
22511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22512#[cfg_attr(test, assert_instr(vpsrlvd))]
22513#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22514pub const fn _mm512_mask_srlv_epi32(
22515    src: __m512i,
22516    k: __mmask16,
22517    a: __m512i,
22518    count: __m512i,
22519) -> __m512i {
22520    unsafe {
22521        let shf = _mm512_srlv_epi32(a, count).as_i32x16();
22522        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
22523    }
22524}
22525
22526/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22527///
22528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi32&expand=5553)
22529#[inline]
22530#[target_feature(enable = "avx512f")]
22531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22532#[cfg_attr(test, assert_instr(vpsrlvd))]
22533#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22534pub const fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
22535    unsafe {
22536        let shf = _mm512_srlv_epi32(a, count).as_i32x16();
22537        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
22538    }
22539}
22540
22541/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22542///
22543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi32&expand=5549)
22544#[inline]
22545#[target_feature(enable = "avx512f,avx512vl")]
22546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22547#[cfg_attr(test, assert_instr(vpsrlvd))]
22548#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22549pub const fn _mm256_mask_srlv_epi32(
22550    src: __m256i,
22551    k: __mmask8,
22552    a: __m256i,
22553    count: __m256i,
22554) -> __m256i {
22555    unsafe {
22556        let shf = _mm256_srlv_epi32(a, count).as_i32x8();
22557        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
22558    }
22559}
22560
22561/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22562///
22563/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi32&expand=5550)
22564#[inline]
22565#[target_feature(enable = "avx512f,avx512vl")]
22566#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22567#[cfg_attr(test, assert_instr(vpsrlvd))]
22568#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22569pub const fn _mm256_maskz_srlv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22570    unsafe {
22571        let shf = _mm256_srlv_epi32(a, count).as_i32x8();
22572        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
22573    }
22574}
22575
22576/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22577///
22578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi32&expand=5546)
22579#[inline]
22580#[target_feature(enable = "avx512f,avx512vl")]
22581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22582#[cfg_attr(test, assert_instr(vpsrlvd))]
22583#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22584pub const fn _mm_mask_srlv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22585    unsafe {
22586        let shf = _mm_srlv_epi32(a, count).as_i32x4();
22587        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
22588    }
22589}
22590
22591/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22592///
22593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi32&expand=5547)
22594#[inline]
22595#[target_feature(enable = "avx512f,avx512vl")]
22596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22597#[cfg_attr(test, assert_instr(vpsrlvd))]
22598#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22599pub const fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22600    unsafe {
22601        let shf = _mm_srlv_epi32(a, count).as_i32x4();
22602        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
22603    }
22604}
22605
22606/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
22607///
22608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi64&expand=5351)
22609#[inline]
22610#[target_feature(enable = "avx512f")]
22611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22612#[cfg_attr(test, assert_instr(vpsllvq))]
22613#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22614pub const fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
22615    unsafe {
22616        let count = count.as_u64x8();
22617        let no_overflow: u64x8 = simd_lt(count, u64x8::splat(u64::BITS as u64));
22618        let count = simd_select(no_overflow, count, u64x8::ZERO);
22619        simd_select(no_overflow, simd_shl(a.as_u64x8(), count), u64x8::ZERO).as_m512i()
22620    }
22621}
22622
22623/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22624///
22625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi64&expand=5349)
22626#[inline]
22627#[target_feature(enable = "avx512f")]
22628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22629#[cfg_attr(test, assert_instr(vpsllvq))]
22630#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22631pub const fn _mm512_mask_sllv_epi64(
22632    src: __m512i,
22633    k: __mmask8,
22634    a: __m512i,
22635    count: __m512i,
22636) -> __m512i {
22637    unsafe {
22638        let shf = _mm512_sllv_epi64(a, count).as_i64x8();
22639        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
22640    }
22641}
22642
22643/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22644///
22645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi64&expand=5350)
22646#[inline]
22647#[target_feature(enable = "avx512f")]
22648#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22649#[cfg_attr(test, assert_instr(vpsllvq))]
22650#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22651pub const fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
22652    unsafe {
22653        let shf = _mm512_sllv_epi64(a, count).as_i64x8();
22654        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
22655    }
22656}
22657
22658/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22659///
22660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi64&expand=5346)
22661#[inline]
22662#[target_feature(enable = "avx512f,avx512vl")]
22663#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22664#[cfg_attr(test, assert_instr(vpsllvq))]
22665#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22666pub const fn _mm256_mask_sllv_epi64(
22667    src: __m256i,
22668    k: __mmask8,
22669    a: __m256i,
22670    count: __m256i,
22671) -> __m256i {
22672    unsafe {
22673        let shf = _mm256_sllv_epi64(a, count).as_i64x4();
22674        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
22675    }
22676}
22677
22678/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22679///
22680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi64&expand=5347)
22681#[inline]
22682#[target_feature(enable = "avx512f,avx512vl")]
22683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22684#[cfg_attr(test, assert_instr(vpsllvq))]
22685#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22686pub const fn _mm256_maskz_sllv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22687    unsafe {
22688        let shf = _mm256_sllv_epi64(a, count).as_i64x4();
22689        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
22690    }
22691}
22692
22693/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22694///
22695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi64&expand=5343)
22696#[inline]
22697#[target_feature(enable = "avx512f,avx512vl")]
22698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22699#[cfg_attr(test, assert_instr(vpsllvq))]
22700#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22701pub const fn _mm_mask_sllv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22702    unsafe {
22703        let shf = _mm_sllv_epi64(a, count).as_i64x2();
22704        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
22705    }
22706}
22707
22708/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22709///
22710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi64&expand=5344)
22711#[inline]
22712#[target_feature(enable = "avx512f,avx512vl")]
22713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22714#[cfg_attr(test, assert_instr(vpsllvq))]
22715#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22716pub const fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22717    unsafe {
22718        let shf = _mm_sllv_epi64(a, count).as_i64x2();
22719        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
22720    }
22721}
22722
22723/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
22724///
22725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi64&expand=5563)
22726#[inline]
22727#[target_feature(enable = "avx512f")]
22728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22729#[cfg_attr(test, assert_instr(vpsrlvq))]
22730#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22731pub const fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
22732    unsafe {
22733        let count = count.as_u64x8();
22734        let no_overflow: u64x8 = simd_lt(count, u64x8::splat(u64::BITS as u64));
22735        let count = simd_select(no_overflow, count, u64x8::ZERO);
22736        simd_select(no_overflow, simd_shr(a.as_u64x8(), count), u64x8::ZERO).as_m512i()
22737    }
22738}
22739
22740/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22741///
22742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi64&expand=5561)
22743#[inline]
22744#[target_feature(enable = "avx512f")]
22745#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22746#[cfg_attr(test, assert_instr(vpsrlvq))]
22747#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22748pub const fn _mm512_mask_srlv_epi64(
22749    src: __m512i,
22750    k: __mmask8,
22751    a: __m512i,
22752    count: __m512i,
22753) -> __m512i {
22754    unsafe {
22755        let shf = _mm512_srlv_epi64(a, count).as_i64x8();
22756        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
22757    }
22758}
22759
22760/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22761///
22762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi64&expand=5562)
22763#[inline]
22764#[target_feature(enable = "avx512f")]
22765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22766#[cfg_attr(test, assert_instr(vpsrlvq))]
22767#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22768pub const fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
22769    unsafe {
22770        let shf = _mm512_srlv_epi64(a, count).as_i64x8();
22771        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
22772    }
22773}
22774
22775/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22776///
22777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi64&expand=5558)
22778#[inline]
22779#[target_feature(enable = "avx512f,avx512vl")]
22780#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22781#[cfg_attr(test, assert_instr(vpsrlvq))]
22782#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22783pub const fn _mm256_mask_srlv_epi64(
22784    src: __m256i,
22785    k: __mmask8,
22786    a: __m256i,
22787    count: __m256i,
22788) -> __m256i {
22789    unsafe {
22790        let shf = _mm256_srlv_epi64(a, count).as_i64x4();
22791        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
22792    }
22793}
22794
22795/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22796///
22797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi64&expand=5559)
22798#[inline]
22799#[target_feature(enable = "avx512f,avx512vl")]
22800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22801#[cfg_attr(test, assert_instr(vpsrlvq))]
22802#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22803pub const fn _mm256_maskz_srlv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22804    unsafe {
22805        let shf = _mm256_srlv_epi64(a, count).as_i64x4();
22806        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
22807    }
22808}
22809
22810/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22811///
22812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi64&expand=5555)
22813#[inline]
22814#[target_feature(enable = "avx512f,avx512vl")]
22815#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22816#[cfg_attr(test, assert_instr(vpsrlvq))]
22817#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22818pub const fn _mm_mask_srlv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22819    unsafe {
22820        let shf = _mm_srlv_epi64(a, count).as_i64x2();
22821        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
22822    }
22823}
22824
22825/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22826///
22827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi64&expand=5556)
22828#[inline]
22829#[target_feature(enable = "avx512f,avx512vl")]
22830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22831#[cfg_attr(test, assert_instr(vpsrlvq))]
22832#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22833pub const fn _mm_maskz_srlv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22834    unsafe {
22835        let shf = _mm_srlv_epi64(a, count).as_i64x2();
22836        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
22837    }
22838}
22839
22840/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22841///
22842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_ps&expand=4170)
22843#[inline]
22844#[target_feature(enable = "avx512f")]
22845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22846#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22847#[rustc_legacy_const_generics(1)]
22848#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22849pub const fn _mm512_permute_ps<const MASK: i32>(a: __m512) -> __m512 {
22850    unsafe {
22851        static_assert_uimm_bits!(MASK, 8);
22852        simd_shuffle!(
22853            a,
22854            a,
22855            [
22856                MASK as u32 & 0b11,
22857                (MASK as u32 >> 2) & 0b11,
22858                ((MASK as u32 >> 4) & 0b11),
22859                ((MASK as u32 >> 6) & 0b11),
22860                (MASK as u32 & 0b11) + 4,
22861                ((MASK as u32 >> 2) & 0b11) + 4,
22862                ((MASK as u32 >> 4) & 0b11) + 4,
22863                ((MASK as u32 >> 6) & 0b11) + 4,
22864                (MASK as u32 & 0b11) + 8,
22865                ((MASK as u32 >> 2) & 0b11) + 8,
22866                ((MASK as u32 >> 4) & 0b11) + 8,
22867                ((MASK as u32 >> 6) & 0b11) + 8,
22868                (MASK as u32 & 0b11) + 12,
22869                ((MASK as u32 >> 2) & 0b11) + 12,
22870                ((MASK as u32 >> 4) & 0b11) + 12,
22871                ((MASK as u32 >> 6) & 0b11) + 12,
22872            ],
22873        )
22874    }
22875}
22876
22877/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22878///
22879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_ps&expand=4168)
22880#[inline]
22881#[target_feature(enable = "avx512f")]
22882#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22883#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22884#[rustc_legacy_const_generics(3)]
22885#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22886pub const fn _mm512_mask_permute_ps<const MASK: i32>(
22887    src: __m512,
22888    k: __mmask16,
22889    a: __m512,
22890) -> __m512 {
22891    unsafe {
22892        static_assert_uimm_bits!(MASK, 8);
22893        let r = _mm512_permute_ps::<MASK>(a);
22894        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
22895    }
22896}
22897
22898/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22899///
22900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_ps&expand=4169)
22901#[inline]
22902#[target_feature(enable = "avx512f")]
22903#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22904#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22905#[rustc_legacy_const_generics(2)]
22906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22907pub const fn _mm512_maskz_permute_ps<const MASK: i32>(k: __mmask16, a: __m512) -> __m512 {
22908    unsafe {
22909        static_assert_uimm_bits!(MASK, 8);
22910        let r = _mm512_permute_ps::<MASK>(a);
22911        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
22912    }
22913}
22914
22915/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22916///
22917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_ps&expand=4165)
22918#[inline]
22919#[target_feature(enable = "avx512f,avx512vl")]
22920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22921#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22922#[rustc_legacy_const_generics(3)]
22923#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22924pub const fn _mm256_mask_permute_ps<const MASK: i32>(
22925    src: __m256,
22926    k: __mmask8,
22927    a: __m256,
22928) -> __m256 {
22929    unsafe {
22930        let r = _mm256_permute_ps::<MASK>(a);
22931        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
22932    }
22933}
22934
22935/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22936///
22937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_ps&expand=4166)
22938#[inline]
22939#[target_feature(enable = "avx512f,avx512vl")]
22940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22941#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22942#[rustc_legacy_const_generics(2)]
22943#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22944pub const fn _mm256_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m256) -> __m256 {
22945    unsafe {
22946        let r = _mm256_permute_ps::<MASK>(a);
22947        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
22948    }
22949}
22950
22951/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22952///
22953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_ps&expand=4162)
22954#[inline]
22955#[target_feature(enable = "avx512f,avx512vl")]
22956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22957#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22958#[rustc_legacy_const_generics(3)]
22959#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22960pub const fn _mm_mask_permute_ps<const MASK: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
22961    unsafe {
22962        let r = _mm_permute_ps::<MASK>(a);
22963        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
22964    }
22965}
22966
22967/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22968///
22969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_ps&expand=4163)
22970#[inline]
22971#[target_feature(enable = "avx512f,avx512vl")]
22972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22973#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22974#[rustc_legacy_const_generics(2)]
22975#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22976pub const fn _mm_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m128) -> __m128 {
22977    unsafe {
22978        let r = _mm_permute_ps::<MASK>(a);
22979        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
22980    }
22981}
22982
22983/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22984///
22985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_pd&expand=4161)
22986#[inline]
22987#[target_feature(enable = "avx512f")]
22988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22989#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
22990#[rustc_legacy_const_generics(1)]
22991#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22992pub const fn _mm512_permute_pd<const MASK: i32>(a: __m512d) -> __m512d {
22993    unsafe {
22994        static_assert_uimm_bits!(MASK, 8);
22995        simd_shuffle!(
22996            a,
22997            a,
22998            [
22999                MASK as u32 & 0b1,
23000                ((MASK as u32 >> 1) & 0b1),
23001                ((MASK as u32 >> 2) & 0b1) + 2,
23002                ((MASK as u32 >> 3) & 0b1) + 2,
23003                ((MASK as u32 >> 4) & 0b1) + 4,
23004                ((MASK as u32 >> 5) & 0b1) + 4,
23005                ((MASK as u32 >> 6) & 0b1) + 6,
23006                ((MASK as u32 >> 7) & 0b1) + 6,
23007            ],
23008        )
23009    }
23010}
23011
23012/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23013///
23014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_pd&expand=4159)
23015#[inline]
23016#[target_feature(enable = "avx512f")]
23017#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23018#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
23019#[rustc_legacy_const_generics(3)]
23020#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23021pub const fn _mm512_mask_permute_pd<const MASK: i32>(
23022    src: __m512d,
23023    k: __mmask8,
23024    a: __m512d,
23025) -> __m512d {
23026    unsafe {
23027        static_assert_uimm_bits!(MASK, 8);
23028        let r = _mm512_permute_pd::<MASK>(a);
23029        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
23030    }
23031}
23032
23033/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23034///
23035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_pd&expand=4160)
23036#[inline]
23037#[target_feature(enable = "avx512f")]
23038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23039#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
23040#[rustc_legacy_const_generics(2)]
23041#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23042pub const fn _mm512_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
23043    unsafe {
23044        static_assert_uimm_bits!(MASK, 8);
23045        let r = _mm512_permute_pd::<MASK>(a);
23046        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
23047    }
23048}
23049
23050/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23051///
23052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_pd&expand=4156)
23053#[inline]
23054#[target_feature(enable = "avx512f,avx512vl")]
23055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23056#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
23057#[rustc_legacy_const_generics(3)]
23058#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23059pub const fn _mm256_mask_permute_pd<const MASK: i32>(
23060    src: __m256d,
23061    k: __mmask8,
23062    a: __m256d,
23063) -> __m256d {
23064    unsafe {
23065        static_assert_uimm_bits!(MASK, 4);
23066        let r = _mm256_permute_pd::<MASK>(a);
23067        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
23068    }
23069}
23070
23071/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23072///
23073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_pd&expand=4157)
23074#[inline]
23075#[target_feature(enable = "avx512f,avx512vl")]
23076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23077#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
23078#[rustc_legacy_const_generics(2)]
23079#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23080pub const fn _mm256_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
23081    unsafe {
23082        static_assert_uimm_bits!(MASK, 4);
23083        let r = _mm256_permute_pd::<MASK>(a);
23084        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
23085    }
23086}
23087
23088/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23089///
23090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_pd&expand=4153)
23091#[inline]
23092#[target_feature(enable = "avx512f,avx512vl")]
23093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23094#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
23095#[rustc_legacy_const_generics(3)]
23096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23097pub const fn _mm_mask_permute_pd<const IMM2: i32>(
23098    src: __m128d,
23099    k: __mmask8,
23100    a: __m128d,
23101) -> __m128d {
23102    unsafe {
23103        static_assert_uimm_bits!(IMM2, 2);
23104        let r = _mm_permute_pd::<IMM2>(a);
23105        transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2()))
23106    }
23107}
23108
23109/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23110///
23111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_pd&expand=4154)
23112#[inline]
23113#[target_feature(enable = "avx512f,avx512vl")]
23114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23115#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
23116#[rustc_legacy_const_generics(2)]
23117#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23118pub const fn _mm_maskz_permute_pd<const IMM2: i32>(k: __mmask8, a: __m128d) -> __m128d {
23119    unsafe {
23120        static_assert_uimm_bits!(IMM2, 2);
23121        let r = _mm_permute_pd::<IMM2>(a);
23122        transmute(simd_select_bitmask(k, r.as_f64x2(), f64x2::ZERO))
23123    }
23124}
23125
23126/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
23127///
23128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_epi64&expand=4208)
23129#[inline]
23130#[target_feature(enable = "avx512f")]
23131#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23132#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
23133#[rustc_legacy_const_generics(1)]
23134#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23135pub const fn _mm512_permutex_epi64<const MASK: i32>(a: __m512i) -> __m512i {
23136    unsafe {
23137        static_assert_uimm_bits!(MASK, 8);
23138        simd_shuffle!(
23139            a,
23140            a,
23141            [
23142                MASK as u32 & 0b11,
23143                (MASK as u32 >> 2) & 0b11,
23144                ((MASK as u32 >> 4) & 0b11),
23145                ((MASK as u32 >> 6) & 0b11),
23146                (MASK as u32 & 0b11) + 4,
23147                ((MASK as u32 >> 2) & 0b11) + 4,
23148                ((MASK as u32 >> 4) & 0b11) + 4,
23149                ((MASK as u32 >> 6) & 0b11) + 4,
23150            ],
23151        )
23152    }
23153}
23154
23155/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23156///
23157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_epi64&expand=4206)
23158#[inline]
23159#[target_feature(enable = "avx512f")]
23160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23161#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
23162#[rustc_legacy_const_generics(3)]
23163#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23164pub const fn _mm512_mask_permutex_epi64<const MASK: i32>(
23165    src: __m512i,
23166    k: __mmask8,
23167    a: __m512i,
23168) -> __m512i {
23169    unsafe {
23170        static_assert_uimm_bits!(MASK, 8);
23171        let r = _mm512_permutex_epi64::<MASK>(a);
23172        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
23173    }
23174}
23175
23176/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23177///
23178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_epi64&expand=4207)
23179#[inline]
23180#[target_feature(enable = "avx512f")]
23181#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23182#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
23183#[rustc_legacy_const_generics(2)]
23184#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23185pub const fn _mm512_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m512i) -> __m512i {
23186    unsafe {
23187        static_assert_uimm_bits!(MASK, 8);
23188        let r = _mm512_permutex_epi64::<MASK>(a);
23189        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
23190    }
23191}
23192
23193/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
23194///
23195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_epi64&expand=4205)
23196#[inline]
23197#[target_feature(enable = "avx512f,avx512vl")]
23198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23199#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
23200#[rustc_legacy_const_generics(1)]
23201#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23202pub const fn _mm256_permutex_epi64<const MASK: i32>(a: __m256i) -> __m256i {
23203    unsafe {
23204        static_assert_uimm_bits!(MASK, 8);
23205        simd_shuffle!(
23206            a,
23207            a,
23208            [
23209                MASK as u32 & 0b11,
23210                (MASK as u32 >> 2) & 0b11,
23211                ((MASK as u32 >> 4) & 0b11),
23212                ((MASK as u32 >> 6) & 0b11),
23213            ],
23214        )
23215    }
23216}
23217
23218/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23219///
23220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_epi64&expand=4203)
23221#[inline]
23222#[target_feature(enable = "avx512f,avx512vl")]
23223#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23224#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
23225#[rustc_legacy_const_generics(3)]
23226#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23227pub const fn _mm256_mask_permutex_epi64<const MASK: i32>(
23228    src: __m256i,
23229    k: __mmask8,
23230    a: __m256i,
23231) -> __m256i {
23232    unsafe {
23233        static_assert_uimm_bits!(MASK, 8);
23234        let r = _mm256_permutex_epi64::<MASK>(a);
23235        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
23236    }
23237}
23238
23239/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23240///
23241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_epi64&expand=4204)
23242#[inline]
23243#[target_feature(enable = "avx512f,avx512vl")]
23244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23245#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
23246#[rustc_legacy_const_generics(2)]
23247#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23248pub const fn _mm256_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m256i) -> __m256i {
23249    unsafe {
23250        static_assert_uimm_bits!(MASK, 8);
23251        let r = _mm256_permutex_epi64::<MASK>(a);
23252        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
23253    }
23254}
23255
23256/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
23257///
23258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_pd&expand=4214)
23259#[inline]
23260#[target_feature(enable = "avx512f")]
23261#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23262#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
23263#[rustc_legacy_const_generics(1)]
23264#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23265pub const fn _mm512_permutex_pd<const MASK: i32>(a: __m512d) -> __m512d {
23266    unsafe {
23267        static_assert_uimm_bits!(MASK, 8);
23268        simd_shuffle!(
23269            a,
23270            a,
23271            [
23272                MASK as u32 & 0b11,
23273                (MASK as u32 >> 2) & 0b11,
23274                ((MASK as u32 >> 4) & 0b11),
23275                ((MASK as u32 >> 6) & 0b11),
23276                (MASK as u32 & 0b11) + 4,
23277                ((MASK as u32 >> 2) & 0b11) + 4,
23278                ((MASK as u32 >> 4) & 0b11) + 4,
23279                ((MASK as u32 >> 6) & 0b11) + 4,
23280            ],
23281        )
23282    }
23283}
23284
23285/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23286///
23287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_pd&expand=4212)
23288#[inline]
23289#[target_feature(enable = "avx512f")]
23290#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23291#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
23292#[rustc_legacy_const_generics(3)]
23293#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23294pub const fn _mm512_mask_permutex_pd<const MASK: i32>(
23295    src: __m512d,
23296    k: __mmask8,
23297    a: __m512d,
23298) -> __m512d {
23299    unsafe {
23300        let r = _mm512_permutex_pd::<MASK>(a);
23301        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
23302    }
23303}
23304
23305/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23306///
23307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_pd&expand=4213)
23308#[inline]
23309#[target_feature(enable = "avx512f")]
23310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23311#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
23312#[rustc_legacy_const_generics(2)]
23313#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23314pub const fn _mm512_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
23315    unsafe {
23316        let r = _mm512_permutex_pd::<MASK>(a);
23317        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
23318    }
23319}
23320
23321/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
23322///
23323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_pd&expand=4211)
23324#[inline]
23325#[target_feature(enable = "avx512f,avx512vl")]
23326#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23327#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
23328#[rustc_legacy_const_generics(1)]
23329#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23330pub const fn _mm256_permutex_pd<const MASK: i32>(a: __m256d) -> __m256d {
23331    unsafe {
23332        static_assert_uimm_bits!(MASK, 8);
23333        simd_shuffle!(
23334            a,
23335            a,
23336            [
23337                MASK as u32 & 0b11,
23338                (MASK as u32 >> 2) & 0b11,
23339                ((MASK as u32 >> 4) & 0b11),
23340                ((MASK as u32 >> 6) & 0b11),
23341            ],
23342        )
23343    }
23344}
23345
23346/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23347///
23348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_pd&expand=4209)
23349#[inline]
23350#[target_feature(enable = "avx512f,avx512vl")]
23351#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23352#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
23353#[rustc_legacy_const_generics(3)]
23354#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23355pub const fn _mm256_mask_permutex_pd<const MASK: i32>(
23356    src: __m256d,
23357    k: __mmask8,
23358    a: __m256d,
23359) -> __m256d {
23360    unsafe {
23361        static_assert_uimm_bits!(MASK, 8);
23362        let r = _mm256_permutex_pd::<MASK>(a);
23363        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
23364    }
23365}
23366
23367/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23368///
23369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_pd&expand=4210)
23370#[inline]
23371#[target_feature(enable = "avx512f,avx512vl")]
23372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23373#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
23374#[rustc_legacy_const_generics(2)]
23375#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23376pub const fn _mm256_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
23377    unsafe {
23378        static_assert_uimm_bits!(MASK, 8);
23379        let r = _mm256_permutex_pd::<MASK>(a);
23380        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
23381    }
23382}
23383
23384/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_permutexvar_epi32, and it is recommended that you use that intrinsic name.
23385///
23386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_epi32&expand=4182)
23387#[inline]
23388#[target_feature(enable = "avx512f")]
23389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23390#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
23391pub fn _mm512_permutevar_epi32(idx: __m512i, a: __m512i) -> __m512i {
23392    unsafe { transmute(vpermd(a.as_i32x16(), idx.as_i32x16())) }
23393}
23394
23395/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_mask_permutexvar_epi32, and it is recommended that you use that intrinsic name.
23396///
23397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_epi32&expand=4181)
23398#[inline]
23399#[target_feature(enable = "avx512f")]
23400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23401#[cfg_attr(test, assert_instr(vpermd))]
23402pub fn _mm512_mask_permutevar_epi32(
23403    src: __m512i,
23404    k: __mmask16,
23405    idx: __m512i,
23406    a: __m512i,
23407) -> __m512i {
23408    unsafe {
23409        let permute = _mm512_permutevar_epi32(idx, a).as_i32x16();
23410        transmute(simd_select_bitmask(k, permute, src.as_i32x16()))
23411    }
23412}
23413
23414/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
23415///
23416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_ps&expand=4200)
23417#[inline]
23418#[target_feature(enable = "avx512f")]
23419#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23420#[cfg_attr(test, assert_instr(vpermilps))]
23421pub fn _mm512_permutevar_ps(a: __m512, b: __m512i) -> __m512 {
23422    unsafe { transmute(vpermilps(a.as_f32x16(), b.as_i32x16())) }
23423}
23424
23425/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23426///
23427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_ps&expand=4198)
23428#[inline]
23429#[target_feature(enable = "avx512f")]
23430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23431#[cfg_attr(test, assert_instr(vpermilps))]
23432pub fn _mm512_mask_permutevar_ps(src: __m512, k: __mmask16, a: __m512, b: __m512i) -> __m512 {
23433    unsafe {
23434        let permute = _mm512_permutevar_ps(a, b).as_f32x16();
23435        transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
23436    }
23437}
23438
23439/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23440///
23441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_ps&expand=4199)
23442#[inline]
23443#[target_feature(enable = "avx512f")]
23444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23445#[cfg_attr(test, assert_instr(vpermilps))]
23446pub fn _mm512_maskz_permutevar_ps(k: __mmask16, a: __m512, b: __m512i) -> __m512 {
23447    unsafe {
23448        let permute = _mm512_permutevar_ps(a, b).as_f32x16();
23449        transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
23450    }
23451}
23452
23453/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23454///
23455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm256_mask_permutevar_ps&expand=4195)
23456#[inline]
23457#[target_feature(enable = "avx512f,avx512vl")]
23458#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23459#[cfg_attr(test, assert_instr(vpermilps))]
23460pub fn _mm256_mask_permutevar_ps(src: __m256, k: __mmask8, a: __m256, b: __m256i) -> __m256 {
23461    unsafe {
23462        let permute = _mm256_permutevar_ps(a, b).as_f32x8();
23463        transmute(simd_select_bitmask(k, permute, src.as_f32x8()))
23464    }
23465}
23466
23467/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23468///
23469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_ps&expand=4196)
23470#[inline]
23471#[target_feature(enable = "avx512f,avx512vl")]
23472#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23473#[cfg_attr(test, assert_instr(vpermilps))]
23474pub fn _mm256_maskz_permutevar_ps(k: __mmask8, a: __m256, b: __m256i) -> __m256 {
23475    unsafe {
23476        let permute = _mm256_permutevar_ps(a, b).as_f32x8();
23477        transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
23478    }
23479}
23480
23481/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23482///
23483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_ps&expand=4192)
23484#[inline]
23485#[target_feature(enable = "avx512f,avx512vl")]
23486#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23487#[cfg_attr(test, assert_instr(vpermilps))]
23488pub fn _mm_mask_permutevar_ps(src: __m128, k: __mmask8, a: __m128, b: __m128i) -> __m128 {
23489    unsafe {
23490        let permute = _mm_permutevar_ps(a, b).as_f32x4();
23491        transmute(simd_select_bitmask(k, permute, src.as_f32x4()))
23492    }
23493}
23494
23495/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23496///
23497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_ps&expand=4193)
23498#[inline]
23499#[target_feature(enable = "avx512f,avx512vl")]
23500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23501#[cfg_attr(test, assert_instr(vpermilps))]
23502pub fn _mm_maskz_permutevar_ps(k: __mmask8, a: __m128, b: __m128i) -> __m128 {
23503    unsafe {
23504        let permute = _mm_permutevar_ps(a, b).as_f32x4();
23505        transmute(simd_select_bitmask(k, permute, f32x4::ZERO))
23506    }
23507}
23508
23509/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
23510///
23511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_pd&expand=4191)
23512#[inline]
23513#[target_feature(enable = "avx512f")]
23514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23515#[cfg_attr(test, assert_instr(vpermilpd))]
23516pub fn _mm512_permutevar_pd(a: __m512d, b: __m512i) -> __m512d {
23517    unsafe { transmute(vpermilpd(a.as_f64x8(), b.as_i64x8())) }
23518}
23519
23520/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23521///
23522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_pd&expand=4189)
23523#[inline]
23524#[target_feature(enable = "avx512f")]
23525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23526#[cfg_attr(test, assert_instr(vpermilpd))]
23527pub fn _mm512_mask_permutevar_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
23528    unsafe {
23529        let permute = _mm512_permutevar_pd(a, b).as_f64x8();
23530        transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
23531    }
23532}
23533
23534/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23535///
23536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_pd&expand=4190)
23537#[inline]
23538#[target_feature(enable = "avx512f")]
23539#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23540#[cfg_attr(test, assert_instr(vpermilpd))]
23541pub fn _mm512_maskz_permutevar_pd(k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
23542    unsafe {
23543        let permute = _mm512_permutevar_pd(a, b).as_f64x8();
23544        transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
23545    }
23546}
23547
23548/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23549///
23550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutevar_pd&expand=4186)
23551#[inline]
23552#[target_feature(enable = "avx512f,avx512vl")]
23553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23554#[cfg_attr(test, assert_instr(vpermilpd))]
23555pub fn _mm256_mask_permutevar_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
23556    unsafe {
23557        let permute = _mm256_permutevar_pd(a, b).as_f64x4();
23558        transmute(simd_select_bitmask(k, permute, src.as_f64x4()))
23559    }
23560}
23561
23562/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23563///
23564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_pd&expand=4187)
23565#[inline]
23566#[target_feature(enable = "avx512f,avx512vl")]
23567#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23568#[cfg_attr(test, assert_instr(vpermilpd))]
23569pub fn _mm256_maskz_permutevar_pd(k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
23570    unsafe {
23571        let permute = _mm256_permutevar_pd(a, b).as_f64x4();
23572        transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
23573    }
23574}
23575
23576/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23577///
23578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_pd&expand=4183)
23579#[inline]
23580#[target_feature(enable = "avx512f,avx512vl")]
23581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23582#[cfg_attr(test, assert_instr(vpermilpd))]
23583pub fn _mm_mask_permutevar_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
23584    unsafe {
23585        let permute = _mm_permutevar_pd(a, b).as_f64x2();
23586        transmute(simd_select_bitmask(k, permute, src.as_f64x2()))
23587    }
23588}
23589
23590/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23591///
23592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_pd&expand=4184)
23593#[inline]
23594#[target_feature(enable = "avx512f,avx512vl")]
23595#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23596#[cfg_attr(test, assert_instr(vpermilpd))]
23597pub fn _mm_maskz_permutevar_pd(k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
23598    unsafe {
23599        let permute = _mm_permutevar_pd(a, b).as_f64x2();
23600        transmute(simd_select_bitmask(k, permute, f64x2::ZERO))
23601    }
23602}
23603
23604/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
23605///
23606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi32&expand=4301)
23607#[inline]
23608#[target_feature(enable = "avx512f")]
23609#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23610#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
23611pub fn _mm512_permutexvar_epi32(idx: __m512i, a: __m512i) -> __m512i {
23612    unsafe { transmute(vpermd(a.as_i32x16(), idx.as_i32x16())) }
23613}
23614
23615/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23616///
23617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi32&expand=4299)
23618#[inline]
23619#[target_feature(enable = "avx512f")]
23620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23621#[cfg_attr(test, assert_instr(vpermd))]
23622pub fn _mm512_mask_permutexvar_epi32(
23623    src: __m512i,
23624    k: __mmask16,
23625    idx: __m512i,
23626    a: __m512i,
23627) -> __m512i {
23628    unsafe {
23629        let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16();
23630        transmute(simd_select_bitmask(k, permute, src.as_i32x16()))
23631    }
23632}
23633
23634/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23635///
23636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi32&expand=4300)
23637#[inline]
23638#[target_feature(enable = "avx512f")]
23639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23640#[cfg_attr(test, assert_instr(vpermd))]
23641pub fn _mm512_maskz_permutexvar_epi32(k: __mmask16, idx: __m512i, a: __m512i) -> __m512i {
23642    unsafe {
23643        let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16();
23644        transmute(simd_select_bitmask(k, permute, i32x16::ZERO))
23645    }
23646}
23647
23648/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
23649///
23650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi32&expand=4298)
23651#[inline]
23652#[target_feature(enable = "avx512f,avx512vl")]
23653#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23654#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
23655pub fn _mm256_permutexvar_epi32(idx: __m256i, a: __m256i) -> __m256i {
23656    _mm256_permutevar8x32_epi32(a, idx) // llvm use llvm.x86.avx2.permd
23657}
23658
23659/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23660///
23661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi32&expand=4296)
23662#[inline]
23663#[target_feature(enable = "avx512f,avx512vl")]
23664#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23665#[cfg_attr(test, assert_instr(vpermd))]
23666pub fn _mm256_mask_permutexvar_epi32(
23667    src: __m256i,
23668    k: __mmask8,
23669    idx: __m256i,
23670    a: __m256i,
23671) -> __m256i {
23672    unsafe {
23673        let permute = _mm256_permutexvar_epi32(idx, a).as_i32x8();
23674        transmute(simd_select_bitmask(k, permute, src.as_i32x8()))
23675    }
23676}
23677
23678/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23679///
23680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi32&expand=4297)
23681#[inline]
23682#[target_feature(enable = "avx512f,avx512vl")]
23683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23684#[cfg_attr(test, assert_instr(vpermd))]
23685pub fn _mm256_maskz_permutexvar_epi32(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
23686    unsafe {
23687        let permute = _mm256_permutexvar_epi32(idx, a).as_i32x8();
23688        transmute(simd_select_bitmask(k, permute, i32x8::ZERO))
23689    }
23690}
23691
23692/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
23693///
23694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi64&expand=4307)
23695#[inline]
23696#[target_feature(enable = "avx512f")]
23697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23698#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
23699pub fn _mm512_permutexvar_epi64(idx: __m512i, a: __m512i) -> __m512i {
23700    unsafe { transmute(vpermq(a.as_i64x8(), idx.as_i64x8())) }
23701}
23702
23703/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23704///
23705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi64&expand=4305)
23706#[inline]
23707#[target_feature(enable = "avx512f")]
23708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23709#[cfg_attr(test, assert_instr(vpermq))]
23710pub fn _mm512_mask_permutexvar_epi64(
23711    src: __m512i,
23712    k: __mmask8,
23713    idx: __m512i,
23714    a: __m512i,
23715) -> __m512i {
23716    unsafe {
23717        let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8();
23718        transmute(simd_select_bitmask(k, permute, src.as_i64x8()))
23719    }
23720}
23721
23722/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23723///
23724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi64&expand=4306)
23725#[inline]
23726#[target_feature(enable = "avx512f")]
23727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23728#[cfg_attr(test, assert_instr(vpermq))]
23729pub fn _mm512_maskz_permutexvar_epi64(k: __mmask8, idx: __m512i, a: __m512i) -> __m512i {
23730    unsafe {
23731        let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8();
23732        transmute(simd_select_bitmask(k, permute, i64x8::ZERO))
23733    }
23734}
23735
23736/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
23737///
23738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi64&expand=4304)
23739#[inline]
23740#[target_feature(enable = "avx512f,avx512vl")]
23741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23742#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
23743pub fn _mm256_permutexvar_epi64(idx: __m256i, a: __m256i) -> __m256i {
23744    unsafe { transmute(vpermq256(a.as_i64x4(), idx.as_i64x4())) }
23745}
23746
23747/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23748///
23749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi64&expand=4302)
23750#[inline]
23751#[target_feature(enable = "avx512f,avx512vl")]
23752#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23753#[cfg_attr(test, assert_instr(vpermq))]
23754pub fn _mm256_mask_permutexvar_epi64(
23755    src: __m256i,
23756    k: __mmask8,
23757    idx: __m256i,
23758    a: __m256i,
23759) -> __m256i {
23760    unsafe {
23761        let permute = _mm256_permutexvar_epi64(idx, a).as_i64x4();
23762        transmute(simd_select_bitmask(k, permute, src.as_i64x4()))
23763    }
23764}
23765
23766/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23767///
23768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi64&expand=4303)
23769#[inline]
23770#[target_feature(enable = "avx512f,avx512vl")]
23771#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23772#[cfg_attr(test, assert_instr(vpermq))]
23773pub fn _mm256_maskz_permutexvar_epi64(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
23774    unsafe {
23775        let permute = _mm256_permutexvar_epi64(idx, a).as_i64x4();
23776        transmute(simd_select_bitmask(k, permute, i64x4::ZERO))
23777    }
23778}
23779
23780/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
23781///
23782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_ps&expand=4200)
23783#[inline]
23784#[target_feature(enable = "avx512f")]
23785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23786#[cfg_attr(test, assert_instr(vpermps))]
23787pub fn _mm512_permutexvar_ps(idx: __m512i, a: __m512) -> __m512 {
23788    unsafe { transmute(vpermps(a.as_f32x16(), idx.as_i32x16())) }
23789}
23790
23791/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23792///
23793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_ps&expand=4326)
23794#[inline]
23795#[target_feature(enable = "avx512f")]
23796#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23797#[cfg_attr(test, assert_instr(vpermps))]
23798pub fn _mm512_mask_permutexvar_ps(src: __m512, k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
23799    unsafe {
23800        let permute = _mm512_permutexvar_ps(idx, a).as_f32x16();
23801        transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
23802    }
23803}
23804
23805/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23806///
23807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_ps&expand=4327)
23808#[inline]
23809#[target_feature(enable = "avx512f")]
23810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23811#[cfg_attr(test, assert_instr(vpermps))]
23812pub fn _mm512_maskz_permutexvar_ps(k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
23813    unsafe {
23814        let permute = _mm512_permutexvar_ps(idx, a).as_f32x16();
23815        transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
23816    }
23817}
23818
23819/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
23820///
23821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_ps&expand=4325)
23822#[inline]
23823#[target_feature(enable = "avx512f,avx512vl")]
23824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23825#[cfg_attr(test, assert_instr(vpermps))]
23826pub fn _mm256_permutexvar_ps(idx: __m256i, a: __m256) -> __m256 {
23827    _mm256_permutevar8x32_ps(a, idx) //llvm.x86.avx2.permps
23828}
23829
23830/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23831///
23832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_ps&expand=4323)
23833#[inline]
23834#[target_feature(enable = "avx512f,avx512vl")]
23835#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23836#[cfg_attr(test, assert_instr(vpermps))]
23837pub fn _mm256_mask_permutexvar_ps(src: __m256, k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
23838    unsafe {
23839        let permute = _mm256_permutexvar_ps(idx, a).as_f32x8();
23840        transmute(simd_select_bitmask(k, permute, src.as_f32x8()))
23841    }
23842}
23843
23844/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23845///
23846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_ps&expand=4324)
23847#[inline]
23848#[target_feature(enable = "avx512f,avx512vl")]
23849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23850#[cfg_attr(test, assert_instr(vpermps))]
23851pub fn _mm256_maskz_permutexvar_ps(k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
23852    unsafe {
23853        let permute = _mm256_permutexvar_ps(idx, a).as_f32x8();
23854        transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
23855    }
23856}
23857
23858/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
23859///
23860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_pd&expand=4322)
23861#[inline]
23862#[target_feature(enable = "avx512f")]
23863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23864#[cfg_attr(test, assert_instr(vpermpd))]
23865pub fn _mm512_permutexvar_pd(idx: __m512i, a: __m512d) -> __m512d {
23866    unsafe { transmute(vpermpd(a.as_f64x8(), idx.as_i64x8())) }
23867}
23868
23869/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23870///
23871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_pd&expand=4320)
23872#[inline]
23873#[target_feature(enable = "avx512f")]
23874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23875#[cfg_attr(test, assert_instr(vpermpd))]
23876pub fn _mm512_mask_permutexvar_pd(src: __m512d, k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
23877    unsafe {
23878        let permute = _mm512_permutexvar_pd(idx, a).as_f64x8();
23879        transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
23880    }
23881}
23882
23883/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23884///
23885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_pd&expand=4321)
23886#[inline]
23887#[target_feature(enable = "avx512f")]
23888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23889#[cfg_attr(test, assert_instr(vpermpd))]
23890pub fn _mm512_maskz_permutexvar_pd(k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
23891    unsafe {
23892        let permute = _mm512_permutexvar_pd(idx, a).as_f64x8();
23893        transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
23894    }
23895}
23896
23897/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
23898///
23899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_pd&expand=4319)
23900#[inline]
23901#[target_feature(enable = "avx512f,avx512vl")]
23902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23903#[cfg_attr(test, assert_instr(vpermpd))]
23904pub fn _mm256_permutexvar_pd(idx: __m256i, a: __m256d) -> __m256d {
23905    unsafe { transmute(vpermpd256(a.as_f64x4(), idx.as_i64x4())) }
23906}
23907
23908/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23909///
23910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_pd&expand=4317)
23911#[inline]
23912#[target_feature(enable = "avx512f,avx512vl")]
23913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23914#[cfg_attr(test, assert_instr(vpermpd))]
23915pub fn _mm256_mask_permutexvar_pd(src: __m256d, k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
23916    unsafe {
23917        let permute = _mm256_permutexvar_pd(idx, a).as_f64x4();
23918        transmute(simd_select_bitmask(k, permute, src.as_f64x4()))
23919    }
23920}
23921
23922/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23923///
23924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_pd&expand=4318)
23925#[inline]
23926#[target_feature(enable = "avx512f,avx512vl")]
23927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23928#[cfg_attr(test, assert_instr(vpermpd))]
23929pub fn _mm256_maskz_permutexvar_pd(k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
23930    unsafe {
23931        let permute = _mm256_permutexvar_pd(idx, a).as_f64x4();
23932        transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
23933    }
23934}
23935
23936/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23937///
23938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi32&expand=4238)
23939#[inline]
23940#[target_feature(enable = "avx512f")]
23941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23942#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23943pub fn _mm512_permutex2var_epi32(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
23944    unsafe { transmute(vpermi2d(a.as_i32x16(), idx.as_i32x16(), b.as_i32x16())) }
23945}
23946
23947/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23948///
23949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi32&expand=4235)
23950#[inline]
23951#[target_feature(enable = "avx512f")]
23952#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23953#[cfg_attr(test, assert_instr(vpermt2d))]
23954pub fn _mm512_mask_permutex2var_epi32(
23955    a: __m512i,
23956    k: __mmask16,
23957    idx: __m512i,
23958    b: __m512i,
23959) -> __m512i {
23960    unsafe {
23961        let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23962        transmute(simd_select_bitmask(k, permute, a.as_i32x16()))
23963    }
23964}
23965
23966/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23967///
23968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi32&expand=4237)
23969#[inline]
23970#[target_feature(enable = "avx512f")]
23971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23972#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23973pub fn _mm512_maskz_permutex2var_epi32(
23974    k: __mmask16,
23975    a: __m512i,
23976    idx: __m512i,
23977    b: __m512i,
23978) -> __m512i {
23979    unsafe {
23980        let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23981        transmute(simd_select_bitmask(k, permute, i32x16::ZERO))
23982    }
23983}
23984
23985/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23986///
23987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi32&expand=4236)
23988#[inline]
23989#[target_feature(enable = "avx512f")]
23990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23991#[cfg_attr(test, assert_instr(vpermi2d))]
23992pub fn _mm512_mask2_permutex2var_epi32(
23993    a: __m512i,
23994    idx: __m512i,
23995    k: __mmask16,
23996    b: __m512i,
23997) -> __m512i {
23998    unsafe {
23999        let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
24000        transmute(simd_select_bitmask(k, permute, idx.as_i32x16()))
24001    }
24002}
24003
24004/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24005///
24006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi32&expand=4234)
24007#[inline]
24008#[target_feature(enable = "avx512f,avx512vl")]
24009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24010#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
24011pub fn _mm256_permutex2var_epi32(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
24012    unsafe { transmute(vpermi2d256(a.as_i32x8(), idx.as_i32x8(), b.as_i32x8())) }
24013}
24014
24015/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24016///
24017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi32&expand=4231)
24018#[inline]
24019#[target_feature(enable = "avx512f,avx512vl")]
24020#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24021#[cfg_attr(test, assert_instr(vpermt2d))]
24022pub fn _mm256_mask_permutex2var_epi32(
24023    a: __m256i,
24024    k: __mmask8,
24025    idx: __m256i,
24026    b: __m256i,
24027) -> __m256i {
24028    unsafe {
24029        let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
24030        transmute(simd_select_bitmask(k, permute, a.as_i32x8()))
24031    }
24032}
24033
24034/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24035///
24036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi32&expand=4233)
24037#[inline]
24038#[target_feature(enable = "avx512f,avx512vl")]
24039#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24040#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
24041pub fn _mm256_maskz_permutex2var_epi32(
24042    k: __mmask8,
24043    a: __m256i,
24044    idx: __m256i,
24045    b: __m256i,
24046) -> __m256i {
24047    unsafe {
24048        let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
24049        transmute(simd_select_bitmask(k, permute, i32x8::ZERO))
24050    }
24051}
24052
24053/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24054///
24055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi32&expand=4232)
24056#[inline]
24057#[target_feature(enable = "avx512f,avx512vl")]
24058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24059#[cfg_attr(test, assert_instr(vpermi2d))]
24060pub fn _mm256_mask2_permutex2var_epi32(
24061    a: __m256i,
24062    idx: __m256i,
24063    k: __mmask8,
24064    b: __m256i,
24065) -> __m256i {
24066    unsafe {
24067        let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
24068        transmute(simd_select_bitmask(k, permute, idx.as_i32x8()))
24069    }
24070}
24071
24072/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24073///
24074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi32&expand=4230)
24075#[inline]
24076#[target_feature(enable = "avx512f,avx512vl")]
24077#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24078#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
24079pub fn _mm_permutex2var_epi32(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
24080    unsafe { transmute(vpermi2d128(a.as_i32x4(), idx.as_i32x4(), b.as_i32x4())) }
24081}
24082
24083/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24084///
24085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi32&expand=4227)
24086#[inline]
24087#[target_feature(enable = "avx512f,avx512vl")]
24088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24089#[cfg_attr(test, assert_instr(vpermt2d))]
24090pub fn _mm_mask_permutex2var_epi32(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
24091    unsafe {
24092        let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
24093        transmute(simd_select_bitmask(k, permute, a.as_i32x4()))
24094    }
24095}
24096
24097/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24098///
24099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi32&expand=4229)
24100#[inline]
24101#[target_feature(enable = "avx512f,avx512vl")]
24102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24103#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
24104pub fn _mm_maskz_permutex2var_epi32(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
24105    unsafe {
24106        let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
24107        transmute(simd_select_bitmask(k, permute, i32x4::ZERO))
24108    }
24109}
24110
24111/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24112///
24113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi32&expand=4228)
24114#[inline]
24115#[target_feature(enable = "avx512f,avx512vl")]
24116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24117#[cfg_attr(test, assert_instr(vpermi2d))]
24118pub fn _mm_mask2_permutex2var_epi32(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
24119    unsafe {
24120        let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
24121        transmute(simd_select_bitmask(k, permute, idx.as_i32x4()))
24122    }
24123}
24124
24125/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24126///
24127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi64&expand=4250)
24128#[inline]
24129#[target_feature(enable = "avx512f")]
24130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24131#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24132pub fn _mm512_permutex2var_epi64(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
24133    unsafe { transmute(vpermi2q(a.as_i64x8(), idx.as_i64x8(), b.as_i64x8())) }
24134}
24135
24136/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24137///
24138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi64&expand=4247)
24139#[inline]
24140#[target_feature(enable = "avx512f")]
24141#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24142#[cfg_attr(test, assert_instr(vpermt2q))]
24143pub fn _mm512_mask_permutex2var_epi64(
24144    a: __m512i,
24145    k: __mmask8,
24146    idx: __m512i,
24147    b: __m512i,
24148) -> __m512i {
24149    unsafe {
24150        let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
24151        transmute(simd_select_bitmask(k, permute, a.as_i64x8()))
24152    }
24153}
24154
24155/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24156///
24157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi64&expand=4249)
24158#[inline]
24159#[target_feature(enable = "avx512f")]
24160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24161#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24162pub fn _mm512_maskz_permutex2var_epi64(
24163    k: __mmask8,
24164    a: __m512i,
24165    idx: __m512i,
24166    b: __m512i,
24167) -> __m512i {
24168    unsafe {
24169        let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
24170        transmute(simd_select_bitmask(k, permute, i64x8::ZERO))
24171    }
24172}
24173
24174/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24175///
24176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi64&expand=4248)
24177#[inline]
24178#[target_feature(enable = "avx512f")]
24179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24180#[cfg_attr(test, assert_instr(vpermi2q))]
24181pub fn _mm512_mask2_permutex2var_epi64(
24182    a: __m512i,
24183    idx: __m512i,
24184    k: __mmask8,
24185    b: __m512i,
24186) -> __m512i {
24187    unsafe {
24188        let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
24189        transmute(simd_select_bitmask(k, permute, idx.as_i64x8()))
24190    }
24191}
24192
24193/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24194///
24195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi64&expand=4246)
24196#[inline]
24197#[target_feature(enable = "avx512f,avx512vl")]
24198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24199#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24200pub fn _mm256_permutex2var_epi64(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
24201    unsafe { transmute(vpermi2q256(a.as_i64x4(), idx.as_i64x4(), b.as_i64x4())) }
24202}
24203
24204/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24205///
24206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi64&expand=4243)
24207#[inline]
24208#[target_feature(enable = "avx512f,avx512vl")]
24209#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24210#[cfg_attr(test, assert_instr(vpermt2q))]
24211pub fn _mm256_mask_permutex2var_epi64(
24212    a: __m256i,
24213    k: __mmask8,
24214    idx: __m256i,
24215    b: __m256i,
24216) -> __m256i {
24217    unsafe {
24218        let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
24219        transmute(simd_select_bitmask(k, permute, a.as_i64x4()))
24220    }
24221}
24222
24223/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24224///
24225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi64&expand=4245)
24226#[inline]
24227#[target_feature(enable = "avx512f,avx512vl")]
24228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24229#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24230pub fn _mm256_maskz_permutex2var_epi64(
24231    k: __mmask8,
24232    a: __m256i,
24233    idx: __m256i,
24234    b: __m256i,
24235) -> __m256i {
24236    unsafe {
24237        let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
24238        transmute(simd_select_bitmask(k, permute, i64x4::ZERO))
24239    }
24240}
24241
24242/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24243///
24244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi64&expand=4244)
24245#[inline]
24246#[target_feature(enable = "avx512f,avx512vl")]
24247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24248#[cfg_attr(test, assert_instr(vpermi2q))]
24249pub fn _mm256_mask2_permutex2var_epi64(
24250    a: __m256i,
24251    idx: __m256i,
24252    k: __mmask8,
24253    b: __m256i,
24254) -> __m256i {
24255    unsafe {
24256        let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
24257        transmute(simd_select_bitmask(k, permute, idx.as_i64x4()))
24258    }
24259}
24260
24261/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24262///
24263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi64&expand=4242)
24264#[inline]
24265#[target_feature(enable = "avx512f,avx512vl")]
24266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24267#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24268pub fn _mm_permutex2var_epi64(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
24269    unsafe { transmute(vpermi2q128(a.as_i64x2(), idx.as_i64x2(), b.as_i64x2())) }
24270}
24271
24272/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24273///
24274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi64&expand=4239)
24275#[inline]
24276#[target_feature(enable = "avx512f,avx512vl")]
24277#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24278#[cfg_attr(test, assert_instr(vpermt2q))]
24279pub fn _mm_mask_permutex2var_epi64(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
24280    unsafe {
24281        let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
24282        transmute(simd_select_bitmask(k, permute, a.as_i64x2()))
24283    }
24284}
24285
24286/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24287///
24288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi64&expand=4241)
24289#[inline]
24290#[target_feature(enable = "avx512f,avx512vl")]
24291#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24292#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24293pub fn _mm_maskz_permutex2var_epi64(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
24294    unsafe {
24295        let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
24296        transmute(simd_select_bitmask(k, permute, i64x2::ZERO))
24297    }
24298}
24299
24300/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24301///
24302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi64&expand=4240)
24303#[inline]
24304#[target_feature(enable = "avx512f,avx512vl")]
24305#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24306#[cfg_attr(test, assert_instr(vpermi2q))]
24307pub fn _mm_mask2_permutex2var_epi64(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
24308    unsafe {
24309        let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
24310        transmute(simd_select_bitmask(k, permute, idx.as_i64x2()))
24311    }
24312}
24313
24314/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24315///
24316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_ps&expand=4286)
24317#[inline]
24318#[target_feature(enable = "avx512f")]
24319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24320#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24321pub fn _mm512_permutex2var_ps(a: __m512, idx: __m512i, b: __m512) -> __m512 {
24322    unsafe { transmute(vpermi2ps(a.as_f32x16(), idx.as_i32x16(), b.as_f32x16())) }
24323}
24324
24325/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24326///
24327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_ps&expand=4283)
24328#[inline]
24329#[target_feature(enable = "avx512f")]
24330#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24331#[cfg_attr(test, assert_instr(vpermt2ps))]
24332pub fn _mm512_mask_permutex2var_ps(a: __m512, k: __mmask16, idx: __m512i, b: __m512) -> __m512 {
24333    unsafe {
24334        let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
24335        transmute(simd_select_bitmask(k, permute, a.as_f32x16()))
24336    }
24337}
24338
24339/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24340///
24341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_ps&expand=4285)
24342#[inline]
24343#[target_feature(enable = "avx512f")]
24344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24345#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24346pub fn _mm512_maskz_permutex2var_ps(k: __mmask16, a: __m512, idx: __m512i, b: __m512) -> __m512 {
24347    unsafe {
24348        let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
24349        transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
24350    }
24351}
24352
24353/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24354///
24355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_ps&expand=4284)
24356#[inline]
24357#[target_feature(enable = "avx512f")]
24358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24359#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
24360pub fn _mm512_mask2_permutex2var_ps(a: __m512, idx: __m512i, k: __mmask16, b: __m512) -> __m512 {
24361    unsafe {
24362        let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
24363        let idx = _mm512_castsi512_ps(idx).as_f32x16();
24364        transmute(simd_select_bitmask(k, permute, idx))
24365    }
24366}
24367
24368/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24369///
24370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_ps&expand=4282)
24371#[inline]
24372#[target_feature(enable = "avx512f,avx512vl")]
24373#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24374#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24375pub fn _mm256_permutex2var_ps(a: __m256, idx: __m256i, b: __m256) -> __m256 {
24376    unsafe { transmute(vpermi2ps256(a.as_f32x8(), idx.as_i32x8(), b.as_f32x8())) }
24377}
24378
24379/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24380///
24381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_ps&expand=4279)
24382#[inline]
24383#[target_feature(enable = "avx512f,avx512vl")]
24384#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24385#[cfg_attr(test, assert_instr(vpermt2ps))]
24386pub fn _mm256_mask_permutex2var_ps(a: __m256, k: __mmask8, idx: __m256i, b: __m256) -> __m256 {
24387    unsafe {
24388        let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
24389        transmute(simd_select_bitmask(k, permute, a.as_f32x8()))
24390    }
24391}
24392
24393/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24394///
24395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_ps&expand=4281)
24396#[inline]
24397#[target_feature(enable = "avx512f,avx512vl")]
24398#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24399#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24400pub fn _mm256_maskz_permutex2var_ps(k: __mmask8, a: __m256, idx: __m256i, b: __m256) -> __m256 {
24401    unsafe {
24402        let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
24403        transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
24404    }
24405}
24406
24407/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24408///
24409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_ps&expand=4280)
24410#[inline]
24411#[target_feature(enable = "avx512f,avx512vl")]
24412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24413#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
24414pub fn _mm256_mask2_permutex2var_ps(a: __m256, idx: __m256i, k: __mmask8, b: __m256) -> __m256 {
24415    unsafe {
24416        let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
24417        let idx = _mm256_castsi256_ps(idx).as_f32x8();
24418        transmute(simd_select_bitmask(k, permute, idx))
24419    }
24420}
24421
24422/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24423///
24424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_ps&expand=4278)
24425#[inline]
24426#[target_feature(enable = "avx512f,avx512vl")]
24427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24428#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24429pub fn _mm_permutex2var_ps(a: __m128, idx: __m128i, b: __m128) -> __m128 {
24430    unsafe { transmute(vpermi2ps128(a.as_f32x4(), idx.as_i32x4(), b.as_f32x4())) }
24431}
24432
24433/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24434///
24435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_ps&expand=4275)
24436#[inline]
24437#[target_feature(enable = "avx512f,avx512vl")]
24438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24439#[cfg_attr(test, assert_instr(vpermt2ps))]
24440pub fn _mm_mask_permutex2var_ps(a: __m128, k: __mmask8, idx: __m128i, b: __m128) -> __m128 {
24441    unsafe {
24442        let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
24443        transmute(simd_select_bitmask(k, permute, a.as_f32x4()))
24444    }
24445}
24446
24447/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24448///
24449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_ps&expand=4277)
24450#[inline]
24451#[target_feature(enable = "avx512f,avx512vl")]
24452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24453#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24454pub fn _mm_maskz_permutex2var_ps(k: __mmask8, a: __m128, idx: __m128i, b: __m128) -> __m128 {
24455    unsafe {
24456        let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
24457        transmute(simd_select_bitmask(k, permute, f32x4::ZERO))
24458    }
24459}
24460
24461/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24462///
24463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_ps&expand=4276)
24464#[inline]
24465#[target_feature(enable = "avx512f,avx512vl")]
24466#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24467#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
24468pub fn _mm_mask2_permutex2var_ps(a: __m128, idx: __m128i, k: __mmask8, b: __m128) -> __m128 {
24469    unsafe {
24470        let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
24471        let idx = _mm_castsi128_ps(idx).as_f32x4();
24472        transmute(simd_select_bitmask(k, permute, idx))
24473    }
24474}
24475
24476/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24477///
24478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_pd&expand=4274)
24479#[inline]
24480#[target_feature(enable = "avx512f")]
24481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24482#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24483pub fn _mm512_permutex2var_pd(a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
24484    unsafe { transmute(vpermi2pd(a.as_f64x8(), idx.as_i64x8(), b.as_f64x8())) }
24485}
24486
24487/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24488///
24489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_pd&expand=4271)
24490#[inline]
24491#[target_feature(enable = "avx512f")]
24492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24493#[cfg_attr(test, assert_instr(vpermt2pd))]
24494pub fn _mm512_mask_permutex2var_pd(a: __m512d, k: __mmask8, idx: __m512i, b: __m512d) -> __m512d {
24495    unsafe {
24496        let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
24497        transmute(simd_select_bitmask(k, permute, a.as_f64x8()))
24498    }
24499}
24500
24501/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24502///
24503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_pd&expand=4273)
24504#[inline]
24505#[target_feature(enable = "avx512f")]
24506#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24507#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24508pub fn _mm512_maskz_permutex2var_pd(k: __mmask8, a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
24509    unsafe {
24510        let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
24511        transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
24512    }
24513}
24514
24515/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
24516///
24517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_pd&expand=4272)
24518#[inline]
24519#[target_feature(enable = "avx512f")]
24520#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24521#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
24522pub fn _mm512_mask2_permutex2var_pd(a: __m512d, idx: __m512i, k: __mmask8, b: __m512d) -> __m512d {
24523    unsafe {
24524        let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
24525        let idx = _mm512_castsi512_pd(idx).as_f64x8();
24526        transmute(simd_select_bitmask(k, permute, idx))
24527    }
24528}
24529
24530/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24531///
24532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_pd&expand=4270)
24533#[inline]
24534#[target_feature(enable = "avx512f,avx512vl")]
24535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24536#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24537pub fn _mm256_permutex2var_pd(a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
24538    unsafe { transmute(vpermi2pd256(a.as_f64x4(), idx.as_i64x4(), b.as_f64x4())) }
24539}
24540
24541/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24542///
24543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_pd&expand=4267)
24544#[inline]
24545#[target_feature(enable = "avx512f,avx512vl")]
24546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24547#[cfg_attr(test, assert_instr(vpermt2pd))]
24548pub fn _mm256_mask_permutex2var_pd(a: __m256d, k: __mmask8, idx: __m256i, b: __m256d) -> __m256d {
24549    unsafe {
24550        let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
24551        transmute(simd_select_bitmask(k, permute, a.as_f64x4()))
24552    }
24553}
24554
24555/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24556///
24557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_pd&expand=4269)
24558#[inline]
24559#[target_feature(enable = "avx512f,avx512vl")]
24560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24561#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24562pub fn _mm256_maskz_permutex2var_pd(k: __mmask8, a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
24563    unsafe {
24564        let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
24565        transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
24566    }
24567}
24568
24569/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
24570///
24571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_pd&expand=4268)
24572#[inline]
24573#[target_feature(enable = "avx512f,avx512vl")]
24574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24575#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
24576pub fn _mm256_mask2_permutex2var_pd(a: __m256d, idx: __m256i, k: __mmask8, b: __m256d) -> __m256d {
24577    unsafe {
24578        let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
24579        let idx = _mm256_castsi256_pd(idx).as_f64x4();
24580        transmute(simd_select_bitmask(k, permute, idx))
24581    }
24582}
24583
24584/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24585///
24586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_pd&expand=4266)
24587#[inline]
24588#[target_feature(enable = "avx512f,avx512vl")]
24589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24590#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24591pub fn _mm_permutex2var_pd(a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
24592    unsafe { transmute(vpermi2pd128(a.as_f64x2(), idx.as_i64x2(), b.as_f64x2())) }
24593}
24594
24595/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24596///
24597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_pd&expand=4263)
24598#[inline]
24599#[target_feature(enable = "avx512f,avx512vl")]
24600#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24601#[cfg_attr(test, assert_instr(vpermt2pd))]
24602pub fn _mm_mask_permutex2var_pd(a: __m128d, k: __mmask8, idx: __m128i, b: __m128d) -> __m128d {
24603    unsafe {
24604        let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
24605        transmute(simd_select_bitmask(k, permute, a.as_f64x2()))
24606    }
24607}
24608
24609/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24610///
24611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_pd&expand=4265)
24612#[inline]
24613#[target_feature(enable = "avx512f,avx512vl")]
24614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24615#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24616pub fn _mm_maskz_permutex2var_pd(k: __mmask8, a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
24617    unsafe {
24618        let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
24619        transmute(simd_select_bitmask(k, permute, f64x2::ZERO))
24620    }
24621}
24622
24623/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
24624///
24625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_pd&expand=4264)
24626#[inline]
24627#[target_feature(enable = "avx512f,avx512vl")]
24628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24629#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
24630pub fn _mm_mask2_permutex2var_pd(a: __m128d, idx: __m128i, k: __mmask8, b: __m128d) -> __m128d {
24631    unsafe {
24632        let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
24633        let idx = _mm_castsi128_pd(idx).as_f64x2();
24634        transmute(simd_select_bitmask(k, permute, idx))
24635    }
24636}
24637
24638/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
24639///
24640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_epi32&expand=5150)
24641#[inline]
24642#[target_feature(enable = "avx512f")]
24643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24644#[cfg_attr(test, assert_instr(vshufps, MASK = 9))] //should be vpshufd
24645#[rustc_legacy_const_generics(1)]
24646#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24647pub const fn _mm512_shuffle_epi32<const MASK: _MM_PERM_ENUM>(a: __m512i) -> __m512i {
24648    unsafe {
24649        static_assert_uimm_bits!(MASK, 8);
24650        let r: i32x16 = simd_shuffle!(
24651            a.as_i32x16(),
24652            a.as_i32x16(),
24653            [
24654                MASK as u32 & 0b11,
24655                (MASK as u32 >> 2) & 0b11,
24656                (MASK as u32 >> 4) & 0b11,
24657                (MASK as u32 >> 6) & 0b11,
24658                (MASK as u32 & 0b11) + 4,
24659                ((MASK as u32 >> 2) & 0b11) + 4,
24660                ((MASK as u32 >> 4) & 0b11) + 4,
24661                ((MASK as u32 >> 6) & 0b11) + 4,
24662                (MASK as u32 & 0b11) + 8,
24663                ((MASK as u32 >> 2) & 0b11) + 8,
24664                ((MASK as u32 >> 4) & 0b11) + 8,
24665                ((MASK as u32 >> 6) & 0b11) + 8,
24666                (MASK as u32 & 0b11) + 12,
24667                ((MASK as u32 >> 2) & 0b11) + 12,
24668                ((MASK as u32 >> 4) & 0b11) + 12,
24669                ((MASK as u32 >> 6) & 0b11) + 12,
24670            ],
24671        );
24672        transmute(r)
24673    }
24674}
24675
24676/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24677///
24678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi32&expand=5148)
24679#[inline]
24680#[target_feature(enable = "avx512f")]
24681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24682#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
24683#[rustc_legacy_const_generics(3)]
24684#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24685pub const fn _mm512_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24686    src: __m512i,
24687    k: __mmask16,
24688    a: __m512i,
24689) -> __m512i {
24690    unsafe {
24691        static_assert_uimm_bits!(MASK, 8);
24692        let r = _mm512_shuffle_epi32::<MASK>(a);
24693        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
24694    }
24695}
24696
24697/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24698///
24699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi32&expand=5149)
24700#[inline]
24701#[target_feature(enable = "avx512f")]
24702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24703#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
24704#[rustc_legacy_const_generics(2)]
24705#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24706pub const fn _mm512_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24707    k: __mmask16,
24708    a: __m512i,
24709) -> __m512i {
24710    unsafe {
24711        static_assert_uimm_bits!(MASK, 8);
24712        let r = _mm512_shuffle_epi32::<MASK>(a);
24713        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
24714    }
24715}
24716
24717/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24718///
24719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi32&expand=5145)
24720#[inline]
24721#[target_feature(enable = "avx512f,avx512vl")]
24722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24723#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
24724#[rustc_legacy_const_generics(3)]
24725#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24726pub const fn _mm256_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24727    src: __m256i,
24728    k: __mmask8,
24729    a: __m256i,
24730) -> __m256i {
24731    unsafe {
24732        static_assert_uimm_bits!(MASK, 8);
24733        let r = _mm256_shuffle_epi32::<MASK>(a);
24734        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
24735    }
24736}
24737
24738/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24739///
24740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi32&expand=5146)
24741#[inline]
24742#[target_feature(enable = "avx512f,avx512vl")]
24743#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24744#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
24745#[rustc_legacy_const_generics(2)]
24746#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24747pub const fn _mm256_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24748    k: __mmask8,
24749    a: __m256i,
24750) -> __m256i {
24751    unsafe {
24752        static_assert_uimm_bits!(MASK, 8);
24753        let r = _mm256_shuffle_epi32::<MASK>(a);
24754        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
24755    }
24756}
24757
24758/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24759///
24760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi32&expand=5142)
24761#[inline]
24762#[target_feature(enable = "avx512f,avx512vl")]
24763#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24764#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
24765#[rustc_legacy_const_generics(3)]
24766#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24767pub const fn _mm_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24768    src: __m128i,
24769    k: __mmask8,
24770    a: __m128i,
24771) -> __m128i {
24772    unsafe {
24773        static_assert_uimm_bits!(MASK, 8);
24774        let r = _mm_shuffle_epi32::<MASK>(a);
24775        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
24776    }
24777}
24778
24779/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24780///
24781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi32&expand=5143)
24782#[inline]
24783#[target_feature(enable = "avx512f,avx512vl")]
24784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24785#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
24786#[rustc_legacy_const_generics(2)]
24787#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24788pub const fn _mm_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24789    k: __mmask8,
24790    a: __m128i,
24791) -> __m128i {
24792    unsafe {
24793        static_assert_uimm_bits!(MASK, 8);
24794        let r = _mm_shuffle_epi32::<MASK>(a);
24795        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
24796    }
24797}
24798
24799/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
24800///
24801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_ps&expand=5203)
24802#[inline]
24803#[target_feature(enable = "avx512f")]
24804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24805#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24806#[rustc_legacy_const_generics(2)]
24807#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24808pub const fn _mm512_shuffle_ps<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
24809    unsafe {
24810        static_assert_uimm_bits!(MASK, 8);
24811        simd_shuffle!(
24812            a,
24813            b,
24814            [
24815                MASK as u32 & 0b11,
24816                (MASK as u32 >> 2) & 0b11,
24817                ((MASK as u32 >> 4) & 0b11) + 16,
24818                ((MASK as u32 >> 6) & 0b11) + 16,
24819                (MASK as u32 & 0b11) + 4,
24820                ((MASK as u32 >> 2) & 0b11) + 4,
24821                ((MASK as u32 >> 4) & 0b11) + 20,
24822                ((MASK as u32 >> 6) & 0b11) + 20,
24823                (MASK as u32 & 0b11) + 8,
24824                ((MASK as u32 >> 2) & 0b11) + 8,
24825                ((MASK as u32 >> 4) & 0b11) + 24,
24826                ((MASK as u32 >> 6) & 0b11) + 24,
24827                (MASK as u32 & 0b11) + 12,
24828                ((MASK as u32 >> 2) & 0b11) + 12,
24829                ((MASK as u32 >> 4) & 0b11) + 28,
24830                ((MASK as u32 >> 6) & 0b11) + 28,
24831            ],
24832        )
24833    }
24834}
24835
24836/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24837///
24838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_ps&expand=5201)
24839#[inline]
24840#[target_feature(enable = "avx512f")]
24841#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24842#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24843#[rustc_legacy_const_generics(4)]
24844#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24845pub const fn _mm512_mask_shuffle_ps<const MASK: i32>(
24846    src: __m512,
24847    k: __mmask16,
24848    a: __m512,
24849    b: __m512,
24850) -> __m512 {
24851    unsafe {
24852        static_assert_uimm_bits!(MASK, 8);
24853        let r = _mm512_shuffle_ps::<MASK>(a, b);
24854        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
24855    }
24856}
24857
24858/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24859///
24860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_ps&expand=5202)
24861#[inline]
24862#[target_feature(enable = "avx512f")]
24863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24864#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24865#[rustc_legacy_const_generics(3)]
24866#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24867pub const fn _mm512_maskz_shuffle_ps<const MASK: i32>(
24868    k: __mmask16,
24869    a: __m512,
24870    b: __m512,
24871) -> __m512 {
24872    unsafe {
24873        static_assert_uimm_bits!(MASK, 8);
24874        let r = _mm512_shuffle_ps::<MASK>(a, b);
24875        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
24876    }
24877}
24878
24879/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24880///
24881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_ps&expand=5198)
24882#[inline]
24883#[target_feature(enable = "avx512f,avx512vl")]
24884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24885#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24886#[rustc_legacy_const_generics(4)]
24887#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24888pub const fn _mm256_mask_shuffle_ps<const MASK: i32>(
24889    src: __m256,
24890    k: __mmask8,
24891    a: __m256,
24892    b: __m256,
24893) -> __m256 {
24894    unsafe {
24895        static_assert_uimm_bits!(MASK, 8);
24896        let r = _mm256_shuffle_ps::<MASK>(a, b);
24897        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
24898    }
24899}
24900
24901/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24902///
24903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_ps&expand=5199)
24904#[inline]
24905#[target_feature(enable = "avx512f,avx512vl")]
24906#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24907#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24908#[rustc_legacy_const_generics(3)]
24909#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24910pub const fn _mm256_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
24911    unsafe {
24912        static_assert_uimm_bits!(MASK, 8);
24913        let r = _mm256_shuffle_ps::<MASK>(a, b);
24914        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
24915    }
24916}
24917
24918/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24919///
24920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_ps&expand=5195)
24921#[inline]
24922#[target_feature(enable = "avx512f,avx512vl")]
24923#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24924#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24925#[rustc_legacy_const_generics(4)]
24926#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24927pub const fn _mm_mask_shuffle_ps<const MASK: i32>(
24928    src: __m128,
24929    k: __mmask8,
24930    a: __m128,
24931    b: __m128,
24932) -> __m128 {
24933    unsafe {
24934        static_assert_uimm_bits!(MASK, 8);
24935        let r = _mm_shuffle_ps::<MASK>(a, b);
24936        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
24937    }
24938}
24939
24940/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24941///
24942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_ps&expand=5196)
24943#[inline]
24944#[target_feature(enable = "avx512f,avx512vl")]
24945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24946#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24947#[rustc_legacy_const_generics(3)]
24948#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24949pub const fn _mm_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
24950    unsafe {
24951        static_assert_uimm_bits!(MASK, 8);
24952        let r = _mm_shuffle_ps::<MASK>(a, b);
24953        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
24954    }
24955}
24956
24957/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst.
24958///
24959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_pd&expand=5192)
24960#[inline]
24961#[target_feature(enable = "avx512f")]
24962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24963#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24964#[rustc_legacy_const_generics(2)]
24965#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24966pub const fn _mm512_shuffle_pd<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
24967    unsafe {
24968        static_assert_uimm_bits!(MASK, 8);
24969        simd_shuffle!(
24970            a,
24971            b,
24972            [
24973                MASK as u32 & 0b1,
24974                ((MASK as u32 >> 1) & 0b1) + 8,
24975                ((MASK as u32 >> 2) & 0b1) + 2,
24976                ((MASK as u32 >> 3) & 0b1) + 10,
24977                ((MASK as u32 >> 4) & 0b1) + 4,
24978                ((MASK as u32 >> 5) & 0b1) + 12,
24979                ((MASK as u32 >> 6) & 0b1) + 6,
24980                ((MASK as u32 >> 7) & 0b1) + 14,
24981            ],
24982        )
24983    }
24984}
24985
24986/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24987///
24988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_pd&expand=5190)
24989#[inline]
24990#[target_feature(enable = "avx512f")]
24991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24992#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24993#[rustc_legacy_const_generics(4)]
24994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24995pub const fn _mm512_mask_shuffle_pd<const MASK: i32>(
24996    src: __m512d,
24997    k: __mmask8,
24998    a: __m512d,
24999    b: __m512d,
25000) -> __m512d {
25001    unsafe {
25002        static_assert_uimm_bits!(MASK, 8);
25003        let r = _mm512_shuffle_pd::<MASK>(a, b);
25004        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
25005    }
25006}
25007
25008/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25009///
25010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_pd&expand=5191)
25011#[inline]
25012#[target_feature(enable = "avx512f")]
25013#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25014#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
25015#[rustc_legacy_const_generics(3)]
25016#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25017pub const fn _mm512_maskz_shuffle_pd<const MASK: i32>(
25018    k: __mmask8,
25019    a: __m512d,
25020    b: __m512d,
25021) -> __m512d {
25022    unsafe {
25023        static_assert_uimm_bits!(MASK, 8);
25024        let r = _mm512_shuffle_pd::<MASK>(a, b);
25025        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
25026    }
25027}
25028
25029/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25030///
25031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_pd&expand=5187)
25032#[inline]
25033#[target_feature(enable = "avx512f,avx512vl")]
25034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25035#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
25036#[rustc_legacy_const_generics(4)]
25037#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25038pub const fn _mm256_mask_shuffle_pd<const MASK: i32>(
25039    src: __m256d,
25040    k: __mmask8,
25041    a: __m256d,
25042    b: __m256d,
25043) -> __m256d {
25044    unsafe {
25045        static_assert_uimm_bits!(MASK, 8);
25046        let r = _mm256_shuffle_pd::<MASK>(a, b);
25047        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
25048    }
25049}
25050
25051/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25052///
25053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_pd&expand=5188)
25054#[inline]
25055#[target_feature(enable = "avx512f,avx512vl")]
25056#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25057#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
25058#[rustc_legacy_const_generics(3)]
25059#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25060pub const fn _mm256_maskz_shuffle_pd<const MASK: i32>(
25061    k: __mmask8,
25062    a: __m256d,
25063    b: __m256d,
25064) -> __m256d {
25065    unsafe {
25066        static_assert_uimm_bits!(MASK, 8);
25067        let r = _mm256_shuffle_pd::<MASK>(a, b);
25068        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
25069    }
25070}
25071
25072/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25073///
25074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_pd&expand=5184)
25075#[inline]
25076#[target_feature(enable = "avx512f,avx512vl")]
25077#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25078#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
25079#[rustc_legacy_const_generics(4)]
25080#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25081pub const fn _mm_mask_shuffle_pd<const MASK: i32>(
25082    src: __m128d,
25083    k: __mmask8,
25084    a: __m128d,
25085    b: __m128d,
25086) -> __m128d {
25087    unsafe {
25088        static_assert_uimm_bits!(MASK, 8);
25089        let r = _mm_shuffle_pd::<MASK>(a, b);
25090        transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2()))
25091    }
25092}
25093
25094/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25095///
25096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_pd&expand=5185)
25097#[inline]
25098#[target_feature(enable = "avx512f,avx512vl")]
25099#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25100#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
25101#[rustc_legacy_const_generics(3)]
25102#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25103pub const fn _mm_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
25104    unsafe {
25105        static_assert_uimm_bits!(MASK, 8);
25106        let r = _mm_shuffle_pd::<MASK>(a, b);
25107        transmute(simd_select_bitmask(k, r.as_f64x2(), f64x2::ZERO))
25108    }
25109}
25110
25111/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
25112///
25113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i32x4&expand=5177)
25114#[inline]
25115#[target_feature(enable = "avx512f")]
25116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25117#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_01_01_01))] //should be vshufi32x4
25118#[rustc_legacy_const_generics(2)]
25119#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25120pub const fn _mm512_shuffle_i32x4<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
25121    unsafe {
25122        static_assert_uimm_bits!(MASK, 8);
25123        let a = a.as_i32x16();
25124        let b = b.as_i32x16();
25125        let r: i32x16 = simd_shuffle!(
25126            a,
25127            b,
25128            [
25129                (MASK as u32 & 0b11) * 4 + 0,
25130                (MASK as u32 & 0b11) * 4 + 1,
25131                (MASK as u32 & 0b11) * 4 + 2,
25132                (MASK as u32 & 0b11) * 4 + 3,
25133                ((MASK as u32 >> 2) & 0b11) * 4 + 0,
25134                ((MASK as u32 >> 2) & 0b11) * 4 + 1,
25135                ((MASK as u32 >> 2) & 0b11) * 4 + 2,
25136                ((MASK as u32 >> 2) & 0b11) * 4 + 3,
25137                ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16,
25138                ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16,
25139                ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16,
25140                ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16,
25141                ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16,
25142                ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16,
25143                ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16,
25144                ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16,
25145            ],
25146        );
25147        transmute(r)
25148    }
25149}
25150
25151/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25152///
25153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i32x4&expand=5175)
25154#[inline]
25155#[target_feature(enable = "avx512f")]
25156#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25157#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
25158#[rustc_legacy_const_generics(4)]
25159#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25160pub const fn _mm512_mask_shuffle_i32x4<const MASK: i32>(
25161    src: __m512i,
25162    k: __mmask16,
25163    a: __m512i,
25164    b: __m512i,
25165) -> __m512i {
25166    unsafe {
25167        static_assert_uimm_bits!(MASK, 8);
25168        let r = _mm512_shuffle_i32x4::<MASK>(a, b);
25169        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
25170    }
25171}
25172
25173/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25174///
25175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i32x4&expand=5176)
25176#[inline]
25177#[target_feature(enable = "avx512f")]
25178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25179#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
25180#[rustc_legacy_const_generics(3)]
25181#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25182pub const fn _mm512_maskz_shuffle_i32x4<const MASK: i32>(
25183    k: __mmask16,
25184    a: __m512i,
25185    b: __m512i,
25186) -> __m512i {
25187    unsafe {
25188        static_assert_uimm_bits!(MASK, 8);
25189        let r = _mm512_shuffle_i32x4::<MASK>(a, b);
25190        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
25191    }
25192}
25193
25194/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
25195///
25196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i32x4&expand=5174)
25197#[inline]
25198#[target_feature(enable = "avx512f,avx512vl")]
25199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25200#[cfg_attr(test, assert_instr(vperm, MASK = 0b11))] //should be vshufi32x4
25201#[rustc_legacy_const_generics(2)]
25202#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25203pub const fn _mm256_shuffle_i32x4<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
25204    unsafe {
25205        static_assert_uimm_bits!(MASK, 8);
25206        let a = a.as_i32x8();
25207        let b = b.as_i32x8();
25208        let r: i32x8 = simd_shuffle!(
25209            a,
25210            b,
25211            [
25212                (MASK as u32 & 0b1) * 4 + 0,
25213                (MASK as u32 & 0b1) * 4 + 1,
25214                (MASK as u32 & 0b1) * 4 + 2,
25215                (MASK as u32 & 0b1) * 4 + 3,
25216                ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8,
25217                ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8,
25218                ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8,
25219                ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8,
25220            ],
25221        );
25222        transmute(r)
25223    }
25224}
25225
25226/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25227///
25228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i32x4&expand=5172)
25229#[inline]
25230#[target_feature(enable = "avx512f,avx512vl")]
25231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25232#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
25233#[rustc_legacy_const_generics(4)]
25234#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25235pub const fn _mm256_mask_shuffle_i32x4<const MASK: i32>(
25236    src: __m256i,
25237    k: __mmask8,
25238    a: __m256i,
25239    b: __m256i,
25240) -> __m256i {
25241    unsafe {
25242        static_assert_uimm_bits!(MASK, 8);
25243        let r = _mm256_shuffle_i32x4::<MASK>(a, b);
25244        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
25245    }
25246}
25247
25248/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25249///
25250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i32x4&expand=5173)
25251#[inline]
25252#[target_feature(enable = "avx512f,avx512vl")]
25253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25254#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
25255#[rustc_legacy_const_generics(3)]
25256#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25257pub const fn _mm256_maskz_shuffle_i32x4<const MASK: i32>(
25258    k: __mmask8,
25259    a: __m256i,
25260    b: __m256i,
25261) -> __m256i {
25262    unsafe {
25263        static_assert_uimm_bits!(MASK, 8);
25264        let r = _mm256_shuffle_i32x4::<MASK>(a, b);
25265        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
25266    }
25267}
25268
25269/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
25270///
25271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i64x2&expand=5183)
25272#[inline]
25273#[target_feature(enable = "avx512f")]
25274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25275#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
25276#[rustc_legacy_const_generics(2)]
25277#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25278pub const fn _mm512_shuffle_i64x2<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
25279    unsafe {
25280        static_assert_uimm_bits!(MASK, 8);
25281        let a = a.as_i64x8();
25282        let b = b.as_i64x8();
25283        let r: i64x8 = simd_shuffle!(
25284            a,
25285            b,
25286            [
25287                (MASK as u32 & 0b11) * 2 + 0,
25288                (MASK as u32 & 0b11) * 2 + 1,
25289                ((MASK as u32 >> 2) & 0b11) * 2 + 0,
25290                ((MASK as u32 >> 2) & 0b11) * 2 + 1,
25291                ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8,
25292                ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8,
25293                ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8,
25294                ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8,
25295            ],
25296        );
25297        transmute(r)
25298    }
25299}
25300
25301/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25302///
25303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i64x2&expand=5181)
25304#[inline]
25305#[target_feature(enable = "avx512f")]
25306#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25307#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
25308#[rustc_legacy_const_generics(4)]
25309#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25310pub const fn _mm512_mask_shuffle_i64x2<const MASK: i32>(
25311    src: __m512i,
25312    k: __mmask8,
25313    a: __m512i,
25314    b: __m512i,
25315) -> __m512i {
25316    unsafe {
25317        static_assert_uimm_bits!(MASK, 8);
25318        let r = _mm512_shuffle_i64x2::<MASK>(a, b);
25319        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
25320    }
25321}
25322
25323/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25324///
25325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i64x2&expand=5182)
25326#[inline]
25327#[target_feature(enable = "avx512f")]
25328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25329#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
25330#[rustc_legacy_const_generics(3)]
25331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25332pub const fn _mm512_maskz_shuffle_i64x2<const MASK: i32>(
25333    k: __mmask8,
25334    a: __m512i,
25335    b: __m512i,
25336) -> __m512i {
25337    unsafe {
25338        static_assert_uimm_bits!(MASK, 8);
25339        let r = _mm512_shuffle_i64x2::<MASK>(a, b);
25340        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
25341    }
25342}
25343
25344/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
25345///
25346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i64x2&expand=5180)
25347#[inline]
25348#[target_feature(enable = "avx512f,avx512vl")]
25349#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25350#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshufi64x2
25351#[rustc_legacy_const_generics(2)]
25352#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25353pub const fn _mm256_shuffle_i64x2<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
25354    unsafe {
25355        static_assert_uimm_bits!(MASK, 8);
25356        let a = a.as_i64x4();
25357        let b = b.as_i64x4();
25358        let r: i64x4 = simd_shuffle!(
25359            a,
25360            b,
25361            [
25362                (MASK as u32 & 0b1) * 2 + 0,
25363                (MASK as u32 & 0b1) * 2 + 1,
25364                ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
25365                ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4,
25366            ],
25367        );
25368        transmute(r)
25369    }
25370}
25371
25372/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25373///
25374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i64x2&expand=5178)
25375#[inline]
25376#[target_feature(enable = "avx512f,avx512vl")]
25377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25378#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
25379#[rustc_legacy_const_generics(4)]
25380#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25381pub const fn _mm256_mask_shuffle_i64x2<const MASK: i32>(
25382    src: __m256i,
25383    k: __mmask8,
25384    a: __m256i,
25385    b: __m256i,
25386) -> __m256i {
25387    unsafe {
25388        static_assert_uimm_bits!(MASK, 8);
25389        let r = _mm256_shuffle_i64x2::<MASK>(a, b);
25390        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
25391    }
25392}
25393
25394/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25395///
25396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i64x2&expand=5179)
25397#[inline]
25398#[target_feature(enable = "avx512f,avx512vl")]
25399#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25400#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
25401#[rustc_legacy_const_generics(3)]
25402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25403pub const fn _mm256_maskz_shuffle_i64x2<const MASK: i32>(
25404    k: __mmask8,
25405    a: __m256i,
25406    b: __m256i,
25407) -> __m256i {
25408    unsafe {
25409        static_assert_uimm_bits!(MASK, 8);
25410        let r = _mm256_shuffle_i64x2::<MASK>(a, b);
25411        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
25412    }
25413}
25414
25415/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
25416///
25417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f32x4&expand=5165)
25418#[inline]
25419#[target_feature(enable = "avx512f")]
25420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25421#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b1011))] //should be vshuff32x4, but generate vshuff64x2
25422#[rustc_legacy_const_generics(2)]
25423#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25424pub const fn _mm512_shuffle_f32x4<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
25425    unsafe {
25426        static_assert_uimm_bits!(MASK, 8);
25427        let a = a.as_f32x16();
25428        let b = b.as_f32x16();
25429        let r: f32x16 = simd_shuffle!(
25430            a,
25431            b,
25432            [
25433                (MASK as u32 & 0b11) * 4 + 0,
25434                (MASK as u32 & 0b11) * 4 + 1,
25435                (MASK as u32 & 0b11) * 4 + 2,
25436                (MASK as u32 & 0b11) * 4 + 3,
25437                ((MASK as u32 >> 2) & 0b11) * 4 + 0,
25438                ((MASK as u32 >> 2) & 0b11) * 4 + 1,
25439                ((MASK as u32 >> 2) & 0b11) * 4 + 2,
25440                ((MASK as u32 >> 2) & 0b11) * 4 + 3,
25441                ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16,
25442                ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16,
25443                ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16,
25444                ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16,
25445                ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16,
25446                ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16,
25447                ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16,
25448                ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16,
25449            ],
25450        );
25451        transmute(r)
25452    }
25453}
25454
25455/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25456///
25457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f32x4&expand=5163)
25458#[inline]
25459#[target_feature(enable = "avx512f")]
25460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25461#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
25462#[rustc_legacy_const_generics(4)]
25463#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25464pub const fn _mm512_mask_shuffle_f32x4<const MASK: i32>(
25465    src: __m512,
25466    k: __mmask16,
25467    a: __m512,
25468    b: __m512,
25469) -> __m512 {
25470    unsafe {
25471        static_assert_uimm_bits!(MASK, 8);
25472        let r = _mm512_shuffle_f32x4::<MASK>(a, b);
25473        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
25474    }
25475}
25476
25477/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25478///
25479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f32x4&expand=5164)
25480#[inline]
25481#[target_feature(enable = "avx512f")]
25482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25483#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
25484#[rustc_legacy_const_generics(3)]
25485#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25486pub const fn _mm512_maskz_shuffle_f32x4<const MASK: i32>(
25487    k: __mmask16,
25488    a: __m512,
25489    b: __m512,
25490) -> __m512 {
25491    unsafe {
25492        static_assert_uimm_bits!(MASK, 8);
25493        let r = _mm512_shuffle_f32x4::<MASK>(a, b);
25494        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
25495    }
25496}
25497
25498/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
25499///
25500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f32x4&expand=5162)
25501#[inline]
25502#[target_feature(enable = "avx512f,avx512vl")]
25503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25504#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff32x4
25505#[rustc_legacy_const_generics(2)]
25506#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25507pub const fn _mm256_shuffle_f32x4<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
25508    unsafe {
25509        static_assert_uimm_bits!(MASK, 8);
25510        let a = a.as_f32x8();
25511        let b = b.as_f32x8();
25512        let r: f32x8 = simd_shuffle!(
25513            a,
25514            b,
25515            [
25516                (MASK as u32 & 0b1) * 4 + 0,
25517                (MASK as u32 & 0b1) * 4 + 1,
25518                (MASK as u32 & 0b1) * 4 + 2,
25519                (MASK as u32 & 0b1) * 4 + 3,
25520                ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8,
25521                ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8,
25522                ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8,
25523                ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8,
25524            ],
25525        );
25526        transmute(r)
25527    }
25528}
25529
25530/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25531///
25532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f32x4&expand=5160)
25533#[inline]
25534#[target_feature(enable = "avx512f,avx512vl")]
25535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25536#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
25537#[rustc_legacy_const_generics(4)]
25538#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25539pub const fn _mm256_mask_shuffle_f32x4<const MASK: i32>(
25540    src: __m256,
25541    k: __mmask8,
25542    a: __m256,
25543    b: __m256,
25544) -> __m256 {
25545    unsafe {
25546        static_assert_uimm_bits!(MASK, 8);
25547        let r = _mm256_shuffle_f32x4::<MASK>(a, b);
25548        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
25549    }
25550}
25551
25552/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25553///
25554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f32x4&expand=5161)
25555#[inline]
25556#[target_feature(enable = "avx512f,avx512vl")]
25557#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25558#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
25559#[rustc_legacy_const_generics(3)]
25560#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25561pub const fn _mm256_maskz_shuffle_f32x4<const MASK: i32>(
25562    k: __mmask8,
25563    a: __m256,
25564    b: __m256,
25565) -> __m256 {
25566    unsafe {
25567        static_assert_uimm_bits!(MASK, 8);
25568        let r = _mm256_shuffle_f32x4::<MASK>(a, b);
25569        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
25570    }
25571}
25572
25573/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
25574///
25575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f64x2&expand=5171)
25576#[inline]
25577#[target_feature(enable = "avx512f")]
25578#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25579#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
25580#[rustc_legacy_const_generics(2)]
25581#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25582pub const fn _mm512_shuffle_f64x2<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
25583    unsafe {
25584        static_assert_uimm_bits!(MASK, 8);
25585        let a = a.as_f64x8();
25586        let b = b.as_f64x8();
25587        let r: f64x8 = simd_shuffle!(
25588            a,
25589            b,
25590            [
25591                (MASK as u32 & 0b11) * 2 + 0,
25592                (MASK as u32 & 0b11) * 2 + 1,
25593                ((MASK as u32 >> 2) & 0b11) * 2 + 0,
25594                ((MASK as u32 >> 2) & 0b11) * 2 + 1,
25595                ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8,
25596                ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8,
25597                ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8,
25598                ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8,
25599            ],
25600        );
25601        transmute(r)
25602    }
25603}
25604
25605/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25606///
25607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f64x2&expand=5169)
25608#[inline]
25609#[target_feature(enable = "avx512f")]
25610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25611#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
25612#[rustc_legacy_const_generics(4)]
25613#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25614pub const fn _mm512_mask_shuffle_f64x2<const MASK: i32>(
25615    src: __m512d,
25616    k: __mmask8,
25617    a: __m512d,
25618    b: __m512d,
25619) -> __m512d {
25620    unsafe {
25621        static_assert_uimm_bits!(MASK, 8);
25622        let r = _mm512_shuffle_f64x2::<MASK>(a, b);
25623        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
25624    }
25625}
25626
25627/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25628///
25629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f64x2&expand=5170)
25630#[inline]
25631#[target_feature(enable = "avx512f")]
25632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25633#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
25634#[rustc_legacy_const_generics(3)]
25635#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25636pub const fn _mm512_maskz_shuffle_f64x2<const MASK: i32>(
25637    k: __mmask8,
25638    a: __m512d,
25639    b: __m512d,
25640) -> __m512d {
25641    unsafe {
25642        static_assert_uimm_bits!(MASK, 8);
25643        let r = _mm512_shuffle_f64x2::<MASK>(a, b);
25644        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
25645    }
25646}
25647
25648/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
25649///
25650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f64x2&expand=5168)
25651#[inline]
25652#[target_feature(enable = "avx512f,avx512vl")]
25653#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25654#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff64x2
25655#[rustc_legacy_const_generics(2)]
25656#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25657pub const fn _mm256_shuffle_f64x2<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
25658    unsafe {
25659        static_assert_uimm_bits!(MASK, 8);
25660        let a = a.as_f64x4();
25661        let b = b.as_f64x4();
25662        let r: f64x4 = simd_shuffle!(
25663            a,
25664            b,
25665            [
25666                (MASK as u32 & 0b1) * 2 + 0,
25667                (MASK as u32 & 0b1) * 2 + 1,
25668                ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
25669                ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4,
25670            ],
25671        );
25672        transmute(r)
25673    }
25674}
25675
25676/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25677///
25678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f64x2&expand=5166)
25679#[inline]
25680#[target_feature(enable = "avx512f,avx512vl")]
25681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25682#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
25683#[rustc_legacy_const_generics(4)]
25684#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25685pub const fn _mm256_mask_shuffle_f64x2<const MASK: i32>(
25686    src: __m256d,
25687    k: __mmask8,
25688    a: __m256d,
25689    b: __m256d,
25690) -> __m256d {
25691    unsafe {
25692        static_assert_uimm_bits!(MASK, 8);
25693        let r = _mm256_shuffle_f64x2::<MASK>(a, b);
25694        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
25695    }
25696}
25697
25698/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25699///
25700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f64x2&expand=5167)
25701#[inline]
25702#[target_feature(enable = "avx512f,avx512vl")]
25703#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25704#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
25705#[rustc_legacy_const_generics(3)]
25706#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25707pub const fn _mm256_maskz_shuffle_f64x2<const MASK: i32>(
25708    k: __mmask8,
25709    a: __m256d,
25710    b: __m256d,
25711) -> __m256d {
25712    unsafe {
25713        static_assert_uimm_bits!(MASK, 8);
25714        let r = _mm256_shuffle_f64x2::<MASK>(a, b);
25715        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
25716    }
25717}
25718
25719/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
25720///
25721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf32x4_ps&expand=2442)
25722#[inline]
25723#[target_feature(enable = "avx512f")]
25724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25725#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
25726#[rustc_legacy_const_generics(1)]
25727#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25728pub const fn _mm512_extractf32x4_ps<const IMM8: i32>(a: __m512) -> __m128 {
25729    unsafe {
25730        static_assert_uimm_bits!(IMM8, 2);
25731        match IMM8 & 0x3 {
25732            0 => simd_shuffle!(a, _mm512_undefined_ps(), [0, 1, 2, 3]),
25733            1 => simd_shuffle!(a, _mm512_undefined_ps(), [4, 5, 6, 7]),
25734            2 => simd_shuffle!(a, _mm512_undefined_ps(), [8, 9, 10, 11]),
25735            _ => simd_shuffle!(a, _mm512_undefined_ps(), [12, 13, 14, 15]),
25736        }
25737    }
25738}
25739
25740/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25741///
25742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf32x4_ps&expand=2443)
25743#[inline]
25744#[target_feature(enable = "avx512f")]
25745#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25746#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
25747#[rustc_legacy_const_generics(3)]
25748#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25749pub const fn _mm512_mask_extractf32x4_ps<const IMM8: i32>(
25750    src: __m128,
25751    k: __mmask8,
25752    a: __m512,
25753) -> __m128 {
25754    unsafe {
25755        static_assert_uimm_bits!(IMM8, 2);
25756        let r = _mm512_extractf32x4_ps::<IMM8>(a);
25757        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
25758    }
25759}
25760
25761/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25762///
25763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf32x4_ps&expand=2444)
25764#[inline]
25765#[target_feature(enable = "avx512f")]
25766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25767#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
25768#[rustc_legacy_const_generics(2)]
25769#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25770pub const fn _mm512_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m128 {
25771    unsafe {
25772        static_assert_uimm_bits!(IMM8, 2);
25773        let r = _mm512_extractf32x4_ps::<IMM8>(a);
25774        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
25775    }
25776}
25777
25778/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
25779///
25780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf32x4_ps&expand=2439)
25781#[inline]
25782#[target_feature(enable = "avx512f,avx512vl")]
25783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25784#[cfg_attr(
25785    test,
25786    assert_instr(vextract, IMM8 = 1) //should be vextractf32x4
25787)]
25788#[rustc_legacy_const_generics(1)]
25789#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25790pub const fn _mm256_extractf32x4_ps<const IMM8: i32>(a: __m256) -> __m128 {
25791    unsafe {
25792        static_assert_uimm_bits!(IMM8, 1);
25793        match IMM8 & 0x1 {
25794            0 => simd_shuffle!(a, _mm256_undefined_ps(), [0, 1, 2, 3]),
25795            _ => simd_shuffle!(a, _mm256_undefined_ps(), [4, 5, 6, 7]),
25796        }
25797    }
25798}
25799
25800/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25801///
25802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extractf32x4_ps&expand=2440)
25803#[inline]
25804#[target_feature(enable = "avx512f,avx512vl")]
25805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25806#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 1))]
25807#[rustc_legacy_const_generics(3)]
25808#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25809pub const fn _mm256_mask_extractf32x4_ps<const IMM8: i32>(
25810    src: __m128,
25811    k: __mmask8,
25812    a: __m256,
25813) -> __m128 {
25814    unsafe {
25815        static_assert_uimm_bits!(IMM8, 1);
25816        let r = _mm256_extractf32x4_ps::<IMM8>(a);
25817        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
25818    }
25819}
25820
25821/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25822///
25823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extractf32x4_ps&expand=2441)
25824#[inline]
25825#[target_feature(enable = "avx512f,avx512vl")]
25826#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25827#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 1))]
25828#[rustc_legacy_const_generics(2)]
25829#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25830pub const fn _mm256_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128 {
25831    unsafe {
25832        static_assert_uimm_bits!(IMM8, 1);
25833        let r = _mm256_extractf32x4_ps::<IMM8>(a);
25834        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
25835    }
25836}
25837
25838/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the result in dst.
25839///
25840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti64x4_epi64&expand=2473)
25841#[inline]
25842#[target_feature(enable = "avx512f")]
25843#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25844#[cfg_attr(
25845    test,
25846    assert_instr(vextractf64x4, IMM1 = 1) //should be vextracti64x4
25847)]
25848#[rustc_legacy_const_generics(1)]
25849#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25850pub const fn _mm512_extracti64x4_epi64<const IMM1: i32>(a: __m512i) -> __m256i {
25851    unsafe {
25852        static_assert_uimm_bits!(IMM1, 1);
25853        match IMM1 {
25854            0 => simd_shuffle!(a, _mm512_setzero_si512(), [0, 1, 2, 3]),
25855            _ => simd_shuffle!(a, _mm512_setzero_si512(), [4, 5, 6, 7]),
25856        }
25857    }
25858}
25859
25860/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25861///
25862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti64x4_epi64&expand=2474)
25863#[inline]
25864#[target_feature(enable = "avx512f")]
25865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25866#[cfg_attr(test, assert_instr(vextracti64x4, IMM1 = 1))]
25867#[rustc_legacy_const_generics(3)]
25868#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25869pub const fn _mm512_mask_extracti64x4_epi64<const IMM1: i32>(
25870    src: __m256i,
25871    k: __mmask8,
25872    a: __m512i,
25873) -> __m256i {
25874    unsafe {
25875        static_assert_uimm_bits!(IMM1, 1);
25876        let r = _mm512_extracti64x4_epi64::<IMM1>(a);
25877        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
25878    }
25879}
25880
25881/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25882///
25883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti64x4_epi64&expand=2475)
25884#[inline]
25885#[target_feature(enable = "avx512f")]
25886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25887#[cfg_attr(test, assert_instr(vextracti64x4, IMM1 = 1))]
25888#[rustc_legacy_const_generics(2)]
25889#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25890pub const fn _mm512_maskz_extracti64x4_epi64<const IMM1: i32>(k: __mmask8, a: __m512i) -> __m256i {
25891    unsafe {
25892        static_assert_uimm_bits!(IMM1, 1);
25893        let r = _mm512_extracti64x4_epi64::<IMM1>(a);
25894        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
25895    }
25896}
25897
25898/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
25899///
25900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf64x4_pd&expand=2454)
25901#[inline]
25902#[target_feature(enable = "avx512f")]
25903#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25904#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
25905#[rustc_legacy_const_generics(1)]
25906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25907pub const fn _mm512_extractf64x4_pd<const IMM8: i32>(a: __m512d) -> __m256d {
25908    unsafe {
25909        static_assert_uimm_bits!(IMM8, 1);
25910        match IMM8 & 0x1 {
25911            0 => simd_shuffle!(a, _mm512_undefined_pd(), [0, 1, 2, 3]),
25912            _ => simd_shuffle!(a, _mm512_undefined_pd(), [4, 5, 6, 7]),
25913        }
25914    }
25915}
25916
25917/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25918///
25919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf64x4_pd&expand=2455)
25920#[inline]
25921#[target_feature(enable = "avx512f")]
25922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25923#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
25924#[rustc_legacy_const_generics(3)]
25925#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25926pub const fn _mm512_mask_extractf64x4_pd<const IMM8: i32>(
25927    src: __m256d,
25928    k: __mmask8,
25929    a: __m512d,
25930) -> __m256d {
25931    unsafe {
25932        static_assert_uimm_bits!(IMM8, 1);
25933        let r = _mm512_extractf64x4_pd::<IMM8>(a);
25934        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
25935    }
25936}
25937
25938/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25939///
25940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf64x4_pd&expand=2456)
25941#[inline]
25942#[target_feature(enable = "avx512f")]
25943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25944#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
25945#[rustc_legacy_const_generics(2)]
25946#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25947pub const fn _mm512_maskz_extractf64x4_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m256d {
25948    unsafe {
25949        static_assert_uimm_bits!(IMM8, 1);
25950        let r = _mm512_extractf64x4_pd::<IMM8>(a);
25951        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
25952    }
25953}
25954
25955/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the result in dst.
25956///
25957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti32x4_epi32&expand=2461)
25958#[inline]
25959#[target_feature(enable = "avx512f")]
25960#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25961#[cfg_attr(
25962    test,
25963    assert_instr(vextractf32x4, IMM2 = 3) //should be vextracti32x4
25964)]
25965#[rustc_legacy_const_generics(1)]
25966#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25967pub const fn _mm512_extracti32x4_epi32<const IMM2: i32>(a: __m512i) -> __m128i {
25968    unsafe {
25969        static_assert_uimm_bits!(IMM2, 2);
25970        let a = a.as_i32x16();
25971        let zero = i32x16::ZERO;
25972        let extract: i32x4 = match IMM2 {
25973            0 => simd_shuffle!(a, zero, [0, 1, 2, 3]),
25974            1 => simd_shuffle!(a, zero, [4, 5, 6, 7]),
25975            2 => simd_shuffle!(a, zero, [8, 9, 10, 11]),
25976            _ => simd_shuffle!(a, zero, [12, 13, 14, 15]),
25977        };
25978        transmute(extract)
25979    }
25980}
25981
25982/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25983///
25984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti32x4_epi32&expand=2462)
25985#[inline]
25986#[target_feature(enable = "avx512f")]
25987#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25988#[cfg_attr(test, assert_instr(vextracti32x4, IMM2 = 3))]
25989#[rustc_legacy_const_generics(3)]
25990#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25991pub const fn _mm512_mask_extracti32x4_epi32<const IMM2: i32>(
25992    src: __m128i,
25993    k: __mmask8,
25994    a: __m512i,
25995) -> __m128i {
25996    unsafe {
25997        static_assert_uimm_bits!(IMM2, 2);
25998        let r = _mm512_extracti32x4_epi32::<IMM2>(a);
25999        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
26000    }
26001}
26002
26003/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26004///
26005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti32x4_epi32&expand=2463)
26006#[inline]
26007#[target_feature(enable = "avx512f")]
26008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26009#[cfg_attr(test, assert_instr(vextracti32x4, IMM2 = 3))]
26010#[rustc_legacy_const_generics(2)]
26011#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26012pub const fn _mm512_maskz_extracti32x4_epi32<const IMM2: i32>(k: __mmask8, a: __m512i) -> __m128i {
26013    unsafe {
26014        static_assert_uimm_bits!(IMM2, 2);
26015        let r = _mm512_extracti32x4_epi32::<IMM2>(a);
26016        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
26017    }
26018}
26019
26020/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the result in dst.
26021///
26022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti32x4_epi32&expand=2458)
26023#[inline]
26024#[target_feature(enable = "avx512f,avx512vl")]
26025#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26026#[cfg_attr(
26027    test,
26028    assert_instr(vextract, IMM1 = 1) //should be vextracti32x4
26029)]
26030#[rustc_legacy_const_generics(1)]
26031#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26032pub const fn _mm256_extracti32x4_epi32<const IMM1: i32>(a: __m256i) -> __m128i {
26033    unsafe {
26034        static_assert_uimm_bits!(IMM1, 1);
26035        let a = a.as_i32x8();
26036        let zero = i32x8::ZERO;
26037        let extract: i32x4 = match IMM1 {
26038            0 => simd_shuffle!(a, zero, [0, 1, 2, 3]),
26039            _ => simd_shuffle!(a, zero, [4, 5, 6, 7]),
26040        };
26041        transmute(extract)
26042    }
26043}
26044
26045/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26046///
26047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extracti32x4_epi32&expand=2459)
26048#[inline]
26049#[target_feature(enable = "avx512f,avx512vl")]
26050#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26051#[cfg_attr(test, assert_instr(vextracti32x4, IMM1 = 1))]
26052#[rustc_legacy_const_generics(3)]
26053#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26054pub const fn _mm256_mask_extracti32x4_epi32<const IMM1: i32>(
26055    src: __m128i,
26056    k: __mmask8,
26057    a: __m256i,
26058) -> __m128i {
26059    unsafe {
26060        static_assert_uimm_bits!(IMM1, 1);
26061        let r = _mm256_extracti32x4_epi32::<IMM1>(a);
26062        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
26063    }
26064}
26065
26066/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26067///
26068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extracti32x4_epi32&expand=2460)
26069#[inline]
26070#[target_feature(enable = "avx512f,avx512vl")]
26071#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26072#[cfg_attr(test, assert_instr(vextracti32x4, IMM1 = 1))]
26073#[rustc_legacy_const_generics(2)]
26074#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26075pub const fn _mm256_maskz_extracti32x4_epi32<const IMM1: i32>(k: __mmask8, a: __m256i) -> __m128i {
26076    unsafe {
26077        static_assert_uimm_bits!(IMM1, 1);
26078        let r = _mm256_extracti32x4_epi32::<IMM1>(a);
26079        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
26080    }
26081}
26082
26083/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
26084///
26085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_moveldup_ps&expand=3862)
26086#[inline]
26087#[target_feature(enable = "avx512f")]
26088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26089#[cfg_attr(test, assert_instr(vmovsldup))]
26090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26091pub const fn _mm512_moveldup_ps(a: __m512) -> __m512 {
26092    unsafe {
26093        let r: f32x16 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
26094        transmute(r)
26095    }
26096}
26097
26098/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26099///
26100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_moveldup_ps&expand=3860)
26101#[inline]
26102#[target_feature(enable = "avx512f")]
26103#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26104#[cfg_attr(test, assert_instr(vmovsldup))]
26105#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26106pub const fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
26107    unsafe {
26108        let mov: f32x16 =
26109            simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
26110        transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
26111    }
26112}
26113
26114/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26115///
26116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_moveldup_ps&expand=3861)
26117#[inline]
26118#[target_feature(enable = "avx512f")]
26119#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26120#[cfg_attr(test, assert_instr(vmovsldup))]
26121#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26122pub const fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 {
26123    unsafe {
26124        let mov: f32x16 =
26125            simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
26126        transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
26127    }
26128}
26129
26130/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26131///
26132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_moveldup_ps&expand=3857)
26133#[inline]
26134#[target_feature(enable = "avx512f,avx512vl")]
26135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26136#[cfg_attr(test, assert_instr(vmovsldup))]
26137#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26138pub const fn _mm256_mask_moveldup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
26139    unsafe {
26140        let mov = _mm256_moveldup_ps(a);
26141        transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8()))
26142    }
26143}
26144
26145/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26146///
26147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_moveldup_ps&expand=3858)
26148#[inline]
26149#[target_feature(enable = "avx512f,avx512vl")]
26150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26151#[cfg_attr(test, assert_instr(vmovsldup))]
26152#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26153pub const fn _mm256_maskz_moveldup_ps(k: __mmask8, a: __m256) -> __m256 {
26154    unsafe {
26155        let mov = _mm256_moveldup_ps(a);
26156        transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO))
26157    }
26158}
26159
26160/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26161///
26162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_moveldup_ps&expand=3854)
26163#[inline]
26164#[target_feature(enable = "avx512f,avx512vl")]
26165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26166#[cfg_attr(test, assert_instr(vmovsldup))]
26167#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26168pub const fn _mm_mask_moveldup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
26169    unsafe {
26170        let mov = _mm_moveldup_ps(a);
26171        transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4()))
26172    }
26173}
26174
26175/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26176///
26177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_moveldup_ps&expand=3855)
26178#[inline]
26179#[target_feature(enable = "avx512f,avx512vl")]
26180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26181#[cfg_attr(test, assert_instr(vmovsldup))]
26182#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26183pub const fn _mm_maskz_moveldup_ps(k: __mmask8, a: __m128) -> __m128 {
26184    unsafe {
26185        let mov = _mm_moveldup_ps(a);
26186        transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO))
26187    }
26188}
26189
26190/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
26191///
26192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movehdup_ps&expand=3852)
26193#[inline]
26194#[target_feature(enable = "avx512f")]
26195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26196#[cfg_attr(test, assert_instr(vmovshdup))]
26197#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26198pub const fn _mm512_movehdup_ps(a: __m512) -> __m512 {
26199    unsafe {
26200        let r: f32x16 = simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
26201        transmute(r)
26202    }
26203}
26204
26205/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26206///
26207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movehdup_ps&expand=3850)
26208#[inline]
26209#[target_feature(enable = "avx512f")]
26210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26211#[cfg_attr(test, assert_instr(vmovshdup))]
26212#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26213pub const fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
26214    unsafe {
26215        let mov: f32x16 =
26216            simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
26217        transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
26218    }
26219}
26220
26221/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26222///
26223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movehdup_ps&expand=3851)
26224#[inline]
26225#[target_feature(enable = "avx512f")]
26226#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26227#[cfg_attr(test, assert_instr(vmovshdup))]
26228#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26229pub const fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 {
26230    unsafe {
26231        let mov: f32x16 =
26232            simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
26233        transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
26234    }
26235}
26236
26237/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26238///
26239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movehdup_ps&expand=3847)
26240#[inline]
26241#[target_feature(enable = "avx512f,avx512vl")]
26242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26243#[cfg_attr(test, assert_instr(vmovshdup))]
26244#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26245pub const fn _mm256_mask_movehdup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
26246    unsafe {
26247        let mov = _mm256_movehdup_ps(a);
26248        transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8()))
26249    }
26250}
26251
26252/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26253///
26254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movehdup_ps&expand=3848)
26255#[inline]
26256#[target_feature(enable = "avx512f,avx512vl")]
26257#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26258#[cfg_attr(test, assert_instr(vmovshdup))]
26259#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26260pub const fn _mm256_maskz_movehdup_ps(k: __mmask8, a: __m256) -> __m256 {
26261    unsafe {
26262        let mov = _mm256_movehdup_ps(a);
26263        transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO))
26264    }
26265}
26266
26267/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26268///
26269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movehdup_ps&expand=3844)
26270#[inline]
26271#[target_feature(enable = "avx512f,avx512vl")]
26272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26273#[cfg_attr(test, assert_instr(vmovshdup))]
26274#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26275pub const fn _mm_mask_movehdup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
26276    unsafe {
26277        let mov = _mm_movehdup_ps(a);
26278        transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4()))
26279    }
26280}
26281
26282/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26283///
26284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movehdup_ps&expand=3845)
26285#[inline]
26286#[target_feature(enable = "avx512f,avx512vl")]
26287#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26288#[cfg_attr(test, assert_instr(vmovshdup))]
26289#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26290pub const fn _mm_maskz_movehdup_ps(k: __mmask8, a: __m128) -> __m128 {
26291    unsafe {
26292        let mov = _mm_movehdup_ps(a);
26293        transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO))
26294    }
26295}
26296
26297/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst.
26298///
26299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movedup_pd&expand=3843)
26300#[inline]
26301#[target_feature(enable = "avx512f")]
26302#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26303#[cfg_attr(test, assert_instr(vmovddup))]
26304#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26305pub const fn _mm512_movedup_pd(a: __m512d) -> __m512d {
26306    unsafe {
26307        let r: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
26308        transmute(r)
26309    }
26310}
26311
26312/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26313///
26314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movedup_pd&expand=3841)
26315#[inline]
26316#[target_feature(enable = "avx512f")]
26317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26318#[cfg_attr(test, assert_instr(vmovddup))]
26319#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26320pub const fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
26321    unsafe {
26322        let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
26323        transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
26324    }
26325}
26326
26327/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26328///
26329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movedup_pd&expand=3842)
26330#[inline]
26331#[target_feature(enable = "avx512f")]
26332#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26333#[cfg_attr(test, assert_instr(vmovddup))]
26334#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26335pub const fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d {
26336    unsafe {
26337        let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
26338        transmute(simd_select_bitmask(k, mov, f64x8::ZERO))
26339    }
26340}
26341
26342/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26343///
26344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movedup_pd&expand=3838)
26345#[inline]
26346#[target_feature(enable = "avx512f,avx512vl")]
26347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26348#[cfg_attr(test, assert_instr(vmovddup))]
26349#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26350pub const fn _mm256_mask_movedup_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
26351    unsafe {
26352        let mov = _mm256_movedup_pd(a);
26353        transmute(simd_select_bitmask(k, mov.as_f64x4(), src.as_f64x4()))
26354    }
26355}
26356
26357/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26358///
26359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movedup_pd&expand=3839)
26360#[inline]
26361#[target_feature(enable = "avx512f,avx512vl")]
26362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26363#[cfg_attr(test, assert_instr(vmovddup))]
26364#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26365pub const fn _mm256_maskz_movedup_pd(k: __mmask8, a: __m256d) -> __m256d {
26366    unsafe {
26367        let mov = _mm256_movedup_pd(a);
26368        transmute(simd_select_bitmask(k, mov.as_f64x4(), f64x4::ZERO))
26369    }
26370}
26371
26372/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26373///
26374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movedup_pd&expand=3835)
26375#[inline]
26376#[target_feature(enable = "avx512f,avx512vl")]
26377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26378#[cfg_attr(test, assert_instr(vmovddup))]
26379#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26380pub const fn _mm_mask_movedup_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
26381    unsafe {
26382        let mov = _mm_movedup_pd(a);
26383        transmute(simd_select_bitmask(k, mov.as_f64x2(), src.as_f64x2()))
26384    }
26385}
26386
26387/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26388///
26389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movedup_pd&expand=3836)
26390#[inline]
26391#[target_feature(enable = "avx512f,avx512vl")]
26392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26393#[cfg_attr(test, assert_instr(vmovddup))]
26394#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26395pub const fn _mm_maskz_movedup_pd(k: __mmask8, a: __m128d) -> __m128d {
26396    unsafe {
26397        let mov = _mm_movedup_pd(a);
26398        transmute(simd_select_bitmask(k, mov.as_f64x2(), f64x2::ZERO))
26399    }
26400}
26401
26402/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
26403///
26404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti32x4&expand=3174)
26405#[inline]
26406#[target_feature(enable = "avx512f")]
26407#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26408#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))] //should be vinserti32x4
26409#[rustc_legacy_const_generics(2)]
26410#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26411pub const fn _mm512_inserti32x4<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
26412    unsafe {
26413        static_assert_uimm_bits!(IMM8, 2);
26414        let a = a.as_i32x16();
26415        let b = _mm512_castsi128_si512(b).as_i32x16();
26416        let ret: i32x16 = match IMM8 & 0b11 {
26417            0 => {
26418                simd_shuffle!(
26419                    a,
26420                    b,
26421                    [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
26422                )
26423            }
26424            1 => {
26425                simd_shuffle!(
26426                    a,
26427                    b,
26428                    [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
26429                )
26430            }
26431            2 => {
26432                simd_shuffle!(
26433                    a,
26434                    b,
26435                    [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
26436                )
26437            }
26438            _ => {
26439                simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19])
26440            }
26441        };
26442        transmute(ret)
26443    }
26444}
26445
26446/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26447///
26448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti32x4&expand=3175)
26449#[inline]
26450#[target_feature(enable = "avx512f")]
26451#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26452#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
26453#[rustc_legacy_const_generics(4)]
26454#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26455pub const fn _mm512_mask_inserti32x4<const IMM8: i32>(
26456    src: __m512i,
26457    k: __mmask16,
26458    a: __m512i,
26459    b: __m128i,
26460) -> __m512i {
26461    unsafe {
26462        static_assert_uimm_bits!(IMM8, 2);
26463        let r = _mm512_inserti32x4::<IMM8>(a, b);
26464        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
26465    }
26466}
26467
26468/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26469///
26470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti32x4&expand=3176)
26471#[inline]
26472#[target_feature(enable = "avx512f")]
26473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26474#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
26475#[rustc_legacy_const_generics(3)]
26476#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26477pub const fn _mm512_maskz_inserti32x4<const IMM8: i32>(
26478    k: __mmask16,
26479    a: __m512i,
26480    b: __m128i,
26481) -> __m512i {
26482    unsafe {
26483        static_assert_uimm_bits!(IMM8, 2);
26484        let r = _mm512_inserti32x4::<IMM8>(a, b);
26485        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
26486    }
26487}
26488
26489/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
26490///
26491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti32x4&expand=3171)
26492#[inline]
26493#[target_feature(enable = "avx512f,avx512vl")]
26494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26495#[cfg_attr(
26496    test,
26497    assert_instr(vinsert, IMM8 = 1) //should be vinserti32x4
26498)]
26499#[rustc_legacy_const_generics(2)]
26500#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26501pub const fn _mm256_inserti32x4<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
26502    unsafe {
26503        static_assert_uimm_bits!(IMM8, 1);
26504        let a = a.as_i32x8();
26505        let b = _mm256_castsi128_si256(b).as_i32x8();
26506        let ret: i32x8 = match IMM8 & 0b1 {
26507            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
26508            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
26509        };
26510        transmute(ret)
26511    }
26512}
26513
26514/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26515///
26516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_inserti32x4&expand=3172)
26517#[inline]
26518#[target_feature(enable = "avx512f,avx512vl")]
26519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26520#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 1))]
26521#[rustc_legacy_const_generics(4)]
26522#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26523pub const fn _mm256_mask_inserti32x4<const IMM8: i32>(
26524    src: __m256i,
26525    k: __mmask8,
26526    a: __m256i,
26527    b: __m128i,
26528) -> __m256i {
26529    unsafe {
26530        static_assert_uimm_bits!(IMM8, 1);
26531        let r = _mm256_inserti32x4::<IMM8>(a, b);
26532        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
26533    }
26534}
26535
26536/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26537///
26538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_inserti32x4&expand=3173)
26539#[inline]
26540#[target_feature(enable = "avx512f,avx512vl")]
26541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26542#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 1))]
26543#[rustc_legacy_const_generics(3)]
26544#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26545pub const fn _mm256_maskz_inserti32x4<const IMM8: i32>(
26546    k: __mmask8,
26547    a: __m256i,
26548    b: __m128i,
26549) -> __m256i {
26550    unsafe {
26551        static_assert_uimm_bits!(IMM8, 1);
26552        let r = _mm256_inserti32x4::<IMM8>(a, b);
26553        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
26554    }
26555}
26556
26557/// Copy a to dst, then insert 256 bits (composed of 4 packed 64-bit integers) from b into dst at the location specified by imm8.
26558///
26559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti64x4&expand=3186)
26560#[inline]
26561#[target_feature(enable = "avx512f")]
26562#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26563#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))] //should be vinserti64x4
26564#[rustc_legacy_const_generics(2)]
26565#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26566pub const fn _mm512_inserti64x4<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
26567    unsafe {
26568        static_assert_uimm_bits!(IMM8, 1);
26569        let b = _mm512_castsi256_si512(b);
26570        match IMM8 & 0b1 {
26571            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
26572            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
26573        }
26574    }
26575}
26576
26577/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26578///
26579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti64x4&expand=3187)
26580#[inline]
26581#[target_feature(enable = "avx512f")]
26582#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26583#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
26584#[rustc_legacy_const_generics(4)]
26585#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26586pub const fn _mm512_mask_inserti64x4<const IMM8: i32>(
26587    src: __m512i,
26588    k: __mmask8,
26589    a: __m512i,
26590    b: __m256i,
26591) -> __m512i {
26592    unsafe {
26593        static_assert_uimm_bits!(IMM8, 1);
26594        let r = _mm512_inserti64x4::<IMM8>(a, b);
26595        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
26596    }
26597}
26598
26599/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26600///
26601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti64x4&expand=3188)
26602#[inline]
26603#[target_feature(enable = "avx512f")]
26604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26605#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
26606#[rustc_legacy_const_generics(3)]
26607#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26608pub const fn _mm512_maskz_inserti64x4<const IMM8: i32>(
26609    k: __mmask8,
26610    a: __m512i,
26611    b: __m256i,
26612) -> __m512i {
26613    unsafe {
26614        static_assert_uimm_bits!(IMM8, 1);
26615        let r = _mm512_inserti64x4::<IMM8>(a, b);
26616        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
26617    }
26618}
26619
26620/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
26621///
26622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf32x4&expand=3155)
26623#[inline]
26624#[target_feature(enable = "avx512f")]
26625#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26626#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
26627#[rustc_legacy_const_generics(2)]
26628#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26629pub const fn _mm512_insertf32x4<const IMM8: i32>(a: __m512, b: __m128) -> __m512 {
26630    unsafe {
26631        static_assert_uimm_bits!(IMM8, 2);
26632        let b = _mm512_castps128_ps512(b);
26633        match IMM8 & 0b11 {
26634            0 => {
26635                simd_shuffle!(
26636                    a,
26637                    b,
26638                    [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
26639                )
26640            }
26641            1 => {
26642                simd_shuffle!(
26643                    a,
26644                    b,
26645                    [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
26646                )
26647            }
26648            2 => {
26649                simd_shuffle!(
26650                    a,
26651                    b,
26652                    [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
26653                )
26654            }
26655            _ => {
26656                simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19])
26657            }
26658        }
26659    }
26660}
26661
26662/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26663///
26664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf32x4&expand=3156)
26665#[inline]
26666#[target_feature(enable = "avx512f")]
26667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26668#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
26669#[rustc_legacy_const_generics(4)]
26670#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26671pub const fn _mm512_mask_insertf32x4<const IMM8: i32>(
26672    src: __m512,
26673    k: __mmask16,
26674    a: __m512,
26675    b: __m128,
26676) -> __m512 {
26677    unsafe {
26678        static_assert_uimm_bits!(IMM8, 2);
26679        let r = _mm512_insertf32x4::<IMM8>(a, b);
26680        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
26681    }
26682}
26683
26684/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26685///
26686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf32x4&expand=3157)
26687#[inline]
26688#[target_feature(enable = "avx512f")]
26689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26690#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
26691#[rustc_legacy_const_generics(3)]
26692#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26693pub const fn _mm512_maskz_insertf32x4<const IMM8: i32>(
26694    k: __mmask16,
26695    a: __m512,
26696    b: __m128,
26697) -> __m512 {
26698    unsafe {
26699        static_assert_uimm_bits!(IMM8, 2);
26700        let r = _mm512_insertf32x4::<IMM8>(a, b);
26701        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
26702    }
26703}
26704
26705/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
26706///
26707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf32x4&expand=3152)
26708#[inline]
26709#[target_feature(enable = "avx512f,avx512vl")]
26710#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26711#[cfg_attr(
26712    test,
26713    assert_instr(vinsert, IMM8 = 1) //should be vinsertf32x4
26714)]
26715#[rustc_legacy_const_generics(2)]
26716#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26717pub const fn _mm256_insertf32x4<const IMM8: i32>(a: __m256, b: __m128) -> __m256 {
26718    unsafe {
26719        static_assert_uimm_bits!(IMM8, 1);
26720        let b = _mm256_castps128_ps256(b);
26721        match IMM8 & 0b1 {
26722            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
26723            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
26724        }
26725    }
26726}
26727
26728/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26729///
26730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_insertf32x4&expand=3153)
26731#[inline]
26732#[target_feature(enable = "avx512f,avx512vl")]
26733#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26734#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 1))]
26735#[rustc_legacy_const_generics(4)]
26736#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26737pub const fn _mm256_mask_insertf32x4<const IMM8: i32>(
26738    src: __m256,
26739    k: __mmask8,
26740    a: __m256,
26741    b: __m128,
26742) -> __m256 {
26743    unsafe {
26744        static_assert_uimm_bits!(IMM8, 1);
26745        let r = _mm256_insertf32x4::<IMM8>(a, b);
26746        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
26747    }
26748}
26749
26750/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26751///
26752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_insertf32x4&expand=3154)
26753#[inline]
26754#[target_feature(enable = "avx512f,avx512vl")]
26755#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26756#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 1))]
26757#[rustc_legacy_const_generics(3)]
26758#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26759pub const fn _mm256_maskz_insertf32x4<const IMM8: i32>(
26760    k: __mmask8,
26761    a: __m256,
26762    b: __m128,
26763) -> __m256 {
26764    unsafe {
26765        static_assert_uimm_bits!(IMM8, 1);
26766        let r = _mm256_insertf32x4::<IMM8>(a, b);
26767        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
26768    }
26769}
26770
26771/// Copy a to dst, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into dst at the location specified by imm8.
26772///
26773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf64x4&expand=3167)
26774#[inline]
26775#[target_feature(enable = "avx512f")]
26776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26777#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
26778#[rustc_legacy_const_generics(2)]
26779#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26780pub const fn _mm512_insertf64x4<const IMM8: i32>(a: __m512d, b: __m256d) -> __m512d {
26781    unsafe {
26782        static_assert_uimm_bits!(IMM8, 1);
26783        let b = _mm512_castpd256_pd512(b);
26784        match IMM8 & 0b1 {
26785            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
26786            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
26787        }
26788    }
26789}
26790
26791/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26792///
26793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf64x4&expand=3168)
26794#[inline]
26795#[target_feature(enable = "avx512f")]
26796#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26797#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
26798#[rustc_legacy_const_generics(4)]
26799#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26800pub const fn _mm512_mask_insertf64x4<const IMM8: i32>(
26801    src: __m512d,
26802    k: __mmask8,
26803    a: __m512d,
26804    b: __m256d,
26805) -> __m512d {
26806    unsafe {
26807        static_assert_uimm_bits!(IMM8, 1);
26808        let r = _mm512_insertf64x4::<IMM8>(a, b);
26809        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
26810    }
26811}
26812
26813/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26814///
26815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf64x4&expand=3169)
26816#[inline]
26817#[target_feature(enable = "avx512f")]
26818#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26819#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
26820#[rustc_legacy_const_generics(3)]
26821#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26822pub const fn _mm512_maskz_insertf64x4<const IMM8: i32>(
26823    k: __mmask8,
26824    a: __m512d,
26825    b: __m256d,
26826) -> __m512d {
26827    unsafe {
26828        static_assert_uimm_bits!(IMM8, 1);
26829        let r = _mm512_insertf64x4::<IMM8>(a, b);
26830        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
26831    }
26832}
26833
26834/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
26835///
26836/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi32&expand=6021)
26837#[inline]
26838#[target_feature(enable = "avx512f")]
26839#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26840#[cfg_attr(test, assert_instr(vunpckhps))] //should be vpunpckhdq
26841#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26842pub const fn _mm512_unpackhi_epi32(a: __m512i, b: __m512i) -> __m512i {
26843    unsafe {
26844        let a = a.as_i32x16();
26845        let b = b.as_i32x16();
26846        #[rustfmt::skip]
26847        let r: i32x16 = simd_shuffle!(
26848            a, b,
26849            [ 2, 18, 3, 19,
26850              2 + 4, 18 + 4, 3 + 4, 19 + 4,
26851              2 + 8, 18 + 8, 3 + 8, 19 + 8,
26852              2 + 12, 18 + 12, 3 + 12, 19 + 12],
26853        );
26854        transmute(r)
26855    }
26856}
26857
26858/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26859///
26860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi32&expand=6019)
26861#[inline]
26862#[target_feature(enable = "avx512f")]
26863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26864#[cfg_attr(test, assert_instr(vpunpckhdq))]
26865#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26866pub const fn _mm512_mask_unpackhi_epi32(
26867    src: __m512i,
26868    k: __mmask16,
26869    a: __m512i,
26870    b: __m512i,
26871) -> __m512i {
26872    unsafe {
26873        let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16();
26874        transmute(simd_select_bitmask(k, unpackhi, src.as_i32x16()))
26875    }
26876}
26877
26878/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26879///
26880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi32&expand=6020)
26881#[inline]
26882#[target_feature(enable = "avx512f")]
26883#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26884#[cfg_attr(test, assert_instr(vpunpckhdq))]
26885#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26886pub const fn _mm512_maskz_unpackhi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26887    unsafe {
26888        let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16();
26889        transmute(simd_select_bitmask(k, unpackhi, i32x16::ZERO))
26890    }
26891}
26892
26893/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26894///
26895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi32&expand=6016)
26896#[inline]
26897#[target_feature(enable = "avx512f,avx512vl")]
26898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26899#[cfg_attr(test, assert_instr(vpunpckhdq))]
26900#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26901pub const fn _mm256_mask_unpackhi_epi32(
26902    src: __m256i,
26903    k: __mmask8,
26904    a: __m256i,
26905    b: __m256i,
26906) -> __m256i {
26907    unsafe {
26908        let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8();
26909        transmute(simd_select_bitmask(k, unpackhi, src.as_i32x8()))
26910    }
26911}
26912
26913/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26914///
26915/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi32&expand=6017)
26916#[inline]
26917#[target_feature(enable = "avx512f,avx512vl")]
26918#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26919#[cfg_attr(test, assert_instr(vpunpckhdq))]
26920#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26921pub const fn _mm256_maskz_unpackhi_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26922    unsafe {
26923        let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8();
26924        transmute(simd_select_bitmask(k, unpackhi, i32x8::ZERO))
26925    }
26926}
26927
26928/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26929///
26930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi32&expand=6013)
26931#[inline]
26932#[target_feature(enable = "avx512f,avx512vl")]
26933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26934#[cfg_attr(test, assert_instr(vpunpckhdq))]
26935#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26936pub const fn _mm_mask_unpackhi_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26937    unsafe {
26938        let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4();
26939        transmute(simd_select_bitmask(k, unpackhi, src.as_i32x4()))
26940    }
26941}
26942
26943/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26944///
26945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi32&expand=6014)
26946#[inline]
26947#[target_feature(enable = "avx512f,avx512vl")]
26948#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26949#[cfg_attr(test, assert_instr(vpunpckhdq))]
26950#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26951pub const fn _mm_maskz_unpackhi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26952    unsafe {
26953        let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4();
26954        transmute(simd_select_bitmask(k, unpackhi, i32x4::ZERO))
26955    }
26956}
26957
26958/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
26959///
26960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi64&expand=6030)
26961#[inline]
26962#[target_feature(enable = "avx512f")]
26963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26964#[cfg_attr(test, assert_instr(vunpckhpd))] //should be vpunpckhqdq
26965#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26966pub const fn _mm512_unpackhi_epi64(a: __m512i, b: __m512i) -> __m512i {
26967    unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) }
26968}
26969
26970/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26971///
26972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi64&expand=6028)
26973#[inline]
26974#[target_feature(enable = "avx512f")]
26975#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26976#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26977#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26978pub const fn _mm512_mask_unpackhi_epi64(
26979    src: __m512i,
26980    k: __mmask8,
26981    a: __m512i,
26982    b: __m512i,
26983) -> __m512i {
26984    unsafe {
26985        let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8();
26986        transmute(simd_select_bitmask(k, unpackhi, src.as_i64x8()))
26987    }
26988}
26989
26990/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26991///
26992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi64&expand=6029)
26993#[inline]
26994#[target_feature(enable = "avx512f")]
26995#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26996#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26997#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26998pub const fn _mm512_maskz_unpackhi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26999    unsafe {
27000        let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8();
27001        transmute(simd_select_bitmask(k, unpackhi, i64x8::ZERO))
27002    }
27003}
27004
27005/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27006///
27007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi64&expand=6025)
27008#[inline]
27009#[target_feature(enable = "avx512f,avx512vl")]
27010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27011#[cfg_attr(test, assert_instr(vpunpckhqdq))]
27012#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27013pub const fn _mm256_mask_unpackhi_epi64(
27014    src: __m256i,
27015    k: __mmask8,
27016    a: __m256i,
27017    b: __m256i,
27018) -> __m256i {
27019    unsafe {
27020        let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4();
27021        transmute(simd_select_bitmask(k, unpackhi, src.as_i64x4()))
27022    }
27023}
27024
27025/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27026///
27027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi64&expand=6026)
27028#[inline]
27029#[target_feature(enable = "avx512f,avx512vl")]
27030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27031#[cfg_attr(test, assert_instr(vpunpckhqdq))]
27032#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27033pub const fn _mm256_maskz_unpackhi_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27034    unsafe {
27035        let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4();
27036        transmute(simd_select_bitmask(k, unpackhi, i64x4::ZERO))
27037    }
27038}
27039
27040/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27041///
27042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi64&expand=6022)
27043#[inline]
27044#[target_feature(enable = "avx512f,avx512vl")]
27045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27046#[cfg_attr(test, assert_instr(vpunpckhqdq))]
27047#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27048pub const fn _mm_mask_unpackhi_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27049    unsafe {
27050        let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2();
27051        transmute(simd_select_bitmask(k, unpackhi, src.as_i64x2()))
27052    }
27053}
27054
27055/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27056///
27057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi64&expand=6023)
27058#[inline]
27059#[target_feature(enable = "avx512f,avx512vl")]
27060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27061#[cfg_attr(test, assert_instr(vpunpckhqdq))]
27062#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27063pub const fn _mm_maskz_unpackhi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27064    unsafe {
27065        let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2();
27066        transmute(simd_select_bitmask(k, unpackhi, i64x2::ZERO))
27067    }
27068}
27069
27070/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
27071///
27072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_ps&expand=6060)
27073#[inline]
27074#[target_feature(enable = "avx512f")]
27075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27076#[cfg_attr(test, assert_instr(vunpckhps))]
27077#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27078pub const fn _mm512_unpackhi_ps(a: __m512, b: __m512) -> __m512 {
27079    unsafe {
27080        #[rustfmt::skip]
27081        simd_shuffle!(
27082            a, b,
27083            [ 2, 18, 3, 19,
27084              2 + 4, 18 + 4, 3 + 4, 19 + 4,
27085              2 + 8, 18 + 8, 3 + 8, 19 + 8,
27086              2 + 12, 18 + 12, 3 + 12, 19 + 12],
27087        )
27088    }
27089}
27090
27091/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27092///
27093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_ps&expand=6058)
27094#[inline]
27095#[target_feature(enable = "avx512f")]
27096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27097#[cfg_attr(test, assert_instr(vunpckhps))]
27098#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27099pub const fn _mm512_mask_unpackhi_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
27100    unsafe {
27101        let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16();
27102        transmute(simd_select_bitmask(k, unpackhi, src.as_f32x16()))
27103    }
27104}
27105
27106/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27107///
27108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_ps&expand=6059)
27109#[inline]
27110#[target_feature(enable = "avx512f")]
27111#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27112#[cfg_attr(test, assert_instr(vunpckhps))]
27113#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27114pub const fn _mm512_maskz_unpackhi_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
27115    unsafe {
27116        let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16();
27117        transmute(simd_select_bitmask(k, unpackhi, f32x16::ZERO))
27118    }
27119}
27120
27121/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27122///
27123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_ps&expand=6055)
27124#[inline]
27125#[target_feature(enable = "avx512f,avx512vl")]
27126#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27127#[cfg_attr(test, assert_instr(vunpckhps))]
27128#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27129pub const fn _mm256_mask_unpackhi_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
27130    unsafe {
27131        let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8();
27132        transmute(simd_select_bitmask(k, unpackhi, src.as_f32x8()))
27133    }
27134}
27135
27136/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27137///
27138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_ps&expand=6056)
27139#[inline]
27140#[target_feature(enable = "avx512f,avx512vl")]
27141#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27142#[cfg_attr(test, assert_instr(vunpckhps))]
27143#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27144pub const fn _mm256_maskz_unpackhi_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
27145    unsafe {
27146        let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8();
27147        transmute(simd_select_bitmask(k, unpackhi, f32x8::ZERO))
27148    }
27149}
27150
27151/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27152///
27153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_ps&expand=6052)
27154#[inline]
27155#[target_feature(enable = "avx512f,avx512vl")]
27156#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27157#[cfg_attr(test, assert_instr(vunpckhps))]
27158#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27159pub const fn _mm_mask_unpackhi_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
27160    unsafe {
27161        let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4();
27162        transmute(simd_select_bitmask(k, unpackhi, src.as_f32x4()))
27163    }
27164}
27165
27166/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27167///
27168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_ps&expand=6053)
27169#[inline]
27170#[target_feature(enable = "avx512f,avx512vl")]
27171#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27172#[cfg_attr(test, assert_instr(vunpckhps))]
27173#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27174pub const fn _mm_maskz_unpackhi_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
27175    unsafe {
27176        let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4();
27177        transmute(simd_select_bitmask(k, unpackhi, f32x4::ZERO))
27178    }
27179}
27180
27181/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
27182///
27183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_pd&expand=6048)
27184#[inline]
27185#[target_feature(enable = "avx512f")]
27186#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27187#[cfg_attr(test, assert_instr(vunpckhpd))]
27188#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27189pub const fn _mm512_unpackhi_pd(a: __m512d, b: __m512d) -> __m512d {
27190    unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) }
27191}
27192
27193/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27194///
27195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_pd&expand=6046)
27196#[inline]
27197#[target_feature(enable = "avx512f")]
27198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27199#[cfg_attr(test, assert_instr(vunpckhpd))]
27200#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27201pub const fn _mm512_mask_unpackhi_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
27202    unsafe {
27203        let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8();
27204        transmute(simd_select_bitmask(k, unpackhi, src.as_f64x8()))
27205    }
27206}
27207
27208/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27209///
27210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_pd&expand=6047)
27211#[inline]
27212#[target_feature(enable = "avx512f")]
27213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27214#[cfg_attr(test, assert_instr(vunpckhpd))]
27215#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27216pub const fn _mm512_maskz_unpackhi_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
27217    unsafe {
27218        let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8();
27219        transmute(simd_select_bitmask(k, unpackhi, f64x8::ZERO))
27220    }
27221}
27222
27223/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27224///
27225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_pd&expand=6043)
27226#[inline]
27227#[target_feature(enable = "avx512f,avx512vl")]
27228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27229#[cfg_attr(test, assert_instr(vunpckhpd))]
27230#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27231pub const fn _mm256_mask_unpackhi_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
27232    unsafe {
27233        let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4();
27234        transmute(simd_select_bitmask(k, unpackhi, src.as_f64x4()))
27235    }
27236}
27237
27238/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27239///
27240/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_pd&expand=6044)
27241#[inline]
27242#[target_feature(enable = "avx512f,avx512vl")]
27243#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27244#[cfg_attr(test, assert_instr(vunpckhpd))]
27245#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27246pub const fn _mm256_maskz_unpackhi_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
27247    unsafe {
27248        let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4();
27249        transmute(simd_select_bitmask(k, unpackhi, f64x4::ZERO))
27250    }
27251}
27252
27253/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27254///
27255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_pd&expand=6040)
27256#[inline]
27257#[target_feature(enable = "avx512f,avx512vl")]
27258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27259#[cfg_attr(test, assert_instr(vunpckhpd))]
27260#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27261pub const fn _mm_mask_unpackhi_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
27262    unsafe {
27263        let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2();
27264        transmute(simd_select_bitmask(k, unpackhi, src.as_f64x2()))
27265    }
27266}
27267
27268/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27269///
27270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_pd&expand=6041)
27271#[inline]
27272#[target_feature(enable = "avx512f,avx512vl")]
27273#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27274#[cfg_attr(test, assert_instr(vunpckhpd))]
27275#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27276pub const fn _mm_maskz_unpackhi_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
27277    unsafe {
27278        let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2();
27279        transmute(simd_select_bitmask(k, unpackhi, f64x2::ZERO))
27280    }
27281}
27282
27283/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
27284///
27285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi32&expand=6078)
27286#[inline]
27287#[target_feature(enable = "avx512f")]
27288#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27289#[cfg_attr(test, assert_instr(vunpcklps))] //should be vpunpckldq
27290#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27291pub const fn _mm512_unpacklo_epi32(a: __m512i, b: __m512i) -> __m512i {
27292    unsafe {
27293        let a = a.as_i32x16();
27294        let b = b.as_i32x16();
27295        #[rustfmt::skip]
27296        let r: i32x16 = simd_shuffle!(
27297            a, b,
27298            [ 0, 16, 1, 17,
27299              0 + 4, 16 + 4, 1 + 4, 17 + 4,
27300              0 + 8, 16 + 8, 1 + 8, 17 + 8,
27301              0 + 12, 16 + 12, 1 + 12, 17 + 12],
27302        );
27303        transmute(r)
27304    }
27305}
27306
27307/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27308///
27309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi32&expand=6076)
27310#[inline]
27311#[target_feature(enable = "avx512f")]
27312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27313#[cfg_attr(test, assert_instr(vpunpckldq))]
27314#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27315pub const fn _mm512_mask_unpacklo_epi32(
27316    src: __m512i,
27317    k: __mmask16,
27318    a: __m512i,
27319    b: __m512i,
27320) -> __m512i {
27321    unsafe {
27322        let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16();
27323        transmute(simd_select_bitmask(k, unpacklo, src.as_i32x16()))
27324    }
27325}
27326
27327/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27328///
27329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi32&expand=6077)
27330#[inline]
27331#[target_feature(enable = "avx512f")]
27332#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27333#[cfg_attr(test, assert_instr(vpunpckldq))]
27334#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27335pub const fn _mm512_maskz_unpacklo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27336    unsafe {
27337        let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16();
27338        transmute(simd_select_bitmask(k, unpacklo, i32x16::ZERO))
27339    }
27340}
27341
27342/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27343///
27344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi32&expand=6073)
27345#[inline]
27346#[target_feature(enable = "avx512f,avx512vl")]
27347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27348#[cfg_attr(test, assert_instr(vpunpckldq))]
27349#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27350pub const fn _mm256_mask_unpacklo_epi32(
27351    src: __m256i,
27352    k: __mmask8,
27353    a: __m256i,
27354    b: __m256i,
27355) -> __m256i {
27356    unsafe {
27357        let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8();
27358        transmute(simd_select_bitmask(k, unpacklo, src.as_i32x8()))
27359    }
27360}
27361
27362/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27363///
27364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi32&expand=6074)
27365#[inline]
27366#[target_feature(enable = "avx512f,avx512vl")]
27367#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27368#[cfg_attr(test, assert_instr(vpunpckldq))]
27369#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27370pub const fn _mm256_maskz_unpacklo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27371    unsafe {
27372        let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8();
27373        transmute(simd_select_bitmask(k, unpacklo, i32x8::ZERO))
27374    }
27375}
27376
27377/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27378///
27379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi32&expand=6070)
27380#[inline]
27381#[target_feature(enable = "avx512f,avx512vl")]
27382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27383#[cfg_attr(test, assert_instr(vpunpckldq))]
27384#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27385pub const fn _mm_mask_unpacklo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27386    unsafe {
27387        let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4();
27388        transmute(simd_select_bitmask(k, unpacklo, src.as_i32x4()))
27389    }
27390}
27391
27392/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27393///
27394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi32&expand=6071)
27395#[inline]
27396#[target_feature(enable = "avx512f,avx512vl")]
27397#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27398#[cfg_attr(test, assert_instr(vpunpckldq))]
27399#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27400pub const fn _mm_maskz_unpacklo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27401    unsafe {
27402        let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4();
27403        transmute(simd_select_bitmask(k, unpacklo, i32x4::ZERO))
27404    }
27405}
27406
27407/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
27408///
27409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi64&expand=6087)
27410#[inline]
27411#[target_feature(enable = "avx512f")]
27412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27413#[cfg_attr(test, assert_instr(vunpcklpd))] //should be vpunpcklqdq
27414#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27415pub const fn _mm512_unpacklo_epi64(a: __m512i, b: __m512i) -> __m512i {
27416    unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) }
27417}
27418
27419/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27420///
27421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi64&expand=6085)
27422#[inline]
27423#[target_feature(enable = "avx512f")]
27424#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27425#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27426#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27427pub const fn _mm512_mask_unpacklo_epi64(
27428    src: __m512i,
27429    k: __mmask8,
27430    a: __m512i,
27431    b: __m512i,
27432) -> __m512i {
27433    unsafe {
27434        let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8();
27435        transmute(simd_select_bitmask(k, unpacklo, src.as_i64x8()))
27436    }
27437}
27438
27439/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27440///
27441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi64&expand=6086)
27442#[inline]
27443#[target_feature(enable = "avx512f")]
27444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27445#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27446#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27447pub const fn _mm512_maskz_unpacklo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
27448    unsafe {
27449        let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8();
27450        transmute(simd_select_bitmask(k, unpacklo, i64x8::ZERO))
27451    }
27452}
27453
27454/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27455///
27456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi64&expand=6082)
27457#[inline]
27458#[target_feature(enable = "avx512f,avx512vl")]
27459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27460#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27461#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27462pub const fn _mm256_mask_unpacklo_epi64(
27463    src: __m256i,
27464    k: __mmask8,
27465    a: __m256i,
27466    b: __m256i,
27467) -> __m256i {
27468    unsafe {
27469        let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4();
27470        transmute(simd_select_bitmask(k, unpacklo, src.as_i64x4()))
27471    }
27472}
27473
27474/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27475///
27476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi64&expand=6083)
27477#[inline]
27478#[target_feature(enable = "avx512f,avx512vl")]
27479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27480#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27481#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27482pub const fn _mm256_maskz_unpacklo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27483    unsafe {
27484        let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4();
27485        transmute(simd_select_bitmask(k, unpacklo, i64x4::ZERO))
27486    }
27487}
27488
27489/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27490///
27491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi64&expand=6079)
27492#[inline]
27493#[target_feature(enable = "avx512f,avx512vl")]
27494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27495#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27496#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27497pub const fn _mm_mask_unpacklo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27498    unsafe {
27499        let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2();
27500        transmute(simd_select_bitmask(k, unpacklo, src.as_i64x2()))
27501    }
27502}
27503
27504/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27505///
27506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi64&expand=6080)
27507#[inline]
27508#[target_feature(enable = "avx512f,avx512vl")]
27509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27510#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27511#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27512pub const fn _mm_maskz_unpacklo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27513    unsafe {
27514        let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2();
27515        transmute(simd_select_bitmask(k, unpacklo, i64x2::ZERO))
27516    }
27517}
27518
27519/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
27520///
27521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_ps&expand=6117)
27522#[inline]
27523#[target_feature(enable = "avx512f")]
27524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27525#[cfg_attr(test, assert_instr(vunpcklps))]
27526#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27527pub const fn _mm512_unpacklo_ps(a: __m512, b: __m512) -> __m512 {
27528    unsafe {
27529        #[rustfmt::skip]
27530        simd_shuffle!(a, b,
27531                       [ 0, 16, 1, 17,
27532                         0 + 4, 16 + 4, 1 + 4, 17 + 4,
27533                         0 + 8, 16 + 8, 1 + 8, 17 + 8,
27534                         0 + 12, 16 + 12, 1 + 12, 17 + 12],
27535        )
27536    }
27537}
27538
27539/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27540///
27541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_ps&expand=6115)
27542#[inline]
27543#[target_feature(enable = "avx512f")]
27544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27545#[cfg_attr(test, assert_instr(vunpcklps))]
27546#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27547pub const fn _mm512_mask_unpacklo_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
27548    unsafe {
27549        let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16();
27550        transmute(simd_select_bitmask(k, unpacklo, src.as_f32x16()))
27551    }
27552}
27553
27554/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27555///
27556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_ps&expand=6116)
27557#[inline]
27558#[target_feature(enable = "avx512f")]
27559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27560#[cfg_attr(test, assert_instr(vunpcklps))]
27561#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27562pub const fn _mm512_maskz_unpacklo_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
27563    unsafe {
27564        let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16();
27565        transmute(simd_select_bitmask(k, unpacklo, f32x16::ZERO))
27566    }
27567}
27568
27569/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27570///
27571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_ps&expand=6112)
27572#[inline]
27573#[target_feature(enable = "avx512f,avx512vl")]
27574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27575#[cfg_attr(test, assert_instr(vunpcklps))]
27576#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27577pub const fn _mm256_mask_unpacklo_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
27578    unsafe {
27579        let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8();
27580        transmute(simd_select_bitmask(k, unpacklo, src.as_f32x8()))
27581    }
27582}
27583
27584/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27585///
27586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_ps&expand=6113)
27587#[inline]
27588#[target_feature(enable = "avx512f,avx512vl")]
27589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27590#[cfg_attr(test, assert_instr(vunpcklps))]
27591#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27592pub const fn _mm256_maskz_unpacklo_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
27593    unsafe {
27594        let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8();
27595        transmute(simd_select_bitmask(k, unpacklo, f32x8::ZERO))
27596    }
27597}
27598
27599/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27600///
27601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_ps&expand=6109)
27602#[inline]
27603#[target_feature(enable = "avx512f,avx512vl")]
27604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27605#[cfg_attr(test, assert_instr(vunpcklps))]
27606#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27607pub const fn _mm_mask_unpacklo_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
27608    unsafe {
27609        let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4();
27610        transmute(simd_select_bitmask(k, unpacklo, src.as_f32x4()))
27611    }
27612}
27613
27614/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27615///
27616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_ps&expand=6110)
27617#[inline]
27618#[target_feature(enable = "avx512f,avx512vl")]
27619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27620#[cfg_attr(test, assert_instr(vunpcklps))]
27621#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27622pub const fn _mm_maskz_unpacklo_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
27623    unsafe {
27624        let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4();
27625        transmute(simd_select_bitmask(k, unpacklo, f32x4::ZERO))
27626    }
27627}
27628
27629/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
27630///
27631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_pd&expand=6105)
27632#[inline]
27633#[target_feature(enable = "avx512f")]
27634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27635#[cfg_attr(test, assert_instr(vunpcklpd))]
27636#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27637pub const fn _mm512_unpacklo_pd(a: __m512d, b: __m512d) -> __m512d {
27638    unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) }
27639}
27640
27641/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27642///
27643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_pd&expand=6103)
27644#[inline]
27645#[target_feature(enable = "avx512f")]
27646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27647#[cfg_attr(test, assert_instr(vunpcklpd))]
27648#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27649pub const fn _mm512_mask_unpacklo_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
27650    unsafe {
27651        let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8();
27652        transmute(simd_select_bitmask(k, unpacklo, src.as_f64x8()))
27653    }
27654}
27655
27656/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27657///
27658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_pd&expand=6104)
27659#[inline]
27660#[target_feature(enable = "avx512f")]
27661#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27662#[cfg_attr(test, assert_instr(vunpcklpd))]
27663#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27664pub const fn _mm512_maskz_unpacklo_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
27665    unsafe {
27666        let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8();
27667        transmute(simd_select_bitmask(k, unpacklo, f64x8::ZERO))
27668    }
27669}
27670
27671/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27672///
27673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_pd&expand=6100)
27674#[inline]
27675#[target_feature(enable = "avx512f,avx512vl")]
27676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27677#[cfg_attr(test, assert_instr(vunpcklpd))]
27678#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27679pub const fn _mm256_mask_unpacklo_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
27680    unsafe {
27681        let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4();
27682        transmute(simd_select_bitmask(k, unpacklo, src.as_f64x4()))
27683    }
27684}
27685
27686/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27687///
27688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_pd&expand=6101)
27689#[inline]
27690#[target_feature(enable = "avx512f,avx512vl")]
27691#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27692#[cfg_attr(test, assert_instr(vunpcklpd))]
27693#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27694pub const fn _mm256_maskz_unpacklo_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
27695    unsafe {
27696        let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4();
27697        transmute(simd_select_bitmask(k, unpacklo, f64x4::ZERO))
27698    }
27699}
27700
27701/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27702///
27703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_pd&expand=6097)
27704#[inline]
27705#[target_feature(enable = "avx512f,avx512vl")]
27706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27707#[cfg_attr(test, assert_instr(vunpcklpd))]
27708#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27709pub const fn _mm_mask_unpacklo_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
27710    unsafe {
27711        let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2();
27712        transmute(simd_select_bitmask(k, unpacklo, src.as_f64x2()))
27713    }
27714}
27715
27716/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27717///
27718/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_pd&expand=6098)
27719#[inline]
27720#[target_feature(enable = "avx512f,avx512vl")]
27721#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27722#[cfg_attr(test, assert_instr(vunpcklpd))]
27723#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27724pub const fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
27725    unsafe {
27726        let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2();
27727        transmute(simd_select_bitmask(k, unpacklo, f64x2::ZERO))
27728    }
27729}
27730
27731/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are indeterminate.
27732///
27733/// In the Intel documentation, the upper bits are declared to be "undefined".
27734/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27735/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27736///
27737/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27738///
27739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps128_ps512&expand=621)
27740#[inline]
27741#[target_feature(enable = "avx512f")]
27742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27743#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27744pub const fn _mm512_castps128_ps512(a: __m128) -> __m512 {
27745    unsafe {
27746        simd_shuffle!(
27747            a,
27748            _mm_undefined_ps(),
27749            [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
27750        )
27751    }
27752}
27753
27754/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are indeterminate.
27755///
27756/// In the Intel documentation, the upper bits are declared to be "undefined".
27757/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27758/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27759///
27760/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27761///
27762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps256_ps512&expand=623)
27763#[inline]
27764#[target_feature(enable = "avx512f")]
27765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27766#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27767pub const fn _mm512_castps256_ps512(a: __m256) -> __m512 {
27768    unsafe {
27769        simd_shuffle!(
27770            a,
27771            _mm256_undefined_ps(),
27772            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
27773        )
27774    }
27775}
27776
27777/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27778///
27779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps128_ps512&expand=6196)
27780#[inline]
27781#[target_feature(enable = "avx512f")]
27782#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27783#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27784pub const fn _mm512_zextps128_ps512(a: __m128) -> __m512 {
27785    unsafe {
27786        simd_shuffle!(
27787            a,
27788            _mm_set1_ps(0.),
27789            [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
27790        )
27791    }
27792}
27793
27794/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27795///
27796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps256_ps512&expand=6197)
27797#[inline]
27798#[target_feature(enable = "avx512f")]
27799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27800#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27801pub const fn _mm512_zextps256_ps512(a: __m256) -> __m512 {
27802    unsafe {
27803        simd_shuffle!(
27804            a,
27805            _mm256_set1_ps(0.),
27806            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
27807        )
27808    }
27809}
27810
27811/// Cast vector of type __m512 to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27812///
27813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps128&expand=624)
27814#[inline]
27815#[target_feature(enable = "avx512f")]
27816#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27817#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27818pub const fn _mm512_castps512_ps128(a: __m512) -> __m128 {
27819    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
27820}
27821
27822/// Cast vector of type __m512 to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27823///
27824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps256&expand=625)
27825#[inline]
27826#[target_feature(enable = "avx512f")]
27827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27828#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27829pub const fn _mm512_castps512_ps256(a: __m512) -> __m256 {
27830    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
27831}
27832
27833/// Cast vector of type __m512 to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27834///
27835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_pd&expand=616)
27836#[inline]
27837#[target_feature(enable = "avx512f")]
27838#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27839#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27840pub const fn _mm512_castps_pd(a: __m512) -> __m512d {
27841    unsafe { transmute(a) }
27842}
27843
27844/// Cast vector of type __m512 to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27845///
27846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_si512&expand=619)
27847#[inline]
27848#[target_feature(enable = "avx512f")]
27849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27850#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27851pub const fn _mm512_castps_si512(a: __m512) -> __m512i {
27852    unsafe { transmute(a) }
27853}
27854
27855/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are indeterminate.
27856///
27857/// In the Intel documentation, the upper bits are declared to be "undefined".
27858/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27859/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27860///
27861/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27862///
27863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd128_pd512&expand=609)
27864#[inline]
27865#[target_feature(enable = "avx512f")]
27866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27867#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27868pub const fn _mm512_castpd128_pd512(a: __m128d) -> __m512d {
27869    unsafe { simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2, 2, 2, 2, 2]) }
27870}
27871
27872/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are indeterminate.
27873///
27874/// In the Intel documentation, the upper bits are declared to be "undefined".
27875/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27876/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27877///
27878/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27879///
27880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd256_pd512&expand=611)
27881#[inline]
27882#[target_feature(enable = "avx512f")]
27883#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27884#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27885pub const fn _mm512_castpd256_pd512(a: __m256d) -> __m512d {
27886    unsafe { simd_shuffle!(a, _mm256_undefined_pd(), [0, 1, 2, 3, 4, 4, 4, 4]) }
27887}
27888
27889/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27890///
27891/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd128_pd512&expand=6193)
27892#[inline]
27893#[target_feature(enable = "avx512f")]
27894#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27895#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27896pub const fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d {
27897    unsafe { simd_shuffle!(a, _mm_set1_pd(0.), [0, 1, 2, 2, 2, 2, 2, 2]) }
27898}
27899
27900/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27901///
27902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd256_pd512&expand=6194)
27903#[inline]
27904#[target_feature(enable = "avx512f")]
27905#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27907pub const fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d {
27908    unsafe { simd_shuffle!(a, _mm256_set1_pd(0.), [0, 1, 2, 3, 4, 4, 4, 4]) }
27909}
27910
27911/// Cast vector of type __m512d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27912///
27913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd128&expand=612)
27914#[inline]
27915#[target_feature(enable = "avx512f")]
27916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27917#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27918pub const fn _mm512_castpd512_pd128(a: __m512d) -> __m128d {
27919    unsafe { simd_shuffle!(a, a, [0, 1]) }
27920}
27921
27922/// Cast vector of type __m512d to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27923///
27924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd256&expand=613)
27925#[inline]
27926#[target_feature(enable = "avx512f")]
27927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27928#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27929pub const fn _mm512_castpd512_pd256(a: __m512d) -> __m256d {
27930    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
27931}
27932
27933/// Cast vector of type __m512d to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27934///
27935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_ps&expand=604)
27936#[inline]
27937#[target_feature(enable = "avx512f")]
27938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27939#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27940pub const fn _mm512_castpd_ps(a: __m512d) -> __m512 {
27941    unsafe { transmute(a) }
27942}
27943
27944/// Cast vector of type __m512d to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27945///
27946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_si512&expand=607)
27947#[inline]
27948#[target_feature(enable = "avx512f")]
27949#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27950#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27951pub const fn _mm512_castpd_si512(a: __m512d) -> __m512i {
27952    unsafe { transmute(a) }
27953}
27954
27955/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are indeterminate.
27956///
27957/// In the Intel documentation, the upper bits are declared to be "undefined".
27958/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27959/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27960///
27961/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27962///
27963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi128_si512&expand=629)
27964#[inline]
27965#[target_feature(enable = "avx512f")]
27966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27967#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27968pub const fn _mm512_castsi128_si512(a: __m128i) -> __m512i {
27969    unsafe { simd_shuffle!(a, _mm_undefined_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) }
27970}
27971
27972/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are indeterminate.
27973///
27974/// In the Intel documentation, the upper bits are declared to be "undefined".
27975/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27976/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27977///
27978/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27979///
27980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi256_si512&expand=633)
27981#[inline]
27982#[target_feature(enable = "avx512f")]
27983#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27984#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27985pub const fn _mm512_castsi256_si512(a: __m256i) -> __m512i {
27986    unsafe { simd_shuffle!(a, _mm256_undefined_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) }
27987}
27988
27989/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27990///
27991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi128_si512&expand=6199)
27992#[inline]
27993#[target_feature(enable = "avx512f")]
27994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27995#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27996pub const fn _mm512_zextsi128_si512(a: __m128i) -> __m512i {
27997    unsafe { simd_shuffle!(a, _mm_setzero_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) }
27998}
27999
28000/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
28001///
28002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi256_si512&expand=6200)
28003#[inline]
28004#[target_feature(enable = "avx512f")]
28005#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28006#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28007pub const fn _mm512_zextsi256_si512(a: __m256i) -> __m512i {
28008    unsafe { simd_shuffle!(a, _mm256_setzero_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) }
28009}
28010
28011/// Cast vector of type __m512i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
28012///
28013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si128&expand=636)
28014#[inline]
28015#[target_feature(enable = "avx512f")]
28016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28017#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28018pub const fn _mm512_castsi512_si128(a: __m512i) -> __m128i {
28019    unsafe { simd_shuffle!(a, a, [0, 1]) }
28020}
28021
28022/// Cast vector of type __m512i to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
28023///
28024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si256&expand=637)
28025#[inline]
28026#[target_feature(enable = "avx512f")]
28027#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28028#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28029pub const fn _mm512_castsi512_si256(a: __m512i) -> __m256i {
28030    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
28031}
28032
28033/// Cast vector of type __m512i to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
28034///
28035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_ps&expand=635)
28036#[inline]
28037#[target_feature(enable = "avx512f")]
28038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28039#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28040pub const fn _mm512_castsi512_ps(a: __m512i) -> __m512 {
28041    unsafe { transmute(a) }
28042}
28043
28044/// Cast vector of type __m512i to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
28045///
28046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_pd&expand=634)
28047#[inline]
28048#[target_feature(enable = "avx512f")]
28049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28051pub const fn _mm512_castsi512_pd(a: __m512i) -> __m512d {
28052    unsafe { transmute(a) }
28053}
28054
28055/// Copy the lower 32-bit integer in a to dst.
28056///
28057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsi512_si32&expand=1882)
28058#[inline]
28059#[target_feature(enable = "avx512f")]
28060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28061#[cfg_attr(test, assert_instr(vmovd))]
28062#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28063pub const fn _mm512_cvtsi512_si32(a: __m512i) -> i32 {
28064    unsafe { simd_extract!(a.as_i32x16(), 0) }
28065}
28066
28067/// Copy the lower single-precision (32-bit) floating-point element of a to dst.
28068///
28069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtss_f32)
28070#[inline]
28071#[target_feature(enable = "avx512f")]
28072#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28073#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28074pub const fn _mm512_cvtss_f32(a: __m512) -> f32 {
28075    unsafe { simd_extract!(a, 0) }
28076}
28077
28078/// Copy the lower double-precision (64-bit) floating-point element of a to dst.
28079///
28080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsd_f64)
28081#[inline]
28082#[target_feature(enable = "avx512f")]
28083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28084#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28085pub const fn _mm512_cvtsd_f64(a: __m512d) -> f64 {
28086    unsafe { simd_extract!(a, 0) }
28087}
28088
28089/// Broadcast the low packed 32-bit integer from a to all elements of dst.
28090///
28091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastd_epi32&expand=545)
28092#[inline]
28093#[target_feature(enable = "avx512f")]
28094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28095#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastd
28096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28097pub const fn _mm512_broadcastd_epi32(a: __m128i) -> __m512i {
28098    unsafe {
28099        let a = _mm512_castsi128_si512(a).as_i32x16();
28100        let ret: i32x16 = simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
28101        transmute(ret)
28102    }
28103}
28104
28105/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28106///
28107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastd_epi32&expand=546)
28108#[inline]
28109#[target_feature(enable = "avx512f")]
28110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28111#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28112#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28113pub const fn _mm512_mask_broadcastd_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
28114    unsafe {
28115        let broadcast = _mm512_broadcastd_epi32(a).as_i32x16();
28116        transmute(simd_select_bitmask(k, broadcast, src.as_i32x16()))
28117    }
28118}
28119
28120/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28121///
28122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastd_epi32&expand=547)
28123#[inline]
28124#[target_feature(enable = "avx512f")]
28125#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28126#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28127#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28128pub const fn _mm512_maskz_broadcastd_epi32(k: __mmask16, a: __m128i) -> __m512i {
28129    unsafe {
28130        let broadcast = _mm512_broadcastd_epi32(a).as_i32x16();
28131        transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO))
28132    }
28133}
28134
28135/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28136///
28137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastd_epi32&expand=543)
28138#[inline]
28139#[target_feature(enable = "avx512f,avx512vl")]
28140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28141#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28142#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28143pub const fn _mm256_mask_broadcastd_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
28144    unsafe {
28145        let broadcast = _mm256_broadcastd_epi32(a).as_i32x8();
28146        transmute(simd_select_bitmask(k, broadcast, src.as_i32x8()))
28147    }
28148}
28149
28150/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28151///
28152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastd_epi32&expand=544)
28153#[inline]
28154#[target_feature(enable = "avx512f,avx512vl")]
28155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28156#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28157#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28158pub const fn _mm256_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m256i {
28159    unsafe {
28160        let broadcast = _mm256_broadcastd_epi32(a).as_i32x8();
28161        transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO))
28162    }
28163}
28164
28165/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28166///
28167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastd_epi32&expand=540)
28168#[inline]
28169#[target_feature(enable = "avx512f,avx512vl")]
28170#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28171#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28172#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28173pub const fn _mm_mask_broadcastd_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
28174    unsafe {
28175        let broadcast = _mm_broadcastd_epi32(a).as_i32x4();
28176        transmute(simd_select_bitmask(k, broadcast, src.as_i32x4()))
28177    }
28178}
28179
28180/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28181///
28182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastd_epi32&expand=541)
28183#[inline]
28184#[target_feature(enable = "avx512f,avx512vl")]
28185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28186#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28187#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28188pub const fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i {
28189    unsafe {
28190        let broadcast = _mm_broadcastd_epi32(a).as_i32x4();
28191        transmute(simd_select_bitmask(k, broadcast, i32x4::ZERO))
28192    }
28193}
28194
28195/// Broadcast the low packed 64-bit integer from a to all elements of dst.
28196///
28197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastq_epi64&expand=560)
28198#[inline]
28199#[target_feature(enable = "avx512f")]
28200#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28201#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastq
28202#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28203pub const fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i {
28204    unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) }
28205}
28206
28207/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28208///
28209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastq_epi64&expand=561)
28210#[inline]
28211#[target_feature(enable = "avx512f")]
28212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28213#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28214#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28215pub const fn _mm512_mask_broadcastq_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
28216    unsafe {
28217        let broadcast = _mm512_broadcastq_epi64(a).as_i64x8();
28218        transmute(simd_select_bitmask(k, broadcast, src.as_i64x8()))
28219    }
28220}
28221
28222/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28223///
28224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastq_epi64&expand=562)
28225#[inline]
28226#[target_feature(enable = "avx512f")]
28227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28228#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28229#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28230pub const fn _mm512_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m512i {
28231    unsafe {
28232        let broadcast = _mm512_broadcastq_epi64(a).as_i64x8();
28233        transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO))
28234    }
28235}
28236
28237/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28238///
28239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastq_epi64&expand=558)
28240#[inline]
28241#[target_feature(enable = "avx512f,avx512vl")]
28242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28243#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28244#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28245pub const fn _mm256_mask_broadcastq_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
28246    unsafe {
28247        let broadcast = _mm256_broadcastq_epi64(a).as_i64x4();
28248        transmute(simd_select_bitmask(k, broadcast, src.as_i64x4()))
28249    }
28250}
28251
28252/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28253///
28254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastq_epi64&expand=559)
28255#[inline]
28256#[target_feature(enable = "avx512f,avx512vl")]
28257#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28258#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28259#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28260pub const fn _mm256_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m256i {
28261    unsafe {
28262        let broadcast = _mm256_broadcastq_epi64(a).as_i64x4();
28263        transmute(simd_select_bitmask(k, broadcast, i64x4::ZERO))
28264    }
28265}
28266
28267/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28268///
28269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastq_epi64&expand=555)
28270#[inline]
28271#[target_feature(enable = "avx512f,avx512vl")]
28272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28273#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28274#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28275pub const fn _mm_mask_broadcastq_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
28276    unsafe {
28277        let broadcast = _mm_broadcastq_epi64(a).as_i64x2();
28278        transmute(simd_select_bitmask(k, broadcast, src.as_i64x2()))
28279    }
28280}
28281
28282/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28283///
28284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastq_epi64&expand=556)
28285#[inline]
28286#[target_feature(enable = "avx512f,avx512vl")]
28287#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28288#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28289#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28290pub const fn _mm_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m128i {
28291    unsafe {
28292        let broadcast = _mm_broadcastq_epi64(a).as_i64x2();
28293        transmute(simd_select_bitmask(k, broadcast, i64x2::ZERO))
28294    }
28295}
28296
28297/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst.
28298///
28299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastss_ps&expand=578)
28300#[inline]
28301#[target_feature(enable = "avx512f")]
28302#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28303#[cfg_attr(test, assert_instr(vbroadcastss))]
28304#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28305pub const fn _mm512_broadcastss_ps(a: __m128) -> __m512 {
28306    unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) }
28307}
28308
28309/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28310///
28311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastss_ps&expand=579)
28312#[inline]
28313#[target_feature(enable = "avx512f")]
28314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28315#[cfg_attr(test, assert_instr(vbroadcastss))]
28316#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28317pub const fn _mm512_mask_broadcastss_ps(src: __m512, k: __mmask16, a: __m128) -> __m512 {
28318    unsafe {
28319        let broadcast = _mm512_broadcastss_ps(a).as_f32x16();
28320        transmute(simd_select_bitmask(k, broadcast, src.as_f32x16()))
28321    }
28322}
28323
28324/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28325///
28326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastss_ps&expand=580)
28327#[inline]
28328#[target_feature(enable = "avx512f")]
28329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28330#[cfg_attr(test, assert_instr(vbroadcastss))]
28331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28332pub const fn _mm512_maskz_broadcastss_ps(k: __mmask16, a: __m128) -> __m512 {
28333    unsafe {
28334        let broadcast = _mm512_broadcastss_ps(a).as_f32x16();
28335        transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO))
28336    }
28337}
28338
28339/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28340///
28341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastss_ps&expand=576)
28342#[inline]
28343#[target_feature(enable = "avx512f,avx512vl")]
28344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28345#[cfg_attr(test, assert_instr(vbroadcastss))]
28346#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28347pub const fn _mm256_mask_broadcastss_ps(src: __m256, k: __mmask8, a: __m128) -> __m256 {
28348    unsafe {
28349        let broadcast = _mm256_broadcastss_ps(a).as_f32x8();
28350        transmute(simd_select_bitmask(k, broadcast, src.as_f32x8()))
28351    }
28352}
28353
28354/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28355///
28356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastss_ps&expand=577)
28357#[inline]
28358#[target_feature(enable = "avx512f,avx512vl")]
28359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28360#[cfg_attr(test, assert_instr(vbroadcastss))]
28361#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28362pub const fn _mm256_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m256 {
28363    unsafe {
28364        let broadcast = _mm256_broadcastss_ps(a).as_f32x8();
28365        transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO))
28366    }
28367}
28368
28369/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28370///
28371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastss_ps&expand=573)
28372#[inline]
28373#[target_feature(enable = "avx512f,avx512vl")]
28374#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28375#[cfg_attr(test, assert_instr(vbroadcastss))]
28376#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28377pub const fn _mm_mask_broadcastss_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
28378    unsafe {
28379        let broadcast = _mm_broadcastss_ps(a).as_f32x4();
28380        transmute(simd_select_bitmask(k, broadcast, src.as_f32x4()))
28381    }
28382}
28383
28384/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28385///
28386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastss_ps&expand=574)
28387#[inline]
28388#[target_feature(enable = "avx512f,avx512vl")]
28389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28390#[cfg_attr(test, assert_instr(vbroadcastss))]
28391#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28392pub const fn _mm_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m128 {
28393    unsafe {
28394        let broadcast = _mm_broadcastss_ps(a).as_f32x4();
28395        transmute(simd_select_bitmask(k, broadcast, f32x4::ZERO))
28396    }
28397}
28398
28399/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst.
28400///
28401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastsd_pd&expand=567)
28402#[inline]
28403#[target_feature(enable = "avx512f")]
28404#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28405#[cfg_attr(test, assert_instr(vbroadcastsd))]
28406#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28407pub const fn _mm512_broadcastsd_pd(a: __m128d) -> __m512d {
28408    unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) }
28409}
28410
28411/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28412///
28413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastsd_pd&expand=568)
28414#[inline]
28415#[target_feature(enable = "avx512f")]
28416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28417#[cfg_attr(test, assert_instr(vbroadcastsd))]
28418#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28419pub const fn _mm512_mask_broadcastsd_pd(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
28420    unsafe {
28421        let broadcast = _mm512_broadcastsd_pd(a).as_f64x8();
28422        transmute(simd_select_bitmask(k, broadcast, src.as_f64x8()))
28423    }
28424}
28425
28426/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28427///
28428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastsd_pd&expand=569)
28429#[inline]
28430#[target_feature(enable = "avx512f")]
28431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28432#[cfg_attr(test, assert_instr(vbroadcastsd))]
28433#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28434pub const fn _mm512_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m512d {
28435    unsafe {
28436        let broadcast = _mm512_broadcastsd_pd(a).as_f64x8();
28437        transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO))
28438    }
28439}
28440
28441/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28442///
28443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastsd_pd&expand=565)
28444#[inline]
28445#[target_feature(enable = "avx512f,avx512vl")]
28446#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28447#[cfg_attr(test, assert_instr(vbroadcastsd))]
28448#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28449pub const fn _mm256_mask_broadcastsd_pd(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
28450    unsafe {
28451        let broadcast = _mm256_broadcastsd_pd(a).as_f64x4();
28452        transmute(simd_select_bitmask(k, broadcast, src.as_f64x4()))
28453    }
28454}
28455
28456/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28457///
28458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastsd_pd&expand=566)
28459#[inline]
28460#[target_feature(enable = "avx512f,avx512vl")]
28461#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28462#[cfg_attr(test, assert_instr(vbroadcastsd))]
28463#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28464pub const fn _mm256_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m256d {
28465    unsafe {
28466        let broadcast = _mm256_broadcastsd_pd(a).as_f64x4();
28467        transmute(simd_select_bitmask(k, broadcast, f64x4::ZERO))
28468    }
28469}
28470
28471/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
28472///
28473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x4&expand=510)
28474#[inline]
28475#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
28476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28477#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28478pub const fn _mm512_broadcast_i32x4(a: __m128i) -> __m512i {
28479    unsafe {
28480        let a = a.as_i32x4();
28481        let ret: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]);
28482        transmute(ret)
28483    }
28484}
28485
28486/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28487///
28488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x4&expand=511)
28489#[inline]
28490#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
28491#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28492#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28493pub const fn _mm512_mask_broadcast_i32x4(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
28494    unsafe {
28495        let broadcast = _mm512_broadcast_i32x4(a).as_i32x16();
28496        transmute(simd_select_bitmask(k, broadcast, src.as_i32x16()))
28497    }
28498}
28499
28500/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28501///
28502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x4&expand=512)
28503#[inline]
28504#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
28505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28506#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28507pub const fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i {
28508    unsafe {
28509        let broadcast = _mm512_broadcast_i32x4(a).as_i32x16();
28510        transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO))
28511    }
28512}
28513
28514/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
28515///
28516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i32x4&expand=507)
28517#[inline]
28518#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
28519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28520#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28521pub const fn _mm256_broadcast_i32x4(a: __m128i) -> __m256i {
28522    unsafe {
28523        let a = a.as_i32x4();
28524        let ret: i32x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]);
28525        transmute(ret)
28526    }
28527}
28528
28529/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28530///
28531/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i32x4&expand=508)
28532#[inline]
28533#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
28534#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28535#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28536pub const fn _mm256_mask_broadcast_i32x4(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
28537    unsafe {
28538        let broadcast = _mm256_broadcast_i32x4(a).as_i32x8();
28539        transmute(simd_select_bitmask(k, broadcast, src.as_i32x8()))
28540    }
28541}
28542
28543/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28544///
28545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i32x4&expand=509)
28546#[inline]
28547#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
28548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28549#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28550pub const fn _mm256_maskz_broadcast_i32x4(k: __mmask8, a: __m128i) -> __m256i {
28551    unsafe {
28552        let broadcast = _mm256_broadcast_i32x4(a).as_i32x8();
28553        transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO))
28554    }
28555}
28556
28557/// Broadcast the 4 packed 64-bit integers from a to all elements of dst.
28558///
28559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i64x4&expand=522)
28560#[inline]
28561#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
28562#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28563#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28564pub const fn _mm512_broadcast_i64x4(a: __m256i) -> __m512i {
28565    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
28566}
28567
28568/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28569///
28570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i64x4&expand=523)
28571#[inline]
28572#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
28573#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28574#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28575pub const fn _mm512_mask_broadcast_i64x4(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
28576    unsafe {
28577        let broadcast = _mm512_broadcast_i64x4(a).as_i64x8();
28578        transmute(simd_select_bitmask(k, broadcast, src.as_i64x8()))
28579    }
28580}
28581
28582/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28583///
28584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i64x4&expand=524)
28585#[inline]
28586#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
28587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28588#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28589pub const fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i {
28590    unsafe {
28591        let broadcast = _mm512_broadcast_i64x4(a).as_i64x8();
28592        transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO))
28593    }
28594}
28595
28596/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
28597///
28598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x4&expand=483)
28599#[inline]
28600#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshuf
28601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28602#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28603pub const fn _mm512_broadcast_f32x4(a: __m128) -> __m512 {
28604    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]) }
28605}
28606
28607/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28608///
28609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x4&expand=484)
28610#[inline]
28611#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
28612#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28613#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28614pub const fn _mm512_mask_broadcast_f32x4(src: __m512, k: __mmask16, a: __m128) -> __m512 {
28615    unsafe {
28616        let broadcast = _mm512_broadcast_f32x4(a).as_f32x16();
28617        transmute(simd_select_bitmask(k, broadcast, src.as_f32x16()))
28618    }
28619}
28620
28621/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28622///
28623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x4&expand=485)
28624#[inline]
28625#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
28626#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28627#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28628pub const fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 {
28629    unsafe {
28630        let broadcast = _mm512_broadcast_f32x4(a).as_f32x16();
28631        transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO))
28632    }
28633}
28634
28635/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
28636///
28637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f32x4&expand=480)
28638#[inline]
28639#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshuf
28640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28641#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28642pub const fn _mm256_broadcast_f32x4(a: __m128) -> __m256 {
28643    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
28644}
28645
28646/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28647///
28648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f32x4&expand=481)
28649#[inline]
28650#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
28651#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28652#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28653pub const fn _mm256_mask_broadcast_f32x4(src: __m256, k: __mmask8, a: __m128) -> __m256 {
28654    unsafe {
28655        let broadcast = _mm256_broadcast_f32x4(a).as_f32x8();
28656        transmute(simd_select_bitmask(k, broadcast, src.as_f32x8()))
28657    }
28658}
28659
28660/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28661///
28662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f32x4&expand=482)
28663#[inline]
28664#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
28665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28666#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28667pub const fn _mm256_maskz_broadcast_f32x4(k: __mmask8, a: __m128) -> __m256 {
28668    unsafe {
28669        let broadcast = _mm256_broadcast_f32x4(a).as_f32x8();
28670        transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO))
28671    }
28672}
28673
28674/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst.
28675///
28676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f64x4&expand=495)
28677#[inline]
28678#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vperm
28679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28680#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28681pub const fn _mm512_broadcast_f64x4(a: __m256d) -> __m512d {
28682    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
28683}
28684
28685/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28686///
28687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f64x4&expand=496)
28688#[inline]
28689#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
28690#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28691#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28692pub const fn _mm512_mask_broadcast_f64x4(src: __m512d, k: __mmask8, a: __m256d) -> __m512d {
28693    unsafe {
28694        let broadcast = _mm512_broadcast_f64x4(a).as_f64x8();
28695        transmute(simd_select_bitmask(k, broadcast, src.as_f64x8()))
28696    }
28697}
28698
28699/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28700///
28701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f64x4&expand=497)
28702#[inline]
28703#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
28704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28705#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28706pub const fn _mm512_maskz_broadcast_f64x4(k: __mmask8, a: __m256d) -> __m512d {
28707    unsafe {
28708        let broadcast = _mm512_broadcast_f64x4(a).as_f64x8();
28709        transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO))
28710    }
28711}
28712
28713/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
28714///
28715/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi32&expand=435)
28716#[inline]
28717#[target_feature(enable = "avx512f")]
28718#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28719#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
28720#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28721pub const fn _mm512_mask_blend_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28722    unsafe { transmute(simd_select_bitmask(k, b.as_i32x16(), a.as_i32x16())) }
28723}
28724
28725/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
28726///
28727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi32&expand=434)
28728#[inline]
28729#[target_feature(enable = "avx512f,avx512vl")]
28730#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28731#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
28732#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28733pub const fn _mm256_mask_blend_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28734    unsafe { transmute(simd_select_bitmask(k, b.as_i32x8(), a.as_i32x8())) }
28735}
28736
28737/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
28738///
28739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi32&expand=432)
28740#[inline]
28741#[target_feature(enable = "avx512f,avx512vl")]
28742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28743#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
28744#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28745pub const fn _mm_mask_blend_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28746    unsafe { transmute(simd_select_bitmask(k, b.as_i32x4(), a.as_i32x4())) }
28747}
28748
28749/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
28750///
28751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi64&expand=438)
28752#[inline]
28753#[target_feature(enable = "avx512f")]
28754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28755#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
28756#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28757pub const fn _mm512_mask_blend_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28758    unsafe { transmute(simd_select_bitmask(k, b.as_i64x8(), a.as_i64x8())) }
28759}
28760
28761/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
28762///
28763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi64&expand=437)
28764#[inline]
28765#[target_feature(enable = "avx512f,avx512vl")]
28766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28767#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
28768#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28769pub const fn _mm256_mask_blend_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28770    unsafe { transmute(simd_select_bitmask(k, b.as_i64x4(), a.as_i64x4())) }
28771}
28772
28773/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
28774///
28775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi64&expand=436)
28776#[inline]
28777#[target_feature(enable = "avx512f,avx512vl")]
28778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28779#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
28780#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28781pub const fn _mm_mask_blend_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28782    unsafe { transmute(simd_select_bitmask(k, b.as_i64x2(), a.as_i64x2())) }
28783}
28784
28785/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28786///
28787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_ps&expand=451)
28788#[inline]
28789#[target_feature(enable = "avx512f")]
28790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28791#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
28792#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28793pub const fn _mm512_mask_blend_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
28794    unsafe { transmute(simd_select_bitmask(k, b.as_f32x16(), a.as_f32x16())) }
28795}
28796
28797/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28798///
28799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_ps&expand=450)
28800#[inline]
28801#[target_feature(enable = "avx512f,avx512vl")]
28802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28803#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
28804#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28805pub const fn _mm256_mask_blend_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
28806    unsafe { transmute(simd_select_bitmask(k, b.as_f32x8(), a.as_f32x8())) }
28807}
28808
28809/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28810///
28811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_ps&expand=448)
28812#[inline]
28813#[target_feature(enable = "avx512f,avx512vl")]
28814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28815#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
28816#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28817pub const fn _mm_mask_blend_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
28818    unsafe { transmute(simd_select_bitmask(k, b.as_f32x4(), a.as_f32x4())) }
28819}
28820
28821/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28822///
28823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_pd&expand=446)
28824#[inline]
28825#[target_feature(enable = "avx512f")]
28826#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28827#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
28828#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28829pub const fn _mm512_mask_blend_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
28830    unsafe { transmute(simd_select_bitmask(k, b.as_f64x8(), a.as_f64x8())) }
28831}
28832
28833/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28834///
28835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_pd&expand=445)
28836#[inline]
28837#[target_feature(enable = "avx512f,avx512vl")]
28838#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28839#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
28840#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28841pub const fn _mm256_mask_blend_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
28842    unsafe { transmute(simd_select_bitmask(k, b.as_f64x4(), a.as_f64x4())) }
28843}
28844
28845/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28846///
28847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_pd&expand=443)
28848#[inline]
28849#[target_feature(enable = "avx512f,avx512vl")]
28850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28851#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
28852#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28853pub const fn _mm_mask_blend_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
28854    unsafe { transmute(simd_select_bitmask(k, b.as_f64x2(), a.as_f64x2())) }
28855}
28856
28857/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst.
28858///
28859/// <div class="warning">Only lowest <strong>4 bits</strong> are used from the mask (shift at maximum by 60 bytes)!</div>
28860///
28861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi32&expand=245)
28862#[inline]
28863#[target_feature(enable = "avx512f")]
28864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28865#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
28866#[rustc_legacy_const_generics(2)]
28867#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28868pub const fn _mm512_alignr_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
28869    unsafe {
28870        static_assert_uimm_bits!(IMM8, 8);
28871        let a = a.as_i32x16();
28872        let b = b.as_i32x16();
28873        let imm8: i32 = IMM8 % 16;
28874        let r: i32x16 = match imm8 {
28875            0 => simd_shuffle!(
28876                a,
28877                b,
28878                [
28879                    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
28880                ],
28881            ),
28882            1 => simd_shuffle!(
28883                a,
28884                b,
28885                [
28886                    17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,
28887                ],
28888            ),
28889            2 => simd_shuffle!(
28890                a,
28891                b,
28892                [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1],
28893            ),
28894            3 => simd_shuffle!(
28895                a,
28896                b,
28897                [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2],
28898            ),
28899            4 => simd_shuffle!(
28900                a,
28901                b,
28902                [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3],
28903            ),
28904            5 => simd_shuffle!(
28905                a,
28906                b,
28907                [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4],
28908            ),
28909            6 => simd_shuffle!(
28910                a,
28911                b,
28912                [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5],
28913            ),
28914            7 => simd_shuffle!(
28915                a,
28916                b,
28917                [23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6],
28918            ),
28919            8 => simd_shuffle!(
28920                a,
28921                b,
28922                [24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7],
28923            ),
28924            9 => simd_shuffle!(
28925                a,
28926                b,
28927                [25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8],
28928            ),
28929            10 => simd_shuffle!(a, b, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
28930            11 => simd_shuffle!(a, b, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
28931            12 => simd_shuffle!(a, b, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
28932            13 => simd_shuffle!(a, b, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]),
28933            14 => simd_shuffle!(a, b, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]),
28934            15 => simd_shuffle!(a, b, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
28935            _ => unreachable_unchecked(),
28936        };
28937        transmute(r)
28938    }
28939}
28940
28941/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28942///
28943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi32&expand=246)
28944#[inline]
28945#[target_feature(enable = "avx512f")]
28946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28947#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
28948#[rustc_legacy_const_generics(4)]
28949#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28950pub const fn _mm512_mask_alignr_epi32<const IMM8: i32>(
28951    src: __m512i,
28952    k: __mmask16,
28953    a: __m512i,
28954    b: __m512i,
28955) -> __m512i {
28956    unsafe {
28957        static_assert_uimm_bits!(IMM8, 8);
28958        let r = _mm512_alignr_epi32::<IMM8>(a, b);
28959        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
28960    }
28961}
28962
28963/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and stores the low 64 bytes (16 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28964///
28965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi32&expand=247)
28966#[inline]
28967#[target_feature(enable = "avx512f")]
28968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28969#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
28970#[rustc_legacy_const_generics(3)]
28971#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28972pub const fn _mm512_maskz_alignr_epi32<const IMM8: i32>(
28973    k: __mmask16,
28974    a: __m512i,
28975    b: __m512i,
28976) -> __m512i {
28977    unsafe {
28978        static_assert_uimm_bits!(IMM8, 8);
28979        let r = _mm512_alignr_epi32::<IMM8>(a, b);
28980        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
28981    }
28982}
28983
28984/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst.
28985///
28986/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 28 bytes)!</div>
28987///
28988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi32&expand=242)
28989#[inline]
28990#[target_feature(enable = "avx512f,avx512vl")]
28991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28992#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
28993#[rustc_legacy_const_generics(2)]
28994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28995pub const fn _mm256_alignr_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
28996    unsafe {
28997        static_assert_uimm_bits!(IMM8, 8);
28998        let a = a.as_i32x8();
28999        let b = b.as_i32x8();
29000        let imm8: i32 = IMM8 % 8;
29001        let r: i32x8 = match imm8 {
29002            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
29003            1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
29004            2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
29005            3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
29006            4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
29007            5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
29008            6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
29009            7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
29010            _ => unreachable_unchecked(),
29011        };
29012        transmute(r)
29013    }
29014}
29015
29016/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29017///
29018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi32&expand=243)
29019#[inline]
29020#[target_feature(enable = "avx512f,avx512vl")]
29021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29022#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
29023#[rustc_legacy_const_generics(4)]
29024#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29025pub const fn _mm256_mask_alignr_epi32<const IMM8: i32>(
29026    src: __m256i,
29027    k: __mmask8,
29028    a: __m256i,
29029    b: __m256i,
29030) -> __m256i {
29031    unsafe {
29032        static_assert_uimm_bits!(IMM8, 8);
29033        let r = _mm256_alignr_epi32::<IMM8>(a, b);
29034        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
29035    }
29036}
29037
29038/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29039///
29040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi32&expand=244)
29041#[inline]
29042#[target_feature(enable = "avx512f,avx512vl")]
29043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29044#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
29045#[rustc_legacy_const_generics(3)]
29046#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29047pub const fn _mm256_maskz_alignr_epi32<const IMM8: i32>(
29048    k: __mmask8,
29049    a: __m256i,
29050    b: __m256i,
29051) -> __m256i {
29052    unsafe {
29053        static_assert_uimm_bits!(IMM8, 8);
29054        let r = _mm256_alignr_epi32::<IMM8>(a, b);
29055        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
29056    }
29057}
29058
29059/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst.
29060///
29061/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 12 bytes)!</div>
29062///
29063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi32&expand=239)
29064#[inline]
29065#[target_feature(enable = "avx512f,avx512vl")]
29066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29067#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignd
29068#[rustc_legacy_const_generics(2)]
29069#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29070pub const fn _mm_alignr_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
29071    unsafe {
29072        static_assert_uimm_bits!(IMM8, 8);
29073        let a = a.as_i32x4();
29074        let b = b.as_i32x4();
29075        let imm8: i32 = IMM8 % 4;
29076        let r: i32x4 = match imm8 {
29077            0 => simd_shuffle!(a, b, [4, 5, 6, 7]),
29078            1 => simd_shuffle!(a, b, [5, 6, 7, 0]),
29079            2 => simd_shuffle!(a, b, [6, 7, 0, 1]),
29080            3 => simd_shuffle!(a, b, [7, 0, 1, 2]),
29081            _ => unreachable_unchecked(),
29082        };
29083        transmute(r)
29084    }
29085}
29086
29087/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29088///
29089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi32&expand=240)
29090#[inline]
29091#[target_feature(enable = "avx512f,avx512vl")]
29092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29093#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
29094#[rustc_legacy_const_generics(4)]
29095#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29096pub const fn _mm_mask_alignr_epi32<const IMM8: i32>(
29097    src: __m128i,
29098    k: __mmask8,
29099    a: __m128i,
29100    b: __m128i,
29101) -> __m128i {
29102    unsafe {
29103        static_assert_uimm_bits!(IMM8, 8);
29104        let r = _mm_alignr_epi32::<IMM8>(a, b);
29105        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
29106    }
29107}
29108
29109/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29110///
29111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi32&expand=241)
29112#[inline]
29113#[target_feature(enable = "avx512f,avx512vl")]
29114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29115#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
29116#[rustc_legacy_const_generics(3)]
29117#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29118pub const fn _mm_maskz_alignr_epi32<const IMM8: i32>(
29119    k: __mmask8,
29120    a: __m128i,
29121    b: __m128i,
29122) -> __m128i {
29123    unsafe {
29124        static_assert_uimm_bits!(IMM8, 8);
29125        let r = _mm_alignr_epi32::<IMM8>(a, b);
29126        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
29127    }
29128}
29129
29130/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst.
29131///
29132/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 56 bytes)!</div>
29133///
29134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi64&expand=254)
29135#[inline]
29136#[target_feature(enable = "avx512f")]
29137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29138#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29139#[rustc_legacy_const_generics(2)]
29140#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29141pub const fn _mm512_alignr_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
29142    unsafe {
29143        static_assert_uimm_bits!(IMM8, 8);
29144        let imm8: i32 = IMM8 % 8;
29145        let r: i64x8 = match imm8 {
29146            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
29147            1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
29148            2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
29149            3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
29150            4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
29151            5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
29152            6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
29153            7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
29154            _ => unreachable_unchecked(),
29155        };
29156        transmute(r)
29157    }
29158}
29159
29160/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29161///
29162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi64&expand=255)
29163#[inline]
29164#[target_feature(enable = "avx512f")]
29165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29166#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29167#[rustc_legacy_const_generics(4)]
29168#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29169pub const fn _mm512_mask_alignr_epi64<const IMM8: i32>(
29170    src: __m512i,
29171    k: __mmask8,
29172    a: __m512i,
29173    b: __m512i,
29174) -> __m512i {
29175    unsafe {
29176        static_assert_uimm_bits!(IMM8, 8);
29177        let r = _mm512_alignr_epi64::<IMM8>(a, b);
29178        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
29179    }
29180}
29181
29182/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and stores the low 64 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29183///
29184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi64&expand=256)
29185#[inline]
29186#[target_feature(enable = "avx512f")]
29187#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29188#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29189#[rustc_legacy_const_generics(3)]
29190#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29191pub const fn _mm512_maskz_alignr_epi64<const IMM8: i32>(
29192    k: __mmask8,
29193    a: __m512i,
29194    b: __m512i,
29195) -> __m512i {
29196    unsafe {
29197        static_assert_uimm_bits!(IMM8, 8);
29198        let r = _mm512_alignr_epi64::<IMM8>(a, b);
29199        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
29200    }
29201}
29202
29203/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst.
29204///
29205/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 24 bytes)!</div>
29206///
29207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi64&expand=251)
29208#[inline]
29209#[target_feature(enable = "avx512f,avx512vl")]
29210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29211#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29212#[rustc_legacy_const_generics(2)]
29213#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29214pub const fn _mm256_alignr_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
29215    unsafe {
29216        static_assert_uimm_bits!(IMM8, 8);
29217        let imm8: i32 = IMM8 % 4;
29218        let r: i64x4 = match imm8 {
29219            0 => simd_shuffle!(a, b, [4, 5, 6, 7]),
29220            1 => simd_shuffle!(a, b, [5, 6, 7, 0]),
29221            2 => simd_shuffle!(a, b, [6, 7, 0, 1]),
29222            3 => simd_shuffle!(a, b, [7, 0, 1, 2]),
29223            _ => unreachable_unchecked(),
29224        };
29225        transmute(r)
29226    }
29227}
29228
29229/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29230///
29231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi64&expand=252)
29232#[inline]
29233#[target_feature(enable = "avx512f,avx512vl")]
29234#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29235#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29236#[rustc_legacy_const_generics(4)]
29237#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29238pub const fn _mm256_mask_alignr_epi64<const IMM8: i32>(
29239    src: __m256i,
29240    k: __mmask8,
29241    a: __m256i,
29242    b: __m256i,
29243) -> __m256i {
29244    unsafe {
29245        static_assert_uimm_bits!(IMM8, 8);
29246        let r = _mm256_alignr_epi64::<IMM8>(a, b);
29247        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
29248    }
29249}
29250
29251/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29252///
29253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi64&expand=253)
29254#[inline]
29255#[target_feature(enable = "avx512f,avx512vl")]
29256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29257#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29258#[rustc_legacy_const_generics(3)]
29259#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29260pub const fn _mm256_maskz_alignr_epi64<const IMM8: i32>(
29261    k: __mmask8,
29262    a: __m256i,
29263    b: __m256i,
29264) -> __m256i {
29265    unsafe {
29266        static_assert_uimm_bits!(IMM8, 8);
29267        let r = _mm256_alignr_epi64::<IMM8>(a, b);
29268        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
29269    }
29270}
29271
29272/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst.
29273///
29274/// <div class="warning">Only lowest <strong>bit</strong> is used from the mask (shift at maximum by 8 bytes)!</div>
29275///
29276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi64&expand=248)
29277#[inline]
29278#[target_feature(enable = "avx512f,avx512vl")]
29279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29280#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignq
29281#[rustc_legacy_const_generics(2)]
29282#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29283pub const fn _mm_alignr_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
29284    unsafe {
29285        static_assert_uimm_bits!(IMM8, 8);
29286        let imm8: i32 = IMM8 % 2;
29287        let r: i64x2 = match imm8 {
29288            0 => simd_shuffle!(a, b, [2, 3]),
29289            1 => simd_shuffle!(a, b, [3, 0]),
29290            _ => unreachable_unchecked(),
29291        };
29292        transmute(r)
29293    }
29294}
29295
29296/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29297///
29298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi64&expand=249)
29299#[inline]
29300#[target_feature(enable = "avx512f,avx512vl")]
29301#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29302#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29303#[rustc_legacy_const_generics(4)]
29304#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29305pub const fn _mm_mask_alignr_epi64<const IMM8: i32>(
29306    src: __m128i,
29307    k: __mmask8,
29308    a: __m128i,
29309    b: __m128i,
29310) -> __m128i {
29311    unsafe {
29312        static_assert_uimm_bits!(IMM8, 8);
29313        let r = _mm_alignr_epi64::<IMM8>(a, b);
29314        transmute(simd_select_bitmask(k, r.as_i64x2(), src.as_i64x2()))
29315    }
29316}
29317
29318/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29319///
29320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi64&expand=250)
29321#[inline]
29322#[target_feature(enable = "avx512f,avx512vl")]
29323#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29324#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29325#[rustc_legacy_const_generics(3)]
29326#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29327pub const fn _mm_maskz_alignr_epi64<const IMM8: i32>(
29328    k: __mmask8,
29329    a: __m128i,
29330    b: __m128i,
29331) -> __m128i {
29332    unsafe {
29333        static_assert_uimm_bits!(IMM8, 8);
29334        let r = _mm_alignr_epi64::<IMM8>(a, b);
29335        transmute(simd_select_bitmask(k, r.as_i64x2(), i64x2::ZERO))
29336    }
29337}
29338
29339/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst.
29340///
29341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi32&expand=272)
29342#[inline]
29343#[target_feature(enable = "avx512f")]
29344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29345#[cfg_attr(test, assert_instr(vpandq))] //should be vpandd, but generate vpandq
29346#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29347pub const fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
29348    unsafe { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) }
29349}
29350
29351/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29352///
29353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi32&expand=273)
29354#[inline]
29355#[target_feature(enable = "avx512f")]
29356#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29357#[cfg_attr(test, assert_instr(vpandd))]
29358#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29359pub const fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29360    unsafe {
29361        let and = _mm512_and_epi32(a, b).as_i32x16();
29362        transmute(simd_select_bitmask(k, and, src.as_i32x16()))
29363    }
29364}
29365
29366/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29367///
29368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi32&expand=274)
29369#[inline]
29370#[target_feature(enable = "avx512f")]
29371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29372#[cfg_attr(test, assert_instr(vpandd))]
29373#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29374pub const fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29375    unsafe {
29376        let and = _mm512_and_epi32(a, b).as_i32x16();
29377        transmute(simd_select_bitmask(k, and, i32x16::ZERO))
29378    }
29379}
29380
29381/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29382///
29383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi32&expand=270)
29384#[inline]
29385#[target_feature(enable = "avx512f,avx512vl")]
29386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29387#[cfg_attr(test, assert_instr(vpandd))]
29388#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29389pub const fn _mm256_mask_and_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29390    unsafe {
29391        let and = simd_and(a.as_i32x8(), b.as_i32x8());
29392        transmute(simd_select_bitmask(k, and, src.as_i32x8()))
29393    }
29394}
29395
29396/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29397///
29398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi32&expand=271)
29399#[inline]
29400#[target_feature(enable = "avx512f,avx512vl")]
29401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29402#[cfg_attr(test, assert_instr(vpandd))]
29403#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29404pub const fn _mm256_maskz_and_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29405    unsafe {
29406        let and = simd_and(a.as_i32x8(), b.as_i32x8());
29407        transmute(simd_select_bitmask(k, and, i32x8::ZERO))
29408    }
29409}
29410
29411/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29412///
29413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi32&expand=268)
29414#[inline]
29415#[target_feature(enable = "avx512f,avx512vl")]
29416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29417#[cfg_attr(test, assert_instr(vpandd))]
29418#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29419pub const fn _mm_mask_and_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29420    unsafe {
29421        let and = simd_and(a.as_i32x4(), b.as_i32x4());
29422        transmute(simd_select_bitmask(k, and, src.as_i32x4()))
29423    }
29424}
29425
29426/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29427///
29428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi32&expand=269)
29429#[inline]
29430#[target_feature(enable = "avx512f,avx512vl")]
29431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29432#[cfg_attr(test, assert_instr(vpandd))]
29433#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29434pub const fn _mm_maskz_and_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29435    unsafe {
29436        let and = simd_and(a.as_i32x4(), b.as_i32x4());
29437        transmute(simd_select_bitmask(k, and, i32x4::ZERO))
29438    }
29439}
29440
29441/// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst.
29442///
29443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi64&expand=279)
29444#[inline]
29445#[target_feature(enable = "avx512f")]
29446#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29447#[cfg_attr(test, assert_instr(vpandq))]
29448#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29449pub const fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
29450    unsafe { transmute(simd_and(a.as_i64x8(), b.as_i64x8())) }
29451}
29452
29453/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29454///
29455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi64&expand=280)
29456#[inline]
29457#[target_feature(enable = "avx512f")]
29458#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29459#[cfg_attr(test, assert_instr(vpandq))]
29460#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29461pub const fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29462    unsafe {
29463        let and = _mm512_and_epi64(a, b).as_i64x8();
29464        transmute(simd_select_bitmask(k, and, src.as_i64x8()))
29465    }
29466}
29467
29468/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29469///
29470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi64&expand=281)
29471#[inline]
29472#[target_feature(enable = "avx512f")]
29473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29474#[cfg_attr(test, assert_instr(vpandq))]
29475#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29476pub const fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29477    unsafe {
29478        let and = _mm512_and_epi64(a, b).as_i64x8();
29479        transmute(simd_select_bitmask(k, and, i64x8::ZERO))
29480    }
29481}
29482
29483/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29484///
29485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi64&expand=277)
29486#[inline]
29487#[target_feature(enable = "avx512f,avx512vl")]
29488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29489#[cfg_attr(test, assert_instr(vpandq))]
29490#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29491pub const fn _mm256_mask_and_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29492    unsafe {
29493        let and = simd_and(a.as_i64x4(), b.as_i64x4());
29494        transmute(simd_select_bitmask(k, and, src.as_i64x4()))
29495    }
29496}
29497
29498/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29499///
29500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi64&expand=278)
29501#[inline]
29502#[target_feature(enable = "avx512f,avx512vl")]
29503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29504#[cfg_attr(test, assert_instr(vpandq))]
29505#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29506pub const fn _mm256_maskz_and_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29507    unsafe {
29508        let and = simd_and(a.as_i64x4(), b.as_i64x4());
29509        transmute(simd_select_bitmask(k, and, i64x4::ZERO))
29510    }
29511}
29512
29513/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29514///
29515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi64&expand=275)
29516#[inline]
29517#[target_feature(enable = "avx512f,avx512vl")]
29518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29519#[cfg_attr(test, assert_instr(vpandq))]
29520#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29521pub const fn _mm_mask_and_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29522    unsafe {
29523        let and = simd_and(a.as_i64x2(), b.as_i64x2());
29524        transmute(simd_select_bitmask(k, and, src.as_i64x2()))
29525    }
29526}
29527
29528/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29529///
29530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi64&expand=276)
29531#[inline]
29532#[target_feature(enable = "avx512f,avx512vl")]
29533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29534#[cfg_attr(test, assert_instr(vpandq))]
29535#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29536pub const fn _mm_maskz_and_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29537    unsafe {
29538        let and = simd_and(a.as_i64x2(), b.as_i64x2());
29539        transmute(simd_select_bitmask(k, and, i64x2::ZERO))
29540    }
29541}
29542
29543/// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst.
29544///
29545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_si512&expand=302)
29546#[inline]
29547#[target_feature(enable = "avx512f")]
29548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29549#[cfg_attr(test, assert_instr(vpandq))]
29550#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29551pub const fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
29552    unsafe { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) }
29553}
29554
29555/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
29556///
29557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi32&expand=4042)
29558#[inline]
29559#[target_feature(enable = "avx512f")]
29560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29561#[cfg_attr(test, assert_instr(vporq))]
29562#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29563pub const fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
29564    unsafe { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) }
29565}
29566
29567/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29568///
29569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi32&expand=4040)
29570#[inline]
29571#[target_feature(enable = "avx512f")]
29572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29573#[cfg_attr(test, assert_instr(vpord))]
29574#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29575pub const fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29576    unsafe {
29577        let or = _mm512_or_epi32(a, b).as_i32x16();
29578        transmute(simd_select_bitmask(k, or, src.as_i32x16()))
29579    }
29580}
29581
29582/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29583///
29584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi32&expand=4041)
29585#[inline]
29586#[target_feature(enable = "avx512f")]
29587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29588#[cfg_attr(test, assert_instr(vpord))]
29589#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29590pub const fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29591    unsafe {
29592        let or = _mm512_or_epi32(a, b).as_i32x16();
29593        transmute(simd_select_bitmask(k, or, i32x16::ZERO))
29594    }
29595}
29596
29597/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
29598///
29599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi32&expand=4039)
29600#[inline]
29601#[target_feature(enable = "avx512f,avx512vl")]
29602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29603#[cfg_attr(test, assert_instr(vor))] //should be vpord
29604#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29605pub const fn _mm256_or_epi32(a: __m256i, b: __m256i) -> __m256i {
29606    unsafe { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) }
29607}
29608
29609/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29610///
29611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi32&expand=4037)
29612#[inline]
29613#[target_feature(enable = "avx512f,avx512vl")]
29614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29615#[cfg_attr(test, assert_instr(vpord))]
29616#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29617pub const fn _mm256_mask_or_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29618    unsafe {
29619        let or = _mm256_or_epi32(a, b).as_i32x8();
29620        transmute(simd_select_bitmask(k, or, src.as_i32x8()))
29621    }
29622}
29623
29624/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29625///
29626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi32&expand=4038)
29627#[inline]
29628#[target_feature(enable = "avx512f,avx512vl")]
29629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29630#[cfg_attr(test, assert_instr(vpord))]
29631#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29632pub const fn _mm256_maskz_or_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29633    unsafe {
29634        let or = _mm256_or_epi32(a, b).as_i32x8();
29635        transmute(simd_select_bitmask(k, or, i32x8::ZERO))
29636    }
29637}
29638
29639/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
29640///
29641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi32&expand=4036)
29642#[inline]
29643#[target_feature(enable = "avx512f,avx512vl")]
29644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29645#[cfg_attr(test, assert_instr(vor))] //should be vpord
29646#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29647pub const fn _mm_or_epi32(a: __m128i, b: __m128i) -> __m128i {
29648    unsafe { transmute(simd_or(a.as_i32x4(), b.as_i32x4())) }
29649}
29650
29651/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29652///
29653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi32&expand=4034)
29654#[inline]
29655#[target_feature(enable = "avx512f,avx512vl")]
29656#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29657#[cfg_attr(test, assert_instr(vpord))]
29658#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29659pub const fn _mm_mask_or_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29660    unsafe {
29661        let or = _mm_or_epi32(a, b).as_i32x4();
29662        transmute(simd_select_bitmask(k, or, src.as_i32x4()))
29663    }
29664}
29665
29666/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29667///
29668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi32&expand=4035)
29669#[inline]
29670#[target_feature(enable = "avx512f,avx512vl")]
29671#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29672#[cfg_attr(test, assert_instr(vpord))]
29673#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29674pub const fn _mm_maskz_or_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29675    unsafe {
29676        let or = _mm_or_epi32(a, b).as_i32x4();
29677        transmute(simd_select_bitmask(k, or, i32x4::ZERO))
29678    }
29679}
29680
29681/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
29682///
29683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi64&expand=4051)
29684#[inline]
29685#[target_feature(enable = "avx512f")]
29686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29687#[cfg_attr(test, assert_instr(vporq))]
29688#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29689pub const fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
29690    unsafe { transmute(simd_or(a.as_i64x8(), b.as_i64x8())) }
29691}
29692
29693/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29694///
29695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi64&expand=4049)
29696#[inline]
29697#[target_feature(enable = "avx512f")]
29698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29699#[cfg_attr(test, assert_instr(vporq))]
29700#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29701pub const fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29702    unsafe {
29703        let or = _mm512_or_epi64(a, b).as_i64x8();
29704        transmute(simd_select_bitmask(k, or, src.as_i64x8()))
29705    }
29706}
29707
29708/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29709///
29710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi64&expand=4050)
29711#[inline]
29712#[target_feature(enable = "avx512f")]
29713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29714#[cfg_attr(test, assert_instr(vporq))]
29715#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29716pub const fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29717    unsafe {
29718        let or = _mm512_or_epi64(a, b).as_i64x8();
29719        transmute(simd_select_bitmask(k, or, i64x8::ZERO))
29720    }
29721}
29722
29723/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
29724///
29725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi64&expand=4048)
29726#[inline]
29727#[target_feature(enable = "avx512f,avx512vl")]
29728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29729#[cfg_attr(test, assert_instr(vor))] //should be vporq
29730#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29731pub const fn _mm256_or_epi64(a: __m256i, b: __m256i) -> __m256i {
29732    unsafe { transmute(simd_or(a.as_i64x4(), b.as_i64x4())) }
29733}
29734
29735/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29736///
29737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi64&expand=4046)
29738#[inline]
29739#[target_feature(enable = "avx512f,avx512vl")]
29740#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29741#[cfg_attr(test, assert_instr(vporq))]
29742#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29743pub const fn _mm256_mask_or_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29744    unsafe {
29745        let or = _mm256_or_epi64(a, b).as_i64x4();
29746        transmute(simd_select_bitmask(k, or, src.as_i64x4()))
29747    }
29748}
29749
29750/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29751///
29752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi64&expand=4047)
29753#[inline]
29754#[target_feature(enable = "avx512f,avx512vl")]
29755#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29756#[cfg_attr(test, assert_instr(vporq))]
29757#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29758pub const fn _mm256_maskz_or_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29759    unsafe {
29760        let or = _mm256_or_epi64(a, b).as_i64x4();
29761        transmute(simd_select_bitmask(k, or, i64x4::ZERO))
29762    }
29763}
29764
29765/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
29766///
29767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi64&expand=4045)
29768#[inline]
29769#[target_feature(enable = "avx512f,avx512vl")]
29770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29771#[cfg_attr(test, assert_instr(vor))] //should be vporq
29772#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29773pub const fn _mm_or_epi64(a: __m128i, b: __m128i) -> __m128i {
29774    unsafe { transmute(simd_or(a.as_i64x2(), b.as_i64x2())) }
29775}
29776
29777/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29778///
29779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi64&expand=4043)
29780#[inline]
29781#[target_feature(enable = "avx512f,avx512vl")]
29782#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29783#[cfg_attr(test, assert_instr(vporq))]
29784#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29785pub const fn _mm_mask_or_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29786    unsafe {
29787        let or = _mm_or_epi64(a, b).as_i64x2();
29788        transmute(simd_select_bitmask(k, or, src.as_i64x2()))
29789    }
29790}
29791
29792/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29793///
29794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi64&expand=4044)
29795#[inline]
29796#[target_feature(enable = "avx512f,avx512vl")]
29797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29798#[cfg_attr(test, assert_instr(vporq))]
29799#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29800pub const fn _mm_maskz_or_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29801    unsafe {
29802        let or = _mm_or_epi64(a, b).as_i64x2();
29803        transmute(simd_select_bitmask(k, or, i64x2::ZERO))
29804    }
29805}
29806
29807/// Compute the bitwise OR of 512 bits (representing integer data) in a and b, and store the result in dst.
29808///
29809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_si512&expand=4072)
29810#[inline]
29811#[target_feature(enable = "avx512f")]
29812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29813#[cfg_attr(test, assert_instr(vporq))]
29814#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29815pub const fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
29816    unsafe { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) }
29817}
29818
29819/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
29820///
29821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi32&expand=6142)
29822#[inline]
29823#[target_feature(enable = "avx512f")]
29824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29825#[cfg_attr(test, assert_instr(vpxorq))] //should be vpxord
29826#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29827pub const fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
29828    unsafe { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) }
29829}
29830
29831/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29832///
29833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi32&expand=6140)
29834#[inline]
29835#[target_feature(enable = "avx512f")]
29836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29837#[cfg_attr(test, assert_instr(vpxord))]
29838#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29839pub const fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29840    unsafe {
29841        let xor = _mm512_xor_epi32(a, b).as_i32x16();
29842        transmute(simd_select_bitmask(k, xor, src.as_i32x16()))
29843    }
29844}
29845
29846/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29847///
29848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi32&expand=6141)
29849#[inline]
29850#[target_feature(enable = "avx512f")]
29851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29852#[cfg_attr(test, assert_instr(vpxord))]
29853#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29854pub const fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29855    unsafe {
29856        let xor = _mm512_xor_epi32(a, b).as_i32x16();
29857        transmute(simd_select_bitmask(k, xor, i32x16::ZERO))
29858    }
29859}
29860
29861/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
29862///
29863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi32&expand=6139)
29864#[inline]
29865#[target_feature(enable = "avx512f,avx512vl")]
29866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29867#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
29868#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29869pub const fn _mm256_xor_epi32(a: __m256i, b: __m256i) -> __m256i {
29870    unsafe { transmute(simd_xor(a.as_i32x8(), b.as_i32x8())) }
29871}
29872
29873/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29874///
29875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi32&expand=6137)
29876#[inline]
29877#[target_feature(enable = "avx512f,avx512vl")]
29878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29879#[cfg_attr(test, assert_instr(vpxord))]
29880#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29881pub const fn _mm256_mask_xor_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29882    unsafe {
29883        let xor = _mm256_xor_epi32(a, b).as_i32x8();
29884        transmute(simd_select_bitmask(k, xor, src.as_i32x8()))
29885    }
29886}
29887
29888/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29889///
29890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi32&expand=6138)
29891#[inline]
29892#[target_feature(enable = "avx512f,avx512vl")]
29893#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29894#[cfg_attr(test, assert_instr(vpxord))]
29895#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29896pub const fn _mm256_maskz_xor_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29897    unsafe {
29898        let xor = _mm256_xor_epi32(a, b).as_i32x8();
29899        transmute(simd_select_bitmask(k, xor, i32x8::ZERO))
29900    }
29901}
29902
29903/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
29904///
29905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi32&expand=6136)
29906#[inline]
29907#[target_feature(enable = "avx512f,avx512vl")]
29908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29909#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
29910#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29911pub const fn _mm_xor_epi32(a: __m128i, b: __m128i) -> __m128i {
29912    unsafe { transmute(simd_xor(a.as_i32x4(), b.as_i32x4())) }
29913}
29914
29915/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29916///
29917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi32&expand=6134)
29918#[inline]
29919#[target_feature(enable = "avx512f,avx512vl")]
29920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29921#[cfg_attr(test, assert_instr(vpxord))]
29922#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29923pub const fn _mm_mask_xor_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29924    unsafe {
29925        let xor = _mm_xor_epi32(a, b).as_i32x4();
29926        transmute(simd_select_bitmask(k, xor, src.as_i32x4()))
29927    }
29928}
29929
29930/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29931///
29932/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi32&expand=6135)
29933#[inline]
29934#[target_feature(enable = "avx512f,avx512vl")]
29935#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29936#[cfg_attr(test, assert_instr(vpxord))]
29937#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29938pub const fn _mm_maskz_xor_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29939    unsafe {
29940        let xor = _mm_xor_epi32(a, b).as_i32x4();
29941        transmute(simd_select_bitmask(k, xor, i32x4::ZERO))
29942    }
29943}
29944
29945/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
29946///
29947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi64&expand=6151)
29948#[inline]
29949#[target_feature(enable = "avx512f")]
29950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29951#[cfg_attr(test, assert_instr(vpxorq))]
29952#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29953pub const fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
29954    unsafe { transmute(simd_xor(a.as_i64x8(), b.as_i64x8())) }
29955}
29956
29957/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29958///
29959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi64&expand=6149)
29960#[inline]
29961#[target_feature(enable = "avx512f")]
29962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29963#[cfg_attr(test, assert_instr(vpxorq))]
29964#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29965pub const fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29966    unsafe {
29967        let xor = _mm512_xor_epi64(a, b).as_i64x8();
29968        transmute(simd_select_bitmask(k, xor, src.as_i64x8()))
29969    }
29970}
29971
29972/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29973///
29974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi64&expand=6150)
29975#[inline]
29976#[target_feature(enable = "avx512f")]
29977#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29978#[cfg_attr(test, assert_instr(vpxorq))]
29979#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29980pub const fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29981    unsafe {
29982        let xor = _mm512_xor_epi64(a, b).as_i64x8();
29983        transmute(simd_select_bitmask(k, xor, i64x8::ZERO))
29984    }
29985}
29986
29987/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
29988///
29989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi64&expand=6148)
29990#[inline]
29991#[target_feature(enable = "avx512f,avx512vl")]
29992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29993#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
29994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29995pub const fn _mm256_xor_epi64(a: __m256i, b: __m256i) -> __m256i {
29996    unsafe { transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) }
29997}
29998
29999/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30000///
30001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi64&expand=6146)
30002#[inline]
30003#[target_feature(enable = "avx512f,avx512vl")]
30004#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30005#[cfg_attr(test, assert_instr(vpxorq))]
30006#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30007pub const fn _mm256_mask_xor_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
30008    unsafe {
30009        let xor = _mm256_xor_epi64(a, b).as_i64x4();
30010        transmute(simd_select_bitmask(k, xor, src.as_i64x4()))
30011    }
30012}
30013
30014/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30015///
30016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi64&expand=6147)
30017#[inline]
30018#[target_feature(enable = "avx512f,avx512vl")]
30019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30020#[cfg_attr(test, assert_instr(vpxorq))]
30021#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30022pub const fn _mm256_maskz_xor_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
30023    unsafe {
30024        let xor = _mm256_xor_epi64(a, b).as_i64x4();
30025        transmute(simd_select_bitmask(k, xor, i64x4::ZERO))
30026    }
30027}
30028
30029/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
30030///
30031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi64&expand=6145)
30032#[inline]
30033#[target_feature(enable = "avx512f,avx512vl")]
30034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30035#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
30036#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30037pub const fn _mm_xor_epi64(a: __m128i, b: __m128i) -> __m128i {
30038    unsafe { transmute(simd_xor(a.as_i64x2(), b.as_i64x2())) }
30039}
30040
30041/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30042///
30043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi64&expand=6143)
30044#[inline]
30045#[target_feature(enable = "avx512f,avx512vl")]
30046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30047#[cfg_attr(test, assert_instr(vpxorq))]
30048#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30049pub const fn _mm_mask_xor_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30050    unsafe {
30051        let xor = _mm_xor_epi64(a, b).as_i64x2();
30052        transmute(simd_select_bitmask(k, xor, src.as_i64x2()))
30053    }
30054}
30055
30056/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30057///
30058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi64&expand=6144)
30059#[inline]
30060#[target_feature(enable = "avx512f,avx512vl")]
30061#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30062#[cfg_attr(test, assert_instr(vpxorq))]
30063#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30064pub const fn _mm_maskz_xor_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30065    unsafe {
30066        let xor = _mm_xor_epi64(a, b).as_i64x2();
30067        transmute(simd_select_bitmask(k, xor, i64x2::ZERO))
30068    }
30069}
30070
30071/// Compute the bitwise XOR of 512 bits (representing integer data) in a and b, and store the result in dst.
30072///
30073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_si512&expand=6172)
30074#[inline]
30075#[target_feature(enable = "avx512f")]
30076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30077#[cfg_attr(test, assert_instr(vpxorq))]
30078#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30079pub const fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
30080    unsafe { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) }
30081}
30082
30083/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst.
30084///
30085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi32&expand=310)
30086#[inline]
30087#[target_feature(enable = "avx512f")]
30088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30089#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
30090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30091pub const fn _mm512_andnot_epi32(a: __m512i, b: __m512i) -> __m512i {
30092    _mm512_and_epi32(_mm512_xor_epi32(a, _mm512_set1_epi32(u32::MAX as i32)), b)
30093}
30094
30095/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30096///
30097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi32&expand=311)
30098#[inline]
30099#[target_feature(enable = "avx512f")]
30100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30101#[cfg_attr(test, assert_instr(vpandnd))]
30102#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30103pub const fn _mm512_mask_andnot_epi32(
30104    src: __m512i,
30105    k: __mmask16,
30106    a: __m512i,
30107    b: __m512i,
30108) -> __m512i {
30109    unsafe {
30110        let andnot = _mm512_andnot_epi32(a, b).as_i32x16();
30111        transmute(simd_select_bitmask(k, andnot, src.as_i32x16()))
30112    }
30113}
30114
30115/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30116///
30117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi32&expand=312)
30118#[inline]
30119#[target_feature(enable = "avx512f")]
30120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30121#[cfg_attr(test, assert_instr(vpandnd))]
30122#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30123pub const fn _mm512_maskz_andnot_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
30124    unsafe {
30125        let andnot = _mm512_andnot_epi32(a, b).as_i32x16();
30126        transmute(simd_select_bitmask(k, andnot, i32x16::ZERO))
30127    }
30128}
30129
30130/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30131///
30132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi32&expand=308)
30133#[inline]
30134#[target_feature(enable = "avx512f,avx512vl")]
30135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30136#[cfg_attr(test, assert_instr(vpandnd))]
30137#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30138pub const fn _mm256_mask_andnot_epi32(
30139    src: __m256i,
30140    k: __mmask8,
30141    a: __m256i,
30142    b: __m256i,
30143) -> __m256i {
30144    unsafe {
30145        let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32));
30146        let andnot = simd_and(not.as_i32x8(), b.as_i32x8());
30147        transmute(simd_select_bitmask(k, andnot, src.as_i32x8()))
30148    }
30149}
30150
30151/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30152///
30153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi32&expand=309)
30154#[inline]
30155#[target_feature(enable = "avx512f,avx512vl")]
30156#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30157#[cfg_attr(test, assert_instr(vpandnd))]
30158#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30159pub const fn _mm256_maskz_andnot_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
30160    unsafe {
30161        let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32));
30162        let andnot = simd_and(not.as_i32x8(), b.as_i32x8());
30163        transmute(simd_select_bitmask(k, andnot, i32x8::ZERO))
30164    }
30165}
30166
30167/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30168///
30169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi32&expand=306)
30170#[inline]
30171#[target_feature(enable = "avx512f,avx512vl")]
30172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30173#[cfg_attr(test, assert_instr(vpandnd))]
30174#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30175pub const fn _mm_mask_andnot_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30176    unsafe {
30177        let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32));
30178        let andnot = simd_and(not.as_i32x4(), b.as_i32x4());
30179        transmute(simd_select_bitmask(k, andnot, src.as_i32x4()))
30180    }
30181}
30182
30183/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30184///
30185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi32&expand=307)
30186#[inline]
30187#[target_feature(enable = "avx512f,avx512vl")]
30188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30189#[cfg_attr(test, assert_instr(vpandnd))]
30190#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30191pub const fn _mm_maskz_andnot_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30192    unsafe {
30193        let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32));
30194        let andnot = simd_and(not.as_i32x4(), b.as_i32x4());
30195        transmute(simd_select_bitmask(k, andnot, i32x4::ZERO))
30196    }
30197}
30198
30199/// Compute the bitwise NOT of 512 bits (composed of packed 64-bit integers) in a and then AND with b, and store the results in dst.
30200///
30201/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi64&expand=317)
30202#[inline]
30203#[target_feature(enable = "avx512f")]
30204#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30205#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
30206#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30207pub const fn _mm512_andnot_epi64(a: __m512i, b: __m512i) -> __m512i {
30208    _mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
30209}
30210
30211/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30212///
30213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi64&expand=318)
30214#[inline]
30215#[target_feature(enable = "avx512f")]
30216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30217#[cfg_attr(test, assert_instr(vpandnq))]
30218#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30219pub const fn _mm512_mask_andnot_epi64(
30220    src: __m512i,
30221    k: __mmask8,
30222    a: __m512i,
30223    b: __m512i,
30224) -> __m512i {
30225    unsafe {
30226        let andnot = _mm512_andnot_epi64(a, b).as_i64x8();
30227        transmute(simd_select_bitmask(k, andnot, src.as_i64x8()))
30228    }
30229}
30230
30231/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30232///
30233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi64&expand=319)
30234#[inline]
30235#[target_feature(enable = "avx512f")]
30236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30237#[cfg_attr(test, assert_instr(vpandnq))]
30238#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30239pub const fn _mm512_maskz_andnot_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
30240    unsafe {
30241        let andnot = _mm512_andnot_epi64(a, b).as_i64x8();
30242        transmute(simd_select_bitmask(k, andnot, i64x8::ZERO))
30243    }
30244}
30245
30246/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30247///
30248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi64&expand=315)
30249#[inline]
30250#[target_feature(enable = "avx512f,avx512vl")]
30251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30252#[cfg_attr(test, assert_instr(vpandnq))]
30253#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30254pub const fn _mm256_mask_andnot_epi64(
30255    src: __m256i,
30256    k: __mmask8,
30257    a: __m256i,
30258    b: __m256i,
30259) -> __m256i {
30260    unsafe {
30261        let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64));
30262        let andnot = simd_and(not.as_i64x4(), b.as_i64x4());
30263        transmute(simd_select_bitmask(k, andnot, src.as_i64x4()))
30264    }
30265}
30266
30267/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30268///
30269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi64&expand=316)
30270#[inline]
30271#[target_feature(enable = "avx512f,avx512vl")]
30272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30273#[cfg_attr(test, assert_instr(vpandnq))]
30274#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30275pub const fn _mm256_maskz_andnot_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
30276    unsafe {
30277        let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64));
30278        let andnot = simd_and(not.as_i64x4(), b.as_i64x4());
30279        transmute(simd_select_bitmask(k, andnot, i64x4::ZERO))
30280    }
30281}
30282
30283/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30284///
30285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi64&expand=313)
30286#[inline]
30287#[target_feature(enable = "avx512f,avx512vl")]
30288#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30289#[cfg_attr(test, assert_instr(vpandnq))]
30290#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30291pub const fn _mm_mask_andnot_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30292    unsafe {
30293        let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64));
30294        let andnot = simd_and(not.as_i64x2(), b.as_i64x2());
30295        transmute(simd_select_bitmask(k, andnot, src.as_i64x2()))
30296    }
30297}
30298
30299/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30300///
30301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi64&expand=314)
30302#[inline]
30303#[target_feature(enable = "avx512f,avx512vl")]
30304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30305#[cfg_attr(test, assert_instr(vpandnq))]
30306#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30307pub const fn _mm_maskz_andnot_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30308    unsafe {
30309        let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64));
30310        let andnot = simd_and(not.as_i64x2(), b.as_i64x2());
30311        transmute(simd_select_bitmask(k, andnot, i64x2::ZERO))
30312    }
30313}
30314
30315/// Compute the bitwise NOT of 512 bits (representing integer data) in a and then AND with b, and store the result in dst.
30316///
30317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_si512&expand=340)
30318#[inline]
30319#[target_feature(enable = "avx512f")]
30320#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30321#[cfg_attr(test, assert_instr(vpandnq))]
30322#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30323pub const fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i {
30324    _mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
30325}
30326
30327/// Convert 16-bit mask a into an integer value, and store the result in dst.
30328///
30329/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask16_u32)
30330#[inline]
30331#[target_feature(enable = "avx512f")]
30332#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30333#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30334pub const fn _cvtmask16_u32(a: __mmask16) -> u32 {
30335    a as u32
30336}
30337
30338/// Convert 32-bit integer value a to an 16-bit mask and store the result in dst.
30339///
30340/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask16)
30341#[inline]
30342#[target_feature(enable = "avx512f")]
30343#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30344#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30345pub const fn _cvtu32_mask16(a: u32) -> __mmask16 {
30346    a as __mmask16
30347}
30348
30349/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
30350///
30351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kand_mask16&expand=3212)
30352#[inline]
30353#[target_feature(enable = "avx512f")]
30354#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30355#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
30356#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30357pub const fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
30358    a & b
30359}
30360
30361/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
30362///
30363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kand&expand=3210)
30364#[inline]
30365#[target_feature(enable = "avx512f")]
30366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30367#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
30368#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30369pub const fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
30370    a & b
30371}
30372
30373/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
30374///
30375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kor_mask16&expand=3239)
30376#[inline]
30377#[target_feature(enable = "avx512f")]
30378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30379#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
30380#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30381pub const fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
30382    a | b
30383}
30384
30385/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
30386///
30387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kor&expand=3237)
30388#[inline]
30389#[target_feature(enable = "avx512f")]
30390#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30391#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
30392#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30393pub const fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
30394    a | b
30395}
30396
30397/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
30398///
30399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxor_mask16&expand=3291)
30400#[inline]
30401#[target_feature(enable = "avx512f")]
30402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30403#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
30404#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30405pub const fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
30406    a ^ b
30407}
30408
30409/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
30410///
30411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxor&expand=3289)
30412#[inline]
30413#[target_feature(enable = "avx512f")]
30414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30415#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
30416#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30417pub const fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
30418    a ^ b
30419}
30420
30421/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
30422///
30423/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=knot_mask16&expand=3233)
30424#[inline]
30425#[target_feature(enable = "avx512f")]
30426#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30427#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30428pub const fn _knot_mask16(a: __mmask16) -> __mmask16 {
30429    a ^ 0b11111111_11111111
30430}
30431
30432/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
30433///
30434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_knot&expand=3231)
30435#[inline]
30436#[target_feature(enable = "avx512f")]
30437#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30438#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30439pub const fn _mm512_knot(a: __mmask16) -> __mmask16 {
30440    a ^ 0b11111111_11111111
30441}
30442
30443/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
30444///
30445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kandn_mask16&expand=3218)
30446#[inline]
30447#[target_feature(enable = "avx512f")]
30448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30449#[cfg_attr(test, assert_instr(not))] // generate normal and, not code instead of kandnw
30450#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30451pub const fn _kandn_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
30452    _mm512_kand(_mm512_knot(a), b)
30453}
30454
30455/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
30456///
30457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kandn&expand=3216)
30458#[inline]
30459#[target_feature(enable = "avx512f")]
30460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30461#[cfg_attr(test, assert_instr(not))] // generate normal and code instead of kandw
30462#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30463pub const fn _mm512_kandn(a: __mmask16, b: __mmask16) -> __mmask16 {
30464    _mm512_kand(_mm512_knot(a), b)
30465}
30466
30467/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
30468///
30469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxnor_mask16&expand=3285)
30470#[inline]
30471#[target_feature(enable = "avx512f")]
30472#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30473#[cfg_attr(test, assert_instr(xor))] // generate normal xor, not code instead of kxnorw
30474#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30475pub const fn _kxnor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
30476    _mm512_knot(_mm512_kxor(a, b))
30477}
30478
30479/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
30480///
30481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxnor&expand=3283)
30482#[inline]
30483#[target_feature(enable = "avx512f")]
30484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30485#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kandw
30486#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30487pub const fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 {
30488    _mm512_knot(_mm512_kxor(a, b))
30489}
30490
30491/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
30492/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
30493///
30494/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask16_u8)
30495#[inline]
30496#[target_feature(enable = "avx512f")]
30497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30498#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30499pub const unsafe fn _kortest_mask16_u8(a: __mmask16, b: __mmask16, all_ones: *mut u8) -> u8 {
30500    let tmp = _kor_mask16(a, b);
30501    *all_ones = (tmp == 0xffff) as u8;
30502    (tmp == 0) as u8
30503}
30504
30505/// Compute the bitwise OR of 16-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
30506/// store 0 in dst.
30507///
30508/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask16_u8)
30509#[inline]
30510#[target_feature(enable = "avx512f")]
30511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30512#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30513pub const fn _kortestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
30514    (_kor_mask16(a, b) == 0xffff) as u8
30515}
30516
30517/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
30518/// store 0 in dst.
30519///
30520/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask16_u8)
30521#[inline]
30522#[target_feature(enable = "avx512f")]
30523#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30524#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30525pub const fn _kortestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
30526    (_kor_mask16(a, b) == 0) as u8
30527}
30528
30529/// Shift 16-bit mask a left by count bits while shifting in zeros, and store the result in dst.
30530///
30531/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask16)
30532#[inline]
30533#[target_feature(enable = "avx512f")]
30534#[rustc_legacy_const_generics(1)]
30535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30536#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30537pub const fn _kshiftli_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
30538    a.unbounded_shl(COUNT)
30539}
30540
30541/// Shift 16-bit mask a right by count bits while shifting in zeros, and store the result in dst.
30542///
30543/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask16)
30544#[inline]
30545#[target_feature(enable = "avx512f")]
30546#[rustc_legacy_const_generics(1)]
30547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30548#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30549pub const fn _kshiftri_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
30550    a.unbounded_shr(COUNT)
30551}
30552
30553/// Load 16-bit mask from memory
30554///
30555/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask16)
30556#[inline]
30557#[target_feature(enable = "avx512f")]
30558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30559#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30560pub const unsafe fn _load_mask16(mem_addr: *const __mmask16) -> __mmask16 {
30561    *mem_addr
30562}
30563
30564/// Store 16-bit mask to memory
30565///
30566/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask16)
30567#[inline]
30568#[target_feature(enable = "avx512f")]
30569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30570#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30571pub const unsafe fn _store_mask16(mem_addr: *mut __mmask16, a: __mmask16) {
30572    *mem_addr = a;
30573}
30574
30575/// Copy 16-bit mask a to k.
30576///
30577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_kmov&expand=3228)
30578#[inline]
30579#[target_feature(enable = "avx512f")]
30580#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30581#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
30582#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30583pub const fn _mm512_kmov(a: __mmask16) -> __mmask16 {
30584    a
30585}
30586
30587/// Converts integer mask into bitmask, storing the result in dst.
30588///
30589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_int2mask&expand=3189)
30590#[inline]
30591#[target_feature(enable = "avx512f")] // generate normal and code instead of kmovw
30592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30593#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30594pub const fn _mm512_int2mask(mask: i32) -> __mmask16 {
30595    mask as u16
30596}
30597
30598/// Converts bit mask k1 into an integer value, storing the results in dst.
30599///
30600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2int&expand=3544)
30601#[inline]
30602#[target_feature(enable = "avx512f")]
30603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30604#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
30605#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30606pub const fn _mm512_mask2int(k1: __mmask16) -> i32 {
30607    k1 as i32
30608}
30609
30610/// Unpack and interleave 8 bits from masks a and b, and store the 16-bit result in k.
30611///
30612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackb&expand=3280)
30613#[inline]
30614#[target_feature(enable = "avx512f")]
30615#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30616#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckbw
30617#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30618pub const fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 {
30619    ((a & 0xff) << 8) | (b & 0xff)
30620}
30621
30622/// Performs bitwise OR between k1 and k2, storing the result in dst. CF flag is set if dst consists of all 1's.
30623///
30624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestc&expand=3247)
30625#[inline]
30626#[target_feature(enable = "avx512f")]
30627#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30628#[cfg_attr(test, assert_instr(cmp))] // generate normal and code instead of kortestw
30629#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30630pub const fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 {
30631    let r = (a | b) == 0b11111111_11111111;
30632    r as i32
30633}
30634
30635/// Performs bitwise OR between k1 and k2, storing the result in dst. ZF flag is set if dst is 0.
30636///
30637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestz)
30638#[inline]
30639#[target_feature(enable = "avx512f")]
30640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30641#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kortestw
30642#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30643pub const fn _mm512_kortestz(a: __mmask16, b: __mmask16) -> i32 {
30644    let r = (a | b) == 0;
30645    r as i32
30646}
30647
30648/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30649///
30650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi32_mask&expand=5890)
30651#[inline]
30652#[target_feature(enable = "avx512f")]
30653#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30654#[cfg_attr(test, assert_instr(vptestmd))]
30655#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30656pub const fn _mm512_test_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30657    let and = _mm512_and_epi32(a, b);
30658    let zero = _mm512_setzero_si512();
30659    _mm512_cmpneq_epi32_mask(and, zero)
30660}
30661
30662/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30663///
30664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi32_mask&expand=5889)
30665#[inline]
30666#[target_feature(enable = "avx512f")]
30667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30668#[cfg_attr(test, assert_instr(vptestmd))]
30669#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30670pub const fn _mm512_mask_test_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30671    let and = _mm512_and_epi32(a, b);
30672    let zero = _mm512_setzero_si512();
30673    _mm512_mask_cmpneq_epi32_mask(k, and, zero)
30674}
30675
30676/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30677///
30678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi32_mask&expand=5888)
30679#[inline]
30680#[target_feature(enable = "avx512f,avx512vl")]
30681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30682#[cfg_attr(test, assert_instr(vptestmd))]
30683#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30684pub const fn _mm256_test_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30685    let and = _mm256_and_si256(a, b);
30686    let zero = _mm256_setzero_si256();
30687    _mm256_cmpneq_epi32_mask(and, zero)
30688}
30689
30690/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30691///
30692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi32_mask&expand=5887)
30693#[inline]
30694#[target_feature(enable = "avx512f,avx512vl")]
30695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30696#[cfg_attr(test, assert_instr(vptestmd))]
30697#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30698pub const fn _mm256_mask_test_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30699    let and = _mm256_and_si256(a, b);
30700    let zero = _mm256_setzero_si256();
30701    _mm256_mask_cmpneq_epi32_mask(k, and, zero)
30702}
30703
30704/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30705///
30706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi32_mask&expand=5886)
30707#[inline]
30708#[target_feature(enable = "avx512f,avx512vl")]
30709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30710#[cfg_attr(test, assert_instr(vptestmd))]
30711#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30712pub const fn _mm_test_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
30713    let and = _mm_and_si128(a, b);
30714    let zero = _mm_setzero_si128();
30715    _mm_cmpneq_epi32_mask(and, zero)
30716}
30717
30718/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30719///
30720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi32_mask&expand=5885)
30721#[inline]
30722#[target_feature(enable = "avx512f,avx512vl")]
30723#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30724#[cfg_attr(test, assert_instr(vptestmd))]
30725#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30726pub const fn _mm_mask_test_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30727    let and = _mm_and_si128(a, b);
30728    let zero = _mm_setzero_si128();
30729    _mm_mask_cmpneq_epi32_mask(k, and, zero)
30730}
30731
30732/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30733///
30734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi64_mask&expand=5896)
30735#[inline]
30736#[target_feature(enable = "avx512f")]
30737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30738#[cfg_attr(test, assert_instr(vptestmq))]
30739#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30740pub const fn _mm512_test_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30741    let and = _mm512_and_epi64(a, b);
30742    let zero = _mm512_setzero_si512();
30743    _mm512_cmpneq_epi64_mask(and, zero)
30744}
30745
30746/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30747///
30748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi64_mask&expand=5895)
30749#[inline]
30750#[target_feature(enable = "avx512f")]
30751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30752#[cfg_attr(test, assert_instr(vptestmq))]
30753#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30754pub const fn _mm512_mask_test_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30755    let and = _mm512_and_epi64(a, b);
30756    let zero = _mm512_setzero_si512();
30757    _mm512_mask_cmpneq_epi64_mask(k, and, zero)
30758}
30759
30760/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30761///
30762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi64_mask&expand=5894)
30763#[inline]
30764#[target_feature(enable = "avx512f,avx512vl")]
30765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30766#[cfg_attr(test, assert_instr(vptestmq))]
30767#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30768pub const fn _mm256_test_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30769    let and = _mm256_and_si256(a, b);
30770    let zero = _mm256_setzero_si256();
30771    _mm256_cmpneq_epi64_mask(and, zero)
30772}
30773
30774/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30775///
30776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi64_mask&expand=5893)
30777#[inline]
30778#[target_feature(enable = "avx512f,avx512vl")]
30779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30780#[cfg_attr(test, assert_instr(vptestmq))]
30781#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30782pub const fn _mm256_mask_test_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30783    let and = _mm256_and_si256(a, b);
30784    let zero = _mm256_setzero_si256();
30785    _mm256_mask_cmpneq_epi64_mask(k, and, zero)
30786}
30787
30788/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30789///
30790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi64_mask&expand=5892)
30791#[inline]
30792#[target_feature(enable = "avx512f,avx512vl")]
30793#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30794#[cfg_attr(test, assert_instr(vptestmq))]
30795#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30796pub const fn _mm_test_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30797    let and = _mm_and_si128(a, b);
30798    let zero = _mm_setzero_si128();
30799    _mm_cmpneq_epi64_mask(and, zero)
30800}
30801
30802/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30803///
30804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi64_mask&expand=5891)
30805#[inline]
30806#[target_feature(enable = "avx512f,avx512vl")]
30807#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30808#[cfg_attr(test, assert_instr(vptestmq))]
30809#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30810pub const fn _mm_mask_test_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30811    let and = _mm_and_si128(a, b);
30812    let zero = _mm_setzero_si128();
30813    _mm_mask_cmpneq_epi64_mask(k, and, zero)
30814}
30815
30816/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30817///
30818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi32_mask&expand=5921)
30819#[inline]
30820#[target_feature(enable = "avx512f")]
30821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30822#[cfg_attr(test, assert_instr(vptestnmd))]
30823#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30824pub const fn _mm512_testn_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30825    let and = _mm512_and_epi32(a, b);
30826    let zero = _mm512_setzero_si512();
30827    _mm512_cmpeq_epi32_mask(and, zero)
30828}
30829
30830/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30831///
30832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi32_mask&expand=5920)
30833#[inline]
30834#[target_feature(enable = "avx512f")]
30835#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30836#[cfg_attr(test, assert_instr(vptestnmd))]
30837#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30838pub const fn _mm512_mask_testn_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30839    let and = _mm512_and_epi32(a, b);
30840    let zero = _mm512_setzero_si512();
30841    _mm512_mask_cmpeq_epi32_mask(k, and, zero)
30842}
30843
30844/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30845///
30846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi32_mask&expand=5919)
30847#[inline]
30848#[target_feature(enable = "avx512f,avx512vl")]
30849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30850#[cfg_attr(test, assert_instr(vptestnmd))]
30851#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30852pub const fn _mm256_testn_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30853    let and = _mm256_and_si256(a, b);
30854    let zero = _mm256_setzero_si256();
30855    _mm256_cmpeq_epi32_mask(and, zero)
30856}
30857
30858/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30859///
30860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi32_mask&expand=5918)
30861#[inline]
30862#[target_feature(enable = "avx512f,avx512vl")]
30863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30864#[cfg_attr(test, assert_instr(vptestnmd))]
30865#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30866pub const fn _mm256_mask_testn_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30867    let and = _mm256_and_si256(a, b);
30868    let zero = _mm256_setzero_si256();
30869    _mm256_mask_cmpeq_epi32_mask(k, and, zero)
30870}
30871
30872/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30873///
30874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi32_mask&expand=5917)
30875#[inline]
30876#[target_feature(enable = "avx512f,avx512vl")]
30877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30878#[cfg_attr(test, assert_instr(vptestnmd))]
30879#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30880pub const fn _mm_testn_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
30881    let and = _mm_and_si128(a, b);
30882    let zero = _mm_setzero_si128();
30883    _mm_cmpeq_epi32_mask(and, zero)
30884}
30885
30886/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30887///
30888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi32_mask&expand=5916)
30889#[inline]
30890#[target_feature(enable = "avx512f,avx512vl")]
30891#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30892#[cfg_attr(test, assert_instr(vptestnmd))]
30893#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30894pub const fn _mm_mask_testn_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30895    let and = _mm_and_si128(a, b);
30896    let zero = _mm_setzero_si128();
30897    _mm_mask_cmpeq_epi32_mask(k, and, zero)
30898}
30899
30900/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30901///
30902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi64_mask&expand=5927)
30903#[inline]
30904#[target_feature(enable = "avx512f")]
30905#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30906#[cfg_attr(test, assert_instr(vptestnmq))]
30907#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30908pub const fn _mm512_testn_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30909    let and = _mm512_and_epi64(a, b);
30910    let zero = _mm512_setzero_si512();
30911    _mm512_cmpeq_epi64_mask(and, zero)
30912}
30913
30914/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30915///
30916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi64_mask&expand=5926)
30917#[inline]
30918#[target_feature(enable = "avx512f")]
30919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30920#[cfg_attr(test, assert_instr(vptestnmq))]
30921#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30922pub const fn _mm512_mask_testn_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30923    let and = _mm512_and_epi64(a, b);
30924    let zero = _mm512_setzero_si512();
30925    _mm512_mask_cmpeq_epi64_mask(k, and, zero)
30926}
30927
30928/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30929///
30930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi64_mask&expand=5925)
30931#[inline]
30932#[target_feature(enable = "avx512f,avx512vl")]
30933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30934#[cfg_attr(test, assert_instr(vptestnmq))]
30935#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30936pub const fn _mm256_testn_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30937    let and = _mm256_and_si256(a, b);
30938    let zero = _mm256_setzero_si256();
30939    _mm256_cmpeq_epi64_mask(and, zero)
30940}
30941
30942/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30943///
30944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi64_mask&expand=5924)
30945#[inline]
30946#[target_feature(enable = "avx512f,avx512vl")]
30947#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30948#[cfg_attr(test, assert_instr(vptestnmq))]
30949#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30950pub const fn _mm256_mask_testn_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30951    let and = _mm256_and_si256(a, b);
30952    let zero = _mm256_setzero_si256();
30953    _mm256_mask_cmpeq_epi64_mask(k, and, zero)
30954}
30955
30956/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30957///
30958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi64_mask&expand=5923)
30959#[inline]
30960#[target_feature(enable = "avx512f,avx512vl")]
30961#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30962#[cfg_attr(test, assert_instr(vptestnmq))]
30963#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30964pub const fn _mm_testn_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30965    let and = _mm_and_si128(a, b);
30966    let zero = _mm_setzero_si128();
30967    _mm_cmpeq_epi64_mask(and, zero)
30968}
30969
30970/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30971///
30972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi64_mask&expand=5922)
30973#[inline]
30974#[target_feature(enable = "avx512f,avx512vl")]
30975#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30976#[cfg_attr(test, assert_instr(vptestnmq))]
30977#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30978pub const fn _mm_mask_testn_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30979    let and = _mm_and_si128(a, b);
30980    let zero = _mm_setzero_si128();
30981    _mm_mask_cmpeq_epi64_mask(k, and, zero)
30982}
30983
30984/// Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
30985///
30986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_ps&expand=5671)
30987///
30988/// # Safety of non-temporal stores
30989///
30990/// After using this intrinsic, but before any other access to the memory that this intrinsic
30991/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
30992/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
30993/// return.
30994///
30995/// See [`_mm_sfence`] for details.
30996#[inline]
30997#[target_feature(enable = "avx512f")]
30998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30999#[cfg_attr(test, assert_instr(vmovntps))]
31000#[allow(clippy::cast_ptr_alignment)]
31001pub unsafe fn _mm512_stream_ps(mem_addr: *mut f32, a: __m512) {
31002    // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
31003    crate::arch::asm!(
31004        vps!("vmovntps", ",{a}"),
31005        p = in(reg) mem_addr,
31006        a = in(zmm_reg) a,
31007        options(nostack, preserves_flags),
31008    );
31009}
31010
31011/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
31012///
31013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_pd&expand=5667)
31014///
31015/// # Safety of non-temporal stores
31016///
31017/// After using this intrinsic, but before any other access to the memory that this intrinsic
31018/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
31019/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
31020/// return.
31021///
31022/// See [`_mm_sfence`] for details.
31023#[inline]
31024#[target_feature(enable = "avx512f")]
31025#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31026#[cfg_attr(test, assert_instr(vmovntpd))]
31027#[allow(clippy::cast_ptr_alignment)]
31028pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) {
31029    // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
31030    crate::arch::asm!(
31031        vps!("vmovntpd", ",{a}"),
31032        p = in(reg) mem_addr,
31033        a = in(zmm_reg) a,
31034        options(nostack, preserves_flags),
31035    );
31036}
31037
31038/// Store 512-bits of integer data from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
31039///
31040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_si512&expand=5675)
31041///
31042/// # Safety of non-temporal stores
31043///
31044/// After using this intrinsic, but before any other access to the memory that this intrinsic
31045/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
31046/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
31047/// return.
31048///
31049/// See [`_mm_sfence`] for details.
31050#[inline]
31051#[target_feature(enable = "avx512f")]
31052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31053#[cfg_attr(test, assert_instr(vmovntdq))]
31054#[allow(clippy::cast_ptr_alignment)]
31055pub unsafe fn _mm512_stream_si512(mem_addr: *mut __m512i, a: __m512i) {
31056    // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
31057    crate::arch::asm!(
31058        vps!("vmovntdq", ",{a}"),
31059        p = in(reg) mem_addr,
31060        a = in(zmm_reg) a,
31061        options(nostack, preserves_flags),
31062    );
31063}
31064
31065/// Load 512-bits of integer data from memory into dst using a non-temporal memory hint. mem_addr
31066/// must be aligned on a 64-byte boundary or a general-protection exception may be generated. To
31067/// minimize caching, the data is flagged as non-temporal (unlikely to be used again soon)
31068///
31069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_load_si512)
31070#[inline]
31071#[target_feature(enable = "avx512f")]
31072#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31073pub unsafe fn _mm512_stream_load_si512(mem_addr: *const __m512i) -> __m512i {
31074    let dst: __m512i;
31075    crate::arch::asm!(
31076        vpl!("vmovntdqa {a}"),
31077        a = out(zmm_reg) dst,
31078        p = in(reg) mem_addr,
31079        options(pure, readonly, nostack, preserves_flags),
31080    );
31081    dst
31082}
31083
31084/// Sets packed 32-bit integers in `dst` with the supplied values.
31085///
31086/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_ps&expand=4931)
31087#[inline]
31088#[target_feature(enable = "avx512f")]
31089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31091pub const fn _mm512_set_ps(
31092    e0: f32,
31093    e1: f32,
31094    e2: f32,
31095    e3: f32,
31096    e4: f32,
31097    e5: f32,
31098    e6: f32,
31099    e7: f32,
31100    e8: f32,
31101    e9: f32,
31102    e10: f32,
31103    e11: f32,
31104    e12: f32,
31105    e13: f32,
31106    e14: f32,
31107    e15: f32,
31108) -> __m512 {
31109    _mm512_setr_ps(
31110        e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
31111    )
31112}
31113
31114/// Sets packed 32-bit integers in `dst` with the supplied values in
31115/// reverse order.
31116///
31117/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_ps&expand=5008)
31118#[inline]
31119#[target_feature(enable = "avx512f")]
31120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31121#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31122pub const fn _mm512_setr_ps(
31123    e0: f32,
31124    e1: f32,
31125    e2: f32,
31126    e3: f32,
31127    e4: f32,
31128    e5: f32,
31129    e6: f32,
31130    e7: f32,
31131    e8: f32,
31132    e9: f32,
31133    e10: f32,
31134    e11: f32,
31135    e12: f32,
31136    e13: f32,
31137    e14: f32,
31138    e15: f32,
31139) -> __m512 {
31140    unsafe {
31141        let r = f32x16::new(
31142            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
31143        );
31144        transmute(r)
31145    }
31146}
31147
31148/// Broadcast 64-bit float `a` to all elements of `dst`.
31149///
31150/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_pd&expand=4975)
31151#[inline]
31152#[target_feature(enable = "avx512f")]
31153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31154#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31155pub const fn _mm512_set1_pd(a: f64) -> __m512d {
31156    unsafe { transmute(f64x8::splat(a)) }
31157}
31158
31159/// Broadcast 32-bit float `a` to all elements of `dst`.
31160///
31161/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_ps&expand=4981)
31162#[inline]
31163#[target_feature(enable = "avx512f")]
31164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31165#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31166pub const fn _mm512_set1_ps(a: f32) -> __m512 {
31167    unsafe { transmute(f32x16::splat(a)) }
31168}
31169
31170/// Sets packed 32-bit integers in `dst` with the supplied values.
31171///
31172/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi32&expand=4908)
31173#[inline]
31174#[target_feature(enable = "avx512f")]
31175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31176#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31177pub const fn _mm512_set_epi32(
31178    e15: i32,
31179    e14: i32,
31180    e13: i32,
31181    e12: i32,
31182    e11: i32,
31183    e10: i32,
31184    e9: i32,
31185    e8: i32,
31186    e7: i32,
31187    e6: i32,
31188    e5: i32,
31189    e4: i32,
31190    e3: i32,
31191    e2: i32,
31192    e1: i32,
31193    e0: i32,
31194) -> __m512i {
31195    _mm512_setr_epi32(
31196        e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
31197    )
31198}
31199
31200/// Broadcast 8-bit integer a to all elements of dst.
31201///
31202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi8&expand=4972)
31203#[inline]
31204#[target_feature(enable = "avx512f")]
31205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31206#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31207pub const fn _mm512_set1_epi8(a: i8) -> __m512i {
31208    unsafe { transmute(i8x64::splat(a)) }
31209}
31210
31211/// Broadcast the low packed 16-bit integer from a to all elements of dst.
31212///
31213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi16&expand=4944)
31214#[inline]
31215#[target_feature(enable = "avx512f")]
31216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31217#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31218pub const fn _mm512_set1_epi16(a: i16) -> __m512i {
31219    unsafe { transmute(i16x32::splat(a)) }
31220}
31221
31222/// Broadcast 32-bit integer `a` to all elements of `dst`.
31223///
31224/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_epi32)
31225#[inline]
31226#[target_feature(enable = "avx512f")]
31227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31228#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31229pub const fn _mm512_set1_epi32(a: i32) -> __m512i {
31230    unsafe { transmute(i32x16::splat(a)) }
31231}
31232
31233/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31234///
31235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi32&expand=4951)
31236#[inline]
31237#[target_feature(enable = "avx512f")]
31238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31239#[cfg_attr(test, assert_instr(vpbroadcastd))]
31240#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31241pub const fn _mm512_mask_set1_epi32(src: __m512i, k: __mmask16, a: i32) -> __m512i {
31242    unsafe {
31243        let r = _mm512_set1_epi32(a).as_i32x16();
31244        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
31245    }
31246}
31247
31248/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31249///
31250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi32&expand=4952)
31251#[inline]
31252#[target_feature(enable = "avx512f")]
31253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31254#[cfg_attr(test, assert_instr(vpbroadcastd))]
31255#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31256pub const fn _mm512_maskz_set1_epi32(k: __mmask16, a: i32) -> __m512i {
31257    unsafe {
31258        let r = _mm512_set1_epi32(a).as_i32x16();
31259        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
31260    }
31261}
31262
31263/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31264///
31265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi32&expand=4948)
31266#[inline]
31267#[target_feature(enable = "avx512f,avx512vl")]
31268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31269#[cfg_attr(test, assert_instr(vpbroadcastd))]
31270#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31271pub const fn _mm256_mask_set1_epi32(src: __m256i, k: __mmask8, a: i32) -> __m256i {
31272    unsafe {
31273        let r = _mm256_set1_epi32(a).as_i32x8();
31274        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
31275    }
31276}
31277
31278/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31279///
31280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi32&expand=4949)
31281#[inline]
31282#[target_feature(enable = "avx512f,avx512vl")]
31283#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31284#[cfg_attr(test, assert_instr(vpbroadcastd))]
31285#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31286pub const fn _mm256_maskz_set1_epi32(k: __mmask8, a: i32) -> __m256i {
31287    unsafe {
31288        let r = _mm256_set1_epi32(a).as_i32x8();
31289        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
31290    }
31291}
31292
31293/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31294///
31295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi32&expand=4945)
31296#[inline]
31297#[target_feature(enable = "avx512f,avx512vl")]
31298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31299#[cfg_attr(test, assert_instr(vpbroadcastd))]
31300#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31301pub const fn _mm_mask_set1_epi32(src: __m128i, k: __mmask8, a: i32) -> __m128i {
31302    unsafe {
31303        let r = _mm_set1_epi32(a).as_i32x4();
31304        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
31305    }
31306}
31307
31308/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31309///
31310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi32&expand=4946)
31311#[inline]
31312#[target_feature(enable = "avx512f,avx512vl")]
31313#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31314#[cfg_attr(test, assert_instr(vpbroadcastd))]
31315#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31316pub const fn _mm_maskz_set1_epi32(k: __mmask8, a: i32) -> __m128i {
31317    unsafe {
31318        let r = _mm_set1_epi32(a).as_i32x4();
31319        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
31320    }
31321}
31322
31323/// Broadcast 64-bit integer `a` to all elements of `dst`.
31324///
31325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi64&expand=4961)
31326#[inline]
31327#[target_feature(enable = "avx512f")]
31328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31329#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31330pub const fn _mm512_set1_epi64(a: i64) -> __m512i {
31331    unsafe { transmute(i64x8::splat(a)) }
31332}
31333
31334/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31335///
31336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi64&expand=4959)
31337#[inline]
31338#[target_feature(enable = "avx512f")]
31339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31340#[cfg_attr(test, assert_instr(vpbroadcastq))]
31341#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31342pub const fn _mm512_mask_set1_epi64(src: __m512i, k: __mmask8, a: i64) -> __m512i {
31343    unsafe {
31344        let r = _mm512_set1_epi64(a).as_i64x8();
31345        transmute(simd_select_bitmask(k, r, src.as_i64x8()))
31346    }
31347}
31348
31349/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31350///
31351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi64&expand=4960)
31352#[inline]
31353#[target_feature(enable = "avx512f")]
31354#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31355#[cfg_attr(test, assert_instr(vpbroadcastq))]
31356#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31357pub const fn _mm512_maskz_set1_epi64(k: __mmask8, a: i64) -> __m512i {
31358    unsafe {
31359        let r = _mm512_set1_epi64(a).as_i64x8();
31360        transmute(simd_select_bitmask(k, r, i64x8::ZERO))
31361    }
31362}
31363
31364/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31365///
31366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi64&expand=4957)
31367#[inline]
31368#[target_feature(enable = "avx512f,avx512vl")]
31369#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31370#[cfg_attr(test, assert_instr(vpbroadcastq))]
31371#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31372pub const fn _mm256_mask_set1_epi64(src: __m256i, k: __mmask8, a: i64) -> __m256i {
31373    unsafe {
31374        let r = _mm256_set1_epi64x(a).as_i64x4();
31375        transmute(simd_select_bitmask(k, r, src.as_i64x4()))
31376    }
31377}
31378
31379/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31380///
31381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi64&expand=4958)
31382#[inline]
31383#[target_feature(enable = "avx512f,avx512vl")]
31384#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31385#[cfg_attr(test, assert_instr(vpbroadcastq))]
31386#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31387pub const fn _mm256_maskz_set1_epi64(k: __mmask8, a: i64) -> __m256i {
31388    unsafe {
31389        let r = _mm256_set1_epi64x(a).as_i64x4();
31390        transmute(simd_select_bitmask(k, r, i64x4::ZERO))
31391    }
31392}
31393
31394/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31395///
31396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi64&expand=4954)
31397#[inline]
31398#[target_feature(enable = "avx512f,avx512vl")]
31399#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31400#[cfg_attr(test, assert_instr(vpbroadcastq))]
31401#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31402pub const fn _mm_mask_set1_epi64(src: __m128i, k: __mmask8, a: i64) -> __m128i {
31403    unsafe {
31404        let r = _mm_set1_epi64x(a).as_i64x2();
31405        transmute(simd_select_bitmask(k, r, src.as_i64x2()))
31406    }
31407}
31408
31409/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31410///
31411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi64&expand=4955)
31412#[inline]
31413#[target_feature(enable = "avx512f,avx512vl")]
31414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31415#[cfg_attr(test, assert_instr(vpbroadcastq))]
31416#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31417pub const fn _mm_maskz_set1_epi64(k: __mmask8, a: i64) -> __m128i {
31418    unsafe {
31419        let r = _mm_set1_epi64x(a).as_i64x2();
31420        transmute(simd_select_bitmask(k, r, i64x2::ZERO))
31421    }
31422}
31423
31424/// Set packed 64-bit integers in dst with the repeated 4 element sequence.
31425///
31426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi64&expand=4983)
31427#[inline]
31428#[target_feature(enable = "avx512f")]
31429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31430#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31431pub const fn _mm512_set4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
31432    _mm512_set_epi64(d, c, b, a, d, c, b, a)
31433}
31434
31435/// Set packed 64-bit integers in dst with the repeated 4 element sequence in reverse order.
31436///
31437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi64&expand=5010)
31438#[inline]
31439#[target_feature(enable = "avx512f")]
31440#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31441#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31442pub const fn _mm512_setr4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
31443    _mm512_set_epi64(a, b, c, d, a, b, c, d)
31444}
31445
31446/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
31447///
31448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_ps_mask&expand=1074)
31449#[inline]
31450#[target_feature(enable = "avx512f")]
31451#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31452#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31453pub fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31454    _mm512_cmp_ps_mask::<_CMP_LT_OS>(a, b)
31455}
31456
31457/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31458///
31459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_ps_mask&expand=1075)
31460#[inline]
31461#[target_feature(enable = "avx512f")]
31462#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31463#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31464pub fn _mm512_mask_cmplt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31465    _mm512_mask_cmp_ps_mask::<_CMP_LT_OS>(k1, a, b)
31466}
31467
31468/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
31469///
31470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_ps_mask&expand=1154)
31471#[inline]
31472#[target_feature(enable = "avx512f")]
31473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31474#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31475pub fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31476    _mm512_cmp_ps_mask::<_CMP_NLT_US>(a, b)
31477}
31478
31479/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31480///
31481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_ps_mask&expand=1155)
31482#[inline]
31483#[target_feature(enable = "avx512f")]
31484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31485#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31486pub fn _mm512_mask_cmpnlt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31487    _mm512_mask_cmp_ps_mask::<_CMP_NLT_US>(k1, a, b)
31488}
31489
31490/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
31491///
31492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_ps_mask&expand=1013)
31493#[inline]
31494#[target_feature(enable = "avx512f")]
31495#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31496#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31497pub fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31498    _mm512_cmp_ps_mask::<_CMP_LE_OS>(a, b)
31499}
31500
31501/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31502///
31503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_ps_mask&expand=1014)
31504#[inline]
31505#[target_feature(enable = "avx512f")]
31506#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31507#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31508pub fn _mm512_mask_cmple_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31509    _mm512_mask_cmp_ps_mask::<_CMP_LE_OS>(k1, a, b)
31510}
31511
31512/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
31513///
31514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_ps_mask&expand=1146)
31515#[inline]
31516#[target_feature(enable = "avx512f")]
31517#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31518#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31519pub fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31520    _mm512_cmp_ps_mask::<_CMP_NLE_US>(a, b)
31521}
31522
31523/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31524///
31525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_ps_mask&expand=1147)
31526#[inline]
31527#[target_feature(enable = "avx512f")]
31528#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31529#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31530pub fn _mm512_mask_cmpnle_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31531    _mm512_mask_cmp_ps_mask::<_CMP_NLE_US>(k1, a, b)
31532}
31533
31534/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
31535///
31536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_ps_mask&expand=828)
31537#[inline]
31538#[target_feature(enable = "avx512f")]
31539#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31540#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31541pub fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31542    _mm512_cmp_ps_mask::<_CMP_EQ_OQ>(a, b)
31543}
31544
31545/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31546///
31547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_ps_mask&expand=829)
31548#[inline]
31549#[target_feature(enable = "avx512f")]
31550#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31551#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31552pub fn _mm512_mask_cmpeq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31553    _mm512_mask_cmp_ps_mask::<_CMP_EQ_OQ>(k1, a, b)
31554}
31555
31556/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
31557///
31558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_ps_mask&expand=1130)
31559#[inline]
31560#[target_feature(enable = "avx512f")]
31561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31562#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31563pub fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31564    _mm512_cmp_ps_mask::<_CMP_NEQ_UQ>(a, b)
31565}
31566
31567/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31568///
31569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_ps_mask&expand=1131)
31570#[inline]
31571#[target_feature(enable = "avx512f")]
31572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31573#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31574pub fn _mm512_mask_cmpneq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31575    _mm512_mask_cmp_ps_mask::<_CMP_NEQ_UQ>(k1, a, b)
31576}
31577
31578/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31579///
31580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_ps_mask&expand=749)
31581#[inline]
31582#[target_feature(enable = "avx512f")]
31583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31584#[rustc_legacy_const_generics(2)]
31585#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31586pub fn _mm512_cmp_ps_mask<const IMM8: i32>(a: __m512, b: __m512) -> __mmask16 {
31587    unsafe {
31588        static_assert_uimm_bits!(IMM8, 5);
31589        let neg_one = -1;
31590        let a = a.as_f32x16();
31591        let b = b.as_f32x16();
31592        let r = vcmpps(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
31593        r.cast_unsigned()
31594    }
31595}
31596
31597/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31598///
31599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_ps_mask&expand=750)
31600#[inline]
31601#[target_feature(enable = "avx512f")]
31602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31603#[rustc_legacy_const_generics(3)]
31604#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31605pub fn _mm512_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31606    unsafe {
31607        static_assert_uimm_bits!(IMM8, 5);
31608        let a = a.as_f32x16();
31609        let b = b.as_f32x16();
31610        let r = vcmpps(a, b, IMM8, k1 as i16, _MM_FROUND_CUR_DIRECTION);
31611        r.cast_unsigned()
31612    }
31613}
31614
31615/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31616///
31617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_ps_mask&expand=747)
31618#[inline]
31619#[target_feature(enable = "avx512f,avx512vl")]
31620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31621#[rustc_legacy_const_generics(2)]
31622#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31623pub fn _mm256_cmp_ps_mask<const IMM8: i32>(a: __m256, b: __m256) -> __mmask8 {
31624    unsafe {
31625        static_assert_uimm_bits!(IMM8, 5);
31626        let neg_one = -1;
31627        let a = a.as_f32x8();
31628        let b = b.as_f32x8();
31629        let r = vcmpps256(a, b, IMM8, neg_one);
31630        r.cast_unsigned()
31631    }
31632}
31633
31634/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31635///
31636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_ps_mask&expand=748)
31637#[inline]
31638#[target_feature(enable = "avx512f,avx512vl")]
31639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31640#[rustc_legacy_const_generics(3)]
31641#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31642pub fn _mm256_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m256, b: __m256) -> __mmask8 {
31643    unsafe {
31644        static_assert_uimm_bits!(IMM8, 5);
31645        let a = a.as_f32x8();
31646        let b = b.as_f32x8();
31647        let r = vcmpps256(a, b, IMM8, k1 as i8);
31648        r.cast_unsigned()
31649    }
31650}
31651
31652/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31653///
31654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ps_mask&expand=745)
31655#[inline]
31656#[target_feature(enable = "avx512f,avx512vl")]
31657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31658#[rustc_legacy_const_generics(2)]
31659#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31660pub fn _mm_cmp_ps_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
31661    unsafe {
31662        static_assert_uimm_bits!(IMM8, 5);
31663        let neg_one = -1;
31664        let a = a.as_f32x4();
31665        let b = b.as_f32x4();
31666        let r = vcmpps128(a, b, IMM8, neg_one);
31667        r.cast_unsigned()
31668    }
31669}
31670
31671/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31672///
31673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ps_mask&expand=746)
31674#[inline]
31675#[target_feature(enable = "avx512f,avx512vl")]
31676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31677#[rustc_legacy_const_generics(3)]
31678#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31679pub fn _mm_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 {
31680    unsafe {
31681        static_assert_uimm_bits!(IMM8, 5);
31682        let a = a.as_f32x4();
31683        let b = b.as_f32x4();
31684        let r = vcmpps128(a, b, IMM8, k1 as i8);
31685        r.cast_unsigned()
31686    }
31687}
31688
31689/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
31690/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
31691///
31692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_ps_mask&expand=753)
31693#[inline]
31694#[target_feature(enable = "avx512f")]
31695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31696#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
31697#[rustc_legacy_const_generics(2, 3)]
31698pub fn _mm512_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
31699    a: __m512,
31700    b: __m512,
31701) -> __mmask16 {
31702    unsafe {
31703        static_assert_uimm_bits!(IMM5, 5);
31704        static_assert_mantissas_sae!(SAE);
31705        let neg_one = -1;
31706        let a = a.as_f32x16();
31707        let b = b.as_f32x16();
31708        let r = vcmpps(a, b, IMM5, neg_one, SAE);
31709        r.cast_unsigned()
31710    }
31711}
31712
31713/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
31714/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
31715///
31716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_ps_mask&expand=754)
31717#[inline]
31718#[target_feature(enable = "avx512f")]
31719#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31720#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
31721#[rustc_legacy_const_generics(3, 4)]
31722pub fn _mm512_mask_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
31723    m: __mmask16,
31724    a: __m512,
31725    b: __m512,
31726) -> __mmask16 {
31727    unsafe {
31728        static_assert_uimm_bits!(IMM5, 5);
31729        static_assert_mantissas_sae!(SAE);
31730        let a = a.as_f32x16();
31731        let b = b.as_f32x16();
31732        let r = vcmpps(a, b, IMM5, m as i16, SAE);
31733        r.cast_unsigned()
31734    }
31735}
31736
31737/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
31738///
31739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_ps_mask&expand=1162)
31740#[inline]
31741#[target_feature(enable = "avx512f")]
31742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31743#[cfg_attr(test, assert_instr(vcmp))] //should be vcmps
31744pub fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31745    _mm512_cmp_ps_mask::<_CMP_ORD_Q>(a, b)
31746}
31747
31748/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31749///
31750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_ps_mask&expand=1163)
31751#[inline]
31752#[target_feature(enable = "avx512f")]
31753#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31754#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31755pub fn _mm512_mask_cmpord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31756    _mm512_mask_cmp_ps_mask::<_CMP_ORD_Q>(k1, a, b)
31757}
31758
31759/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
31760///
31761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_ps_mask&expand=1170)
31762#[inline]
31763#[target_feature(enable = "avx512f")]
31764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31765#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31766pub fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31767    _mm512_cmp_ps_mask::<_CMP_UNORD_Q>(a, b)
31768}
31769
31770/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31771///
31772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_ps_mask&expand=1171)
31773#[inline]
31774#[target_feature(enable = "avx512f")]
31775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31776#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31777pub fn _mm512_mask_cmpunord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31778    _mm512_mask_cmp_ps_mask::<_CMP_UNORD_Q>(k1, a, b)
31779}
31780
31781/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
31782///
31783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_pd_mask&expand=1071)
31784#[inline]
31785#[target_feature(enable = "avx512f")]
31786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31787#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31788pub fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31789    _mm512_cmp_pd_mask::<_CMP_LT_OS>(a, b)
31790}
31791
31792/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31793///
31794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_pd_mask&expand=1072)
31795#[inline]
31796#[target_feature(enable = "avx512f")]
31797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31798#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31799pub fn _mm512_mask_cmplt_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31800    _mm512_mask_cmp_pd_mask::<_CMP_LT_OS>(k1, a, b)
31801}
31802
31803/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
31804///
31805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_pd_mask&expand=1151)
31806#[inline]
31807#[target_feature(enable = "avx512f")]
31808#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31809#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31810pub fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31811    _mm512_cmp_pd_mask::<_CMP_NLT_US>(a, b)
31812}
31813
31814/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31815///
31816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_pd_mask&expand=1152)
31817#[inline]
31818#[target_feature(enable = "avx512f")]
31819#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31820#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31821pub fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31822    _mm512_mask_cmp_pd_mask::<_CMP_NLT_US>(m, a, b)
31823}
31824
31825/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
31826///
31827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_pd_mask&expand=1010)
31828#[inline]
31829#[target_feature(enable = "avx512f")]
31830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31831#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31832pub fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31833    _mm512_cmp_pd_mask::<_CMP_LE_OS>(a, b)
31834}
31835
31836/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31837///
31838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_pd_mask&expand=1011)
31839#[inline]
31840#[target_feature(enable = "avx512f")]
31841#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31842#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31843pub fn _mm512_mask_cmple_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31844    _mm512_mask_cmp_pd_mask::<_CMP_LE_OS>(k1, a, b)
31845}
31846
31847/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
31848///
31849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_pd_mask&expand=1143)
31850#[inline]
31851#[target_feature(enable = "avx512f")]
31852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31853#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31854pub fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31855    _mm512_cmp_pd_mask::<_CMP_NLE_US>(a, b)
31856}
31857
31858/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31859///
31860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_pd_mask&expand=1144)
31861#[inline]
31862#[target_feature(enable = "avx512f")]
31863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31864#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31865pub fn _mm512_mask_cmpnle_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31866    _mm512_mask_cmp_pd_mask::<_CMP_NLE_US>(k1, a, b)
31867}
31868
31869/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
31870///
31871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_pd_mask&expand=822)
31872#[inline]
31873#[target_feature(enable = "avx512f")]
31874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31875#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31876pub fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31877    _mm512_cmp_pd_mask::<_CMP_EQ_OQ>(a, b)
31878}
31879
31880/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31881///
31882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_pd_mask&expand=823)
31883#[inline]
31884#[target_feature(enable = "avx512f")]
31885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31886#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31887pub fn _mm512_mask_cmpeq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31888    _mm512_mask_cmp_pd_mask::<_CMP_EQ_OQ>(k1, a, b)
31889}
31890
31891/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
31892///
31893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_pd_mask&expand=1127)
31894#[inline]
31895#[target_feature(enable = "avx512f")]
31896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31897#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31898pub fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31899    _mm512_cmp_pd_mask::<_CMP_NEQ_UQ>(a, b)
31900}
31901
31902/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31903///
31904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_pd_mask&expand=1128)
31905#[inline]
31906#[target_feature(enable = "avx512f")]
31907#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31908#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31909pub fn _mm512_mask_cmpneq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31910    _mm512_mask_cmp_pd_mask::<_CMP_NEQ_UQ>(k1, a, b)
31911}
31912
31913/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31914///
31915/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_pd_mask&expand=741)
31916#[inline]
31917#[target_feature(enable = "avx512f")]
31918#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31919#[rustc_legacy_const_generics(2)]
31920#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31921pub fn _mm512_cmp_pd_mask<const IMM8: i32>(a: __m512d, b: __m512d) -> __mmask8 {
31922    unsafe {
31923        static_assert_uimm_bits!(IMM8, 5);
31924        let neg_one = -1;
31925        let a = a.as_f64x8();
31926        let b = b.as_f64x8();
31927        let r = vcmppd(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
31928        r.cast_unsigned()
31929    }
31930}
31931
31932/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31933///
31934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_pd_mask&expand=742)
31935#[inline]
31936#[target_feature(enable = "avx512f")]
31937#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31938#[rustc_legacy_const_generics(3)]
31939#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31940pub fn _mm512_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31941    unsafe {
31942        static_assert_uimm_bits!(IMM8, 5);
31943        let a = a.as_f64x8();
31944        let b = b.as_f64x8();
31945        let r = vcmppd(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
31946        r.cast_unsigned()
31947    }
31948}
31949
31950/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31951///
31952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_pd_mask&expand=739)
31953#[inline]
31954#[target_feature(enable = "avx512f,avx512vl")]
31955#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31956#[rustc_legacy_const_generics(2)]
31957#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31958pub fn _mm256_cmp_pd_mask<const IMM8: i32>(a: __m256d, b: __m256d) -> __mmask8 {
31959    unsafe {
31960        static_assert_uimm_bits!(IMM8, 5);
31961        let neg_one = -1;
31962        let a = a.as_f64x4();
31963        let b = b.as_f64x4();
31964        let r = vcmppd256(a, b, IMM8, neg_one);
31965        r.cast_unsigned()
31966    }
31967}
31968
31969/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31970///
31971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_pd_mask&expand=740)
31972#[inline]
31973#[target_feature(enable = "avx512f,avx512vl")]
31974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31975#[rustc_legacy_const_generics(3)]
31976#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31977pub fn _mm256_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m256d, b: __m256d) -> __mmask8 {
31978    unsafe {
31979        static_assert_uimm_bits!(IMM8, 5);
31980        let a = a.as_f64x4();
31981        let b = b.as_f64x4();
31982        let r = vcmppd256(a, b, IMM8, k1 as i8);
31983        r.cast_unsigned()
31984    }
31985}
31986
31987/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31988///
31989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_pd_mask&expand=737)
31990#[inline]
31991#[target_feature(enable = "avx512f,avx512vl")]
31992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31993#[rustc_legacy_const_generics(2)]
31994#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31995pub fn _mm_cmp_pd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
31996    unsafe {
31997        static_assert_uimm_bits!(IMM8, 5);
31998        let neg_one = -1;
31999        let a = a.as_f64x2();
32000        let b = b.as_f64x2();
32001        let r = vcmppd128(a, b, IMM8, neg_one);
32002        r.cast_unsigned()
32003    }
32004}
32005
32006/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32007///
32008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_pd_mask&expand=738)
32009#[inline]
32010#[target_feature(enable = "avx512f,avx512vl")]
32011#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32012#[rustc_legacy_const_generics(3)]
32013#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
32014pub fn _mm_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 {
32015    unsafe {
32016        static_assert_uimm_bits!(IMM8, 5);
32017        let a = a.as_f64x2();
32018        let b = b.as_f64x2();
32019        let r = vcmppd128(a, b, IMM8, k1 as i8);
32020        r.cast_unsigned()
32021    }
32022}
32023
32024/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
32025/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32026///
32027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_pd_mask&expand=751)
32028#[inline]
32029#[target_feature(enable = "avx512f")]
32030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32031#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
32032#[rustc_legacy_const_generics(2, 3)]
32033pub fn _mm512_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
32034    a: __m512d,
32035    b: __m512d,
32036) -> __mmask8 {
32037    unsafe {
32038        static_assert_uimm_bits!(IMM5, 5);
32039        static_assert_mantissas_sae!(SAE);
32040        let neg_one = -1;
32041        let a = a.as_f64x8();
32042        let b = b.as_f64x8();
32043        let r = vcmppd(a, b, IMM5, neg_one, SAE);
32044        r.cast_unsigned()
32045    }
32046}
32047
32048/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
32049/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32050///
32051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_pd_mask&expand=752)
32052#[inline]
32053#[target_feature(enable = "avx512f")]
32054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32055#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
32056#[rustc_legacy_const_generics(3, 4)]
32057pub fn _mm512_mask_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
32058    k1: __mmask8,
32059    a: __m512d,
32060    b: __m512d,
32061) -> __mmask8 {
32062    unsafe {
32063        static_assert_uimm_bits!(IMM5, 5);
32064        static_assert_mantissas_sae!(SAE);
32065        let a = a.as_f64x8();
32066        let b = b.as_f64x8();
32067        let r = vcmppd(a, b, IMM5, k1 as i8, SAE);
32068        r.cast_unsigned()
32069    }
32070}
32071
32072/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
32073///
32074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_pd_mask&expand=1159)
32075#[inline]
32076#[target_feature(enable = "avx512f")]
32077#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32078#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
32079pub fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
32080    _mm512_cmp_pd_mask::<_CMP_ORD_Q>(a, b)
32081}
32082
32083/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32084///
32085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_pd_mask&expand=1160)
32086#[inline]
32087#[target_feature(enable = "avx512f")]
32088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32089#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
32090pub fn _mm512_mask_cmpord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
32091    _mm512_mask_cmp_pd_mask::<_CMP_ORD_Q>(k1, a, b)
32092}
32093
32094/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
32095///
32096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_pd_mask&expand=1167)
32097#[inline]
32098#[target_feature(enable = "avx512f")]
32099#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32100#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
32101pub fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
32102    _mm512_cmp_pd_mask::<_CMP_UNORD_Q>(a, b)
32103}
32104
32105/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32106///
32107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_pd_mask&expand=1168)
32108#[inline]
32109#[target_feature(enable = "avx512f")]
32110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32111#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
32112pub fn _mm512_mask_cmpunord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
32113    _mm512_mask_cmp_pd_mask::<_CMP_UNORD_Q>(k1, a, b)
32114}
32115
32116/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
32117///
32118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ss_mask&expand=763)
32119#[inline]
32120#[target_feature(enable = "avx512f")]
32121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32122#[rustc_legacy_const_generics(2)]
32123#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
32124pub fn _mm_cmp_ss_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
32125    unsafe {
32126        static_assert_uimm_bits!(IMM8, 5);
32127        let neg_one = -1;
32128        let r = vcmpss(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
32129        r.cast_unsigned()
32130    }
32131}
32132
32133/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
32134///
32135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ss_mask&expand=764)
32136#[inline]
32137#[target_feature(enable = "avx512f")]
32138#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32139#[rustc_legacy_const_generics(3)]
32140#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
32141pub fn _mm_mask_cmp_ss_mask<const IMM8: i32>(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 {
32142    unsafe {
32143        static_assert_uimm_bits!(IMM8, 5);
32144        let r = vcmpss(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
32145        r.cast_unsigned()
32146    }
32147}
32148
32149/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
32150/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32151///
32152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_ss_mask&expand=757)
32153#[inline]
32154#[target_feature(enable = "avx512f")]
32155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32156#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
32157#[rustc_legacy_const_generics(2, 3)]
32158pub fn _mm_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> __mmask8 {
32159    unsafe {
32160        static_assert_uimm_bits!(IMM5, 5);
32161        static_assert_mantissas_sae!(SAE);
32162        let neg_one = -1;
32163        let r = vcmpss(a, b, IMM5, neg_one, SAE);
32164        r.cast_unsigned()
32165    }
32166}
32167
32168/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not seti).\
32169/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32170///
32171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_ss_mask&expand=758)
32172#[inline]
32173#[target_feature(enable = "avx512f")]
32174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32175#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
32176#[rustc_legacy_const_generics(3, 4)]
32177pub fn _mm_mask_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(
32178    k1: __mmask8,
32179    a: __m128,
32180    b: __m128,
32181) -> __mmask8 {
32182    unsafe {
32183        static_assert_uimm_bits!(IMM5, 5);
32184        static_assert_mantissas_sae!(SAE);
32185        let r = vcmpss(a, b, IMM5, k1 as i8, SAE);
32186        r.cast_unsigned()
32187    }
32188}
32189
32190/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
32191///
32192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_sd_mask&expand=760)
32193#[inline]
32194#[target_feature(enable = "avx512f")]
32195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32196#[rustc_legacy_const_generics(2)]
32197#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
32198pub fn _mm_cmp_sd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
32199    unsafe {
32200        static_assert_uimm_bits!(IMM8, 5);
32201        let neg_one = -1;
32202        let r = vcmpsd(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
32203        r.cast_unsigned()
32204    }
32205}
32206
32207/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
32208///
32209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_sd_mask&expand=761)
32210#[inline]
32211#[target_feature(enable = "avx512f")]
32212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32213#[rustc_legacy_const_generics(3)]
32214#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
32215pub fn _mm_mask_cmp_sd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 {
32216    unsafe {
32217        static_assert_uimm_bits!(IMM8, 5);
32218        let r = vcmpsd(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
32219        r.cast_unsigned()
32220    }
32221}
32222
32223/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
32224/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32225///
32226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_sd_mask&expand=755)
32227#[inline]
32228#[target_feature(enable = "avx512f")]
32229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32230#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
32231#[rustc_legacy_const_generics(2, 3)]
32232pub fn _mm_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __mmask8 {
32233    unsafe {
32234        static_assert_uimm_bits!(IMM5, 5);
32235        static_assert_mantissas_sae!(SAE);
32236        let neg_one = -1;
32237        let r = vcmpsd(a, b, IMM5, neg_one, SAE);
32238        r.cast_unsigned()
32239    }
32240}
32241
32242/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).\
32243/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32244///
32245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_sd_mask&expand=756)
32246#[inline]
32247#[target_feature(enable = "avx512f")]
32248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32249#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
32250#[rustc_legacy_const_generics(3, 4)]
32251pub fn _mm_mask_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(
32252    k1: __mmask8,
32253    a: __m128d,
32254    b: __m128d,
32255) -> __mmask8 {
32256    unsafe {
32257        static_assert_uimm_bits!(IMM5, 5);
32258        static_assert_mantissas_sae!(SAE);
32259        let r = vcmpsd(a, b, IMM5, k1 as i8, SAE);
32260        r.cast_unsigned()
32261    }
32262}
32263
32264/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
32265///
32266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu32_mask&expand=1056)
32267#[inline]
32268#[target_feature(enable = "avx512f")]
32269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32270#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32271#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32272pub const fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32273    unsafe { simd_bitmask::<u32x16, _>(simd_lt(a.as_u32x16(), b.as_u32x16())) }
32274}
32275
32276/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32277///
32278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu32_mask&expand=1057)
32279#[inline]
32280#[target_feature(enable = "avx512f")]
32281#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32282#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32283#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32284pub const fn _mm512_mask_cmplt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32285    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
32286}
32287
32288/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
32289///
32290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu32_mask&expand=1054)
32291#[inline]
32292#[target_feature(enable = "avx512f,avx512vl")]
32293#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32294#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32295#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32296pub const fn _mm256_cmplt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32297    unsafe { simd_bitmask::<u32x8, _>(simd_lt(a.as_u32x8(), b.as_u32x8())) }
32298}
32299
32300/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32301///
32302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu32_mask&expand=1055)
32303#[inline]
32304#[target_feature(enable = "avx512f,avx512vl")]
32305#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32306#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32307#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32308pub const fn _mm256_mask_cmplt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32309    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
32310}
32311
32312/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
32313///
32314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu32_mask&expand=1052)
32315#[inline]
32316#[target_feature(enable = "avx512f,avx512vl")]
32317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32318#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32319#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32320pub const fn _mm_cmplt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32321    unsafe { simd_bitmask::<u32x4, _>(simd_lt(a.as_u32x4(), b.as_u32x4())) }
32322}
32323
32324/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32325///
32326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu32_mask&expand=1053)
32327#[inline]
32328#[target_feature(enable = "avx512f,avx512vl")]
32329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32330#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32332pub const fn _mm_mask_cmplt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32333    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
32334}
32335
32336/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
32337///
32338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu32_mask&expand=933)
32339#[inline]
32340#[target_feature(enable = "avx512f")]
32341#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32342#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32343#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32344pub const fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32345    unsafe { simd_bitmask::<u32x16, _>(simd_gt(a.as_u32x16(), b.as_u32x16())) }
32346}
32347
32348/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32349///
32350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu32_mask&expand=934)
32351#[inline]
32352#[target_feature(enable = "avx512f")]
32353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32354#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32355#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32356pub const fn _mm512_mask_cmpgt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32357    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
32358}
32359
32360/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
32361///
32362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu32_mask&expand=931)
32363#[inline]
32364#[target_feature(enable = "avx512f,avx512vl")]
32365#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32366#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32367#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32368pub const fn _mm256_cmpgt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32369    unsafe { simd_bitmask::<u32x8, _>(simd_gt(a.as_u32x8(), b.as_u32x8())) }
32370}
32371
32372/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32373///
32374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu32_mask&expand=932)
32375#[inline]
32376#[target_feature(enable = "avx512f,avx512vl")]
32377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32378#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32379#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32380pub const fn _mm256_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32381    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
32382}
32383
32384/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
32385///
32386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu32_mask&expand=929)
32387#[inline]
32388#[target_feature(enable = "avx512f,avx512vl")]
32389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32390#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32391#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32392pub const fn _mm_cmpgt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32393    unsafe { simd_bitmask::<u32x4, _>(simd_gt(a.as_u32x4(), b.as_u32x4())) }
32394}
32395
32396/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32397///
32398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu32_mask&expand=930)
32399#[inline]
32400#[target_feature(enable = "avx512f,avx512vl")]
32401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32402#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32403#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32404pub const fn _mm_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32405    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
32406}
32407
32408/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32409///
32410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu32_mask&expand=995)
32411#[inline]
32412#[target_feature(enable = "avx512f")]
32413#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32414#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32415#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32416pub const fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32417    unsafe { simd_bitmask::<u32x16, _>(simd_le(a.as_u32x16(), b.as_u32x16())) }
32418}
32419
32420/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32421///
32422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu32_mask&expand=996)
32423#[inline]
32424#[target_feature(enable = "avx512f")]
32425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32426#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32427#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32428pub const fn _mm512_mask_cmple_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32429    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
32430}
32431
32432/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32433///
32434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu32_mask&expand=993)
32435#[inline]
32436#[target_feature(enable = "avx512f,avx512vl")]
32437#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32438#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32439#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32440pub const fn _mm256_cmple_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32441    unsafe { simd_bitmask::<u32x8, _>(simd_le(a.as_u32x8(), b.as_u32x8())) }
32442}
32443
32444/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32445///
32446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu32_mask&expand=994)
32447#[inline]
32448#[target_feature(enable = "avx512f,avx512vl")]
32449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32450#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32451#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32452pub const fn _mm256_mask_cmple_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32453    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
32454}
32455
32456/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32457///
32458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu32_mask&expand=991)
32459#[inline]
32460#[target_feature(enable = "avx512f,avx512vl")]
32461#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32462#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32463#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32464pub const fn _mm_cmple_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32465    unsafe { simd_bitmask::<u32x4, _>(simd_le(a.as_u32x4(), b.as_u32x4())) }
32466}
32467
32468/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32469///
32470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu32_mask&expand=992)
32471#[inline]
32472#[target_feature(enable = "avx512f,avx512vl")]
32473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32474#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32475#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32476pub const fn _mm_mask_cmple_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32477    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
32478}
32479
32480/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32481///
32482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu32_mask&expand=873)
32483#[inline]
32484#[target_feature(enable = "avx512f")]
32485#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32486#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32487#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32488pub const fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32489    unsafe { simd_bitmask::<u32x16, _>(simd_ge(a.as_u32x16(), b.as_u32x16())) }
32490}
32491
32492/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32493///
32494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu32_mask&expand=874)
32495#[inline]
32496#[target_feature(enable = "avx512f")]
32497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32498#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32499#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32500pub const fn _mm512_mask_cmpge_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32501    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
32502}
32503
32504/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32505///
32506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu32_mask&expand=871)
32507#[inline]
32508#[target_feature(enable = "avx512f,avx512vl")]
32509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32510#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32511#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32512pub const fn _mm256_cmpge_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32513    unsafe { simd_bitmask::<u32x8, _>(simd_ge(a.as_u32x8(), b.as_u32x8())) }
32514}
32515
32516/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32517///
32518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu32_mask&expand=872)
32519#[inline]
32520#[target_feature(enable = "avx512f,avx512vl")]
32521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32522#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32523#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32524pub const fn _mm256_mask_cmpge_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32525    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
32526}
32527
32528/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32529///
32530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu32_mask&expand=869)
32531#[inline]
32532#[target_feature(enable = "avx512f,avx512vl")]
32533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32534#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32535#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32536pub const fn _mm_cmpge_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32537    unsafe { simd_bitmask::<u32x4, _>(simd_ge(a.as_u32x4(), b.as_u32x4())) }
32538}
32539
32540/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32541///
32542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu32_mask&expand=870)
32543#[inline]
32544#[target_feature(enable = "avx512f,avx512vl")]
32545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32546#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32547#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32548pub const fn _mm_mask_cmpge_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32549    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
32550}
32551
32552/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
32553///
32554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu32_mask&expand=807)
32555#[inline]
32556#[target_feature(enable = "avx512f")]
32557#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32558#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32559#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32560pub const fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32561    unsafe { simd_bitmask::<u32x16, _>(simd_eq(a.as_u32x16(), b.as_u32x16())) }
32562}
32563
32564/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32565///
32566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu32_mask&expand=808)
32567#[inline]
32568#[target_feature(enable = "avx512f")]
32569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32570#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32571#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32572pub const fn _mm512_mask_cmpeq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32573    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
32574}
32575
32576/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
32577///
32578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu32_mask&expand=805)
32579#[inline]
32580#[target_feature(enable = "avx512f,avx512vl")]
32581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32582#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32583#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32584pub const fn _mm256_cmpeq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32585    unsafe { simd_bitmask::<u32x8, _>(simd_eq(a.as_u32x8(), b.as_u32x8())) }
32586}
32587
32588/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32589///
32590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu32_mask&expand=806)
32591#[inline]
32592#[target_feature(enable = "avx512f,avx512vl")]
32593#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32594#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32595#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32596pub const fn _mm256_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32597    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
32598}
32599
32600/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
32601///
32602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu32_mask&expand=803)
32603#[inline]
32604#[target_feature(enable = "avx512f,avx512vl")]
32605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32606#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32607#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32608pub const fn _mm_cmpeq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32609    unsafe { simd_bitmask::<u32x4, _>(simd_eq(a.as_u32x4(), b.as_u32x4())) }
32610}
32611
32612/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32613///
32614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu32_mask&expand=804)
32615#[inline]
32616#[target_feature(enable = "avx512f,avx512vl")]
32617#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32618#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32619#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32620pub const fn _mm_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32621    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
32622}
32623
32624/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
32625///
32626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu32_mask&expand=1112)
32627#[inline]
32628#[target_feature(enable = "avx512f")]
32629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32630#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32631#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32632pub const fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32633    unsafe { simd_bitmask::<u32x16, _>(simd_ne(a.as_u32x16(), b.as_u32x16())) }
32634}
32635
32636/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32637///
32638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu32_mask&expand=1113)
32639#[inline]
32640#[target_feature(enable = "avx512f")]
32641#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32642#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32643#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32644pub const fn _mm512_mask_cmpneq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32645    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
32646}
32647
32648/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
32649///
32650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu32_mask&expand=1110)
32651#[inline]
32652#[target_feature(enable = "avx512f,avx512vl")]
32653#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32654#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32655#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32656pub const fn _mm256_cmpneq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32657    unsafe { simd_bitmask::<u32x8, _>(simd_ne(a.as_u32x8(), b.as_u32x8())) }
32658}
32659
32660/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32661///
32662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu32_mask&expand=1111)
32663#[inline]
32664#[target_feature(enable = "avx512f,avx512vl")]
32665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32666#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32667#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32668pub const fn _mm256_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32669    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
32670}
32671
32672/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
32673///
32674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu32_mask&expand=1108)
32675#[inline]
32676#[target_feature(enable = "avx512f,avx512vl")]
32677#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32678#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32679#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32680pub const fn _mm_cmpneq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32681    unsafe { simd_bitmask::<u32x4, _>(simd_ne(a.as_u32x4(), b.as_u32x4())) }
32682}
32683
32684/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32685///
32686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu32_mask&expand=1109)
32687#[inline]
32688#[target_feature(enable = "avx512f,avx512vl")]
32689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32690#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32691#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32692pub const fn _mm_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32693    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
32694}
32695
32696/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32697///
32698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu32_mask&expand=721)
32699#[inline]
32700#[target_feature(enable = "avx512f")]
32701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32702#[rustc_legacy_const_generics(2)]
32703#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32704#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32705pub const fn _mm512_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
32706    a: __m512i,
32707    b: __m512i,
32708) -> __mmask16 {
32709    unsafe {
32710        static_assert_uimm_bits!(IMM3, 3);
32711        let a = a.as_u32x16();
32712        let b = b.as_u32x16();
32713        let r = match IMM3 {
32714            0 => simd_eq(a, b),
32715            1 => simd_lt(a, b),
32716            2 => simd_le(a, b),
32717            3 => i32x16::ZERO,
32718            4 => simd_ne(a, b),
32719            5 => simd_ge(a, b),
32720            6 => simd_gt(a, b),
32721            _ => i32x16::splat(-1),
32722        };
32723        simd_bitmask(r)
32724    }
32725}
32726
32727/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32728///
32729/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu32_mask&expand=722)
32730#[inline]
32731#[target_feature(enable = "avx512f")]
32732#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32733#[rustc_legacy_const_generics(3)]
32734#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32735#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32736pub const fn _mm512_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
32737    k1: __mmask16,
32738    a: __m512i,
32739    b: __m512i,
32740) -> __mmask16 {
32741    unsafe {
32742        static_assert_uimm_bits!(IMM3, 3);
32743        let a = a.as_u32x16();
32744        let b = b.as_u32x16();
32745        let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::ZERO);
32746        let r = match IMM3 {
32747            0 => simd_and(k1, simd_eq(a, b)),
32748            1 => simd_and(k1, simd_lt(a, b)),
32749            2 => simd_and(k1, simd_le(a, b)),
32750            3 => i32x16::ZERO,
32751            4 => simd_and(k1, simd_ne(a, b)),
32752            5 => simd_and(k1, simd_ge(a, b)),
32753            6 => simd_and(k1, simd_gt(a, b)),
32754            _ => k1,
32755        };
32756        simd_bitmask(r)
32757    }
32758}
32759
32760/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32761///
32762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu32_mask&expand=719)
32763#[inline]
32764#[target_feature(enable = "avx512f,avx512vl")]
32765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32766#[rustc_legacy_const_generics(2)]
32767#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32768#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32769pub const fn _mm256_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
32770    a: __m256i,
32771    b: __m256i,
32772) -> __mmask8 {
32773    unsafe {
32774        static_assert_uimm_bits!(IMM3, 3);
32775        let a = a.as_u32x8();
32776        let b = b.as_u32x8();
32777        let r = match IMM3 {
32778            0 => simd_eq(a, b),
32779            1 => simd_lt(a, b),
32780            2 => simd_le(a, b),
32781            3 => i32x8::ZERO,
32782            4 => simd_ne(a, b),
32783            5 => simd_ge(a, b),
32784            6 => simd_gt(a, b),
32785            _ => i32x8::splat(-1),
32786        };
32787        simd_bitmask(r)
32788    }
32789}
32790
32791/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32792///
32793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu32_mask&expand=720)
32794#[inline]
32795#[target_feature(enable = "avx512f,avx512vl")]
32796#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32797#[rustc_legacy_const_generics(3)]
32798#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32799#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32800pub const fn _mm256_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
32801    k1: __mmask8,
32802    a: __m256i,
32803    b: __m256i,
32804) -> __mmask8 {
32805    unsafe {
32806        static_assert_uimm_bits!(IMM3, 3);
32807        let a = a.as_u32x8();
32808        let b = b.as_u32x8();
32809        let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::ZERO);
32810        let r = match IMM3 {
32811            0 => simd_and(k1, simd_eq(a, b)),
32812            1 => simd_and(k1, simd_lt(a, b)),
32813            2 => simd_and(k1, simd_le(a, b)),
32814            3 => i32x8::ZERO,
32815            4 => simd_and(k1, simd_ne(a, b)),
32816            5 => simd_and(k1, simd_ge(a, b)),
32817            6 => simd_and(k1, simd_gt(a, b)),
32818            _ => k1,
32819        };
32820        simd_bitmask(r)
32821    }
32822}
32823
32824/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32825///
32826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu32_mask&expand=717)
32827#[inline]
32828#[target_feature(enable = "avx512f,avx512vl")]
32829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32830#[rustc_legacy_const_generics(2)]
32831#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32832#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32833pub const fn _mm_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
32834    unsafe {
32835        static_assert_uimm_bits!(IMM3, 3);
32836        let a = a.as_u32x4();
32837        let b = b.as_u32x4();
32838        let r = match IMM3 {
32839            0 => simd_eq(a, b),
32840            1 => simd_lt(a, b),
32841            2 => simd_le(a, b),
32842            3 => i32x4::ZERO,
32843            4 => simd_ne(a, b),
32844            5 => simd_ge(a, b),
32845            6 => simd_gt(a, b),
32846            _ => i32x4::splat(-1),
32847        };
32848        simd_bitmask(r)
32849    }
32850}
32851
32852/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32853///
32854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu32_mask&expand=718)
32855#[inline]
32856#[target_feature(enable = "avx512f,avx512vl")]
32857#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32858#[rustc_legacy_const_generics(3)]
32859#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32860#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32861pub const fn _mm_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
32862    k1: __mmask8,
32863    a: __m128i,
32864    b: __m128i,
32865) -> __mmask8 {
32866    unsafe {
32867        static_assert_uimm_bits!(IMM3, 3);
32868        let a = a.as_u32x4();
32869        let b = b.as_u32x4();
32870        let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::ZERO);
32871        let r = match IMM3 {
32872            0 => simd_and(k1, simd_eq(a, b)),
32873            1 => simd_and(k1, simd_lt(a, b)),
32874            2 => simd_and(k1, simd_le(a, b)),
32875            3 => i32x4::ZERO,
32876            4 => simd_and(k1, simd_ne(a, b)),
32877            5 => simd_and(k1, simd_ge(a, b)),
32878            6 => simd_and(k1, simd_gt(a, b)),
32879            _ => k1,
32880        };
32881        simd_bitmask(r)
32882    }
32883}
32884
32885/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
32886///
32887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi32_mask&expand=1029)
32888#[inline]
32889#[target_feature(enable = "avx512f")]
32890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32891#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32892#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32893pub const fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32894    unsafe { simd_bitmask::<i32x16, _>(simd_lt(a.as_i32x16(), b.as_i32x16())) }
32895}
32896
32897/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32898///
32899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi32_mask&expand=1031)
32900#[inline]
32901#[target_feature(enable = "avx512f")]
32902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32903#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32904#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32905pub const fn _mm512_mask_cmplt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32906    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
32907}
32908
32909/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
32910///
32911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi32_mask&expand=1027)
32912#[inline]
32913#[target_feature(enable = "avx512f,avx512vl")]
32914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32915#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32916#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32917pub const fn _mm256_cmplt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32918    unsafe { simd_bitmask::<i32x8, _>(simd_lt(a.as_i32x8(), b.as_i32x8())) }
32919}
32920
32921/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32922///
32923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi32_mask&expand=1028)
32924#[inline]
32925#[target_feature(enable = "avx512f,avx512vl")]
32926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32927#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32928#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32929pub const fn _mm256_mask_cmplt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32930    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
32931}
32932
32933/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
32934///
32935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32_mask&expand=1025)
32936#[inline]
32937#[target_feature(enable = "avx512f,avx512vl")]
32938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32939#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32940#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32941pub const fn _mm_cmplt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32942    unsafe { simd_bitmask::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4())) }
32943}
32944
32945/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32946///
32947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi32_mask&expand=1026)
32948#[inline]
32949#[target_feature(enable = "avx512f,avx512vl")]
32950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32951#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32952#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32953pub const fn _mm_mask_cmplt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32954    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
32955}
32956
32957/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
32958///
32959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi32_mask&expand=905)
32960#[inline]
32961#[target_feature(enable = "avx512f")]
32962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32963#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32964#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32965pub const fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32966    unsafe { simd_bitmask::<i32x16, _>(simd_gt(a.as_i32x16(), b.as_i32x16())) }
32967}
32968
32969/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32970///
32971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi32_mask&expand=906)
32972#[inline]
32973#[target_feature(enable = "avx512f")]
32974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32975#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32976#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32977pub const fn _mm512_mask_cmpgt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32978    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
32979}
32980
32981/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
32982///
32983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi32_mask&expand=903)
32984#[inline]
32985#[target_feature(enable = "avx512f,avx512vl")]
32986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32987#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32988#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32989pub const fn _mm256_cmpgt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32990    unsafe { simd_bitmask::<i32x8, _>(simd_gt(a.as_i32x8(), b.as_i32x8())) }
32991}
32992
32993/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32994///
32995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi32_mask&expand=904)
32996#[inline]
32997#[target_feature(enable = "avx512f,avx512vl")]
32998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32999#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33000#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33001pub const fn _mm256_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33002    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
33003}
33004
33005/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
33006///
33007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32_mask&expand=901)
33008#[inline]
33009#[target_feature(enable = "avx512f,avx512vl")]
33010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33011#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33012#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33013pub const fn _mm_cmpgt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
33014    unsafe { simd_bitmask::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4())) }
33015}
33016
33017/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33018///
33019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi32_mask&expand=902)
33020#[inline]
33021#[target_feature(enable = "avx512f,avx512vl")]
33022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33023#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33024#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33025pub const fn _mm_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33026    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
33027}
33028
33029/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33030///
33031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi32_mask&expand=971)
33032#[inline]
33033#[target_feature(enable = "avx512f")]
33034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33035#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33036#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33037pub const fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
33038    unsafe { simd_bitmask::<i32x16, _>(simd_le(a.as_i32x16(), b.as_i32x16())) }
33039}
33040
33041/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33042///
33043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi32_mask&expand=972)
33044#[inline]
33045#[target_feature(enable = "avx512f")]
33046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33047#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33048#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33049pub const fn _mm512_mask_cmple_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
33050    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
33051}
33052
33053/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33054///
33055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi32_mask&expand=969)
33056#[inline]
33057#[target_feature(enable = "avx512f,avx512vl")]
33058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33059#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33060#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33061pub const fn _mm256_cmple_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
33062    unsafe { simd_bitmask::<i32x8, _>(simd_le(a.as_i32x8(), b.as_i32x8())) }
33063}
33064
33065/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33066///
33067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi32_mask&expand=970)
33068#[inline]
33069#[target_feature(enable = "avx512f,avx512vl")]
33070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33071#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33072#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33073pub const fn _mm256_mask_cmple_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33074    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
33075}
33076
33077/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33078///
33079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi32_mask&expand=967)
33080#[inline]
33081#[target_feature(enable = "avx512f,avx512vl")]
33082#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33083#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33084#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33085pub const fn _mm_cmple_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
33086    unsafe { simd_bitmask::<i32x4, _>(simd_le(a.as_i32x4(), b.as_i32x4())) }
33087}
33088
33089/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33090///
33091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi32_mask&expand=968)
33092#[inline]
33093#[target_feature(enable = "avx512f,avx512vl")]
33094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33095#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33097pub const fn _mm_mask_cmple_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33098    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
33099}
33100
33101/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33102///
33103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi32_mask&expand=849)
33104#[inline]
33105#[target_feature(enable = "avx512f")]
33106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33107#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33108#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33109pub const fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
33110    unsafe { simd_bitmask::<i32x16, _>(simd_ge(a.as_i32x16(), b.as_i32x16())) }
33111}
33112
33113/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33114///
33115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi32_mask&expand=850)
33116#[inline]
33117#[target_feature(enable = "avx512f")]
33118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33119#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33120#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33121pub const fn _mm512_mask_cmpge_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
33122    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
33123}
33124
33125/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33126///
33127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi32_mask&expand=847)
33128#[inline]
33129#[target_feature(enable = "avx512f,avx512vl")]
33130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33131#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33132#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33133pub const fn _mm256_cmpge_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
33134    unsafe { simd_bitmask::<i32x8, _>(simd_ge(a.as_i32x8(), b.as_i32x8())) }
33135}
33136
33137/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33138///
33139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi32_mask&expand=848)
33140#[inline]
33141#[target_feature(enable = "avx512f,avx512vl")]
33142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33143#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33144#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33145pub const fn _mm256_mask_cmpge_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33146    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
33147}
33148
33149/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33150///
33151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi32_mask&expand=845)
33152#[inline]
33153#[target_feature(enable = "avx512f,avx512vl")]
33154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33155#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33156#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33157pub const fn _mm_cmpge_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
33158    unsafe { simd_bitmask::<i32x4, _>(simd_ge(a.as_i32x4(), b.as_i32x4())) }
33159}
33160
33161/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33162///
33163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi32_mask&expand=846)
33164#[inline]
33165#[target_feature(enable = "avx512f,avx512vl")]
33166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33167#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33168#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33169pub const fn _mm_mask_cmpge_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33170    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
33171}
33172
33173/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
33174///
33175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi32_mask&expand=779)
33176#[inline]
33177#[target_feature(enable = "avx512f")]
33178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33179#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33180#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33181pub const fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
33182    unsafe { simd_bitmask::<i32x16, _>(simd_eq(a.as_i32x16(), b.as_i32x16())) }
33183}
33184
33185/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33186///
33187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi32_mask&expand=780)
33188#[inline]
33189#[target_feature(enable = "avx512f")]
33190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33191#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33192#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33193pub const fn _mm512_mask_cmpeq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
33194    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
33195}
33196
33197/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
33198///
33199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi32_mask&expand=777)
33200#[inline]
33201#[target_feature(enable = "avx512f,avx512vl")]
33202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33203#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33204#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33205pub const fn _mm256_cmpeq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
33206    unsafe { simd_bitmask::<i32x8, _>(simd_eq(a.as_i32x8(), b.as_i32x8())) }
33207}
33208
33209/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33210///
33211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi32_mask&expand=778)
33212#[inline]
33213#[target_feature(enable = "avx512f,avx512vl")]
33214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33215#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33216#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33217pub const fn _mm256_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33218    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
33219}
33220
33221/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
33222///
33223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32_mask&expand=775)
33224#[inline]
33225#[target_feature(enable = "avx512f,avx512vl")]
33226#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33227#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33228#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33229pub const fn _mm_cmpeq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
33230    unsafe { simd_bitmask::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4())) }
33231}
33232
33233/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33234///
33235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi32_mask&expand=776)
33236#[inline]
33237#[target_feature(enable = "avx512f,avx512vl")]
33238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33239#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33240#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33241pub const fn _mm_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33242    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
33243}
33244
33245/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
33246///
33247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi32_mask&expand=1088)
33248#[inline]
33249#[target_feature(enable = "avx512f")]
33250#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33251#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33252#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33253pub const fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
33254    unsafe { simd_bitmask::<i32x16, _>(simd_ne(a.as_i32x16(), b.as_i32x16())) }
33255}
33256
33257/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33258///
33259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi32_mask&expand=1089)
33260#[inline]
33261#[target_feature(enable = "avx512f")]
33262#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33263#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33264#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33265pub const fn _mm512_mask_cmpneq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
33266    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
33267}
33268
33269/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
33270///
33271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi32_mask&expand=1086)
33272#[inline]
33273#[target_feature(enable = "avx512f,avx512vl")]
33274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33275#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33276#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33277pub const fn _mm256_cmpneq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
33278    unsafe { simd_bitmask::<i32x8, _>(simd_ne(a.as_i32x8(), b.as_i32x8())) }
33279}
33280
33281/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33282///
33283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi32_mask&expand=1087)
33284#[inline]
33285#[target_feature(enable = "avx512f,avx512vl")]
33286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33287#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33288#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33289pub const fn _mm256_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33290    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
33291}
33292
33293/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
33294///
33295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi32_mask&expand=1084)
33296#[inline]
33297#[target_feature(enable = "avx512f,avx512vl")]
33298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33299#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33300#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33301pub const fn _mm_cmpneq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
33302    unsafe { simd_bitmask::<i32x4, _>(simd_ne(a.as_i32x4(), b.as_i32x4())) }
33303}
33304
33305/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33306///
33307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi32_mask&expand=1085)
33308#[inline]
33309#[target_feature(enable = "avx512f,avx512vl")]
33310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33311#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33312#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33313pub const fn _mm_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33314    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
33315}
33316
33317/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33318///
33319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi32_mask&expand=697)
33320#[inline]
33321#[target_feature(enable = "avx512f")]
33322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33323#[rustc_legacy_const_generics(2)]
33324#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33325#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33326pub const fn _mm512_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
33327    a: __m512i,
33328    b: __m512i,
33329) -> __mmask16 {
33330    unsafe {
33331        static_assert_uimm_bits!(IMM3, 3);
33332        let a = a.as_i32x16();
33333        let b = b.as_i32x16();
33334        let r = match IMM3 {
33335            0 => simd_eq(a, b),
33336            1 => simd_lt(a, b),
33337            2 => simd_le(a, b),
33338            3 => i32x16::ZERO,
33339            4 => simd_ne(a, b),
33340            5 => simd_ge(a, b),
33341            6 => simd_gt(a, b),
33342            _ => i32x16::splat(-1),
33343        };
33344        simd_bitmask(r)
33345    }
33346}
33347
33348/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33349///
33350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi32_mask&expand=698)
33351#[inline]
33352#[target_feature(enable = "avx512f")]
33353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33354#[rustc_legacy_const_generics(3)]
33355#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33356#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33357pub const fn _mm512_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
33358    k1: __mmask16,
33359    a: __m512i,
33360    b: __m512i,
33361) -> __mmask16 {
33362    unsafe {
33363        static_assert_uimm_bits!(IMM3, 3);
33364        let a = a.as_i32x16();
33365        let b = b.as_i32x16();
33366        let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::ZERO);
33367        let r = match IMM3 {
33368            0 => simd_and(k1, simd_eq(a, b)),
33369            1 => simd_and(k1, simd_lt(a, b)),
33370            2 => simd_and(k1, simd_le(a, b)),
33371            3 => i32x16::ZERO,
33372            4 => simd_and(k1, simd_ne(a, b)),
33373            5 => simd_and(k1, simd_ge(a, b)),
33374            6 => simd_and(k1, simd_gt(a, b)),
33375            _ => k1,
33376        };
33377        simd_bitmask(r)
33378    }
33379}
33380
33381/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33382///
33383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_cmp_epi32_mask&expand=695)
33384#[inline]
33385#[target_feature(enable = "avx512f,avx512vl")]
33386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33387#[rustc_legacy_const_generics(2)]
33388#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33389#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33390pub const fn _mm256_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
33391    a: __m256i,
33392    b: __m256i,
33393) -> __mmask8 {
33394    unsafe {
33395        static_assert_uimm_bits!(IMM3, 3);
33396        let a = a.as_i32x8();
33397        let b = b.as_i32x8();
33398        let r = match IMM3 {
33399            0 => simd_eq(a, b),
33400            1 => simd_lt(a, b),
33401            2 => simd_le(a, b),
33402            3 => i32x8::ZERO,
33403            4 => simd_ne(a, b),
33404            5 => simd_ge(a, b),
33405            6 => simd_gt(a, b),
33406            _ => i32x8::splat(-1),
33407        };
33408        simd_bitmask(r)
33409    }
33410}
33411
33412/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33413///
33414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi32_mask&expand=696)
33415#[inline]
33416#[target_feature(enable = "avx512f,avx512vl")]
33417#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33418#[rustc_legacy_const_generics(3)]
33419#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33420#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33421pub const fn _mm256_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
33422    k1: __mmask8,
33423    a: __m256i,
33424    b: __m256i,
33425) -> __mmask8 {
33426    unsafe {
33427        static_assert_uimm_bits!(IMM3, 3);
33428        let a = a.as_i32x8();
33429        let b = b.as_i32x8();
33430        let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::ZERO);
33431        let r = match IMM3 {
33432            0 => simd_and(k1, simd_eq(a, b)),
33433            1 => simd_and(k1, simd_lt(a, b)),
33434            2 => simd_and(k1, simd_le(a, b)),
33435            3 => i32x8::ZERO,
33436            4 => simd_and(k1, simd_ne(a, b)),
33437            5 => simd_and(k1, simd_ge(a, b)),
33438            6 => simd_and(k1, simd_gt(a, b)),
33439            _ => k1,
33440        };
33441        simd_bitmask(r)
33442    }
33443}
33444
33445/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33446///
33447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi32_mask&expand=693)
33448#[inline]
33449#[target_feature(enable = "avx512f,avx512vl")]
33450#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33451#[rustc_legacy_const_generics(2)]
33452#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33453#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33454pub const fn _mm_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
33455    unsafe {
33456        static_assert_uimm_bits!(IMM3, 3);
33457        let a = a.as_i32x4();
33458        let b = b.as_i32x4();
33459        let r = match IMM3 {
33460            0 => simd_eq(a, b),
33461            1 => simd_lt(a, b),
33462            2 => simd_le(a, b),
33463            3 => i32x4::ZERO,
33464            4 => simd_ne(a, b),
33465            5 => simd_ge(a, b),
33466            6 => simd_gt(a, b),
33467            _ => i32x4::splat(-1),
33468        };
33469        simd_bitmask(r)
33470    }
33471}
33472
33473/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33474///
33475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi32_mask&expand=694)
33476#[inline]
33477#[target_feature(enable = "avx512f,avx512vl")]
33478#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33479#[rustc_legacy_const_generics(3)]
33480#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33481#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33482pub const fn _mm_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
33483    k1: __mmask8,
33484    a: __m128i,
33485    b: __m128i,
33486) -> __mmask8 {
33487    unsafe {
33488        static_assert_uimm_bits!(IMM3, 3);
33489        let a = a.as_i32x4();
33490        let b = b.as_i32x4();
33491        let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::ZERO);
33492        let r = match IMM3 {
33493            0 => simd_and(k1, simd_eq(a, b)),
33494            1 => simd_and(k1, simd_lt(a, b)),
33495            2 => simd_and(k1, simd_le(a, b)),
33496            3 => i32x4::ZERO,
33497            4 => simd_and(k1, simd_ne(a, b)),
33498            5 => simd_and(k1, simd_ge(a, b)),
33499            6 => simd_and(k1, simd_gt(a, b)),
33500            _ => k1,
33501        };
33502        simd_bitmask(r)
33503    }
33504}
33505
33506/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
33507///
33508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu64_mask&expand=1062)
33509#[inline]
33510#[target_feature(enable = "avx512f")]
33511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33512#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33513#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33514pub const fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33515    unsafe { simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8())) }
33516}
33517
33518/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33519///
33520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu64_mask&expand=1063)
33521#[inline]
33522#[target_feature(enable = "avx512f")]
33523#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33524#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33525#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33526pub const fn _mm512_mask_cmplt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33527    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
33528}
33529
33530/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
33531///
33532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu64_mask&expand=1060)
33533#[inline]
33534#[target_feature(enable = "avx512f,avx512vl")]
33535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33536#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33537#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33538pub const fn _mm256_cmplt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33539    unsafe { simd_bitmask::<__m256i, _>(simd_lt(a.as_u64x4(), b.as_u64x4())) }
33540}
33541
33542/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33543///
33544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu64_mask&expand=1061)
33545#[inline]
33546#[target_feature(enable = "avx512f,avx512vl")]
33547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33548#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33549#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33550pub const fn _mm256_mask_cmplt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33551    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
33552}
33553
33554/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
33555///
33556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu64_mask&expand=1058)
33557#[inline]
33558#[target_feature(enable = "avx512f,avx512vl")]
33559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33560#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33561#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33562pub const fn _mm_cmplt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33563    unsafe { simd_bitmask::<__m128i, _>(simd_lt(a.as_u64x2(), b.as_u64x2())) }
33564}
33565
33566/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33567///
33568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu64_mask&expand=1059)
33569#[inline]
33570#[target_feature(enable = "avx512f,avx512vl")]
33571#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33572#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33573#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33574pub const fn _mm_mask_cmplt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33575    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
33576}
33577
33578/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
33579///
33580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu64_mask&expand=939)
33581#[inline]
33582#[target_feature(enable = "avx512f")]
33583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33584#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33585#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33586pub const fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33587    unsafe { simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8())) }
33588}
33589
33590/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33591///
33592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu64_mask&expand=940)
33593#[inline]
33594#[target_feature(enable = "avx512f")]
33595#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33596#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33597#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33598pub const fn _mm512_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33599    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
33600}
33601
33602/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
33603///
33604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu64_mask&expand=937)
33605#[inline]
33606#[target_feature(enable = "avx512f,avx512vl")]
33607#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33608#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33609#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33610pub const fn _mm256_cmpgt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33611    unsafe { simd_bitmask::<__m256i, _>(simd_gt(a.as_u64x4(), b.as_u64x4())) }
33612}
33613
33614/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33615///
33616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu64_mask&expand=938)
33617#[inline]
33618#[target_feature(enable = "avx512f,avx512vl")]
33619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33620#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33621#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33622pub const fn _mm256_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33623    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
33624}
33625
33626/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
33627///
33628/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu64_mask&expand=935)
33629#[inline]
33630#[target_feature(enable = "avx512f,avx512vl")]
33631#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33632#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33633#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33634pub const fn _mm_cmpgt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33635    unsafe { simd_bitmask::<__m128i, _>(simd_gt(a.as_u64x2(), b.as_u64x2())) }
33636}
33637
33638/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33639///
33640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu64_mask&expand=936)
33641#[inline]
33642#[target_feature(enable = "avx512f,avx512vl")]
33643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33644#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33645#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33646pub const fn _mm_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33647    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
33648}
33649
33650/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33651///
33652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu64_mask&expand=1001)
33653#[inline]
33654#[target_feature(enable = "avx512f")]
33655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33656#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33657#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33658pub const fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33659    unsafe { simd_bitmask::<__m512i, _>(simd_le(a.as_u64x8(), b.as_u64x8())) }
33660}
33661
33662/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33663///
33664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu64_mask&expand=1002)
33665#[inline]
33666#[target_feature(enable = "avx512f")]
33667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33668#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33669#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33670pub const fn _mm512_mask_cmple_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33671    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
33672}
33673
33674/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33675///
33676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu64_mask&expand=999)
33677#[inline]
33678#[target_feature(enable = "avx512f,avx512vl")]
33679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33680#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33681#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33682pub const fn _mm256_cmple_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33683    unsafe { simd_bitmask::<__m256i, _>(simd_le(a.as_u64x4(), b.as_u64x4())) }
33684}
33685
33686/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33687///
33688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu64_mask&expand=1000)
33689#[inline]
33690#[target_feature(enable = "avx512f,avx512vl")]
33691#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33692#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33693#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33694pub const fn _mm256_mask_cmple_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33695    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
33696}
33697
33698/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33699///
33700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu64_mask&expand=997)
33701#[inline]
33702#[target_feature(enable = "avx512f,avx512vl")]
33703#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33704#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33705#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33706pub const fn _mm_cmple_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33707    unsafe { simd_bitmask::<__m128i, _>(simd_le(a.as_u64x2(), b.as_u64x2())) }
33708}
33709
33710/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33711///
33712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu64_mask&expand=998)
33713#[inline]
33714#[target_feature(enable = "avx512f,avx512vl")]
33715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33716#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33717#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33718pub const fn _mm_mask_cmple_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33719    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
33720}
33721
33722/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33723///
33724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu64_mask&expand=879)
33725#[inline]
33726#[target_feature(enable = "avx512f")]
33727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33728#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33729#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33730pub const fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33731    unsafe { simd_bitmask::<__m512i, _>(simd_ge(a.as_u64x8(), b.as_u64x8())) }
33732}
33733
33734/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33735///
33736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu64_mask&expand=880)
33737#[inline]
33738#[target_feature(enable = "avx512f")]
33739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33740#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33741#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33742pub const fn _mm512_mask_cmpge_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33743    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
33744}
33745
33746/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33747///
33748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu64_mask&expand=877)
33749#[inline]
33750#[target_feature(enable = "avx512f,avx512vl")]
33751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33752#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33753#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33754pub const fn _mm256_cmpge_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33755    unsafe { simd_bitmask::<__m256i, _>(simd_ge(a.as_u64x4(), b.as_u64x4())) }
33756}
33757
33758/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33759///
33760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu64_mask&expand=878)
33761#[inline]
33762#[target_feature(enable = "avx512f,avx512vl")]
33763#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33764#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33765#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33766pub const fn _mm256_mask_cmpge_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33767    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
33768}
33769
33770/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33771///
33772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu64_mask&expand=875)
33773#[inline]
33774#[target_feature(enable = "avx512f,avx512vl")]
33775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33776#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33777#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33778pub const fn _mm_cmpge_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33779    unsafe { simd_bitmask::<__m128i, _>(simd_ge(a.as_u64x2(), b.as_u64x2())) }
33780}
33781
33782/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33783///
33784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu64_mask&expand=876)
33785#[inline]
33786#[target_feature(enable = "avx512f,avx512vl")]
33787#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33788#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33789#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33790pub const fn _mm_mask_cmpge_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33791    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
33792}
33793
33794/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
33795///
33796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu64_mask&expand=813)
33797#[inline]
33798#[target_feature(enable = "avx512f")]
33799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33800#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33801#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33802pub const fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33803    unsafe { simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8())) }
33804}
33805
33806/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33807///
33808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu64_mask&expand=814)
33809#[inline]
33810#[target_feature(enable = "avx512f")]
33811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33812#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33813#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33814pub const fn _mm512_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33815    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
33816}
33817
33818/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
33819///
33820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu64_mask&expand=811)
33821#[inline]
33822#[target_feature(enable = "avx512f,avx512vl")]
33823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33824#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33825#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33826pub const fn _mm256_cmpeq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33827    unsafe { simd_bitmask::<__m256i, _>(simd_eq(a.as_u64x4(), b.as_u64x4())) }
33828}
33829
33830/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33831///
33832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu64_mask&expand=812)
33833#[inline]
33834#[target_feature(enable = "avx512f,avx512vl")]
33835#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33836#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33837#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33838pub const fn _mm256_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33839    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
33840}
33841
33842/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
33843///
33844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu64_mask&expand=809)
33845#[inline]
33846#[target_feature(enable = "avx512f,avx512vl")]
33847#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33848#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33849#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33850pub const fn _mm_cmpeq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33851    unsafe { simd_bitmask::<__m128i, _>(simd_eq(a.as_u64x2(), b.as_u64x2())) }
33852}
33853
33854/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33855///
33856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu64_mask&expand=810)
33857#[inline]
33858#[target_feature(enable = "avx512f,avx512vl")]
33859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33860#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33861#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33862pub const fn _mm_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33863    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
33864}
33865
33866/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
33867///
33868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu64_mask&expand=1118)
33869#[inline]
33870#[target_feature(enable = "avx512f")]
33871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33872#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33873#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33874pub const fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33875    unsafe { simd_bitmask::<__m512i, _>(simd_ne(a.as_u64x8(), b.as_u64x8())) }
33876}
33877
33878/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33879///
33880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu64_mask&expand=1119)
33881#[inline]
33882#[target_feature(enable = "avx512f")]
33883#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33884#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33885#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33886pub const fn _mm512_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33887    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
33888}
33889
33890/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
33891///
33892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu64_mask&expand=1116)
33893#[inline]
33894#[target_feature(enable = "avx512f,avx512vl")]
33895#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33896#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33897#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33898pub const fn _mm256_cmpneq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33899    unsafe { simd_bitmask::<__m256i, _>(simd_ne(a.as_u64x4(), b.as_u64x4())) }
33900}
33901
33902/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33903///
33904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu64_mask&expand=1117)
33905#[inline]
33906#[target_feature(enable = "avx512f,avx512vl")]
33907#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33908#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33909#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33910pub const fn _mm256_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33911    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
33912}
33913
33914/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
33915///
33916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu64_mask&expand=1114)
33917#[inline]
33918#[target_feature(enable = "avx512f,avx512vl")]
33919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33920#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33921#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33922pub const fn _mm_cmpneq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33923    unsafe { simd_bitmask::<__m128i, _>(simd_ne(a.as_u64x2(), b.as_u64x2())) }
33924}
33925
33926/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33927///
33928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu64_mask&expand=1115)
33929#[inline]
33930#[target_feature(enable = "avx512f,avx512vl")]
33931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33932#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33933#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33934pub const fn _mm_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33935    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
33936}
33937
33938/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33939///
33940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu64_mask&expand=727)
33941#[inline]
33942#[target_feature(enable = "avx512f")]
33943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33944#[rustc_legacy_const_generics(2)]
33945#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33946#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33947pub const fn _mm512_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
33948    a: __m512i,
33949    b: __m512i,
33950) -> __mmask8 {
33951    unsafe {
33952        static_assert_uimm_bits!(IMM3, 3);
33953        let a = a.as_u64x8();
33954        let b = b.as_u64x8();
33955        let r = match IMM3 {
33956            0 => simd_eq(a, b),
33957            1 => simd_lt(a, b),
33958            2 => simd_le(a, b),
33959            3 => i64x8::ZERO,
33960            4 => simd_ne(a, b),
33961            5 => simd_ge(a, b),
33962            6 => simd_gt(a, b),
33963            _ => i64x8::splat(-1),
33964        };
33965        simd_bitmask(r)
33966    }
33967}
33968
33969/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33970///
33971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu64_mask&expand=728)
33972#[inline]
33973#[target_feature(enable = "avx512f")]
33974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33975#[rustc_legacy_const_generics(3)]
33976#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33977#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33978pub const fn _mm512_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
33979    k1: __mmask8,
33980    a: __m512i,
33981    b: __m512i,
33982) -> __mmask8 {
33983    unsafe {
33984        static_assert_uimm_bits!(IMM3, 3);
33985        let a = a.as_u64x8();
33986        let b = b.as_u64x8();
33987        let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::ZERO);
33988        let r = match IMM3 {
33989            0 => simd_and(k1, simd_eq(a, b)),
33990            1 => simd_and(k1, simd_lt(a, b)),
33991            2 => simd_and(k1, simd_le(a, b)),
33992            3 => i64x8::ZERO,
33993            4 => simd_and(k1, simd_ne(a, b)),
33994            5 => simd_and(k1, simd_ge(a, b)),
33995            6 => simd_and(k1, simd_gt(a, b)),
33996            _ => k1,
33997        };
33998        simd_bitmask(r)
33999    }
34000}
34001
34002/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
34003///
34004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu64_mask&expand=725)
34005#[inline]
34006#[target_feature(enable = "avx512f,avx512vl")]
34007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34008#[rustc_legacy_const_generics(2)]
34009#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34010#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34011pub const fn _mm256_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
34012    a: __m256i,
34013    b: __m256i,
34014) -> __mmask8 {
34015    unsafe {
34016        static_assert_uimm_bits!(IMM3, 3);
34017        let a = a.as_u64x4();
34018        let b = b.as_u64x4();
34019        let r = match IMM3 {
34020            0 => simd_eq(a, b),
34021            1 => simd_lt(a, b),
34022            2 => simd_le(a, b),
34023            3 => i64x4::ZERO,
34024            4 => simd_ne(a, b),
34025            5 => simd_ge(a, b),
34026            6 => simd_gt(a, b),
34027            _ => i64x4::splat(-1),
34028        };
34029        simd_bitmask(r)
34030    }
34031}
34032
34033/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34034///
34035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu64_mask&expand=726)
34036#[inline]
34037#[target_feature(enable = "avx512f,avx512vl")]
34038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34039#[rustc_legacy_const_generics(3)]
34040#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34041#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34042pub const fn _mm256_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
34043    k1: __mmask8,
34044    a: __m256i,
34045    b: __m256i,
34046) -> __mmask8 {
34047    unsafe {
34048        static_assert_uimm_bits!(IMM3, 3);
34049        let a = a.as_u64x4();
34050        let b = b.as_u64x4();
34051        let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::ZERO);
34052        let r = match IMM3 {
34053            0 => simd_and(k1, simd_eq(a, b)),
34054            1 => simd_and(k1, simd_lt(a, b)),
34055            2 => simd_and(k1, simd_le(a, b)),
34056            3 => i64x4::ZERO,
34057            4 => simd_and(k1, simd_ne(a, b)),
34058            5 => simd_and(k1, simd_ge(a, b)),
34059            6 => simd_and(k1, simd_gt(a, b)),
34060            _ => k1,
34061        };
34062        simd_bitmask(r)
34063    }
34064}
34065
34066/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
34067///
34068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu64_mask&expand=723)
34069#[inline]
34070#[target_feature(enable = "avx512f,avx512vl")]
34071#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34072#[rustc_legacy_const_generics(2)]
34073#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34074#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34075pub const fn _mm_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
34076    unsafe {
34077        static_assert_uimm_bits!(IMM3, 3);
34078        let a = a.as_u64x2();
34079        let b = b.as_u64x2();
34080        let r = match IMM3 {
34081            0 => simd_eq(a, b),
34082            1 => simd_lt(a, b),
34083            2 => simd_le(a, b),
34084            3 => i64x2::ZERO,
34085            4 => simd_ne(a, b),
34086            5 => simd_ge(a, b),
34087            6 => simd_gt(a, b),
34088            _ => i64x2::splat(-1),
34089        };
34090        simd_bitmask(r)
34091    }
34092}
34093
34094/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34095///
34096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu64_mask&expand=724)
34097#[inline]
34098#[target_feature(enable = "avx512f,avx512vl")]
34099#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34100#[rustc_legacy_const_generics(3)]
34101#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34102#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34103pub const fn _mm_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
34104    k1: __mmask8,
34105    a: __m128i,
34106    b: __m128i,
34107) -> __mmask8 {
34108    unsafe {
34109        static_assert_uimm_bits!(IMM3, 3);
34110        let a = a.as_u64x2();
34111        let b = b.as_u64x2();
34112        let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::ZERO);
34113        let r = match IMM3 {
34114            0 => simd_and(k1, simd_eq(a, b)),
34115            1 => simd_and(k1, simd_lt(a, b)),
34116            2 => simd_and(k1, simd_le(a, b)),
34117            3 => i64x2::ZERO,
34118            4 => simd_and(k1, simd_ne(a, b)),
34119            5 => simd_and(k1, simd_ge(a, b)),
34120            6 => simd_and(k1, simd_gt(a, b)),
34121            _ => k1,
34122        };
34123        simd_bitmask(r)
34124    }
34125}
34126
34127/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
34128///
34129/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi64_mask&expand=1037)
34130#[inline]
34131#[target_feature(enable = "avx512f")]
34132#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34133#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34134#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34135pub const fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34136    unsafe { simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8())) }
34137}
34138
34139/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34140///
34141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi64_mask&expand=1038)
34142#[inline]
34143#[target_feature(enable = "avx512f")]
34144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34145#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34146#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34147pub const fn _mm512_mask_cmplt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34148    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
34149}
34150
34151/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
34152///
34153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi64_mask&expand=1035)
34154#[inline]
34155#[target_feature(enable = "avx512f,avx512vl")]
34156#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34157#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34158#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34159pub const fn _mm256_cmplt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34160    unsafe { simd_bitmask::<__m256i, _>(simd_lt(a.as_i64x4(), b.as_i64x4())) }
34161}
34162
34163/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34164///
34165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi64_mask&expand=1036)
34166#[inline]
34167#[target_feature(enable = "avx512f,avx512vl")]
34168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34169#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34170#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34171pub const fn _mm256_mask_cmplt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34172    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
34173}
34174
34175/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
34176///
34177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi64_mask&expand=1033)
34178#[inline]
34179#[target_feature(enable = "avx512f,avx512vl")]
34180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34181#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34182#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34183pub const fn _mm_cmplt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34184    unsafe { simd_bitmask::<__m128i, _>(simd_lt(a.as_i64x2(), b.as_i64x2())) }
34185}
34186
34187/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34188///
34189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi64_mask&expand=1034)
34190#[inline]
34191#[target_feature(enable = "avx512f,avx512vl")]
34192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34193#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34194#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34195pub const fn _mm_mask_cmplt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34196    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
34197}
34198
34199/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
34200///
34201/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi64_mask&expand=913)
34202#[inline]
34203#[target_feature(enable = "avx512f")]
34204#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34205#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34206#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34207pub const fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34208    unsafe { simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8())) }
34209}
34210
34211/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34212///
34213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi64_mask&expand=914)
34214#[inline]
34215#[target_feature(enable = "avx512f")]
34216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34217#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34218#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34219pub const fn _mm512_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34220    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
34221}
34222
34223/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
34224///
34225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi64_mask&expand=911)
34226#[inline]
34227#[target_feature(enable = "avx512f,avx512vl")]
34228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34229#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34230#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34231pub const fn _mm256_cmpgt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34232    unsafe { simd_bitmask::<__m256i, _>(simd_gt(a.as_i64x4(), b.as_i64x4())) }
34233}
34234
34235/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34236///
34237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi64_mask&expand=912)
34238#[inline]
34239#[target_feature(enable = "avx512f,avx512vl")]
34240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34241#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34242#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34243pub const fn _mm256_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34244    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
34245}
34246
34247/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
34248///
34249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi64_mask&expand=909)
34250#[inline]
34251#[target_feature(enable = "avx512f,avx512vl")]
34252#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34253#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34254#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34255pub const fn _mm_cmpgt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34256    unsafe { simd_bitmask::<__m128i, _>(simd_gt(a.as_i64x2(), b.as_i64x2())) }
34257}
34258
34259/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34260///
34261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi64_mask&expand=910)
34262#[inline]
34263#[target_feature(enable = "avx512f,avx512vl")]
34264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34265#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34266#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34267pub const fn _mm_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34268    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
34269}
34270
34271/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
34272///
34273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi64_mask&expand=977)
34274#[inline]
34275#[target_feature(enable = "avx512f")]
34276#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34277#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34278#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34279pub const fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34280    unsafe { simd_bitmask::<__m512i, _>(simd_le(a.as_i64x8(), b.as_i64x8())) }
34281}
34282
34283/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34284///
34285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi64_mask&expand=978)
34286#[inline]
34287#[target_feature(enable = "avx512f")]
34288#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34289#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34290#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34291pub const fn _mm512_mask_cmple_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34292    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
34293}
34294
34295/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
34296///
34297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi64_mask&expand=975)
34298#[inline]
34299#[target_feature(enable = "avx512f,avx512vl")]
34300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34301#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34302#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34303pub const fn _mm256_cmple_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34304    unsafe { simd_bitmask::<__m256i, _>(simd_le(a.as_i64x4(), b.as_i64x4())) }
34305}
34306
34307/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34308///
34309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi64_mask&expand=976)
34310#[inline]
34311#[target_feature(enable = "avx512f,avx512vl")]
34312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34313#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34314#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34315pub const fn _mm256_mask_cmple_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34316    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
34317}
34318
34319/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
34320///
34321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi64_mask&expand=973)
34322#[inline]
34323#[target_feature(enable = "avx512f,avx512vl")]
34324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34325#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34326#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34327pub const fn _mm_cmple_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34328    unsafe { simd_bitmask::<__m128i, _>(simd_le(a.as_i64x2(), b.as_i64x2())) }
34329}
34330
34331/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34332///
34333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi64_mask&expand=974)
34334#[inline]
34335#[target_feature(enable = "avx512f,avx512vl")]
34336#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34337#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34338#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34339pub const fn _mm_mask_cmple_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34340    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
34341}
34342
34343/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
34344///
34345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi64_mask&expand=855)
34346#[inline]
34347#[target_feature(enable = "avx512f")]
34348#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34349#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34350#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34351pub const fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34352    unsafe { simd_bitmask::<__m512i, _>(simd_ge(a.as_i64x8(), b.as_i64x8())) }
34353}
34354
34355/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34356///
34357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi64_mask&expand=856)
34358#[inline]
34359#[target_feature(enable = "avx512f")]
34360#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34361#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34362#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34363pub const fn _mm512_mask_cmpge_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34364    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
34365}
34366
34367/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
34368///
34369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi64_mask&expand=853)
34370#[inline]
34371#[target_feature(enable = "avx512f,avx512vl")]
34372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34373#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34374#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34375pub const fn _mm256_cmpge_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34376    unsafe { simd_bitmask::<__m256i, _>(simd_ge(a.as_i64x4(), b.as_i64x4())) }
34377}
34378
34379/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34380///
34381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi64_mask&expand=854)
34382#[inline]
34383#[target_feature(enable = "avx512f,avx512vl")]
34384#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34385#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34386#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34387pub const fn _mm256_mask_cmpge_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34388    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
34389}
34390
34391/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
34392///
34393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi64_mask&expand=851)
34394#[inline]
34395#[target_feature(enable = "avx512f,avx512vl")]
34396#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34397#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34398#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34399pub const fn _mm_cmpge_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34400    unsafe { simd_bitmask::<__m128i, _>(simd_ge(a.as_i64x2(), b.as_i64x2())) }
34401}
34402
34403/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34404///
34405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi64_mask&expand=852)
34406#[inline]
34407#[target_feature(enable = "avx512f,avx512vl")]
34408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34409#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34410#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34411pub const fn _mm_mask_cmpge_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34412    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
34413}
34414
34415/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
34416///
34417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi64_mask&expand=787)
34418#[inline]
34419#[target_feature(enable = "avx512f")]
34420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34421#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34422#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34423pub const fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34424    unsafe { simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8())) }
34425}
34426
34427/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34428///
34429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi64_mask&expand=788)
34430#[inline]
34431#[target_feature(enable = "avx512f")]
34432#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34433#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34434#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34435pub const fn _mm512_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34436    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
34437}
34438
34439/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
34440///
34441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi64_mask&expand=785)
34442#[inline]
34443#[target_feature(enable = "avx512f,avx512vl")]
34444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34445#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34446#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34447pub const fn _mm256_cmpeq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34448    unsafe { simd_bitmask::<__m256i, _>(simd_eq(a.as_i64x4(), b.as_i64x4())) }
34449}
34450
34451/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34452///
34453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi64_mask&expand=786)
34454#[inline]
34455#[target_feature(enable = "avx512f,avx512vl")]
34456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34457#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34458#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34459pub const fn _mm256_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34460    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
34461}
34462
34463/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
34464///
34465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi64_mask&expand=783)
34466#[inline]
34467#[target_feature(enable = "avx512f,avx512vl")]
34468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34469#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34470#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34471pub const fn _mm_cmpeq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34472    unsafe { simd_bitmask::<__m128i, _>(simd_eq(a.as_i64x2(), b.as_i64x2())) }
34473}
34474
34475/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34476///
34477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi64_mask&expand=784)
34478#[inline]
34479#[target_feature(enable = "avx512f,avx512vl")]
34480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34481#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34482#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34483pub const fn _mm_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34484    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
34485}
34486
34487/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
34488///
34489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi64_mask&expand=1094)
34490#[inline]
34491#[target_feature(enable = "avx512f")]
34492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34493#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34494#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34495pub const fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34496    unsafe { simd_bitmask::<__m512i, _>(simd_ne(a.as_i64x8(), b.as_i64x8())) }
34497}
34498
34499/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34500///
34501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi64_mask&expand=1095)
34502#[inline]
34503#[target_feature(enable = "avx512f")]
34504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34505#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34506#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34507pub const fn _mm512_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34508    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
34509}
34510
34511/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
34512///
34513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi64_mask&expand=1092)
34514#[inline]
34515#[target_feature(enable = "avx512f,avx512vl")]
34516#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34517#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34518#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34519pub const fn _mm256_cmpneq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34520    unsafe { simd_bitmask::<__m256i, _>(simd_ne(a.as_i64x4(), b.as_i64x4())) }
34521}
34522
34523/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34524///
34525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi64_mask&expand=1093)
34526#[inline]
34527#[target_feature(enable = "avx512f,avx512vl")]
34528#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34529#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34530#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34531pub const fn _mm256_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34532    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
34533}
34534
34535/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
34536///
34537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi64_mask&expand=1090)
34538#[inline]
34539#[target_feature(enable = "avx512f,avx512vl")]
34540#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34541#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34542#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34543pub const fn _mm_cmpneq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34544    unsafe { simd_bitmask::<__m128i, _>(simd_ne(a.as_i64x2(), b.as_i64x2())) }
34545}
34546
34547/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34548///
34549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi64_mask&expand=1091)
34550#[inline]
34551#[target_feature(enable = "avx512f,avx512vl")]
34552#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34553#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34554#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34555pub const fn _mm_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34556    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
34557}
34558
34559/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
34560///
34561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi64_mask&expand=703)
34562#[inline]
34563#[target_feature(enable = "avx512f")]
34564#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34565#[rustc_legacy_const_generics(2)]
34566#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34567#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34568pub const fn _mm512_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
34569    a: __m512i,
34570    b: __m512i,
34571) -> __mmask8 {
34572    unsafe {
34573        static_assert_uimm_bits!(IMM3, 3);
34574        let a = a.as_i64x8();
34575        let b = b.as_i64x8();
34576        let r = match IMM3 {
34577            0 => simd_eq(a, b),
34578            1 => simd_lt(a, b),
34579            2 => simd_le(a, b),
34580            3 => i64x8::ZERO,
34581            4 => simd_ne(a, b),
34582            5 => simd_ge(a, b),
34583            6 => simd_gt(a, b),
34584            _ => i64x8::splat(-1),
34585        };
34586        simd_bitmask(r)
34587    }
34588}
34589
34590/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34591///
34592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi64_mask&expand=704)
34593#[inline]
34594#[target_feature(enable = "avx512f")]
34595#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34596#[rustc_legacy_const_generics(3)]
34597#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34598#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34599pub const fn _mm512_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
34600    k1: __mmask8,
34601    a: __m512i,
34602    b: __m512i,
34603) -> __mmask8 {
34604    unsafe {
34605        static_assert_uimm_bits!(IMM3, 3);
34606        let a = a.as_i64x8();
34607        let b = b.as_i64x8();
34608        let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::ZERO);
34609        let r = match IMM3 {
34610            0 => simd_and(k1, simd_eq(a, b)),
34611            1 => simd_and(k1, simd_lt(a, b)),
34612            2 => simd_and(k1, simd_le(a, b)),
34613            3 => i64x8::ZERO,
34614            4 => simd_and(k1, simd_ne(a, b)),
34615            5 => simd_and(k1, simd_ge(a, b)),
34616            6 => simd_and(k1, simd_gt(a, b)),
34617            _ => k1,
34618        };
34619        simd_bitmask(r)
34620    }
34621}
34622
34623/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
34624///
34625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi64_mask&expand=701)
34626#[inline]
34627#[target_feature(enable = "avx512f,avx512vl")]
34628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34629#[rustc_legacy_const_generics(2)]
34630#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34631#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34632pub const fn _mm256_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
34633    a: __m256i,
34634    b: __m256i,
34635) -> __mmask8 {
34636    unsafe {
34637        static_assert_uimm_bits!(IMM3, 3);
34638        let a = a.as_i64x4();
34639        let b = b.as_i64x4();
34640        let r = match IMM3 {
34641            0 => simd_eq(a, b),
34642            1 => simd_lt(a, b),
34643            2 => simd_le(a, b),
34644            3 => i64x4::ZERO,
34645            4 => simd_ne(a, b),
34646            5 => simd_ge(a, b),
34647            6 => simd_gt(a, b),
34648            _ => i64x4::splat(-1),
34649        };
34650        simd_bitmask(r)
34651    }
34652}
34653
34654/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34655///
34656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi64_mask&expand=702)
34657#[inline]
34658#[target_feature(enable = "avx512f,avx512vl")]
34659#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34660#[rustc_legacy_const_generics(3)]
34661#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34662#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34663pub const fn _mm256_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
34664    k1: __mmask8,
34665    a: __m256i,
34666    b: __m256i,
34667) -> __mmask8 {
34668    unsafe {
34669        static_assert_uimm_bits!(IMM3, 3);
34670        let a = a.as_i64x4();
34671        let b = b.as_i64x4();
34672        let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::ZERO);
34673        let r = match IMM3 {
34674            0 => simd_and(k1, simd_eq(a, b)),
34675            1 => simd_and(k1, simd_lt(a, b)),
34676            2 => simd_and(k1, simd_le(a, b)),
34677            3 => i64x4::ZERO,
34678            4 => simd_and(k1, simd_ne(a, b)),
34679            5 => simd_and(k1, simd_ge(a, b)),
34680            6 => simd_and(k1, simd_gt(a, b)),
34681            _ => k1,
34682        };
34683        simd_bitmask(r)
34684    }
34685}
34686
34687/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
34688///
34689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi64_mask&expand=699)
34690#[inline]
34691#[target_feature(enable = "avx512f,avx512vl")]
34692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34693#[rustc_legacy_const_generics(2)]
34694#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34695#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34696pub const fn _mm_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
34697    unsafe {
34698        static_assert_uimm_bits!(IMM3, 3);
34699        let a = a.as_i64x2();
34700        let b = b.as_i64x2();
34701        let r = match IMM3 {
34702            0 => simd_eq(a, b),
34703            1 => simd_lt(a, b),
34704            2 => simd_le(a, b),
34705            3 => i64x2::ZERO,
34706            4 => simd_ne(a, b),
34707            5 => simd_ge(a, b),
34708            6 => simd_gt(a, b),
34709            _ => i64x2::splat(-1),
34710        };
34711        simd_bitmask(r)
34712    }
34713}
34714
34715/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34716///
34717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi64_mask&expand=700)
34718#[inline]
34719#[target_feature(enable = "avx512f,avx512vl")]
34720#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34721#[rustc_legacy_const_generics(3)]
34722#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34723#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34724pub const fn _mm_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
34725    k1: __mmask8,
34726    a: __m128i,
34727    b: __m128i,
34728) -> __mmask8 {
34729    unsafe {
34730        static_assert_uimm_bits!(IMM3, 3);
34731        let a = a.as_i64x2();
34732        let b = b.as_i64x2();
34733        let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::ZERO);
34734        let r = match IMM3 {
34735            0 => simd_and(k1, simd_eq(a, b)),
34736            1 => simd_and(k1, simd_lt(a, b)),
34737            2 => simd_and(k1, simd_le(a, b)),
34738            3 => i64x2::ZERO,
34739            4 => simd_and(k1, simd_ne(a, b)),
34740            5 => simd_and(k1, simd_ge(a, b)),
34741            6 => simd_and(k1, simd_gt(a, b)),
34742            _ => k1,
34743        };
34744        simd_bitmask(r)
34745    }
34746}
34747
34748/// Reduce the packed 32-bit integers in a by addition. Returns the sum of all elements in a.
34749///
34750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi32&expand=4556)
34751#[inline]
34752#[target_feature(enable = "avx512f")]
34753#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34754#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34755pub const fn _mm512_reduce_add_epi32(a: __m512i) -> i32 {
34756    unsafe { simd_reduce_add_ordered(a.as_i32x16(), 0) }
34757}
34758
34759/// Reduce the packed 32-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
34760///
34761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi32&expand=4555)
34762#[inline]
34763#[target_feature(enable = "avx512f")]
34764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34765#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34766pub const fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 {
34767    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO), 0) }
34768}
34769
34770/// Reduce the packed 64-bit integers in a by addition. Returns the sum of all elements in a.
34771///
34772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi64&expand=4558)
34773#[inline]
34774#[target_feature(enable = "avx512f")]
34775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34776#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34777pub const fn _mm512_reduce_add_epi64(a: __m512i) -> i64 {
34778    unsafe { simd_reduce_add_ordered(a.as_i64x8(), 0) }
34779}
34780
34781/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
34782///
34783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi64&expand=4557)
34784#[inline]
34785#[target_feature(enable = "avx512f")]
34786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34787#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34788pub const fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 {
34789    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO), 0) }
34790}
34791
34792/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
34793///
34794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_ps&expand=4562)
34795#[inline]
34796#[target_feature(enable = "avx512f")]
34797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34798#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34799pub const fn _mm512_reduce_add_ps(a: __m512) -> f32 {
34800    unsafe {
34801        // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
34802        let a = _mm256_add_ps(
34803            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
34804            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
34805        );
34806        let a = _mm_add_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
34807        let a = _mm_add_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
34808        simd_extract!(a, 0, f32) + simd_extract!(a, 1, f32)
34809    }
34810}
34811
34812/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
34813///
34814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_ps&expand=4561)
34815#[inline]
34816#[target_feature(enable = "avx512f")]
34817#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34818#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34819pub const fn _mm512_mask_reduce_add_ps(k: __mmask16, a: __m512) -> f32 {
34820    unsafe { _mm512_reduce_add_ps(simd_select_bitmask(k, a, _mm512_setzero_ps())) }
34821}
34822
34823/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
34824///
34825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_pd&expand=4560)
34826#[inline]
34827#[target_feature(enable = "avx512f")]
34828#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34829#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34830pub const fn _mm512_reduce_add_pd(a: __m512d) -> f64 {
34831    unsafe {
34832        let a = _mm256_add_pd(
34833            _mm512_extractf64x4_pd::<0>(a),
34834            _mm512_extractf64x4_pd::<1>(a),
34835        );
34836        let a = _mm_add_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
34837        simd_extract!(a, 0, f64) + simd_extract!(a, 1, f64)
34838    }
34839}
34840
34841/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
34842///
34843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_pd&expand=4559)
34844#[inline]
34845#[target_feature(enable = "avx512f")]
34846#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34847#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34848pub const fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 {
34849    unsafe { _mm512_reduce_add_pd(simd_select_bitmask(k, a, _mm512_setzero_pd())) }
34850}
34851
34852/// Reduce the packed 32-bit integers in a by multiplication. Returns the product of all elements in a.
34853///
34854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi32&expand=4600)
34855#[inline]
34856#[target_feature(enable = "avx512f")]
34857#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34858#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34859pub const fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 {
34860    unsafe { simd_reduce_mul_ordered(a.as_i32x16(), 1) }
34861}
34862
34863/// Reduce the packed 32-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
34864///
34865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi32&expand=4599)
34866#[inline]
34867#[target_feature(enable = "avx512f")]
34868#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34869#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34870pub const fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 {
34871    unsafe {
34872        simd_reduce_mul_ordered(
34873            simd_select_bitmask(k, a.as_i32x16(), _mm512_set1_epi32(1).as_i32x16()),
34874            1,
34875        )
34876    }
34877}
34878
34879/// Reduce the packed 64-bit integers in a by multiplication. Returns the product of all elements in a.
34880///
34881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi64&expand=4602)
34882#[inline]
34883#[target_feature(enable = "avx512f")]
34884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34885#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34886pub const fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 {
34887    unsafe { simd_reduce_mul_ordered(a.as_i64x8(), 1) }
34888}
34889
34890/// Reduce the packed 64-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
34891///
34892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi64&expand=4601)
34893#[inline]
34894#[target_feature(enable = "avx512f")]
34895#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34896#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34897pub const fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 {
34898    unsafe {
34899        simd_reduce_mul_ordered(
34900            simd_select_bitmask(k, a.as_i64x8(), _mm512_set1_epi64(1).as_i64x8()),
34901            1,
34902        )
34903    }
34904}
34905
34906/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
34907///
34908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_ps&expand=4606)
34909#[inline]
34910#[target_feature(enable = "avx512f")]
34911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34912#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34913pub const fn _mm512_reduce_mul_ps(a: __m512) -> f32 {
34914    unsafe {
34915        // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
34916        let a = _mm256_mul_ps(
34917            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
34918            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
34919        );
34920        let a = _mm_mul_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
34921        let a = _mm_mul_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
34922        simd_extract!(a, 0, f32) * simd_extract!(a, 1, f32)
34923    }
34924}
34925
34926/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
34927///
34928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_ps&expand=4605)
34929#[inline]
34930#[target_feature(enable = "avx512f")]
34931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34932#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34933pub const fn _mm512_mask_reduce_mul_ps(k: __mmask16, a: __m512) -> f32 {
34934    unsafe { _mm512_reduce_mul_ps(simd_select_bitmask(k, a, _mm512_set1_ps(1.))) }
34935}
34936
34937/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
34938///
34939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_pd&expand=4604)
34940#[inline]
34941#[target_feature(enable = "avx512f")]
34942#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34943#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34944pub const fn _mm512_reduce_mul_pd(a: __m512d) -> f64 {
34945    unsafe {
34946        let a = _mm256_mul_pd(
34947            _mm512_extractf64x4_pd::<0>(a),
34948            _mm512_extractf64x4_pd::<1>(a),
34949        );
34950        let a = _mm_mul_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
34951        simd_extract!(a, 0, f64) * simd_extract!(a, 1, f64)
34952    }
34953}
34954
34955/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
34956///
34957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_pd&expand=4603)
34958#[inline]
34959#[target_feature(enable = "avx512f")]
34960#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34961#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34962pub const fn _mm512_mask_reduce_mul_pd(k: __mmask8, a: __m512d) -> f64 {
34963    unsafe { _mm512_reduce_mul_pd(simd_select_bitmask(k, a, _mm512_set1_pd(1.))) }
34964}
34965
34966/// Reduce the packed signed 32-bit integers in a by maximum. Returns the maximum of all elements in a.
34967///
34968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi32&expand=4576)
34969#[inline]
34970#[target_feature(enable = "avx512f")]
34971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34972#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34973pub const fn _mm512_reduce_max_epi32(a: __m512i) -> i32 {
34974    unsafe { simd_reduce_max(a.as_i32x16()) }
34975}
34976
34977/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
34978///
34979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi32&expand=4575)
34980#[inline]
34981#[target_feature(enable = "avx512f")]
34982#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34983#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34984pub const fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 {
34985    unsafe {
34986        simd_reduce_max(simd_select_bitmask(
34987            k,
34988            a.as_i32x16(),
34989            i32x16::splat(i32::MIN),
34990        ))
34991    }
34992}
34993
34994/// Reduce the packed signed 64-bit integers in a by maximum. Returns the maximum of all elements in a.
34995///
34996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi64&expand=4578)
34997#[inline]
34998#[target_feature(enable = "avx512f")]
34999#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35000#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35001pub const fn _mm512_reduce_max_epi64(a: __m512i) -> i64 {
35002    unsafe { simd_reduce_max(a.as_i64x8()) }
35003}
35004
35005/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
35006///
35007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi64&expand=4577)
35008#[inline]
35009#[target_feature(enable = "avx512f")]
35010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35011#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35012pub const fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 {
35013    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MIN))) }
35014}
35015
35016/// Reduce the packed unsigned 32-bit integers in a by maximum. Returns the maximum of all elements in a.
35017///
35018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu32&expand=4580)
35019#[inline]
35020#[target_feature(enable = "avx512f")]
35021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35022#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35023pub const fn _mm512_reduce_max_epu32(a: __m512i) -> u32 {
35024    unsafe { simd_reduce_max(a.as_u32x16()) }