core/stdarch/crates/core_arch/src/x86/
avx512vbmi2.rs

1use crate::{
2    core_arch::{simd::*, x86::*},
3    intrinsics::simd::*,
4};
5
6#[cfg(test)]
7use stdarch_test::assert_instr;
8
9/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10///
11/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi16)
12#[inline]
13#[target_feature(enable = "avx512vbmi2")]
14#[cfg_attr(test, assert_instr(vpexpandw))]
15#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16pub unsafe fn _mm512_mask_expandloadu_epi16(
17    src: __m512i,
18    k: __mmask32,
19    mem_addr: *const i16,
20) -> __m512i {
21    transmute(expandloadw_512(mem_addr, src.as_i16x32(), k))
22}
23
24/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25///
26/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi16)
27#[inline]
28#[target_feature(enable = "avx512vbmi2")]
29#[cfg_attr(test, assert_instr(vpexpandw))]
30#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31pub unsafe fn _mm512_maskz_expandloadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i {
32    _mm512_mask_expandloadu_epi16(_mm512_setzero_si512(), k, mem_addr)
33}
34
35/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
36///
37/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi16)
38#[inline]
39#[target_feature(enable = "avx512vbmi2,avx512vl")]
40#[cfg_attr(test, assert_instr(vpexpandw))]
41#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42pub unsafe fn _mm256_mask_expandloadu_epi16(
43    src: __m256i,
44    k: __mmask16,
45    mem_addr: *const i16,
46) -> __m256i {
47    transmute(expandloadw_256(mem_addr, src.as_i16x16(), k))
48}
49
50/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
51///
52/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi16)
53#[inline]
54#[target_feature(enable = "avx512vbmi2,avx512vl")]
55#[cfg_attr(test, assert_instr(vpexpandw))]
56#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
57pub unsafe fn _mm256_maskz_expandloadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i {
58    _mm256_mask_expandloadu_epi16(_mm256_setzero_si256(), k, mem_addr)
59}
60
61/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
62///
63/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi16)
64#[inline]
65#[target_feature(enable = "avx512vbmi2,avx512vl")]
66#[cfg_attr(test, assert_instr(vpexpandw))]
67#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
68pub unsafe fn _mm_mask_expandloadu_epi16(
69    src: __m128i,
70    k: __mmask8,
71    mem_addr: *const i16,
72) -> __m128i {
73    transmute(expandloadw_128(mem_addr, src.as_i16x8(), k))
74}
75
76/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
77///
78/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi16)
79#[inline]
80#[target_feature(enable = "avx512vbmi2,avx512vl")]
81#[cfg_attr(test, assert_instr(vpexpandw))]
82#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
83pub unsafe fn _mm_maskz_expandloadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i {
84    _mm_mask_expandloadu_epi16(_mm_setzero_si128(), k, mem_addr)
85}
86
87/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
88///
89/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi8)
90#[inline]
91#[target_feature(enable = "avx512vbmi2")]
92#[cfg_attr(test, assert_instr(vpexpandb))]
93#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
94pub unsafe fn _mm512_mask_expandloadu_epi8(
95    src: __m512i,
96    k: __mmask64,
97    mem_addr: *const i8,
98) -> __m512i {
99    transmute(expandloadb_512(mem_addr, src.as_i8x64(), k))
100}
101
102/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
103///
104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi8)
105#[inline]
106#[target_feature(enable = "avx512vbmi2")]
107#[cfg_attr(test, assert_instr(vpexpandb))]
108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
109pub unsafe fn _mm512_maskz_expandloadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i {
110    _mm512_mask_expandloadu_epi8(_mm512_setzero_si512(), k, mem_addr)
111}
112
113/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
114///
115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi8)
116#[inline]
117#[target_feature(enable = "avx512vbmi2,avx512vl")]
118#[cfg_attr(test, assert_instr(vpexpandb))]
119#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
120pub unsafe fn _mm256_mask_expandloadu_epi8(
121    src: __m256i,
122    k: __mmask32,
123    mem_addr: *const i8,
124) -> __m256i {
125    transmute(expandloadb_256(mem_addr, src.as_i8x32(), k))
126}
127
128/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
129///
130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi8)
131#[inline]
132#[target_feature(enable = "avx512vbmi2,avx512vl")]
133#[cfg_attr(test, assert_instr(vpexpandb))]
134#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
135pub unsafe fn _mm256_maskz_expandloadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i {
136    _mm256_mask_expandloadu_epi8(_mm256_setzero_si256(), k, mem_addr)
137}
138
139/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
140///
141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi8)
142#[inline]
143#[target_feature(enable = "avx512vbmi2,avx512vl")]
144#[cfg_attr(test, assert_instr(vpexpandb))]
145#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
146pub unsafe fn _mm_mask_expandloadu_epi8(
147    src: __m128i,
148    k: __mmask16,
149    mem_addr: *const i8,
150) -> __m128i {
151    transmute(expandloadb_128(mem_addr, src.as_i8x16(), k))
152}
153
154/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
155///
156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi8)
157#[inline]
158#[target_feature(enable = "avx512vbmi2,avx512vl")]
159#[cfg_attr(test, assert_instr(vpexpandb))]
160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
161pub unsafe fn _mm_maskz_expandloadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i {
162    _mm_mask_expandloadu_epi8(_mm_setzero_si128(), k, mem_addr)
163}
164
165/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
166///
167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi16)
168#[inline]
169#[target_feature(enable = "avx512vbmi2")]
170#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
171#[cfg_attr(test, assert_instr(vpcompressw))]
172pub unsafe fn _mm512_mask_compressstoreu_epi16(base_addr: *mut i16, k: __mmask32, a: __m512i) {
173    vcompressstorew(base_addr as *mut _, a.as_i16x32(), k)
174}
175
176/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
177///
178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi16)
179#[inline]
180#[target_feature(enable = "avx512vbmi2,avx512vl")]
181#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
182#[cfg_attr(test, assert_instr(vpcompressw))]
183pub unsafe fn _mm256_mask_compressstoreu_epi16(base_addr: *mut i16, k: __mmask16, a: __m256i) {
184    vcompressstorew256(base_addr as *mut _, a.as_i16x16(), k)
185}
186
187/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
188///
189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi16)
190#[inline]
191#[target_feature(enable = "avx512vbmi2,avx512vl")]
192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
193#[cfg_attr(test, assert_instr(vpcompressw))]
194pub unsafe fn _mm_mask_compressstoreu_epi16(base_addr: *mut i16, k: __mmask8, a: __m128i) {
195    vcompressstorew128(base_addr as *mut _, a.as_i16x8(), k)
196}
197
198/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
199///
200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi8)
201#[inline]
202#[target_feature(enable = "avx512vbmi2")]
203#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
204#[cfg_attr(test, assert_instr(vpcompressb))]
205pub unsafe fn _mm512_mask_compressstoreu_epi8(base_addr: *mut i8, k: __mmask64, a: __m512i) {
206    vcompressstoreb(base_addr, a.as_i8x64(), k)
207}
208
209/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
210///
211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi8)
212#[inline]
213#[target_feature(enable = "avx512vbmi2,avx512vl")]
214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
215#[cfg_attr(test, assert_instr(vpcompressb))]
216pub unsafe fn _mm256_mask_compressstoreu_epi8(base_addr: *mut i8, k: __mmask32, a: __m256i) {
217    vcompressstoreb256(base_addr, a.as_i8x32(), k)
218}
219
220/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
221///
222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi8)
223#[inline]
224#[target_feature(enable = "avx512vbmi2,avx512vl")]
225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
226#[cfg_attr(test, assert_instr(vpcompressb))]
227pub unsafe fn _mm_mask_compressstoreu_epi8(base_addr: *mut i8, k: __mmask16, a: __m128i) {
228    vcompressstoreb128(base_addr, a.as_i8x16(), k)
229}
230
231/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
232///
233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi16&expand=1192)
234#[inline]
235#[target_feature(enable = "avx512vbmi2")]
236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
237#[cfg_attr(test, assert_instr(vpcompressw))]
238pub fn _mm512_mask_compress_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
239    unsafe { transmute(vpcompressw(a.as_i16x32(), src.as_i16x32(), k)) }
240}
241
242/// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
243///
244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi16&expand=1193)
245#[inline]
246#[target_feature(enable = "avx512vbmi2")]
247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
248#[cfg_attr(test, assert_instr(vpcompressw))]
249pub fn _mm512_maskz_compress_epi16(k: __mmask32, a: __m512i) -> __m512i {
250    unsafe { transmute(vpcompressw(a.as_i16x32(), i16x32::ZERO, k)) }
251}
252
253/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
254///
255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi16&expand=1190)
256#[inline]
257#[target_feature(enable = "avx512vbmi2,avx512vl")]
258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
259#[cfg_attr(test, assert_instr(vpcompressw))]
260pub fn _mm256_mask_compress_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
261    unsafe { transmute(vpcompressw256(a.as_i16x16(), src.as_i16x16(), k)) }
262}
263
264/// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
265///
266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi16&expand=1191)
267#[inline]
268#[target_feature(enable = "avx512vbmi2,avx512vl")]
269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
270#[cfg_attr(test, assert_instr(vpcompressw))]
271pub fn _mm256_maskz_compress_epi16(k: __mmask16, a: __m256i) -> __m256i {
272    unsafe { transmute(vpcompressw256(a.as_i16x16(), i16x16::ZERO, k)) }
273}
274
275/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
276///
277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi16&expand=1188)
278#[inline]
279#[target_feature(enable = "avx512vbmi2,avx512vl")]
280#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
281#[cfg_attr(test, assert_instr(vpcompressw))]
282pub fn _mm_mask_compress_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
283    unsafe { transmute(vpcompressw128(a.as_i16x8(), src.as_i16x8(), k)) }
284}
285
286/// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
287///
288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi16&expand=1189)
289#[inline]
290#[target_feature(enable = "avx512vbmi2,avx512vl")]
291#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
292#[cfg_attr(test, assert_instr(vpcompressw))]
293pub fn _mm_maskz_compress_epi16(k: __mmask8, a: __m128i) -> __m128i {
294    unsafe { transmute(vpcompressw128(a.as_i16x8(), i16x8::ZERO, k)) }
295}
296
297/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
298///
299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi8&expand=1210)
300#[inline]
301#[target_feature(enable = "avx512vbmi2")]
302#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
303#[cfg_attr(test, assert_instr(vpcompressb))]
304pub fn _mm512_mask_compress_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
305    unsafe { transmute(vpcompressb(a.as_i8x64(), src.as_i8x64(), k)) }
306}
307
308/// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
309///
310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi8&expand=1211)
311#[inline]
312#[target_feature(enable = "avx512vbmi2")]
313#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
314#[cfg_attr(test, assert_instr(vpcompressb))]
315pub fn _mm512_maskz_compress_epi8(k: __mmask64, a: __m512i) -> __m512i {
316    unsafe { transmute(vpcompressb(a.as_i8x64(), i8x64::ZERO, k)) }
317}
318
319/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
320///
321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi8&expand=1208)
322#[inline]
323#[target_feature(enable = "avx512vbmi2,avx512vl")]
324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
325#[cfg_attr(test, assert_instr(vpcompressb))]
326pub fn _mm256_mask_compress_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
327    unsafe { transmute(vpcompressb256(a.as_i8x32(), src.as_i8x32(), k)) }
328}
329
330/// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
331///
332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi8&expand=1209)
333#[inline]
334#[target_feature(enable = "avx512vbmi2,avx512vl")]
335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
336#[cfg_attr(test, assert_instr(vpcompressb))]
337pub fn _mm256_maskz_compress_epi8(k: __mmask32, a: __m256i) -> __m256i {
338    unsafe { transmute(vpcompressb256(a.as_i8x32(), i8x32::ZERO, k)) }
339}
340
341/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
342///
343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi8&expand=1206)
344#[inline]
345#[target_feature(enable = "avx512vbmi2,avx512vl")]
346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
347#[cfg_attr(test, assert_instr(vpcompressb))]
348pub fn _mm_mask_compress_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
349    unsafe { transmute(vpcompressb128(a.as_i8x16(), src.as_i8x16(), k)) }
350}
351
352/// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
353///
354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi8&expand=1207)
355#[inline]
356#[target_feature(enable = "avx512vbmi2,avx512vl")]
357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
358#[cfg_attr(test, assert_instr(vpcompressb))]
359pub fn _mm_maskz_compress_epi8(k: __mmask16, a: __m128i) -> __m128i {
360    unsafe { transmute(vpcompressb128(a.as_i8x16(), i8x16::ZERO, k)) }
361}
362
363/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
364///
365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi16&expand=2310)
366#[inline]
367#[target_feature(enable = "avx512vbmi2")]
368#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
369#[cfg_attr(test, assert_instr(vpexpandw))]
370pub fn _mm512_mask_expand_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
371    unsafe { transmute(vpexpandw(a.as_i16x32(), src.as_i16x32(), k)) }
372}
373
374/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
375///
376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi16&expand=2311)
377#[inline]
378#[target_feature(enable = "avx512vbmi2")]
379#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
380#[cfg_attr(test, assert_instr(vpexpandw))]
381pub fn _mm512_maskz_expand_epi16(k: __mmask32, a: __m512i) -> __m512i {
382    unsafe { transmute(vpexpandw(a.as_i16x32(), i16x32::ZERO, k)) }
383}
384
385/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
386///
387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi16&expand=2308)
388#[inline]
389#[target_feature(enable = "avx512vbmi2,avx512vl")]
390#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
391#[cfg_attr(test, assert_instr(vpexpandw))]
392pub fn _mm256_mask_expand_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
393    unsafe { transmute(vpexpandw256(a.as_i16x16(), src.as_i16x16(), k)) }
394}
395
396/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
397///
398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi16&expand=2309)
399#[inline]
400#[target_feature(enable = "avx512vbmi2,avx512vl")]
401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
402#[cfg_attr(test, assert_instr(vpexpandw))]
403pub fn _mm256_maskz_expand_epi16(k: __mmask16, a: __m256i) -> __m256i {
404    unsafe { transmute(vpexpandw256(a.as_i16x16(), i16x16::ZERO, k)) }
405}
406
407/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
408///
409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi16&expand=2306)
410#[inline]
411#[target_feature(enable = "avx512vbmi2,avx512vl")]
412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
413#[cfg_attr(test, assert_instr(vpexpandw))]
414pub fn _mm_mask_expand_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
415    unsafe { transmute(vpexpandw128(a.as_i16x8(), src.as_i16x8(), k)) }
416}
417
418/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
419///
420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi16&expand=2307)
421#[inline]
422#[target_feature(enable = "avx512vbmi2,avx512vl")]
423#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
424#[cfg_attr(test, assert_instr(vpexpandw))]
425pub fn _mm_maskz_expand_epi16(k: __mmask8, a: __m128i) -> __m128i {
426    unsafe { transmute(vpexpandw128(a.as_i16x8(), i16x8::ZERO, k)) }
427}
428
429/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
430///
431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi8&expand=2328)
432#[inline]
433#[target_feature(enable = "avx512vbmi2")]
434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
435#[cfg_attr(test, assert_instr(vpexpandb))]
436pub fn _mm512_mask_expand_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
437    unsafe { transmute(vpexpandb(a.as_i8x64(), src.as_i8x64(), k)) }
438}
439
440/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
441///
442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi8&expand=2329)
443#[inline]
444#[target_feature(enable = "avx512vbmi2")]
445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
446#[cfg_attr(test, assert_instr(vpexpandb))]
447pub fn _mm512_maskz_expand_epi8(k: __mmask64, a: __m512i) -> __m512i {
448    unsafe { transmute(vpexpandb(a.as_i8x64(), i8x64::ZERO, k)) }
449}
450
451/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
452///
453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi8&expand=2326)
454#[inline]
455#[target_feature(enable = "avx512vbmi2,avx512vl")]
456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
457#[cfg_attr(test, assert_instr(vpexpandb))]
458pub fn _mm256_mask_expand_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
459    unsafe { transmute(vpexpandb256(a.as_i8x32(), src.as_i8x32(), k)) }
460}
461
462/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
463///
464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi8&expand=2327)
465#[inline]
466#[target_feature(enable = "avx512vbmi2,avx512vl")]
467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
468#[cfg_attr(test, assert_instr(vpexpandb))]
469pub fn _mm256_maskz_expand_epi8(k: __mmask32, a: __m256i) -> __m256i {
470    unsafe { transmute(vpexpandb256(a.as_i8x32(), i8x32::ZERO, k)) }
471}
472
473/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
474///
475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi8&expand=2324)
476#[inline]
477#[target_feature(enable = "avx512vbmi2,avx512vl")]
478#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
479#[cfg_attr(test, assert_instr(vpexpandb))]
480pub fn _mm_mask_expand_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
481    unsafe { transmute(vpexpandb128(a.as_i8x16(), src.as_i8x16(), k)) }
482}
483
484/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
485///
486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi8&expand=2325)
487#[inline]
488#[target_feature(enable = "avx512vbmi2,avx512vl")]
489#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
490#[cfg_attr(test, assert_instr(vpexpandb))]
491pub fn _mm_maskz_expand_epi8(k: __mmask16, a: __m128i) -> __m128i {
492    unsafe { transmute(vpexpandb128(a.as_i8x16(), i8x16::ZERO, k)) }
493}
494
495/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst.
496///
497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi64&expand=5087)
498#[inline]
499#[target_feature(enable = "avx512vbmi2")]
500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
501#[cfg_attr(test, assert_instr(vpshldvq))]
502pub fn _mm512_shldv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
503    unsafe {
504        transmute(simd_funnel_shl(
505            a.as_i64x8(),
506            b.as_i64x8(),
507            simd_and(c.as_i64x8(), i64x8::splat(63)),
508        ))
509    }
510}
511
512/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
513///
514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi64&expand=5085)
515#[inline]
516#[target_feature(enable = "avx512vbmi2")]
517#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
518#[cfg_attr(test, assert_instr(vpshldvq))]
519pub fn _mm512_mask_shldv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
520    unsafe {
521        let shf = _mm512_shldv_epi64(a, b, c).as_i64x8();
522        transmute(simd_select_bitmask(k, shf, a.as_i64x8()))
523    }
524}
525
526/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
527///
528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi64&expand=5086)
529#[inline]
530#[target_feature(enable = "avx512vbmi2")]
531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
532#[cfg_attr(test, assert_instr(vpshldvq))]
533pub fn _mm512_maskz_shldv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
534    unsafe {
535        let shf = _mm512_shldv_epi64(a, b, c).as_i64x8();
536        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
537    }
538}
539
540/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst.
541///
542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi64&expand=5084)
543#[inline]
544#[target_feature(enable = "avx512vbmi2,avx512vl")]
545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
546#[cfg_attr(test, assert_instr(vpshldvq))]
547pub fn _mm256_shldv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
548    unsafe {
549        transmute(simd_funnel_shl(
550            a.as_i64x4(),
551            b.as_i64x4(),
552            simd_and(c.as_i64x4(), i64x4::splat(63)),
553        ))
554    }
555}
556
557/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
558///
559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi64&expand=5082)
560#[inline]
561#[target_feature(enable = "avx512vbmi2,avx512vl")]
562#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
563#[cfg_attr(test, assert_instr(vpshldvq))]
564pub fn _mm256_mask_shldv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
565    unsafe {
566        let shf = _mm256_shldv_epi64(a, b, c).as_i64x4();
567        transmute(simd_select_bitmask(k, shf, a.as_i64x4()))
568    }
569}
570
571/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
572///
573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi64&expand=5083)
574#[inline]
575#[target_feature(enable = "avx512vbmi2,avx512vl")]
576#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
577#[cfg_attr(test, assert_instr(vpshldvq))]
578pub fn _mm256_maskz_shldv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
579    unsafe {
580        let shf = _mm256_shldv_epi64(a, b, c).as_i64x4();
581        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
582    }
583}
584
585/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst.
586///
587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi64&expand=5081)
588#[inline]
589#[target_feature(enable = "avx512vbmi2,avx512vl")]
590#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
591#[cfg_attr(test, assert_instr(vpshldvq))]
592pub fn _mm_shldv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
593    unsafe {
594        transmute(simd_funnel_shl(
595            a.as_i64x2(),
596            b.as_i64x2(),
597            simd_and(c.as_i64x2(), i64x2::splat(63)),
598        ))
599    }
600}
601
602/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
603///
604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi64&expand=5079)
605#[inline]
606#[target_feature(enable = "avx512vbmi2,avx512vl")]
607#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
608#[cfg_attr(test, assert_instr(vpshldvq))]
609pub fn _mm_mask_shldv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
610    unsafe {
611        let shf = _mm_shldv_epi64(a, b, c).as_i64x2();
612        transmute(simd_select_bitmask(k, shf, a.as_i64x2()))
613    }
614}
615
616/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
617///
618/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi64&expand=5080)
619#[inline]
620#[target_feature(enable = "avx512vbmi2,avx512vl")]
621#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
622#[cfg_attr(test, assert_instr(vpshldvq))]
623pub fn _mm_maskz_shldv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
624    unsafe {
625        let shf = _mm_shldv_epi64(a, b, c).as_i64x2();
626        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
627    }
628}
629
630/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst.
631///
632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi32&expand=5078)
633#[inline]
634#[target_feature(enable = "avx512vbmi2")]
635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
636#[cfg_attr(test, assert_instr(vpshldvd))]
637pub fn _mm512_shldv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
638    unsafe {
639        transmute(simd_funnel_shl(
640            a.as_i32x16(),
641            b.as_i32x16(),
642            simd_and(c.as_i32x16(), i32x16::splat(31)),
643        ))
644    }
645}
646
647/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
648///
649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi32&expand=5076)
650#[inline]
651#[target_feature(enable = "avx512vbmi2")]
652#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
653#[cfg_attr(test, assert_instr(vpshldvd))]
654pub fn _mm512_mask_shldv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i {
655    unsafe {
656        let shf = _mm512_shldv_epi32(a, b, c).as_i32x16();
657        transmute(simd_select_bitmask(k, shf, a.as_i32x16()))
658    }
659}
660
661/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
662///
663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi32&expand=5077)
664#[inline]
665#[target_feature(enable = "avx512vbmi2")]
666#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
667#[cfg_attr(test, assert_instr(vpshldvd))]
668pub fn _mm512_maskz_shldv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
669    unsafe {
670        let shf = _mm512_shldv_epi32(a, b, c).as_i32x16();
671        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
672    }
673}
674
675/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst.
676///
677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi32&expand=5075)
678#[inline]
679#[target_feature(enable = "avx512vbmi2,avx512vl")]
680#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
681#[cfg_attr(test, assert_instr(vpshldvd))]
682pub fn _mm256_shldv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
683    unsafe {
684        transmute(simd_funnel_shl(
685            a.as_i32x8(),
686            b.as_i32x8(),
687            simd_and(c.as_i32x8(), i32x8::splat(31)),
688        ))
689    }
690}
691
692/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
693///
694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi32&expand=5073)
695#[inline]
696#[target_feature(enable = "avx512vbmi2,avx512vl")]
697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
698#[cfg_attr(test, assert_instr(vpshldvd))]
699pub fn _mm256_mask_shldv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
700    unsafe {
701        let shf = _mm256_shldv_epi32(a, b, c).as_i32x8();
702        transmute(simd_select_bitmask(k, shf, a.as_i32x8()))
703    }
704}
705
706/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
707///
708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi32&expand=5074)
709#[inline]
710#[target_feature(enable = "avx512vbmi2,avx512vl")]
711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
712#[cfg_attr(test, assert_instr(vpshldvd))]
713pub fn _mm256_maskz_shldv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
714    unsafe {
715        let shf = _mm256_shldv_epi32(a, b, c).as_i32x8();
716        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
717    }
718}
719
720/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst.
721///
722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi32&expand=5072)
723#[inline]
724#[target_feature(enable = "avx512vbmi2,avx512vl")]
725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
726#[cfg_attr(test, assert_instr(vpshldvd))]
727pub fn _mm_shldv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
728    unsafe {
729        transmute(simd_funnel_shl(
730            a.as_i32x4(),
731            b.as_i32x4(),
732            simd_and(c.as_i32x4(), i32x4::splat(31)),
733        ))
734    }
735}
736
737/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
738///
739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi32&expand=5070)
740#[inline]
741#[target_feature(enable = "avx512vbmi2,avx512vl")]
742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
743#[cfg_attr(test, assert_instr(vpshldvd))]
744pub fn _mm_mask_shldv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
745    unsafe {
746        let shf = _mm_shldv_epi32(a, b, c).as_i32x4();
747        transmute(simd_select_bitmask(k, shf, a.as_i32x4()))
748    }
749}
750
751/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
752///
753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi32&expand=5071)
754#[inline]
755#[target_feature(enable = "avx512vbmi2,avx512vl")]
756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
757#[cfg_attr(test, assert_instr(vpshldvd))]
758pub fn _mm_maskz_shldv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
759    unsafe {
760        let shf = _mm_shldv_epi32(a, b, c).as_i32x4();
761        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
762    }
763}
764
765/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst.
766///
767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi16&expand=5069)
768#[inline]
769#[target_feature(enable = "avx512vbmi2")]
770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
771#[cfg_attr(test, assert_instr(vpshldvw))]
772pub fn _mm512_shldv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
773    unsafe {
774        transmute(simd_funnel_shl(
775            a.as_i16x32(),
776            b.as_i16x32(),
777            simd_and(c.as_i16x32(), i16x32::splat(15)),
778        ))
779    }
780}
781
782/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
783///
784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi16&expand=5067)
785#[inline]
786#[target_feature(enable = "avx512vbmi2")]
787#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
788#[cfg_attr(test, assert_instr(vpshldvw))]
789pub fn _mm512_mask_shldv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i {
790    unsafe {
791        let shf = _mm512_shldv_epi16(a, b, c).as_i16x32();
792        transmute(simd_select_bitmask(k, shf, a.as_i16x32()))
793    }
794}
795
796/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
797///
798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi16&expand=5068)
799#[inline]
800#[target_feature(enable = "avx512vbmi2")]
801#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
802#[cfg_attr(test, assert_instr(vpshldvw))]
803pub fn _mm512_maskz_shldv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
804    unsafe {
805        let shf = _mm512_shldv_epi16(a, b, c).as_i16x32();
806        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
807    }
808}
809
810/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst.
811///
812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi16&expand=5066)
813#[inline]
814#[target_feature(enable = "avx512vbmi2,avx512vl")]
815#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
816#[cfg_attr(test, assert_instr(vpshldvw))]
817pub fn _mm256_shldv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
818    unsafe {
819        transmute(simd_funnel_shl(
820            a.as_i16x16(),
821            b.as_i16x16(),
822            simd_and(c.as_i16x16(), i16x16::splat(15)),
823        ))
824    }
825}
826
827/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
828///
829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi16&expand=5064)
830#[inline]
831#[target_feature(enable = "avx512vbmi2,avx512vl")]
832#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
833#[cfg_attr(test, assert_instr(vpshldvw))]
834pub fn _mm256_mask_shldv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i {
835    unsafe {
836        let shf = _mm256_shldv_epi16(a, b, c).as_i16x16();
837        transmute(simd_select_bitmask(k, shf, a.as_i16x16()))
838    }
839}
840
841/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
842///
843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi16&expand=5065)
844#[inline]
845#[target_feature(enable = "avx512vbmi2,avx512vl")]
846#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
847#[cfg_attr(test, assert_instr(vpshldvw))]
848pub fn _mm256_maskz_shldv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
849    unsafe {
850        let shf = _mm256_shldv_epi16(a, b, c).as_i16x16();
851        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
852    }
853}
854
855/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst.
856///
857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi16&expand=5063)
858#[inline]
859#[target_feature(enable = "avx512vbmi2,avx512vl")]
860#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
861#[cfg_attr(test, assert_instr(vpshldvw))]
862pub fn _mm_shldv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
863    unsafe {
864        transmute(simd_funnel_shl(
865            a.as_i16x8(),
866            b.as_i16x8(),
867            simd_and(c.as_i16x8(), i16x8::splat(15)),
868        ))
869    }
870}
871
872/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
873///
874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi16&expand=5061)
875#[inline]
876#[target_feature(enable = "avx512vbmi2,avx512vl")]
877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
878#[cfg_attr(test, assert_instr(vpshldvw))]
879pub fn _mm_mask_shldv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
880    unsafe {
881        let shf = _mm_shldv_epi16(a, b, c).as_i16x8();
882        transmute(simd_select_bitmask(k, shf, a.as_i16x8()))
883    }
884}
885
886/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
887///
888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi16&expand=5062)
889#[inline]
890#[target_feature(enable = "avx512vbmi2,avx512vl")]
891#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
892#[cfg_attr(test, assert_instr(vpshldvw))]
893pub fn _mm_maskz_shldv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
894    unsafe {
895        let shf = _mm_shldv_epi16(a, b, c).as_i16x8();
896        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
897    }
898}
899
900/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst.
901///
902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi64&expand=5141)
903#[inline]
904#[target_feature(enable = "avx512vbmi2")]
905#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
906#[cfg_attr(test, assert_instr(vpshrdvq))]
907pub fn _mm512_shrdv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
908    unsafe {
909        transmute(simd_funnel_shr(
910            b.as_i64x8(),
911            a.as_i64x8(),
912            simd_and(c.as_i64x8(), i64x8::splat(63)),
913        ))
914    }
915}
916
917/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
918///
919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi64&expand=5139)
920#[inline]
921#[target_feature(enable = "avx512vbmi2")]
922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
923#[cfg_attr(test, assert_instr(vpshrdvq))]
924pub fn _mm512_mask_shrdv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
925    unsafe {
926        let shf = _mm512_shrdv_epi64(a, b, c).as_i64x8();
927        transmute(simd_select_bitmask(k, shf, a.as_i64x8()))
928    }
929}
930
931/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
932///
933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi64&expand=5140)
934#[inline]
935#[target_feature(enable = "avx512vbmi2")]
936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
937#[cfg_attr(test, assert_instr(vpshrdvq))]
938pub fn _mm512_maskz_shrdv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
939    unsafe {
940        let shf = _mm512_shrdv_epi64(a, b, c).as_i64x8();
941        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
942    }
943}
944
945/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst.
946///
947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi64&expand=5138)
948#[inline]
949#[target_feature(enable = "avx512vbmi2,avx512vl")]
950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
951#[cfg_attr(test, assert_instr(vpshrdvq))]
952pub fn _mm256_shrdv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
953    unsafe {
954        transmute(simd_funnel_shr(
955            b.as_i64x4(),
956            a.as_i64x4(),
957            simd_and(c.as_i64x4(), i64x4::splat(63)),
958        ))
959    }
960}
961
962/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
963///
964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi64&expand=5136)
965#[inline]
966#[target_feature(enable = "avx512vbmi2,avx512vl")]
967#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
968#[cfg_attr(test, assert_instr(vpshrdvq))]
969pub fn _mm256_mask_shrdv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
970    unsafe {
971        let shf = _mm256_shrdv_epi64(a, b, c).as_i64x4();
972        transmute(simd_select_bitmask(k, shf, a.as_i64x4()))
973    }
974}
975
976/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
977///
978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi64&expand=5137)
979#[inline]
980#[target_feature(enable = "avx512vbmi2,avx512vl")]
981#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
982#[cfg_attr(test, assert_instr(vpshrdvq))]
983pub fn _mm256_maskz_shrdv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
984    unsafe {
985        let shf = _mm256_shrdv_epi64(a, b, c).as_i64x4();
986        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
987    }
988}
989
990/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst.
991///
992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi64&expand=5135)
993#[inline]
994#[target_feature(enable = "avx512vbmi2,avx512vl")]
995#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
996#[cfg_attr(test, assert_instr(vpshrdvq))]
997pub fn _mm_shrdv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
998    unsafe {
999        transmute(simd_funnel_shr(
1000            b.as_i64x2(),
1001            a.as_i64x2(),
1002            simd_and(c.as_i64x2(), i64x2::splat(63)),
1003        ))
1004    }
1005}
1006
1007/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1008///
1009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi64&expand=5133)
1010#[inline]
1011#[target_feature(enable = "avx512vbmi2,avx512vl")]
1012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1013#[cfg_attr(test, assert_instr(vpshrdvq))]
1014pub fn _mm_mask_shrdv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
1015    unsafe {
1016        let shf = _mm_shrdv_epi64(a, b, c).as_i64x2();
1017        transmute(simd_select_bitmask(k, shf, a.as_i64x2()))
1018    }
1019}
1020
1021/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1022///
1023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi64&expand=5134)
1024#[inline]
1025#[target_feature(enable = "avx512vbmi2,avx512vl")]
1026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1027#[cfg_attr(test, assert_instr(vpshrdvq))]
1028pub fn _mm_maskz_shrdv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1029    unsafe {
1030        let shf = _mm_shrdv_epi64(a, b, c).as_i64x2();
1031        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
1032    }
1033}
1034
1035/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst.
1036///
1037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi32&expand=5132)
1038#[inline]
1039#[target_feature(enable = "avx512vbmi2")]
1040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1041#[cfg_attr(test, assert_instr(vpshrdvd))]
1042pub fn _mm512_shrdv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
1043    unsafe {
1044        transmute(simd_funnel_shr(
1045            b.as_i32x16(),
1046            a.as_i32x16(),
1047            simd_and(c.as_i32x16(), i32x16::splat(31)),
1048        ))
1049    }
1050}
1051
1052/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1053///
1054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi32&expand=5130)
1055#[inline]
1056#[target_feature(enable = "avx512vbmi2")]
1057#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1058#[cfg_attr(test, assert_instr(vpshrdvd))]
1059pub fn _mm512_mask_shrdv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i {
1060    unsafe {
1061        let shf = _mm512_shrdv_epi32(a, b, c).as_i32x16();
1062        transmute(simd_select_bitmask(k, shf, a.as_i32x16()))
1063    }
1064}
1065
1066/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1067///
1068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi32&expand=5131)
1069#[inline]
1070#[target_feature(enable = "avx512vbmi2")]
1071#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1072#[cfg_attr(test, assert_instr(vpshrdvd))]
1073pub fn _mm512_maskz_shrdv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
1074    unsafe {
1075        let shf = _mm512_shrdv_epi32(a, b, c).as_i32x16();
1076        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
1077    }
1078}
1079
1080/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst.
1081///
1082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi32&expand=5129)
1083#[inline]
1084#[target_feature(enable = "avx512vbmi2,avx512vl")]
1085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1086#[cfg_attr(test, assert_instr(vpshrdvd))]
1087pub fn _mm256_shrdv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1088    unsafe {
1089        transmute(simd_funnel_shr(
1090            b.as_i32x8(),
1091            a.as_i32x8(),
1092            simd_and(c.as_i32x8(), i32x8::splat(31)),
1093        ))
1094    }
1095}
1096
1097/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1098///
1099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi32&expand=5127)
1100#[inline]
1101#[target_feature(enable = "avx512vbmi2,avx512vl")]
1102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1103#[cfg_attr(test, assert_instr(vpshrdvd))]
1104pub fn _mm256_mask_shrdv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
1105    unsafe {
1106        let shf = _mm256_shrdv_epi32(a, b, c).as_i32x8();
1107        transmute(simd_select_bitmask(k, shf, a.as_i32x8()))
1108    }
1109}
1110
1111/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1112///
1113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi32&expand=5128)
1114#[inline]
1115#[target_feature(enable = "avx512vbmi2,avx512vl")]
1116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1117#[cfg_attr(test, assert_instr(vpshrdvd))]
1118pub fn _mm256_maskz_shrdv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1119    unsafe {
1120        let shf = _mm256_shrdv_epi32(a, b, c).as_i32x8();
1121        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
1122    }
1123}
1124
1125/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst.
1126///
1127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi32&expand=5126)
1128#[inline]
1129#[target_feature(enable = "avx512vbmi2,avx512vl")]
1130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1131#[cfg_attr(test, assert_instr(vpshrdvd))]
1132pub fn _mm_shrdv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1133    unsafe {
1134        transmute(simd_funnel_shr(
1135            b.as_i32x4(),
1136            a.as_i32x4(),
1137            simd_and(c.as_i32x4(), i32x4::splat(31)),
1138        ))
1139    }
1140}
1141
1142/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1143///
1144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi32&expand=5124)
1145#[inline]
1146#[target_feature(enable = "avx512vbmi2,avx512vl")]
1147#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1148#[cfg_attr(test, assert_instr(vpshrdvd))]
1149pub fn _mm_mask_shrdv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
1150    unsafe {
1151        let shf = _mm_shrdv_epi32(a, b, c).as_i32x4();
1152        transmute(simd_select_bitmask(k, shf, a.as_i32x4()))
1153    }
1154}
1155
1156/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1157///
1158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi32&expand=5125)
1159#[inline]
1160#[target_feature(enable = "avx512vbmi2,avx512vl")]
1161#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1162#[cfg_attr(test, assert_instr(vpshrdvd))]
1163pub fn _mm_maskz_shrdv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1164    unsafe {
1165        let shf = _mm_shrdv_epi32(a, b, c).as_i32x4();
1166        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
1167    }
1168}
1169
1170/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst.
1171///
1172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi16&expand=5123)
1173#[inline]
1174#[target_feature(enable = "avx512vbmi2")]
1175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1176#[cfg_attr(test, assert_instr(vpshrdvw))]
1177pub fn _mm512_shrdv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
1178    unsafe {
1179        transmute(simd_funnel_shr(
1180            b.as_i16x32(),
1181            a.as_i16x32(),
1182            simd_and(c.as_i16x32(), i16x32::splat(15)),
1183        ))
1184    }
1185}
1186
1187/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1188///
1189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi16&expand=5121)
1190#[inline]
1191#[target_feature(enable = "avx512vbmi2")]
1192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1193#[cfg_attr(test, assert_instr(vpshrdvw))]
1194pub fn _mm512_mask_shrdv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i {
1195    unsafe {
1196        let shf = _mm512_shrdv_epi16(a, b, c).as_i16x32();
1197        transmute(simd_select_bitmask(k, shf, a.as_i16x32()))
1198    }
1199}
1200
1201/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1202///
1203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi16&expand=5122)
1204#[inline]
1205#[target_feature(enable = "avx512vbmi2")]
1206#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1207#[cfg_attr(test, assert_instr(vpshrdvw))]
1208pub fn _mm512_maskz_shrdv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
1209    unsafe {
1210        let shf = _mm512_shrdv_epi16(a, b, c).as_i16x32();
1211        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
1212    }
1213}
1214
1215/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst.
1216///
1217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi16&expand=5120)
1218#[inline]
1219#[target_feature(enable = "avx512vbmi2,avx512vl")]
1220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1221#[cfg_attr(test, assert_instr(vpshrdvw))]
1222pub fn _mm256_shrdv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1223    unsafe {
1224        transmute(simd_funnel_shr(
1225            b.as_i16x16(),
1226            a.as_i16x16(),
1227            simd_and(c.as_i16x16(), i16x16::splat(15)),
1228        ))
1229    }
1230}
1231
1232/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1233///
1234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi16&expand=5118)
1235#[inline]
1236#[target_feature(enable = "avx512vbmi2,avx512vl")]
1237#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1238#[cfg_attr(test, assert_instr(vpshrdvw))]
1239pub fn _mm256_mask_shrdv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i {
1240    unsafe {
1241        let shf = _mm256_shrdv_epi16(a, b, c).as_i16x16();
1242        transmute(simd_select_bitmask(k, shf, a.as_i16x16()))
1243    }
1244}
1245
1246/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1247///
1248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi16&expand=5119)
1249#[inline]
1250#[target_feature(enable = "avx512vbmi2,avx512vl")]
1251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1252#[cfg_attr(test, assert_instr(vpshrdvw))]
1253pub fn _mm256_maskz_shrdv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1254    unsafe {
1255        let shf = _mm256_shrdv_epi16(a, b, c).as_i16x16();
1256        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
1257    }
1258}
1259
1260/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst.
1261///
1262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi16&expand=5117)
1263#[inline]
1264#[target_feature(enable = "avx512vbmi2,avx512vl")]
1265#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1266#[cfg_attr(test, assert_instr(vpshrdvw))]
1267pub fn _mm_shrdv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1268    unsafe {
1269        transmute(simd_funnel_shr(
1270            b.as_i16x8(),
1271            a.as_i16x8(),
1272            simd_and(c.as_i16x8(), i16x8::splat(15)),
1273        ))
1274    }
1275}
1276
1277/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1278///
1279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi16&expand=5115)
1280#[inline]
1281#[target_feature(enable = "avx512vbmi2,avx512vl")]
1282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1283#[cfg_attr(test, assert_instr(vpshrdvw))]
1284pub fn _mm_mask_shrdv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
1285    unsafe {
1286        let shf = _mm_shrdv_epi16(a, b, c).as_i16x8();
1287        transmute(simd_select_bitmask(k, shf, a.as_i16x8()))
1288    }
1289}
1290
1291/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1292///
1293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi16&expand=5116)
1294#[inline]
1295#[target_feature(enable = "avx512vbmi2,avx512vl")]
1296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1297#[cfg_attr(test, assert_instr(vpshrdvw))]
1298pub fn _mm_maskz_shrdv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1299    unsafe {
1300        let shf = _mm_shrdv_epi16(a, b, c).as_i16x8();
1301        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
1302    }
1303}
1304
1305/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst).
1306///
1307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi64&expand=5060)
1308#[inline]
1309#[target_feature(enable = "avx512vbmi2")]
1310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1311#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1312#[rustc_legacy_const_generics(2)]
1313pub fn _mm512_shldi_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1314    static_assert_uimm_bits!(IMM8, 8);
1315    _mm512_shldv_epi64(a, b, _mm512_set1_epi64(IMM8 as i64))
1316}
1317
1318/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1319///
1320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi64&expand=5058)
1321#[inline]
1322#[target_feature(enable = "avx512vbmi2")]
1323#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1324#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1325#[rustc_legacy_const_generics(4)]
1326pub fn _mm512_mask_shldi_epi64<const IMM8: i32>(
1327    src: __m512i,
1328    k: __mmask8,
1329    a: __m512i,
1330    b: __m512i,
1331) -> __m512i {
1332    unsafe {
1333        static_assert_uimm_bits!(IMM8, 8);
1334        let shf = _mm512_shldi_epi64::<IMM8>(a, b).as_i64x8();
1335        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
1336    }
1337}
1338
1339/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1340///
1341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi64&expand=5059)
1342#[inline]
1343#[target_feature(enable = "avx512vbmi2")]
1344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1345#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1346#[rustc_legacy_const_generics(3)]
1347pub fn _mm512_maskz_shldi_epi64<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1348    unsafe {
1349        static_assert_uimm_bits!(IMM8, 8);
1350        let shf = _mm512_shldi_epi64::<IMM8>(a, b).as_i64x8();
1351        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
1352    }
1353}
1354
1355/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst).
1356///
1357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi64&expand=5057)
1358#[inline]
1359#[target_feature(enable = "avx512vbmi2,avx512vl")]
1360#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1361#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1362#[rustc_legacy_const_generics(2)]
1363pub fn _mm256_shldi_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1364    static_assert_uimm_bits!(IMM8, 8);
1365    _mm256_shldv_epi64(a, b, _mm256_set1_epi64x(IMM8 as i64))
1366}
1367
1368/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1369///
1370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi64&expand=5055)
1371#[inline]
1372#[target_feature(enable = "avx512vbmi2,avx512vl")]
1373#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1374#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1375#[rustc_legacy_const_generics(4)]
1376pub fn _mm256_mask_shldi_epi64<const IMM8: i32>(
1377    src: __m256i,
1378    k: __mmask8,
1379    a: __m256i,
1380    b: __m256i,
1381) -> __m256i {
1382    unsafe {
1383        static_assert_uimm_bits!(IMM8, 8);
1384        let shf = _mm256_shldi_epi64::<IMM8>(a, b).as_i64x4();
1385        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
1386    }
1387}
1388
1389/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1390///
1391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi64&expand=5056)
1392#[inline]
1393#[target_feature(enable = "avx512vbmi2,avx512vl")]
1394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1395#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1396#[rustc_legacy_const_generics(3)]
1397pub fn _mm256_maskz_shldi_epi64<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1398    unsafe {
1399        static_assert_uimm_bits!(IMM8, 8);
1400        let shf = _mm256_shldi_epi64::<IMM8>(a, b).as_i64x4();
1401        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
1402    }
1403}
1404
1405/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst).
1406///
1407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi64&expand=5054)
1408#[inline]
1409#[target_feature(enable = "avx512vbmi2,avx512vl")]
1410#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1411#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1412#[rustc_legacy_const_generics(2)]
1413pub fn _mm_shldi_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1414    static_assert_uimm_bits!(IMM8, 8);
1415    _mm_shldv_epi64(a, b, _mm_set1_epi64x(IMM8 as i64))
1416}
1417
1418/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1419///
1420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi64&expand=5052)
1421#[inline]
1422#[target_feature(enable = "avx512vbmi2,avx512vl")]
1423#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1424#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1425#[rustc_legacy_const_generics(4)]
1426pub fn _mm_mask_shldi_epi64<const IMM8: i32>(
1427    src: __m128i,
1428    k: __mmask8,
1429    a: __m128i,
1430    b: __m128i,
1431) -> __m128i {
1432    unsafe {
1433        static_assert_uimm_bits!(IMM8, 8);
1434        let shf = _mm_shldi_epi64::<IMM8>(a, b).as_i64x2();
1435        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
1436    }
1437}
1438
1439/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1440///
1441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi64&expand=5053)
1442#[inline]
1443#[target_feature(enable = "avx512vbmi2,avx512vl")]
1444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1445#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1446#[rustc_legacy_const_generics(3)]
1447pub fn _mm_maskz_shldi_epi64<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1448    unsafe {
1449        static_assert_uimm_bits!(IMM8, 8);
1450        let shf = _mm_shldi_epi64::<IMM8>(a, b).as_i64x2();
1451        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
1452    }
1453}
1454
1455/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst.
1456///
1457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi32&expand=5051)
1458#[inline]
1459#[target_feature(enable = "avx512vbmi2")]
1460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1461#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1462#[rustc_legacy_const_generics(2)]
1463pub fn _mm512_shldi_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1464    static_assert_uimm_bits!(IMM8, 8);
1465    _mm512_shldv_epi32(a, b, _mm512_set1_epi32(IMM8))
1466}
1467
1468/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1469///
1470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi32&expand=5049)
1471#[inline]
1472#[target_feature(enable = "avx512vbmi2")]
1473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1474#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1475#[rustc_legacy_const_generics(4)]
1476pub fn _mm512_mask_shldi_epi32<const IMM8: i32>(
1477    src: __m512i,
1478    k: __mmask16,
1479    a: __m512i,
1480    b: __m512i,
1481) -> __m512i {
1482    unsafe {
1483        static_assert_uimm_bits!(IMM8, 8);
1484        let shf = _mm512_shldi_epi32::<IMM8>(a, b).as_i32x16();
1485        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
1486    }
1487}
1488
1489/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1490///
1491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi32&expand=5050)
1492#[inline]
1493#[target_feature(enable = "avx512vbmi2")]
1494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1495#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1496#[rustc_legacy_const_generics(3)]
1497pub fn _mm512_maskz_shldi_epi32<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1498    unsafe {
1499        static_assert_uimm_bits!(IMM8, 8);
1500        let shf = _mm512_shldi_epi32::<IMM8>(a, b).as_i32x16();
1501        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
1502    }
1503}
1504
1505/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst.
1506///
1507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi32&expand=5048)
1508#[inline]
1509#[target_feature(enable = "avx512vbmi2,avx512vl")]
1510#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1511#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1512#[rustc_legacy_const_generics(2)]
1513pub fn _mm256_shldi_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1514    static_assert_uimm_bits!(IMM8, 8);
1515    _mm256_shldv_epi32(a, b, _mm256_set1_epi32(IMM8))
1516}
1517
1518/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1519///
1520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi32&expand=5046)
1521#[inline]
1522#[target_feature(enable = "avx512vbmi2,avx512vl")]
1523#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1524#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1525#[rustc_legacy_const_generics(4)]
1526pub fn _mm256_mask_shldi_epi32<const IMM8: i32>(
1527    src: __m256i,
1528    k: __mmask8,
1529    a: __m256i,
1530    b: __m256i,
1531) -> __m256i {
1532    unsafe {
1533        static_assert_uimm_bits!(IMM8, 8);
1534        let shf = _mm256_shldi_epi32::<IMM8>(a, b).as_i32x8();
1535        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
1536    }
1537}
1538
1539/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1540///
1541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi32&expand=5047)
1542#[inline]
1543#[target_feature(enable = "avx512vbmi2,avx512vl")]
1544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1545#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1546#[rustc_legacy_const_generics(3)]
1547pub fn _mm256_maskz_shldi_epi32<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1548    unsafe {
1549        static_assert_uimm_bits!(IMM8, 8);
1550        let shf = _mm256_shldi_epi32::<IMM8>(a, b).as_i32x8();
1551        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
1552    }
1553}
1554
1555/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst.
1556///
1557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi32&expand=5045)
1558#[inline]
1559#[target_feature(enable = "avx512vbmi2,avx512vl")]
1560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1561#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1562#[rustc_legacy_const_generics(2)]
1563pub fn _mm_shldi_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1564    static_assert_uimm_bits!(IMM8, 8);
1565    _mm_shldv_epi32(a, b, _mm_set1_epi32(IMM8))
1566}
1567
1568/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1569///
1570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi32&expand=5043)
1571#[inline]
1572#[target_feature(enable = "avx512vbmi2,avx512vl")]
1573#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1574#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1575#[rustc_legacy_const_generics(4)]
1576pub fn _mm_mask_shldi_epi32<const IMM8: i32>(
1577    src: __m128i,
1578    k: __mmask8,
1579    a: __m128i,
1580    b: __m128i,
1581) -> __m128i {
1582    unsafe {
1583        static_assert_uimm_bits!(IMM8, 8);
1584        let shf = _mm_shldi_epi32::<IMM8>(a, b).as_i32x4();
1585        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
1586    }
1587}
1588
1589/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1590///
1591/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi32&expand=5044)
1592#[inline]
1593#[target_feature(enable = "avx512vbmi2,avx512vl")]
1594#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1595#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1596#[rustc_legacy_const_generics(3)]
1597pub fn _mm_maskz_shldi_epi32<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1598    unsafe {
1599        static_assert_uimm_bits!(IMM8, 8);
1600        let shf = _mm_shldi_epi32::<IMM8>(a, b).as_i32x4();
1601        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
1602    }
1603}
1604
1605/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst).
1606///
1607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi16&expand=5042)
1608#[inline]
1609#[target_feature(enable = "avx512vbmi2")]
1610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1611#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1612#[rustc_legacy_const_generics(2)]
1613pub fn _mm512_shldi_epi16<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1614    static_assert_uimm_bits!(IMM8, 8);
1615    _mm512_shldv_epi16(a, b, _mm512_set1_epi16(IMM8 as i16))
1616}
1617
1618/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1619///
1620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi16&expand=5040)
1621#[inline]
1622#[target_feature(enable = "avx512vbmi2")]
1623#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1624#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1625#[rustc_legacy_const_generics(4)]
1626pub fn _mm512_mask_shldi_epi16<const IMM8: i32>(
1627    src: __m512i,
1628    k: __mmask32,
1629    a: __m512i,
1630    b: __m512i,
1631) -> __m512i {
1632    unsafe {
1633        static_assert_uimm_bits!(IMM8, 8);
1634        let shf = _mm512_shldi_epi16::<IMM8>(a, b).as_i16x32();
1635        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
1636    }
1637}
1638
1639/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1640///
1641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi16&expand=5041)
1642#[inline]
1643#[target_feature(enable = "avx512vbmi2")]
1644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1645#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1646#[rustc_legacy_const_generics(3)]
1647pub fn _mm512_maskz_shldi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1648    unsafe {
1649        static_assert_uimm_bits!(IMM8, 8);
1650        let shf = _mm512_shldi_epi16::<IMM8>(a, b).as_i16x32();
1651        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
1652    }
1653}
1654
1655/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst).
1656///
1657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi16&expand=5039)
1658#[inline]
1659#[target_feature(enable = "avx512vbmi2,avx512vl")]
1660#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1661#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1662#[rustc_legacy_const_generics(2)]
1663pub fn _mm256_shldi_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1664    static_assert_uimm_bits!(IMM8, 8);
1665    _mm256_shldv_epi16(a, b, _mm256_set1_epi16(IMM8 as i16))
1666}
1667
1668/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1669///
1670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi16&expand=5037)
1671#[inline]
1672#[target_feature(enable = "avx512vbmi2,avx512vl")]
1673#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1674#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1675#[rustc_legacy_const_generics(4)]
1676pub fn _mm256_mask_shldi_epi16<const IMM8: i32>(
1677    src: __m256i,
1678    k: __mmask16,
1679    a: __m256i,
1680    b: __m256i,
1681) -> __m256i {
1682    unsafe {
1683        static_assert_uimm_bits!(IMM8, 8);
1684        let shf = _mm256_shldi_epi16::<IMM8>(a, b).as_i16x16();
1685        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
1686    }
1687}
1688
1689/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1690///
1691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi16&expand=5038)
1692#[inline]
1693#[target_feature(enable = "avx512vbmi2,avx512vl")]
1694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1695#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1696#[rustc_legacy_const_generics(3)]
1697pub fn _mm256_maskz_shldi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1698    unsafe {
1699        static_assert_uimm_bits!(IMM8, 8);
1700        let shf = _mm256_shldi_epi16::<IMM8>(a, b).as_i16x16();
1701        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
1702    }
1703}
1704
1705/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst).
1706///
1707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi16&expand=5036)
1708#[inline]
1709#[target_feature(enable = "avx512vbmi2,avx512vl")]
1710#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1711#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1712#[rustc_legacy_const_generics(2)]
1713pub fn _mm_shldi_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1714    static_assert_uimm_bits!(IMM8, 8);
1715    _mm_shldv_epi16(a, b, _mm_set1_epi16(IMM8 as i16))
1716}
1717
1718/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1719///
1720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi16&expand=5034)
1721#[inline]
1722#[target_feature(enable = "avx512vbmi2,avx512vl")]
1723#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1724#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1725#[rustc_legacy_const_generics(4)]
1726pub fn _mm_mask_shldi_epi16<const IMM8: i32>(
1727    src: __m128i,
1728    k: __mmask8,
1729    a: __m128i,
1730    b: __m128i,
1731) -> __m128i {
1732    unsafe {
1733        static_assert_uimm_bits!(IMM8, 8);
1734        let shf = _mm_shldi_epi16::<IMM8>(a, b).as_i16x8();
1735        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
1736    }
1737}
1738
1739/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1740///
1741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi16&expand=5035)
1742#[inline]
1743#[target_feature(enable = "avx512vbmi2,avx512vl")]
1744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1745#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1746#[rustc_legacy_const_generics(3)]
1747pub fn _mm_maskz_shldi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1748    unsafe {
1749        static_assert_uimm_bits!(IMM8, 8);
1750        let shf = _mm_shldi_epi16::<IMM8>(a, b).as_i16x8();
1751        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
1752    }
1753}
1754
1755/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst.
1756///
1757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi64&expand=5114)
1758#[inline]
1759#[target_feature(enable = "avx512vbmi2")]
1760#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1761#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1762#[rustc_legacy_const_generics(2)]
1763pub fn _mm512_shrdi_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1764    static_assert_uimm_bits!(IMM8, 8);
1765    _mm512_shrdv_epi64(a, b, _mm512_set1_epi64(IMM8 as i64))
1766}
1767
1768/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set).
1769///
1770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi64&expand=5112)
1771#[inline]
1772#[target_feature(enable = "avx512vbmi2")]
1773#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1774#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1775#[rustc_legacy_const_generics(4)]
1776pub fn _mm512_mask_shrdi_epi64<const IMM8: i32>(
1777    src: __m512i,
1778    k: __mmask8,
1779    a: __m512i,
1780    b: __m512i,
1781) -> __m512i {
1782    unsafe {
1783        static_assert_uimm_bits!(IMM8, 8);
1784        let shf = _mm512_shrdi_epi64::<IMM8>(a, b).as_i64x8();
1785        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
1786    }
1787}
1788
1789/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1790///
1791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi64&expand=5113)
1792#[inline]
1793#[target_feature(enable = "avx512vbmi2")]
1794#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1795#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 255))] //should be vpshrdq
1796#[rustc_legacy_const_generics(3)]
1797pub fn _mm512_maskz_shrdi_epi64<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1798    unsafe {
1799        static_assert_uimm_bits!(IMM8, 8);
1800        let shf = _mm512_shrdi_epi64::<IMM8>(a, b).as_i64x8();
1801        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
1802    }
1803}
1804
1805/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst.
1806///
1807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi64&expand=5111)
1808#[inline]
1809#[target_feature(enable = "avx512vbmi2,avx512vl")]
1810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1811#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1812#[rustc_legacy_const_generics(2)]
1813pub fn _mm256_shrdi_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1814    static_assert_uimm_bits!(IMM8, 8);
1815    _mm256_shrdv_epi64(a, b, _mm256_set1_epi64x(IMM8 as i64))
1816}
1817
1818/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set).
1819///
1820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi64&expand=5109)
1821#[inline]
1822#[target_feature(enable = "avx512vbmi2,avx512vl")]
1823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1824#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1825#[rustc_legacy_const_generics(4)]
1826pub fn _mm256_mask_shrdi_epi64<const IMM8: i32>(
1827    src: __m256i,
1828    k: __mmask8,
1829    a: __m256i,
1830    b: __m256i,
1831) -> __m256i {
1832    unsafe {
1833        static_assert_uimm_bits!(IMM8, 8);
1834        let shf = _mm256_shrdi_epi64::<IMM8>(a, b).as_i64x4();
1835        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
1836    }
1837}
1838
1839/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1840///
1841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi64&expand=5110)
1842#[inline]
1843#[target_feature(enable = "avx512vbmi2,avx512vl")]
1844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1845#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1846#[rustc_legacy_const_generics(3)]
1847pub fn _mm256_maskz_shrdi_epi64<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1848    unsafe {
1849        static_assert_uimm_bits!(IMM8, 8);
1850        let shf = _mm256_shrdi_epi64::<IMM8>(a, b).as_i64x4();
1851        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
1852    }
1853}
1854
1855/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst.
1856///
1857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi64&expand=5108)
1858#[inline]
1859#[target_feature(enable = "avx512vbmi2,avx512vl")]
1860#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1861#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1862#[rustc_legacy_const_generics(2)]
1863pub fn _mm_shrdi_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1864    static_assert_uimm_bits!(IMM8, 8);
1865    _mm_shrdv_epi64(a, b, _mm_set1_epi64x(IMM8 as i64))
1866}
1867
1868/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set).
1869///
1870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi64&expand=5106)
1871#[inline]
1872#[target_feature(enable = "avx512vbmi2,avx512vl")]
1873#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1874#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1875#[rustc_legacy_const_generics(4)]
1876pub fn _mm_mask_shrdi_epi64<const IMM8: i32>(
1877    src: __m128i,
1878    k: __mmask8,
1879    a: __m128i,
1880    b: __m128i,
1881) -> __m128i {
1882    unsafe {
1883        static_assert_uimm_bits!(IMM8, 8);
1884        let shf = _mm_shrdi_epi64::<IMM8>(a, b).as_i64x2();
1885        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
1886    }
1887}
1888
1889/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1890///
1891/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi64&expand=5107)
1892#[inline]
1893#[target_feature(enable = "avx512vbmi2,avx512vl")]
1894#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1895#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1896#[rustc_legacy_const_generics(3)]
1897pub fn _mm_maskz_shrdi_epi64<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1898    unsafe {
1899        static_assert_uimm_bits!(IMM8, 8);
1900        let shf = _mm_shrdi_epi64::<IMM8>(a, b).as_i64x2();
1901        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
1902    }
1903}
1904
1905/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst.
1906///
1907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi32&expand=5105)
1908#[inline]
1909#[target_feature(enable = "avx512vbmi2")]
1910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1911#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1912#[rustc_legacy_const_generics(2)]
1913pub fn _mm512_shrdi_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1914    static_assert_uimm_bits!(IMM8, 8);
1915    _mm512_shrdv_epi32(a, b, _mm512_set1_epi32(IMM8))
1916}
1917
1918/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1919///
1920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi32&expand=5103)
1921#[inline]
1922#[target_feature(enable = "avx512vbmi2")]
1923#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1924#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1925#[rustc_legacy_const_generics(4)]
1926pub fn _mm512_mask_shrdi_epi32<const IMM8: i32>(
1927    src: __m512i,
1928    k: __mmask16,
1929    a: __m512i,
1930    b: __m512i,
1931) -> __m512i {
1932    unsafe {
1933        static_assert_uimm_bits!(IMM8, 8);
1934        let shf = _mm512_shrdi_epi32::<IMM8>(a, b).as_i32x16();
1935        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
1936    }
1937}
1938
1939/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1940///
1941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi32&expand=5104)
1942#[inline]
1943#[target_feature(enable = "avx512vbmi2")]
1944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1945#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1946#[rustc_legacy_const_generics(3)]
1947pub fn _mm512_maskz_shrdi_epi32<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1948    unsafe {
1949        static_assert_uimm_bits!(IMM8, 8);
1950        let shf = _mm512_shrdi_epi32::<IMM8>(a, b).as_i32x16();
1951        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
1952    }
1953}
1954
1955/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst.
1956///
1957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi32&expand=5102)
1958#[inline]
1959#[target_feature(enable = "avx512vbmi2,avx512vl")]
1960#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1961#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1962#[rustc_legacy_const_generics(2)]
1963pub fn _mm256_shrdi_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1964    static_assert_uimm_bits!(IMM8, 8);
1965    _mm256_shrdv_epi32(a, b, _mm256_set1_epi32(IMM8))
1966}
1967
1968/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1969///
1970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi32&expand=5100)
1971#[inline]
1972#[target_feature(enable = "avx512vbmi2,avx512vl")]
1973#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1974#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1975#[rustc_legacy_const_generics(4)]
1976pub fn _mm256_mask_shrdi_epi32<const IMM8: i32>(
1977    src: __m256i,
1978    k: __mmask8,
1979    a: __m256i,
1980    b: __m256i,
1981) -> __m256i {
1982    unsafe {
1983        static_assert_uimm_bits!(IMM8, 8);
1984        let shf = _mm256_shrdi_epi32::<IMM8>(a, b).as_i32x8();
1985        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
1986    }
1987}
1988
1989/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1990///
1991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi32&expand=5101)
1992#[inline]
1993#[target_feature(enable = "avx512vbmi2,avx512vl")]
1994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1995#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1996#[rustc_legacy_const_generics(3)]
1997pub fn _mm256_maskz_shrdi_epi32<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1998    unsafe {
1999        static_assert_uimm_bits!(IMM8, 8);
2000        let shf = _mm256_shrdi_epi32::<IMM8>(a, b).as_i32x8();
2001        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
2002    }
2003}
2004
2005/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst.
2006///
2007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi32&expand=5099)
2008#[inline]
2009#[target_feature(enable = "avx512vbmi2,avx512vl")]
2010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2011#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
2012#[rustc_legacy_const_generics(2)]
2013pub fn _mm_shrdi_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
2014    static_assert_uimm_bits!(IMM8, 8);
2015    _mm_shrdv_epi32(a, b, _mm_set1_epi32(IMM8))
2016}
2017
2018/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2019///
2020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi32&expand=5097)
2021#[inline]
2022#[target_feature(enable = "avx512vbmi2,avx512vl")]
2023#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2024#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
2025#[rustc_legacy_const_generics(4)]
2026pub fn _mm_mask_shrdi_epi32<const IMM8: i32>(
2027    src: __m128i,
2028    k: __mmask8,
2029    a: __m128i,
2030    b: __m128i,
2031) -> __m128i {
2032    unsafe {
2033        static_assert_uimm_bits!(IMM8, 8);
2034        let shf = _mm_shrdi_epi32::<IMM8>(a, b).as_i32x4();
2035        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
2036    }
2037}
2038
2039/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2040///
2041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi32&expand=5098)
2042#[inline]
2043#[target_feature(enable = "avx512vbmi2,avx512vl")]
2044#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2045#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
2046#[rustc_legacy_const_generics(3)]
2047pub fn _mm_maskz_shrdi_epi32<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2048    unsafe {
2049        static_assert_uimm_bits!(IMM8, 8);
2050        let shf = _mm_shrdi_epi32::<IMM8>(a, b).as_i32x4();
2051        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
2052    }
2053}
2054
2055/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst.
2056///
2057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi16&expand=5096)
2058#[inline]
2059#[target_feature(enable = "avx512vbmi2")]
2060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2061#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2062#[rustc_legacy_const_generics(2)]
2063pub fn _mm512_shrdi_epi16<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
2064    static_assert_uimm_bits!(IMM8, 8);
2065    _mm512_shrdv_epi16(a, b, _mm512_set1_epi16(IMM8 as i16))
2066}
2067
2068/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2069///
2070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi16&expand=5094)
2071#[inline]
2072#[target_feature(enable = "avx512vbmi2")]
2073#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2074#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2075#[rustc_legacy_const_generics(4)]
2076pub fn _mm512_mask_shrdi_epi16<const IMM8: i32>(
2077    src: __m512i,
2078    k: __mmask32,
2079    a: __m512i,
2080    b: __m512i,
2081) -> __m512i {
2082    unsafe {
2083        static_assert_uimm_bits!(IMM8, 8);
2084        let shf = _mm512_shrdi_epi16::<IMM8>(a, b).as_i16x32();
2085        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
2086    }
2087}
2088
2089/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2090///
2091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi16&expand=5095)
2092#[inline]
2093#[target_feature(enable = "avx512vbmi2")]
2094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2095#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2096#[rustc_legacy_const_generics(3)]
2097pub fn _mm512_maskz_shrdi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2098    unsafe {
2099        static_assert_uimm_bits!(IMM8, 8);
2100        let shf = _mm512_shrdi_epi16::<IMM8>(a, b).as_i16x32();
2101        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
2102    }
2103}
2104
2105/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst.
2106///
2107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi16&expand=5093)
2108#[inline]
2109#[target_feature(enable = "avx512vbmi2,avx512vl")]
2110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2111#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2112#[rustc_legacy_const_generics(2)]
2113pub fn _mm256_shrdi_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2114    static_assert_uimm_bits!(IMM8, 8);
2115    _mm256_shrdv_epi16(a, b, _mm256_set1_epi16(IMM8 as i16))
2116}
2117
2118/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2119///
2120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi16&expand=5091)
2121#[inline]
2122#[target_feature(enable = "avx512vbmi2,avx512vl")]
2123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2124#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2125#[rustc_legacy_const_generics(4)]
2126pub fn _mm256_mask_shrdi_epi16<const IMM8: i32>(
2127    src: __m256i,
2128    k: __mmask16,
2129    a: __m256i,
2130    b: __m256i,
2131) -> __m256i {
2132    unsafe {
2133        static_assert_uimm_bits!(IMM8, 8);
2134        let shf = _mm256_shrdi_epi16::<IMM8>(a, b).as_i16x16();
2135        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
2136    }
2137}
2138
2139/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2140///
2141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi16&expand=5092)
2142#[inline]
2143#[target_feature(enable = "avx512vbmi2,avx512vl")]
2144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2145#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2146#[rustc_legacy_const_generics(3)]
2147pub fn _mm256_maskz_shrdi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2148    unsafe {
2149        static_assert_uimm_bits!(IMM8, 8);
2150        let shf = _mm256_shrdi_epi16::<IMM8>(a, b).as_i16x16();
2151        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
2152    }
2153}
2154
2155/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst.
2156///
2157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi16&expand=5090)
2158#[inline]
2159#[target_feature(enable = "avx512vbmi2,avx512vl")]
2160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2161#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2162#[rustc_legacy_const_generics(2)]
2163pub fn _mm_shrdi_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
2164    static_assert_uimm_bits!(IMM8, 8);
2165    _mm_shrdv_epi16(a, b, _mm_set1_epi16(IMM8 as i16))
2166}
2167
2168/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2169///
2170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi16&expand=5088)
2171#[inline]
2172#[target_feature(enable = "avx512vbmi2,avx512vl")]
2173#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2174#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2175#[rustc_legacy_const_generics(4)]
2176pub fn _mm_mask_shrdi_epi16<const IMM8: i32>(
2177    src: __m128i,
2178    k: __mmask8,
2179    a: __m128i,
2180    b: __m128i,
2181) -> __m128i {
2182    unsafe {
2183        static_assert_uimm_bits!(IMM8, 8);
2184        let shf = _mm_shrdi_epi16::<IMM8>(a, b).as_i16x8();
2185        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
2186    }
2187}
2188
2189/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2190///
2191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi16&expand=5089)
2192#[inline]
2193#[target_feature(enable = "avx512vbmi2,avx512vl")]
2194#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2195#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2196#[rustc_legacy_const_generics(3)]
2197pub fn _mm_maskz_shrdi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2198    unsafe {
2199        static_assert_uimm_bits!(IMM8, 8);
2200        let shf = _mm_shrdi_epi16::<IMM8>(a, b).as_i16x8();
2201        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
2202    }
2203}
2204
2205#[allow(improper_ctypes)]
2206unsafe extern "C" {
2207    #[link_name = "llvm.x86.avx512.mask.compress.store.w.512"]
2208    fn vcompressstorew(mem: *mut i8, data: i16x32, mask: u32);
2209    #[link_name = "llvm.x86.avx512.mask.compress.store.w.256"]
2210    fn vcompressstorew256(mem: *mut i8, data: i16x16, mask: u16);
2211    #[link_name = "llvm.x86.avx512.mask.compress.store.w.128"]
2212    fn vcompressstorew128(mem: *mut i8, data: i16x8, mask: u8);
2213
2214    #[link_name = "llvm.x86.avx512.mask.compress.store.b.512"]
2215    fn vcompressstoreb(mem: *mut i8, data: i8x64, mask: u64);
2216    #[link_name = "llvm.x86.avx512.mask.compress.store.b.256"]
2217    fn vcompressstoreb256(mem: *mut i8, data: i8x32, mask: u32);
2218    #[link_name = "llvm.x86.avx512.mask.compress.store.b.128"]
2219    fn vcompressstoreb128(mem: *mut i8, data: i8x16, mask: u16);
2220
2221    #[link_name = "llvm.x86.avx512.mask.compress.w.512"]
2222    fn vpcompressw(a: i16x32, src: i16x32, mask: u32) -> i16x32;
2223    #[link_name = "llvm.x86.avx512.mask.compress.w.256"]
2224    fn vpcompressw256(a: i16x16, src: i16x16, mask: u16) -> i16x16;
2225    #[link_name = "llvm.x86.avx512.mask.compress.w.128"]
2226    fn vpcompressw128(a: i16x8, src: i16x8, mask: u8) -> i16x8;
2227
2228    #[link_name = "llvm.x86.avx512.mask.compress.b.512"]
2229    fn vpcompressb(a: i8x64, src: i8x64, mask: u64) -> i8x64;
2230    #[link_name = "llvm.x86.avx512.mask.compress.b.256"]
2231    fn vpcompressb256(a: i8x32, src: i8x32, mask: u32) -> i8x32;
2232    #[link_name = "llvm.x86.avx512.mask.compress.b.128"]
2233    fn vpcompressb128(a: i8x16, src: i8x16, mask: u16) -> i8x16;
2234
2235    #[link_name = "llvm.x86.avx512.mask.expand.w.512"]
2236    fn vpexpandw(a: i16x32, src: i16x32, mask: u32) -> i16x32;
2237    #[link_name = "llvm.x86.avx512.mask.expand.w.256"]
2238    fn vpexpandw256(a: i16x16, src: i16x16, mask: u16) -> i16x16;
2239    #[link_name = "llvm.x86.avx512.mask.expand.w.128"]
2240    fn vpexpandw128(a: i16x8, src: i16x8, mask: u8) -> i16x8;
2241
2242    #[link_name = "llvm.x86.avx512.mask.expand.b.512"]
2243    fn vpexpandb(a: i8x64, src: i8x64, mask: u64) -> i8x64;
2244    #[link_name = "llvm.x86.avx512.mask.expand.b.256"]
2245    fn vpexpandb256(a: i8x32, src: i8x32, mask: u32) -> i8x32;
2246    #[link_name = "llvm.x86.avx512.mask.expand.b.128"]
2247    fn vpexpandb128(a: i8x16, src: i8x16, mask: u16) -> i8x16;
2248
2249    #[link_name = "llvm.x86.avx512.mask.expand.load.b.128"]
2250    fn expandloadb_128(mem_addr: *const i8, a: i8x16, mask: u16) -> i8x16;
2251    #[link_name = "llvm.x86.avx512.mask.expand.load.w.128"]
2252    fn expandloadw_128(mem_addr: *const i16, a: i16x8, mask: u8) -> i16x8;
2253    #[link_name = "llvm.x86.avx512.mask.expand.load.b.256"]
2254    fn expandloadb_256(mem_addr: *const i8, a: i8x32, mask: u32) -> i8x32;
2255    #[link_name = "llvm.x86.avx512.mask.expand.load.w.256"]
2256    fn expandloadw_256(mem_addr: *const i16, a: i16x16, mask: u16) -> i16x16;
2257    #[link_name = "llvm.x86.avx512.mask.expand.load.b.512"]
2258    fn expandloadb_512(mem_addr: *const i8, a: i8x64, mask: u64) -> i8x64;
2259    #[link_name = "llvm.x86.avx512.mask.expand.load.w.512"]
2260    fn expandloadw_512(mem_addr: *const i16, a: i16x32, mask: u32) -> i16x32;
2261}
2262
2263#[cfg(test)]
2264mod tests {
2265
2266    use stdarch_test::simd_test;
2267
2268    use crate::core_arch::x86::*;
2269    use crate::hint::black_box;
2270
2271    #[simd_test(enable = "avx512vbmi2")]
2272    unsafe fn test_mm512_mask_compress_epi16() {
2273        let src = _mm512_set1_epi16(200);
2274        #[rustfmt::skip]
2275        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2276                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2277        let r = _mm512_mask_compress_epi16(src, 0b01010101_01010101_01010101_01010101, a);
2278        #[rustfmt::skip]
2279        let e = _mm512_set_epi16(
2280            200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200,
2281            1,   3,   5,   7,   9,   11,  13,  15,  17,  19,  21,  23,  25,  27,  29,  31,
2282        );
2283        assert_eq_m512i(r, e);
2284    }
2285
2286    #[simd_test(enable = "avx512vbmi2")]
2287    unsafe fn test_mm512_maskz_compress_epi16() {
2288        #[rustfmt::skip]
2289        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2290                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2291        let r = _mm512_maskz_compress_epi16(0b01010101_01010101_01010101_01010101, a);
2292        #[rustfmt::skip]
2293        let e = _mm512_set_epi16(
2294            0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2295            1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
2296        );
2297        assert_eq_m512i(r, e);
2298    }
2299
2300    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2301    unsafe fn test_mm256_mask_compress_epi16() {
2302        let src = _mm256_set1_epi16(200);
2303        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2304        let r = _mm256_mask_compress_epi16(src, 0b01010101_01010101, a);
2305        let e = _mm256_set_epi16(
2306            200, 200, 200, 200, 200, 200, 200, 200, 1, 3, 5, 7, 9, 11, 13, 15,
2307        );
2308        assert_eq_m256i(r, e);
2309    }
2310
2311    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2312    unsafe fn test_mm256_maskz_compress_epi16() {
2313        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2314        let r = _mm256_maskz_compress_epi16(0b01010101_01010101, a);
2315        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
2316        assert_eq_m256i(r, e);
2317    }
2318
2319    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2320    unsafe fn test_mm_mask_compress_epi16() {
2321        let src = _mm_set1_epi16(200);
2322        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2323        let r = _mm_mask_compress_epi16(src, 0b01010101, a);
2324        let e = _mm_set_epi16(200, 200, 200, 200, 1, 3, 5, 7);
2325        assert_eq_m128i(r, e);
2326    }
2327
2328    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2329    unsafe fn test_mm_maskz_compress_epi16() {
2330        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2331        let r = _mm_maskz_compress_epi16(0b01010101, a);
2332        let e = _mm_set_epi16(0, 0, 0, 0, 1, 3, 5, 7);
2333        assert_eq_m128i(r, e);
2334    }
2335
2336    #[simd_test(enable = "avx512vbmi2")]
2337    unsafe fn test_mm512_mask_compress_epi8() {
2338        let src = _mm512_set1_epi8(100);
2339        #[rustfmt::skip]
2340        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2341                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2342                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2343                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
2344        let r = _mm512_mask_compress_epi8(
2345            src,
2346            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101,
2347            a,
2348        );
2349        #[rustfmt::skip]
2350        let e = _mm512_set_epi8(
2351            100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
2352            100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
2353            1,   3,   5,   7,   9,   11,  13,  15,  17,  19,  21,  23,  25,  27,  29,  31,
2354            33,  35,  37,  39,  41,  43,  45,  47,  49,  51,  53,  55,  57,  59,  61,  63,
2355        );
2356        assert_eq_m512i(r, e);
2357    }
2358
2359    #[simd_test(enable = "avx512vbmi2")]
2360    unsafe fn test_mm512_maskz_compress_epi8() {
2361        #[rustfmt::skip]
2362        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2363                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2364                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2365                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
2366        let r = _mm512_maskz_compress_epi8(
2367            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101,
2368            a,
2369        );
2370        #[rustfmt::skip]
2371        let e = _mm512_set_epi8(
2372            0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2373            0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2374            1,  3,  5,  7,  9,  11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
2375            33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63,
2376        );
2377        assert_eq_m512i(r, e);
2378    }
2379
2380    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2381    unsafe fn test_mm256_mask_compress_epi8() {
2382        let src = _mm256_set1_epi8(100);
2383        #[rustfmt::skip]
2384        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2385                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2386        let r = _mm256_mask_compress_epi8(src, 0b01010101_01010101_01010101_01010101, a);
2387        #[rustfmt::skip]
2388        let e = _mm256_set_epi8(
2389            100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
2390            1,   3,   5,   7,   9,   11,  13,  15,  17,  19,  21,  23,  25,  27,  29,  31,
2391        );
2392        assert_eq_m256i(r, e);
2393    }
2394
2395    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2396    unsafe fn test_mm256_maskz_compress_epi8() {
2397        #[rustfmt::skip]
2398        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2399                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2400        let r = _mm256_maskz_compress_epi8(0b01010101_01010101_01010101_01010101, a);
2401        #[rustfmt::skip]
2402        let e = _mm256_set_epi8(
2403            0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2404            1,  3,  5,  7,  9,  11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
2405        );
2406        assert_eq_m256i(r, e);
2407    }
2408
2409    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2410    unsafe fn test_mm_mask_compress_epi8() {
2411        let src = _mm_set1_epi8(100);
2412        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2413        let r = _mm_mask_compress_epi8(src, 0b01010101_01010101, a);
2414        let e = _mm_set_epi8(
2415            100, 100, 100, 100, 100, 100, 100, 100, 1, 3, 5, 7, 9, 11, 13, 15,
2416        );
2417        assert_eq_m128i(r, e);
2418    }
2419
2420    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2421    unsafe fn test_mm_maskz_compress_epi8() {
2422        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2423        let r = _mm_maskz_compress_epi8(0b01010101_01010101, a);
2424        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
2425        assert_eq_m128i(r, e);
2426    }
2427
2428    #[simd_test(enable = "avx512vbmi2")]
2429    unsafe fn test_mm512_mask_expand_epi16() {
2430        let src = _mm512_set1_epi16(200);
2431        #[rustfmt::skip]
2432        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2433                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2434        let r = _mm512_mask_expand_epi16(src, 0b01010101_01010101_01010101_01010101, a);
2435        #[rustfmt::skip]
2436        let e = _mm512_set_epi16(
2437            200, 16, 200, 17, 200, 18, 200, 19, 200, 20, 200, 21, 200, 22, 200, 23,
2438            200, 24, 200, 25, 200, 26, 200, 27, 200, 28, 200, 29, 200, 30, 200, 31,
2439        );
2440        assert_eq_m512i(r, e);
2441    }
2442
2443    #[simd_test(enable = "avx512vbmi2")]
2444    unsafe fn test_mm512_maskz_expand_epi16() {
2445        #[rustfmt::skip]
2446        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2447                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2448        let r = _mm512_maskz_expand_epi16(0b01010101_01010101_01010101_01010101, a);
2449        #[rustfmt::skip]
2450        let e = _mm512_set_epi16(0, 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23,
2451                                 0, 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31);
2452        assert_eq_m512i(r, e);
2453    }
2454
2455    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2456    unsafe fn test_mm256_mask_expand_epi16() {
2457        let src = _mm256_set1_epi16(200);
2458        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2459        let r = _mm256_mask_expand_epi16(src, 0b01010101_01010101, a);
2460        let e = _mm256_set_epi16(
2461            200, 8, 200, 9, 200, 10, 200, 11, 200, 12, 200, 13, 200, 14, 200, 15,
2462        );
2463        assert_eq_m256i(r, e);
2464    }
2465
2466    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2467    unsafe fn test_mm256_maskz_expand_epi16() {
2468        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2469        let r = _mm256_maskz_expand_epi16(0b01010101_01010101, a);
2470        let e = _mm256_set_epi16(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
2471        assert_eq_m256i(r, e);
2472    }
2473
2474    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2475    unsafe fn test_mm_mask_expand_epi16() {
2476        let src = _mm_set1_epi16(200);
2477        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2478        let r = _mm_mask_expand_epi16(src, 0b01010101, a);
2479        let e = _mm_set_epi16(200, 4, 200, 5, 200, 6, 200, 7);
2480        assert_eq_m128i(r, e);
2481    }
2482
2483    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2484    unsafe fn test_mm_maskz_expand_epi16() {
2485        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2486        let r = _mm_maskz_expand_epi16(0b01010101, a);
2487        let e = _mm_set_epi16(0, 4, 0, 5, 0, 6, 0, 7);
2488        assert_eq_m128i(r, e);
2489    }
2490
2491    #[simd_test(enable = "avx512vbmi2")]
2492    unsafe fn test_mm512_mask_expand_epi8() {
2493        let src = _mm512_set1_epi8(100);
2494        #[rustfmt::skip]
2495        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2496                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2497                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2498                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
2499        let r = _mm512_mask_expand_epi8(
2500            src,
2501            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101,
2502            a,
2503        );
2504        #[rustfmt::skip]
2505        let e = _mm512_set_epi8(
2506            100, 32, 100, 33, 100, 34, 100, 35, 100, 36, 100, 37, 100, 38, 100, 39,
2507            100, 40, 100, 41, 100, 42, 100, 43, 100, 44, 100, 45, 100, 46, 100, 47,
2508            100, 48, 100, 49, 100, 50, 100, 51, 100, 52, 100, 53, 100, 54, 100, 55,
2509            100, 56, 100, 57, 100, 58, 100, 59, 100, 60, 100, 61, 100, 62, 100, 63,
2510        );
2511        assert_eq_m512i(r, e);
2512    }
2513
2514    #[simd_test(enable = "avx512vbmi2")]
2515    unsafe fn test_mm512_maskz_expand_epi8() {
2516        #[rustfmt::skip]
2517        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2518                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2519                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2520                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
2521        let r = _mm512_maskz_expand_epi8(
2522            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101,
2523            a,
2524        );
2525        #[rustfmt::skip]
2526        let e = _mm512_set_epi8(
2527            0, 32, 0, 33, 0, 34, 0, 35, 0, 36, 0, 37, 0, 38, 0, 39,
2528            0, 40, 0, 41, 0, 42, 0, 43, 0, 44, 0, 45, 0, 46, 0, 47,
2529            0, 48, 0, 49, 0, 50, 0, 51, 0, 52, 0, 53, 0, 54, 0, 55,
2530            0, 56, 0, 57, 0, 58, 0, 59, 0, 60, 0, 61, 0, 62, 0, 63,
2531        );
2532        assert_eq_m512i(r, e);
2533    }
2534
2535    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2536    unsafe fn test_mm256_mask_expand_epi8() {
2537        let src = _mm256_set1_epi8(100);
2538        #[rustfmt::skip]
2539        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2540                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2541        let r = _mm256_mask_expand_epi8(src, 0b01010101_01010101_01010101_01010101, a);
2542        #[rustfmt::skip]
2543        let e = _mm256_set_epi8(
2544            100, 16, 100, 17, 100, 18, 100, 19, 100, 20, 100, 21, 100, 22, 100, 23,
2545            100, 24, 100, 25, 100, 26, 100, 27, 100, 28, 100, 29, 100, 30, 100, 31,
2546        );
2547        assert_eq_m256i(r, e);
2548    }
2549
2550    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2551    unsafe fn test_mm256_maskz_expand_epi8() {
2552        #[rustfmt::skip]
2553        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2554                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2555        let r = _mm256_maskz_expand_epi8(0b01010101_01010101_01010101_01010101, a);
2556        #[rustfmt::skip]
2557        let e = _mm256_set_epi8(
2558            0, 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23,
2559            0, 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31,
2560        );
2561        assert_eq_m256i(r, e);
2562    }
2563
2564    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2565    unsafe fn test_mm_mask_expand_epi8() {
2566        let src = _mm_set1_epi8(100);
2567        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2568        let r = _mm_mask_expand_epi8(src, 0b01010101_01010101, a);
2569        let e = _mm_set_epi8(
2570            100, 8, 100, 9, 100, 10, 100, 11, 100, 12, 100, 13, 100, 14, 100, 15,
2571        );
2572        assert_eq_m128i(r, e);
2573    }
2574
2575    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2576    unsafe fn test_mm_maskz_expand_epi8() {
2577        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2578        let r = _mm_maskz_expand_epi8(0b01010101_01010101, a);
2579        let e = _mm_set_epi8(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
2580        assert_eq_m128i(r, e);
2581    }
2582
2583    #[simd_test(enable = "avx512vbmi2")]
2584    unsafe fn test_mm512_shldv_epi64() {
2585        let a = _mm512_set1_epi64(1);
2586        let b = _mm512_set1_epi64(1 << 63);
2587        let c = _mm512_set1_epi64(2);
2588        let r = _mm512_shldv_epi64(a, b, c);
2589        let e = _mm512_set1_epi64(6);
2590        assert_eq_m512i(r, e);
2591    }
2592
2593    #[simd_test(enable = "avx512vbmi2")]
2594    unsafe fn test_mm512_mask_shldv_epi64() {
2595        let a = _mm512_set1_epi64(1);
2596        let b = _mm512_set1_epi64(1 << 63);
2597        let c = _mm512_set1_epi64(2);
2598        let r = _mm512_mask_shldv_epi64(a, 0, b, c);
2599        assert_eq_m512i(r, a);
2600        let r = _mm512_mask_shldv_epi64(a, 0b11111111, b, c);
2601        let e = _mm512_set1_epi64(6);
2602        assert_eq_m512i(r, e);
2603    }
2604
2605    #[simd_test(enable = "avx512vbmi2")]
2606    unsafe fn test_mm512_maskz_shldv_epi64() {
2607        let a = _mm512_set1_epi64(1);
2608        let b = _mm512_set1_epi64(1 << 63);
2609        let c = _mm512_set1_epi64(2);
2610        let r = _mm512_maskz_shldv_epi64(0, a, b, c);
2611        assert_eq_m512i(r, _mm512_setzero_si512());
2612        let r = _mm512_maskz_shldv_epi64(0b11111111, a, b, c);
2613        let e = _mm512_set1_epi64(6);
2614        assert_eq_m512i(r, e);
2615    }
2616
2617    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2618    unsafe fn test_mm256_shldv_epi64() {
2619        let a = _mm256_set1_epi64x(1);
2620        let b = _mm256_set1_epi64x(1 << 63);
2621        let c = _mm256_set1_epi64x(2);
2622        let r = _mm256_shldv_epi64(a, b, c);
2623        let e = _mm256_set1_epi64x(6);
2624        assert_eq_m256i(r, e);
2625    }
2626
2627    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2628    unsafe fn test_mm256_mask_shldv_epi64() {
2629        let a = _mm256_set1_epi64x(1);
2630        let b = _mm256_set1_epi64x(1 << 63);
2631        let c = _mm256_set1_epi64x(2);
2632        let r = _mm256_mask_shldv_epi64(a, 0, b, c);
2633        assert_eq_m256i(r, a);
2634        let r = _mm256_mask_shldv_epi64(a, 0b00001111, b, c);
2635        let e = _mm256_set1_epi64x(6);
2636        assert_eq_m256i(r, e);
2637    }
2638
2639    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2640    unsafe fn test_mm256_maskz_shldv_epi64() {
2641        let a = _mm256_set1_epi64x(1);
2642        let b = _mm256_set1_epi64x(1 << 63);
2643        let c = _mm256_set1_epi64x(2);
2644        let r = _mm256_maskz_shldv_epi64(0, a, b, c);
2645        assert_eq_m256i(r, _mm256_setzero_si256());
2646        let r = _mm256_maskz_shldv_epi64(0b00001111, a, b, c);
2647        let e = _mm256_set1_epi64x(6);
2648        assert_eq_m256i(r, e);
2649    }
2650
2651    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2652    unsafe fn test_mm_shldv_epi64() {
2653        let a = _mm_set1_epi64x(1);
2654        let b = _mm_set1_epi64x(1 << 63);
2655        let c = _mm_set1_epi64x(2);
2656        let r = _mm_shldv_epi64(a, b, c);
2657        let e = _mm_set1_epi64x(6);
2658        assert_eq_m128i(r, e);
2659    }
2660
2661    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2662    unsafe fn test_mm_mask_shldv_epi64() {
2663        let a = _mm_set1_epi64x(1);
2664        let b = _mm_set1_epi64x(1 << 63);
2665        let c = _mm_set1_epi64x(2);
2666        let r = _mm_mask_shldv_epi64(a, 0, b, c);
2667        assert_eq_m128i(r, a);
2668        let r = _mm_mask_shldv_epi64(a, 0b00000011, b, c);
2669        let e = _mm_set1_epi64x(6);
2670        assert_eq_m128i(r, e);
2671    }
2672
2673    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2674    unsafe fn test_mm_maskz_shldv_epi64() {
2675        let a = _mm_set1_epi64x(1);
2676        let b = _mm_set1_epi64x(1 << 63);
2677        let c = _mm_set1_epi64x(2);
2678        let r = _mm_maskz_shldv_epi64(0, a, b, c);
2679        assert_eq_m128i(r, _mm_setzero_si128());
2680        let r = _mm_maskz_shldv_epi64(0b00000011, a, b, c);
2681        let e = _mm_set1_epi64x(6);
2682        assert_eq_m128i(r, e);
2683    }
2684
2685    #[simd_test(enable = "avx512vbmi2")]
2686    unsafe fn test_mm512_shldv_epi32() {
2687        let a = _mm512_set1_epi32(1);
2688        let b = _mm512_set1_epi32(1 << 31);
2689        let c = _mm512_set1_epi32(2);
2690        let r = _mm512_shldv_epi32(a, b, c);
2691        let e = _mm512_set1_epi32(6);
2692        assert_eq_m512i(r, e);
2693    }
2694
2695    #[simd_test(enable = "avx512vbmi2")]
2696    unsafe fn test_mm512_mask_shldv_epi32() {
2697        let a = _mm512_set1_epi32(1);
2698        let b = _mm512_set1_epi32(1 << 31);
2699        let c = _mm512_set1_epi32(2);
2700        let r = _mm512_mask_shldv_epi32(a, 0, b, c);
2701        assert_eq_m512i(r, a);
2702        let r = _mm512_mask_shldv_epi32(a, 0b11111111_11111111, b, c);
2703        let e = _mm512_set1_epi32(6);
2704        assert_eq_m512i(r, e);
2705    }
2706
2707    #[simd_test(enable = "avx512vbmi2")]
2708    unsafe fn test_mm512_maskz_shldv_epi32() {
2709        let a = _mm512_set1_epi32(1);
2710        let b = _mm512_set1_epi32(1 << 31);
2711        let c = _mm512_set1_epi32(2);
2712        let r = _mm512_maskz_shldv_epi32(0, a, b, c);
2713        assert_eq_m512i(r, _mm512_setzero_si512());
2714        let r = _mm512_maskz_shldv_epi32(0b11111111_11111111, a, b, c);
2715        let e = _mm512_set1_epi32(6);
2716        assert_eq_m512i(r, e);
2717    }
2718
2719    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2720    unsafe fn test_mm256_shldv_epi32() {
2721        let a = _mm256_set1_epi32(1);
2722        let b = _mm256_set1_epi32(1 << 31);
2723        let c = _mm256_set1_epi32(2);
2724        let r = _mm256_shldv_epi32(a, b, c);
2725        let e = _mm256_set1_epi32(6);
2726        assert_eq_m256i(r, e);
2727    }
2728
2729    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2730    unsafe fn test_mm256_mask_shldv_epi32() {
2731        let a = _mm256_set1_epi32(1);
2732        let b = _mm256_set1_epi32(1 << 31);
2733        let c = _mm256_set1_epi32(2);
2734        let r = _mm256_mask_shldv_epi32(a, 0, b, c);
2735        assert_eq_m256i(r, a);
2736        let r = _mm256_mask_shldv_epi32(a, 0b11111111, b, c);
2737        let e = _mm256_set1_epi32(6);
2738        assert_eq_m256i(r, e);
2739    }
2740
2741    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2742    unsafe fn test_mm256_maskz_shldv_epi32() {
2743        let a = _mm256_set1_epi32(1);
2744        let b = _mm256_set1_epi32(1 << 31);
2745        let c = _mm256_set1_epi32(2);
2746        let r = _mm256_maskz_shldv_epi32(0, a, b, c);
2747        assert_eq_m256i(r, _mm256_setzero_si256());
2748        let r = _mm256_maskz_shldv_epi32(0b11111111, a, b, c);
2749        let e = _mm256_set1_epi32(6);
2750        assert_eq_m256i(r, e);
2751    }
2752
2753    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2754    unsafe fn test_mm_shldv_epi32() {
2755        let a = _mm_set1_epi32(1);
2756        let b = _mm_set1_epi32(1 << 31);
2757        let c = _mm_set1_epi32(2);
2758        let r = _mm_shldv_epi32(a, b, c);
2759        let e = _mm_set1_epi32(6);
2760        assert_eq_m128i(r, e);
2761    }
2762
2763    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2764    unsafe fn test_mm_mask_shldv_epi32() {
2765        let a = _mm_set1_epi32(1);
2766        let b = _mm_set1_epi32(1 << 31);
2767        let c = _mm_set1_epi32(2);
2768        let r = _mm_mask_shldv_epi32(a, 0, b, c);
2769        assert_eq_m128i(r, a);
2770        let r = _mm_mask_shldv_epi32(a, 0b00001111, b, c);
2771        let e = _mm_set1_epi32(6);
2772        assert_eq_m128i(r, e);
2773    }
2774
2775    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2776    unsafe fn test_mm_maskz_shldv_epi32() {
2777        let a = _mm_set1_epi32(1);
2778        let b = _mm_set1_epi32(1 << 31);
2779        let c = _mm_set1_epi32(2);
2780        let r = _mm_maskz_shldv_epi32(0, a, b, c);
2781        assert_eq_m128i(r, _mm_setzero_si128());
2782        let r = _mm_maskz_shldv_epi32(0b00001111, a, b, c);
2783        let e = _mm_set1_epi32(6);
2784        assert_eq_m128i(r, e);
2785    }
2786
2787    #[simd_test(enable = "avx512vbmi2")]
2788    unsafe fn test_mm512_shldv_epi16() {
2789        let a = _mm512_set1_epi16(1);
2790        let b = _mm512_set1_epi16(1 << 15);
2791        let c = _mm512_set1_epi16(2);
2792        let r = _mm512_shldv_epi16(a, b, c);
2793        let e = _mm512_set1_epi16(6);
2794        assert_eq_m512i(r, e);
2795    }
2796
2797    #[simd_test(enable = "avx512vbmi2")]
2798    unsafe fn test_mm512_mask_shldv_epi16() {
2799        let a = _mm512_set1_epi16(1);
2800        let b = _mm512_set1_epi16(1 << 15);
2801        let c = _mm512_set1_epi16(2);
2802        let r = _mm512_mask_shldv_epi16(a, 0, b, c);
2803        assert_eq_m512i(r, a);
2804        let r = _mm512_mask_shldv_epi16(a, 0b11111111_11111111_11111111_11111111, b, c);
2805        let e = _mm512_set1_epi16(6);
2806        assert_eq_m512i(r, e);
2807    }
2808
2809    #[simd_test(enable = "avx512vbmi2")]
2810    unsafe fn test_mm512_maskz_shldv_epi16() {
2811        let a = _mm512_set1_epi16(1);
2812        let b = _mm512_set1_epi16(1 << 15);
2813        let c = _mm512_set1_epi16(2);
2814        let r = _mm512_maskz_shldv_epi16(0, a, b, c);
2815        assert_eq_m512i(r, _mm512_setzero_si512());
2816        let r = _mm512_maskz_shldv_epi16(0b11111111_11111111_11111111_11111111, a, b, c);
2817        let e = _mm512_set1_epi16(6);
2818        assert_eq_m512i(r, e);
2819    }
2820
2821    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2822    unsafe fn test_mm256_shldv_epi16() {
2823        let a = _mm256_set1_epi16(1);
2824        let b = _mm256_set1_epi16(1 << 15);
2825        let c = _mm256_set1_epi16(2);
2826        let r = _mm256_shldv_epi16(a, b, c);
2827        let e = _mm256_set1_epi16(6);
2828        assert_eq_m256i(r, e);
2829    }
2830
2831    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2832    unsafe fn test_mm256_mask_shldv_epi16() {
2833        let a = _mm256_set1_epi16(1);
2834        let b = _mm256_set1_epi16(1 << 15);
2835        let c = _mm256_set1_epi16(2);
2836        let r = _mm256_mask_shldv_epi16(a, 0, b, c);
2837        assert_eq_m256i(r, a);
2838        let r = _mm256_mask_shldv_epi16(a, 0b11111111_11111111, b, c);
2839        let e = _mm256_set1_epi16(6);
2840        assert_eq_m256i(r, e);
2841    }
2842
2843    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2844    unsafe fn test_mm256_maskz_shldv_epi16() {
2845        let a = _mm256_set1_epi16(1);
2846        let b = _mm256_set1_epi16(1 << 15);
2847        let c = _mm256_set1_epi16(2);
2848        let r = _mm256_maskz_shldv_epi16(0, a, b, c);
2849        assert_eq_m256i(r, _mm256_setzero_si256());
2850        let r = _mm256_maskz_shldv_epi16(0b11111111_11111111, a, b, c);
2851        let e = _mm256_set1_epi16(6);
2852        assert_eq_m256i(r, e);
2853    }
2854
2855    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2856    unsafe fn test_mm_shldv_epi16() {
2857        let a = _mm_set1_epi16(1);
2858        let b = _mm_set1_epi16(1 << 15);
2859        let c = _mm_set1_epi16(2);
2860        let r = _mm_shldv_epi16(a, b, c);
2861        let e = _mm_set1_epi16(6);
2862        assert_eq_m128i(r, e);
2863    }
2864
2865    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2866    unsafe fn test_mm_mask_shldv_epi16() {
2867        let a = _mm_set1_epi16(1);
2868        let b = _mm_set1_epi16(1 << 15);
2869        let c = _mm_set1_epi16(2);
2870        let r = _mm_mask_shldv_epi16(a, 0, b, c);
2871        assert_eq_m128i(r, a);
2872        let r = _mm_mask_shldv_epi16(a, 0b11111111, b, c);
2873        let e = _mm_set1_epi16(6);
2874        assert_eq_m128i(r, e);
2875    }
2876
2877    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2878    unsafe fn test_mm_maskz_shldv_epi16() {
2879        let a = _mm_set1_epi16(1);
2880        let b = _mm_set1_epi16(1 << 15);
2881        let c = _mm_set1_epi16(2);
2882        let r = _mm_maskz_shldv_epi16(0, a, b, c);
2883        assert_eq_m128i(r, _mm_setzero_si128());
2884        let r = _mm_maskz_shldv_epi16(0b11111111, a, b, c);
2885        let e = _mm_set1_epi16(6);
2886        assert_eq_m128i(r, e);
2887    }
2888
2889    #[simd_test(enable = "avx512vbmi2")]
2890    unsafe fn test_mm512_shrdv_epi64() {
2891        let a = _mm512_set1_epi64(2);
2892        let b = _mm512_set1_epi64(8);
2893        let c = _mm512_set1_epi64(1);
2894        let r = _mm512_shrdv_epi64(a, b, c);
2895        let e = _mm512_set1_epi64(1);
2896        assert_eq_m512i(r, e);
2897    }
2898
2899    #[simd_test(enable = "avx512vbmi2")]
2900    unsafe fn test_mm512_mask_shrdv_epi64() {
2901        let a = _mm512_set1_epi64(2);
2902        let b = _mm512_set1_epi64(8);
2903        let c = _mm512_set1_epi64(1);
2904        let r = _mm512_mask_shrdv_epi64(a, 0, b, c);
2905        assert_eq_m512i(r, a);
2906        let r = _mm512_mask_shrdv_epi64(a, 0b11111111, b, c);
2907        let e = _mm512_set1_epi64(1);
2908        assert_eq_m512i(r, e);
2909    }
2910
2911    #[simd_test(enable = "avx512vbmi2")]
2912    unsafe fn test_mm512_maskz_shrdv_epi64() {
2913        let a = _mm512_set1_epi64(2);
2914        let b = _mm512_set1_epi64(8);
2915        let c = _mm512_set1_epi64(1);
2916        let r = _mm512_maskz_shrdv_epi64(0, a, b, c);
2917        assert_eq_m512i(r, _mm512_setzero_si512());
2918        let r = _mm512_maskz_shrdv_epi64(0b11111111, a, b, c);
2919        let e = _mm512_set1_epi64(1);
2920        assert_eq_m512i(r, e);
2921    }
2922
2923    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2924    unsafe fn test_mm256_shrdv_epi64() {
2925        let a = _mm256_set1_epi64x(2);
2926        let b = _mm256_set1_epi64x(8);
2927        let c = _mm256_set1_epi64x(1);
2928        let r = _mm256_shrdv_epi64(a, b, c);
2929        let e = _mm256_set1_epi64x(1);
2930        assert_eq_m256i(r, e);
2931    }
2932
2933    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2934    unsafe fn test_mm256_mask_shrdv_epi64() {
2935        let a = _mm256_set1_epi64x(2);
2936        let b = _mm256_set1_epi64x(8);
2937        let c = _mm256_set1_epi64x(1);
2938        let r = _mm256_mask_shrdv_epi64(a, 0, b, c);
2939        assert_eq_m256i(r, a);
2940        let r = _mm256_mask_shrdv_epi64(a, 0b00001111, b, c);
2941        let e = _mm256_set1_epi64x(1);
2942        assert_eq_m256i(r, e);
2943    }
2944
2945    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2946    unsafe fn test_mm256_maskz_shrdv_epi64() {
2947        let a = _mm256_set1_epi64x(2);
2948        let b = _mm256_set1_epi64x(8);
2949        let c = _mm256_set1_epi64x(1);
2950        let r = _mm256_maskz_shrdv_epi64(0, a, b, c);
2951        assert_eq_m256i(r, _mm256_setzero_si256());
2952        let r = _mm256_maskz_shrdv_epi64(0b00001111, a, b, c);
2953        let e = _mm256_set1_epi64x(1);
2954        assert_eq_m256i(r, e);
2955    }
2956
2957    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2958    unsafe fn test_mm_shrdv_epi64() {
2959        let a = _mm_set1_epi64x(2);
2960        let b = _mm_set1_epi64x(8);
2961        let c = _mm_set1_epi64x(1);
2962        let r = _mm_shrdv_epi64(a, b, c);
2963        let e = _mm_set1_epi64x(1);
2964        assert_eq_m128i(r, e);
2965    }
2966
2967    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2968    unsafe fn test_mm_mask_shrdv_epi64() {
2969        let a = _mm_set1_epi64x(2);
2970        let b = _mm_set1_epi64x(8);
2971        let c = _mm_set1_epi64x(1);
2972        let r = _mm_mask_shrdv_epi64(a, 0, b, c);
2973        assert_eq_m128i(r, a);
2974        let r = _mm_mask_shrdv_epi64(a, 0b00000011, b, c);
2975        let e = _mm_set1_epi64x(1);
2976        assert_eq_m128i(r, e);
2977    }
2978
2979    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2980    unsafe fn test_mm_maskz_shrdv_epi64() {
2981        let a = _mm_set1_epi64x(2);
2982        let b = _mm_set1_epi64x(8);
2983        let c = _mm_set1_epi64x(1);
2984        let r = _mm_maskz_shrdv_epi64(0, a, b, c);
2985        assert_eq_m128i(r, _mm_setzero_si128());
2986        let r = _mm_maskz_shrdv_epi64(0b00000011, a, b, c);
2987        let e = _mm_set1_epi64x(1);
2988        assert_eq_m128i(r, e);
2989    }
2990
2991    #[simd_test(enable = "avx512vbmi2")]
2992    unsafe fn test_mm512_shrdv_epi32() {
2993        let a = _mm512_set1_epi32(2);
2994        let b = _mm512_set1_epi32(8);
2995        let c = _mm512_set1_epi32(1);
2996        let r = _mm512_shrdv_epi32(a, b, c);
2997        let e = _mm512_set1_epi32(1);
2998        assert_eq_m512i(r, e);
2999    }
3000
3001    #[simd_test(enable = "avx512vbmi2")]
3002    unsafe fn test_mm512_mask_shrdv_epi32() {
3003        let a = _mm512_set1_epi32(2);
3004        let b = _mm512_set1_epi32(8);
3005        let c = _mm512_set1_epi32(1);
3006        let r = _mm512_mask_shrdv_epi32(a, 0, b, c);
3007        assert_eq_m512i(r, a);
3008        let r = _mm512_mask_shrdv_epi32(a, 0b11111111_11111111, b, c);
3009        let e = _mm512_set1_epi32(1);
3010        assert_eq_m512i(r, e);
3011    }
3012
3013    #[simd_test(enable = "avx512vbmi2")]
3014    unsafe fn test_mm512_maskz_shrdv_epi32() {
3015        let a = _mm512_set1_epi32(2);
3016        let b = _mm512_set1_epi32(8);
3017        let c = _mm512_set1_epi32(1);
3018        let r = _mm512_maskz_shrdv_epi32(0, a, b, c);
3019        assert_eq_m512i(r, _mm512_setzero_si512());
3020        let r = _mm512_maskz_shrdv_epi32(0b11111111_11111111, a, b, c);
3021        let e = _mm512_set1_epi32(1);
3022        assert_eq_m512i(r, e);
3023    }
3024
3025    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3026    unsafe fn test_mm256_shrdv_epi32() {
3027        let a = _mm256_set1_epi32(2);
3028        let b = _mm256_set1_epi32(8);
3029        let c = _mm256_set1_epi32(1);
3030        let r = _mm256_shrdv_epi32(a, b, c);
3031        let e = _mm256_set1_epi32(1);
3032        assert_eq_m256i(r, e);
3033    }
3034
3035    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3036    unsafe fn test_mm256_mask_shrdv_epi32() {
3037        let a = _mm256_set1_epi32(2);
3038        let b = _mm256_set1_epi32(8);
3039        let c = _mm256_set1_epi32(1);
3040        let r = _mm256_mask_shrdv_epi32(a, 0, b, c);
3041        assert_eq_m256i(r, a);
3042        let r = _mm256_mask_shrdv_epi32(a, 0b11111111, b, c);
3043        let e = _mm256_set1_epi32(1);
3044        assert_eq_m256i(r, e);
3045    }
3046
3047    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3048    unsafe fn test_mm256_maskz_shrdv_epi32() {
3049        let a = _mm256_set1_epi32(2);
3050        let b = _mm256_set1_epi32(8);
3051        let c = _mm256_set1_epi32(1);
3052        let r = _mm256_maskz_shrdv_epi32(0, a, b, c);
3053        assert_eq_m256i(r, _mm256_setzero_si256());
3054        let r = _mm256_maskz_shrdv_epi32(0b11111111, a, b, c);
3055        let e = _mm256_set1_epi32(1);
3056        assert_eq_m256i(r, e);
3057    }
3058
3059    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3060    unsafe fn test_mm_shrdv_epi32() {
3061        let a = _mm_set1_epi32(2);
3062        let b = _mm_set1_epi32(8);
3063        let c = _mm_set1_epi32(1);
3064        let r = _mm_shrdv_epi32(a, b, c);
3065        let e = _mm_set1_epi32(1);
3066        assert_eq_m128i(r, e);
3067    }
3068
3069    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3070    unsafe fn test_mm_mask_shrdv_epi32() {
3071        let a = _mm_set1_epi32(2);
3072        let b = _mm_set1_epi32(8);
3073        let c = _mm_set1_epi32(1);
3074        let r = _mm_mask_shrdv_epi32(a, 0, b, c);
3075        assert_eq_m128i(r, a);
3076        let r = _mm_mask_shrdv_epi32(a, 0b00001111, b, c);
3077        let e = _mm_set1_epi32(1);
3078        assert_eq_m128i(r, e);
3079    }
3080
3081    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3082    unsafe fn test_mm_maskz_shrdv_epi32() {
3083        let a = _mm_set1_epi32(2);
3084        let b = _mm_set1_epi32(8);
3085        let c = _mm_set1_epi32(1);
3086        let r = _mm_maskz_shrdv_epi32(0, a, b, c);
3087        assert_eq_m128i(r, _mm_setzero_si128());
3088        let r = _mm_maskz_shrdv_epi32(0b00001111, a, b, c);
3089        let e = _mm_set1_epi32(1);
3090        assert_eq_m128i(r, e);
3091    }
3092
3093    #[simd_test(enable = "avx512vbmi2")]
3094    unsafe fn test_mm512_shrdv_epi16() {
3095        let a = _mm512_set1_epi16(2);
3096        let b = _mm512_set1_epi16(8);
3097        let c = _mm512_set1_epi16(1);
3098        let r = _mm512_shrdv_epi16(a, b, c);
3099        let e = _mm512_set1_epi16(1);
3100        assert_eq_m512i(r, e);
3101    }
3102
3103    #[simd_test(enable = "avx512vbmi2")]
3104    unsafe fn test_mm512_mask_shrdv_epi16() {
3105        let a = _mm512_set1_epi16(2);
3106        let b = _mm512_set1_epi16(8);
3107        let c = _mm512_set1_epi16(1);
3108        let r = _mm512_mask_shrdv_epi16(a, 0, b, c);
3109        assert_eq_m512i(r, a);
3110        let r = _mm512_mask_shrdv_epi16(a, 0b11111111_11111111_11111111_11111111, b, c);
3111        let e = _mm512_set1_epi16(1);
3112        assert_eq_m512i(r, e);
3113    }
3114
3115    #[simd_test(enable = "avx512vbmi2")]
3116    unsafe fn test_mm512_maskz_shrdv_epi16() {
3117        let a = _mm512_set1_epi16(2);
3118        let b = _mm512_set1_epi16(8);
3119        let c = _mm512_set1_epi16(1);
3120        let r = _mm512_maskz_shrdv_epi16(0, a, b, c);
3121        assert_eq_m512i(r, _mm512_setzero_si512());
3122        let r = _mm512_maskz_shrdv_epi16(0b11111111_11111111_11111111_11111111, a, b, c);
3123        let e = _mm512_set1_epi16(1);
3124        assert_eq_m512i(r, e);
3125    }
3126
3127    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3128    unsafe fn test_mm256_shrdv_epi16() {
3129        let a = _mm256_set1_epi16(2);
3130        let b = _mm256_set1_epi16(8);
3131        let c = _mm256_set1_epi16(1);
3132        let r = _mm256_shrdv_epi16(a, b, c);
3133        let e = _mm256_set1_epi16(1);
3134        assert_eq_m256i(r, e);
3135    }
3136
3137    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3138    unsafe fn test_mm256_mask_shrdv_epi16() {
3139        let a = _mm256_set1_epi16(2);
3140        let b = _mm256_set1_epi16(8);
3141        let c = _mm256_set1_epi16(1);
3142        let r = _mm256_mask_shrdv_epi16(a, 0, b, c);
3143        assert_eq_m256i(r, a);
3144        let r = _mm256_mask_shrdv_epi16(a, 0b11111111_11111111, b, c);
3145        let e = _mm256_set1_epi16(1);
3146        assert_eq_m256i(r, e);
3147    }
3148
3149    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3150    unsafe fn test_mm256_maskz_shrdv_epi16() {
3151        let a = _mm256_set1_epi16(2);
3152        let b = _mm256_set1_epi16(8);
3153        let c = _mm256_set1_epi16(1);
3154        let r = _mm256_maskz_shrdv_epi16(0, a, b, c);
3155        assert_eq_m256i(r, _mm256_setzero_si256());
3156        let r = _mm256_maskz_shrdv_epi16(0b11111111_11111111, a, b, c);
3157        let e = _mm256_set1_epi16(1);
3158        assert_eq_m256i(r, e);
3159    }
3160
3161    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3162    unsafe fn test_mm_shrdv_epi16() {
3163        let a = _mm_set1_epi16(2);
3164        let b = _mm_set1_epi16(8);
3165        let c = _mm_set1_epi16(1);
3166        let r = _mm_shrdv_epi16(a, b, c);
3167        let e = _mm_set1_epi16(1);
3168        assert_eq_m128i(r, e);
3169    }
3170
3171    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3172    unsafe fn test_mm_mask_shrdv_epi16() {
3173        let a = _mm_set1_epi16(2);
3174        let b = _mm_set1_epi16(8);
3175        let c = _mm_set1_epi16(1);
3176        let r = _mm_mask_shrdv_epi16(a, 0, b, c);
3177        assert_eq_m128i(r, a);
3178        let r = _mm_mask_shrdv_epi16(a, 0b11111111, b, c);
3179        let e = _mm_set1_epi16(1);
3180        assert_eq_m128i(r, e);
3181    }
3182
3183    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3184    unsafe fn test_mm_maskz_shrdv_epi16() {
3185        let a = _mm_set1_epi16(2);
3186        let b = _mm_set1_epi16(8);
3187        let c = _mm_set1_epi16(1);
3188        let r = _mm_maskz_shrdv_epi16(0, a, b, c);
3189        assert_eq_m128i(r, _mm_setzero_si128());
3190        let r = _mm_maskz_shrdv_epi16(0b11111111, a, b, c);
3191        let e = _mm_set1_epi16(1);
3192        assert_eq_m128i(r, e);
3193    }
3194
3195    #[simd_test(enable = "avx512vbmi2")]
3196    unsafe fn test_mm512_shldi_epi64() {
3197        let a = _mm512_set1_epi64(1);
3198        let b = _mm512_set1_epi64(1 << 63);
3199        let r = _mm512_shldi_epi64::<2>(a, b);
3200        let e = _mm512_set1_epi64(6);
3201        assert_eq_m512i(r, e);
3202    }
3203
3204    #[simd_test(enable = "avx512vbmi2")]
3205    unsafe fn test_mm512_mask_shldi_epi64() {
3206        let a = _mm512_set1_epi64(1);
3207        let b = _mm512_set1_epi64(1 << 63);
3208        let r = _mm512_mask_shldi_epi64::<2>(a, 0, a, b);
3209        assert_eq_m512i(r, a);
3210        let r = _mm512_mask_shldi_epi64::<2>(a, 0b11111111, a, b);
3211        let e = _mm512_set1_epi64(6);
3212        assert_eq_m512i(r, e);
3213    }
3214
3215    #[simd_test(enable = "avx512vbmi2")]
3216    unsafe fn test_mm512_maskz_shldi_epi64() {
3217        let a = _mm512_set1_epi64(1);
3218        let b = _mm512_set1_epi64(1 << 63);
3219        let r = _mm512_maskz_shldi_epi64::<2>(0, a, b);
3220        assert_eq_m512i(r, _mm512_setzero_si512());
3221        let r = _mm512_maskz_shldi_epi64::<2>(0b11111111, a, b);
3222        let e = _mm512_set1_epi64(6);
3223        assert_eq_m512i(r, e);
3224    }
3225
3226    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3227    unsafe fn test_mm256_shldi_epi64() {
3228        let a = _mm256_set1_epi64x(1);
3229        let b = _mm256_set1_epi64x(1 << 63);
3230        let r = _mm256_shldi_epi64::<2>(a, b);
3231        let e = _mm256_set1_epi64x(6);
3232        assert_eq_m256i(r, e);
3233    }
3234
3235    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3236    unsafe fn test_mm256_mask_shldi_epi64() {
3237        let a = _mm256_set1_epi64x(1);
3238        let b = _mm256_set1_epi64x(1 << 63);
3239        let r = _mm256_mask_shldi_epi64::<2>(a, 0, a, b);
3240        assert_eq_m256i(r, a);
3241        let r = _mm256_mask_shldi_epi64::<2>(a, 0b00001111, a, b);
3242        let e = _mm256_set1_epi64x(6);
3243        assert_eq_m256i(r, e);
3244    }
3245
3246    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3247    unsafe fn test_mm256_maskz_shldi_epi64() {
3248        let a = _mm256_set1_epi64x(1);
3249        let b = _mm256_set1_epi64x(1 << 63);
3250        let r = _mm256_maskz_shldi_epi64::<2>(0, a, b);
3251        assert_eq_m256i(r, _mm256_setzero_si256());
3252        let r = _mm256_maskz_shldi_epi64::<2>(0b00001111, a, b);
3253        let e = _mm256_set1_epi64x(6);
3254        assert_eq_m256i(r, e);
3255    }
3256
3257    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3258    unsafe fn test_mm_shldi_epi64() {
3259        let a = _mm_set1_epi64x(1);
3260        let b = _mm_set1_epi64x(1 << 63);
3261        let r = _mm_shldi_epi64::<2>(a, b);
3262        let e = _mm_set1_epi64x(6);
3263        assert_eq_m128i(r, e);
3264    }
3265
3266    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3267    unsafe fn test_mm_mask_shldi_epi64() {
3268        let a = _mm_set1_epi64x(1);
3269        let b = _mm_set1_epi64x(1 << 63);
3270        let r = _mm_mask_shldi_epi64::<2>(a, 0, a, b);
3271        assert_eq_m128i(r, a);
3272        let r = _mm_mask_shldi_epi64::<2>(a, 0b00000011, a, b);
3273        let e = _mm_set1_epi64x(6);
3274        assert_eq_m128i(r, e);
3275    }
3276
3277    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3278    unsafe fn test_mm_maskz_shldi_epi64() {
3279        let a = _mm_set1_epi64x(1);
3280        let b = _mm_set1_epi64x(1 << 63);
3281        let r = _mm_maskz_shldi_epi64::<2>(0, a, b);
3282        assert_eq_m128i(r, _mm_setzero_si128());
3283        let r = _mm_maskz_shldi_epi64::<2>(0b00000011, a, b);
3284        let e = _mm_set1_epi64x(6);
3285        assert_eq_m128i(r, e);
3286    }
3287
3288    #[simd_test(enable = "avx512vbmi2")]
3289    unsafe fn test_mm512_shldi_epi32() {
3290        let a = _mm512_set1_epi32(1);
3291        let b = _mm512_set1_epi32(1 << 31);
3292        let r = _mm512_shldi_epi32::<2>(a, b);
3293        let e = _mm512_set1_epi32(6);
3294        assert_eq_m512i(r, e);
3295    }
3296
3297    #[simd_test(enable = "avx512vbmi2")]
3298    unsafe fn test_mm512_mask_shldi_epi32() {
3299        let a = _mm512_set1_epi32(1);
3300        let b = _mm512_set1_epi32(1 << 31);
3301        let r = _mm512_mask_shldi_epi32::<2>(a, 0, a, b);
3302        assert_eq_m512i(r, a);
3303        let r = _mm512_mask_shldi_epi32::<2>(a, 0b11111111_11111111, a, b);
3304        let e = _mm512_set1_epi32(6);
3305        assert_eq_m512i(r, e);
3306    }
3307
3308    #[simd_test(enable = "avx512vbmi2")]
3309    unsafe fn test_mm512_maskz_shldi_epi32() {
3310        let a = _mm512_set1_epi32(1);
3311        let b = _mm512_set1_epi32(1 << 31);
3312        let r = _mm512_maskz_shldi_epi32::<2>(0, a, b);
3313        assert_eq_m512i(r, _mm512_setzero_si512());
3314        let r = _mm512_maskz_shldi_epi32::<2>(0b11111111_11111111, a, b);
3315        let e = _mm512_set1_epi32(6);
3316        assert_eq_m512i(r, e);
3317    }
3318
3319    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3320    unsafe fn test_mm256_shldi_epi32() {
3321        let a = _mm256_set1_epi32(1);
3322        let b = _mm256_set1_epi32(1 << 31);
3323        let r = _mm256_shldi_epi32::<2>(a, b);
3324        let e = _mm256_set1_epi32(6);
3325        assert_eq_m256i(r, e);
3326    }
3327
3328    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3329    unsafe fn test_mm256_mask_shldi_epi32() {
3330        let a = _mm256_set1_epi32(1);
3331        let b = _mm256_set1_epi32(1 << 31);
3332        let r = _mm256_mask_shldi_epi32::<2>(a, 0, a, b);
3333        assert_eq_m256i(r, a);
3334        let r = _mm256_mask_shldi_epi32::<2>(a, 0b11111111, a, b);
3335        let e = _mm256_set1_epi32(6);
3336        assert_eq_m256i(r, e);
3337    }
3338
3339    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3340    unsafe fn test_mm256_maskz_shldi_epi32() {
3341        let a = _mm256_set1_epi32(1);
3342        let b = _mm256_set1_epi32(1 << 31);
3343        let r = _mm256_maskz_shldi_epi32::<2>(0, a, b);
3344        assert_eq_m256i(r, _mm256_setzero_si256());
3345        let r = _mm256_maskz_shldi_epi32::<2>(0b11111111, a, b);
3346        let e = _mm256_set1_epi32(6);
3347        assert_eq_m256i(r, e);
3348    }
3349
3350    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3351    unsafe fn test_mm_shldi_epi32() {
3352        let a = _mm_set1_epi32(1);
3353        let b = _mm_set1_epi32(1 << 31);
3354        let r = _mm_shldi_epi32::<2>(a, b);
3355        let e = _mm_set1_epi32(6);
3356        assert_eq_m128i(r, e);
3357    }
3358
3359    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3360    unsafe fn test_mm_mask_shldi_epi32() {
3361        let a = _mm_set1_epi32(1);
3362        let b = _mm_set1_epi32(1 << 31);
3363        let r = _mm_mask_shldi_epi32::<2>(a, 0, a, b);
3364        assert_eq_m128i(r, a);
3365        let r = _mm_mask_shldi_epi32::<2>(a, 0b00001111, a, b);
3366        let e = _mm_set1_epi32(6);
3367        assert_eq_m128i(r, e);
3368    }
3369
3370    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3371    unsafe fn test_mm_maskz_shldi_epi32() {
3372        let a = _mm_set1_epi32(1);
3373        let b = _mm_set1_epi32(1 << 31);
3374        let r = _mm_maskz_shldi_epi32::<2>(0, a, b);
3375        assert_eq_m128i(r, _mm_setzero_si128());
3376        let r = _mm_maskz_shldi_epi32::<2>(0b00001111, a, b);
3377        let e = _mm_set1_epi32(6);
3378        assert_eq_m128i(r, e);
3379    }
3380
3381    #[simd_test(enable = "avx512vbmi2")]
3382    unsafe fn test_mm512_shldi_epi16() {
3383        let a = _mm512_set1_epi16(1);
3384        let b = _mm512_set1_epi16(1 << 15);
3385        let r = _mm512_shldi_epi16::<2>(a, b);
3386        let e = _mm512_set1_epi16(6);
3387        assert_eq_m512i(r, e);
3388    }
3389
3390    #[simd_test(enable = "avx512vbmi2")]
3391    unsafe fn test_mm512_mask_shldi_epi16() {
3392        let a = _mm512_set1_epi16(1);
3393        let b = _mm512_set1_epi16(1 << 15);
3394        let r = _mm512_mask_shldi_epi16::<2>(a, 0, a, b);
3395        assert_eq_m512i(r, a);
3396        let r = _mm512_mask_shldi_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a, b);
3397        let e = _mm512_set1_epi16(6);
3398        assert_eq_m512i(r, e);
3399    }
3400
3401    #[simd_test(enable = "avx512vbmi2")]
3402    unsafe fn test_mm512_maskz_shldi_epi16() {
3403        let a = _mm512_set1_epi16(1);
3404        let b = _mm512_set1_epi16(1 << 15);
3405        let r = _mm512_maskz_shldi_epi16::<2>(0, a, b);
3406        assert_eq_m512i(r, _mm512_setzero_si512());
3407        let r = _mm512_maskz_shldi_epi16::<2>(0b11111111_11111111_11111111_11111111, a, b);
3408        let e = _mm512_set1_epi16(6);
3409        assert_eq_m512i(r, e);
3410    }
3411
3412    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3413    unsafe fn test_mm256_shldi_epi16() {
3414        let a = _mm256_set1_epi16(1);
3415        let b = _mm256_set1_epi16(1 << 15);
3416        let r = _mm256_shldi_epi16::<2>(a, b);
3417        let e = _mm256_set1_epi16(6);
3418        assert_eq_m256i(r, e);
3419    }
3420
3421    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3422    unsafe fn test_mm256_mask_shldi_epi16() {
3423        let a = _mm256_set1_epi16(1);
3424        let b = _mm256_set1_epi16(1 << 15);
3425        let r = _mm256_mask_shldi_epi16::<2>(a, 0, a, b);
3426        assert_eq_m256i(r, a);
3427        let r = _mm256_mask_shldi_epi16::<2>(a, 0b11111111_11111111, a, b);
3428        let e = _mm256_set1_epi16(6);
3429        assert_eq_m256i(r, e);
3430    }
3431
3432    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3433    unsafe fn test_mm256_maskz_shldi_epi16() {
3434        let a = _mm256_set1_epi16(1);
3435        let b = _mm256_set1_epi16(1 << 15);
3436        let r = _mm256_maskz_shldi_epi16::<2>(0, a, b);
3437        assert_eq_m256i(r, _mm256_setzero_si256());
3438        let r = _mm256_maskz_shldi_epi16::<2>(0b11111111_11111111, a, b);
3439        let e = _mm256_set1_epi16(6);
3440        assert_eq_m256i(r, e);
3441    }
3442
3443    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3444    unsafe fn test_mm_shldi_epi16() {
3445        let a = _mm_set1_epi16(1);
3446        let b = _mm_set1_epi16(1 << 15);
3447        let r = _mm_shldi_epi16::<2>(a, b);
3448        let e = _mm_set1_epi16(6);
3449        assert_eq_m128i(r, e);
3450    }
3451
3452    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3453    unsafe fn test_mm_mask_shldi_epi16() {
3454        let a = _mm_set1_epi16(1);
3455        let b = _mm_set1_epi16(1 << 15);
3456        let r = _mm_mask_shldi_epi16::<2>(a, 0, a, b);
3457        assert_eq_m128i(r, a);
3458        let r = _mm_mask_shldi_epi16::<2>(a, 0b11111111, a, b);
3459        let e = _mm_set1_epi16(6);
3460        assert_eq_m128i(r, e);
3461    }
3462
3463    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3464    unsafe fn test_mm_maskz_shldi_epi16() {
3465        let a = _mm_set1_epi16(1);
3466        let b = _mm_set1_epi16(1 << 15);
3467        let r = _mm_maskz_shldi_epi16::<2>(0, a, b);
3468        assert_eq_m128i(r, _mm_setzero_si128());
3469        let r = _mm_maskz_shldi_epi16::<2>(0b11111111, a, b);
3470        let e = _mm_set1_epi16(6);
3471        assert_eq_m128i(r, e);
3472    }
3473
3474    #[simd_test(enable = "avx512vbmi2")]
3475    unsafe fn test_mm512_shrdi_epi64() {
3476        let a = _mm512_set1_epi64(2);
3477        let b = _mm512_set1_epi64(8);
3478        let r = _mm512_shrdi_epi64::<1>(a, b);
3479        let e = _mm512_set1_epi64(1);
3480        assert_eq_m512i(r, e);
3481    }
3482
3483    #[simd_test(enable = "avx512vbmi2")]
3484    unsafe fn test_mm512_mask_shrdi_epi64() {
3485        let a = _mm512_set1_epi64(2);
3486        let b = _mm512_set1_epi64(8);
3487        let r = _mm512_mask_shrdi_epi64::<1>(a, 0, a, b);
3488        assert_eq_m512i(r, a);
3489        let r = _mm512_mask_shrdi_epi64::<1>(a, 0b11111111, a, b);
3490        let e = _mm512_set1_epi64(1);
3491        assert_eq_m512i(r, e);
3492    }
3493
3494    #[simd_test(enable = "avx512vbmi2")]
3495    unsafe fn test_mm512_maskz_shrdi_epi64() {
3496        let a = _mm512_set1_epi64(2);
3497        let b = _mm512_set1_epi64(8);
3498        let r = _mm512_maskz_shrdi_epi64::<1>(0, a, b);
3499        assert_eq_m512i(r, _mm512_setzero_si512());
3500        let r = _mm512_maskz_shrdi_epi64::<1>(0b11111111, a, b);
3501        let e = _mm512_set1_epi64(1);
3502        assert_eq_m512i(r, e);
3503    }
3504
3505    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3506    unsafe fn test_mm256_shrdi_epi64() {
3507        let a = _mm256_set1_epi64x(2);
3508        let b = _mm256_set1_epi64x(8);
3509        let r = _mm256_shrdi_epi64::<1>(a, b);
3510        let e = _mm256_set1_epi64x(1);
3511        assert_eq_m256i(r, e);
3512    }
3513
3514    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3515    unsafe fn test_mm256_mask_shrdi_epi64() {
3516        let a = _mm256_set1_epi64x(2);
3517        let b = _mm256_set1_epi64x(8);
3518        let r = _mm256_mask_shrdi_epi64::<1>(a, 0, a, b);
3519        assert_eq_m256i(r, a);
3520        let r = _mm256_mask_shrdi_epi64::<1>(a, 0b00001111, a, b);
3521        let e = _mm256_set1_epi64x(1);
3522        assert_eq_m256i(r, e);
3523    }
3524
3525    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3526    unsafe fn test_mm256_maskz_shrdi_epi64() {
3527        let a = _mm256_set1_epi64x(2);
3528        let b = _mm256_set1_epi64x(8);
3529        let r = _mm256_maskz_shrdi_epi64::<1>(0, a, b);
3530        assert_eq_m256i(r, _mm256_setzero_si256());
3531        let r = _mm256_maskz_shrdi_epi64::<1>(0b00001111, a, b);
3532        let e = _mm256_set1_epi64x(1);
3533        assert_eq_m256i(r, e);
3534    }
3535
3536    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3537    unsafe fn test_mm_shrdi_epi64() {
3538        let a = _mm_set1_epi64x(2);
3539        let b = _mm_set1_epi64x(8);
3540        let r = _mm_shrdi_epi64::<1>(a, b);
3541        let e = _mm_set1_epi64x(1);
3542        assert_eq_m128i(r, e);
3543    }
3544
3545    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3546    unsafe fn test_mm_mask_shrdi_epi64() {
3547        let a = _mm_set1_epi64x(2);
3548        let b = _mm_set1_epi64x(8);
3549        let r = _mm_mask_shrdi_epi64::<1>(a, 0, a, b);
3550        assert_eq_m128i(r, a);
3551        let r = _mm_mask_shrdi_epi64::<1>(a, 0b00000011, a, b);
3552        let e = _mm_set1_epi64x(1);
3553        assert_eq_m128i(r, e);
3554    }
3555
3556    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3557    unsafe fn test_mm_maskz_shrdi_epi64() {
3558        let a = _mm_set1_epi64x(2);
3559        let b = _mm_set1_epi64x(8);
3560        let r = _mm_maskz_shrdi_epi64::<1>(0, a, b);
3561        assert_eq_m128i(r, _mm_setzero_si128());
3562        let r = _mm_maskz_shrdi_epi64::<1>(0b00000011, a, b);
3563        let e = _mm_set1_epi64x(1);
3564        assert_eq_m128i(r, e);
3565    }
3566
3567    #[simd_test(enable = "avx512vbmi2")]
3568    unsafe fn test_mm512_shrdi_epi32() {
3569        let a = _mm512_set1_epi32(2);
3570        let b = _mm512_set1_epi32(8);
3571        let r = _mm512_shrdi_epi32::<1>(a, b);
3572        let e = _mm512_set1_epi32(1);
3573        assert_eq_m512i(r, e);
3574    }
3575
3576    #[simd_test(enable = "avx512vbmi2")]
3577    unsafe fn test_mm512_mask_shrdi_epi32() {
3578        let a = _mm512_set1_epi32(2);
3579        let b = _mm512_set1_epi32(8);
3580        let r = _mm512_mask_shrdi_epi32::<1>(a, 0, a, b);
3581        assert_eq_m512i(r, a);
3582        let r = _mm512_mask_shrdi_epi32::<1>(a, 0b11111111_11111111, a, b);
3583        let e = _mm512_set1_epi32(1);
3584        assert_eq_m512i(r, e);
3585    }
3586
3587    #[simd_test(enable = "avx512vbmi2")]
3588    unsafe fn test_mm512_maskz_shrdi_epi32() {
3589        let a = _mm512_set1_epi32(2);
3590        let b = _mm512_set1_epi32(8);
3591        let r = _mm512_maskz_shrdi_epi32::<1>(0, a, b);
3592        assert_eq_m512i(r, _mm512_setzero_si512());
3593        let r = _mm512_maskz_shrdi_epi32::<1>(0b11111111_11111111, a, b);
3594        let e = _mm512_set1_epi32(1);
3595        assert_eq_m512i(r, e);
3596    }
3597
3598    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3599    unsafe fn test_mm256_shrdi_epi32() {
3600        let a = _mm256_set1_epi32(2);
3601        let b = _mm256_set1_epi32(8);
3602        let r = _mm256_shrdi_epi32::<1>(a, b);
3603        let e = _mm256_set1_epi32(1);
3604        assert_eq_m256i(r, e);
3605    }
3606
3607    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3608    unsafe fn test_mm256_mask_shrdi_epi32() {
3609        let a = _mm256_set1_epi32(2);
3610        let b = _mm256_set1_epi32(8);
3611        let r = _mm256_mask_shrdi_epi32::<1>(a, 0, a, b);
3612        assert_eq_m256i(r, a);
3613        let r = _mm256_mask_shrdi_epi32::<1>(a, 0b11111111, a, b);
3614        let e = _mm256_set1_epi32(1);
3615        assert_eq_m256i(r, e);
3616    }
3617
3618    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3619    unsafe fn test_mm256_maskz_shrdi_epi32() {
3620        let a = _mm256_set1_epi32(2);
3621        let b = _mm256_set1_epi32(8);
3622        let r = _mm256_maskz_shrdi_epi32::<1>(0, a, b);
3623        assert_eq_m256i(r, _mm256_setzero_si256());
3624        let r = _mm256_maskz_shrdi_epi32::<1>(0b11111111, a, b);
3625        let e = _mm256_set1_epi32(1);
3626        assert_eq_m256i(r, e);
3627    }
3628
3629    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3630    unsafe fn test_mm_shrdi_epi32() {
3631        let a = _mm_set1_epi32(2);
3632        let b = _mm_set1_epi32(8);
3633        let r = _mm_shrdi_epi32::<1>(a, b);
3634        let e = _mm_set1_epi32(1);
3635        assert_eq_m128i(r, e);
3636    }
3637
3638    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3639    unsafe fn test_mm_mask_shrdi_epi32() {
3640        let a = _mm_set1_epi32(2);
3641        let b = _mm_set1_epi32(8);
3642        let r = _mm_mask_shrdi_epi32::<1>(a, 0, a, b);
3643        assert_eq_m128i(r, a);
3644        let r = _mm_mask_shrdi_epi32::<1>(a, 0b00001111, a, b);
3645        let e = _mm_set1_epi32(1);
3646        assert_eq_m128i(r, e);
3647    }
3648
3649    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3650    unsafe fn test_mm_maskz_shrdi_epi32() {
3651        let a = _mm_set1_epi32(2);
3652        let b = _mm_set1_epi32(8);
3653        let r = _mm_maskz_shrdi_epi32::<1>(0, a, b);
3654        assert_eq_m128i(r, _mm_setzero_si128());
3655        let r = _mm_maskz_shrdi_epi32::<1>(0b00001111, a, b);
3656        let e = _mm_set1_epi32(1);
3657        assert_eq_m128i(r, e);
3658    }
3659
3660    #[simd_test(enable = "avx512vbmi2")]
3661    unsafe fn test_mm512_shrdi_epi16() {
3662        let a = _mm512_set1_epi16(2);
3663        let b = _mm512_set1_epi16(8);
3664        let r = _mm512_shrdi_epi16::<1>(a, b);
3665        let e = _mm512_set1_epi16(1);
3666        assert_eq_m512i(r, e);
3667    }
3668
3669    #[simd_test(enable = "avx512vbmi2")]
3670    unsafe fn test_mm512_mask_shrdi_epi16() {
3671        let a = _mm512_set1_epi16(2);
3672        let b = _mm512_set1_epi16(8);
3673        let r = _mm512_mask_shrdi_epi16::<1>(a, 0, a, b);
3674        assert_eq_m512i(r, a);
3675        let r = _mm512_mask_shrdi_epi16::<1>(a, 0b11111111_11111111_11111111_11111111, a, b);
3676        let e = _mm512_set1_epi16(1);
3677        assert_eq_m512i(r, e);
3678    }
3679
3680    #[simd_test(enable = "avx512vbmi2")]
3681    unsafe fn test_mm512_maskz_shrdi_epi16() {
3682        let a = _mm512_set1_epi16(2);
3683        let b = _mm512_set1_epi16(8);
3684        let r = _mm512_maskz_shrdi_epi16::<1>(0, a, b);
3685        assert_eq_m512i(r, _mm512_setzero_si512());
3686        let r = _mm512_maskz_shrdi_epi16::<1>(0b11111111_11111111_11111111_11111111, a, b);
3687        let e = _mm512_set1_epi16(1);
3688        assert_eq_m512i(r, e);
3689    }
3690
3691    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3692    unsafe fn test_mm256_shrdi_epi16() {
3693        let a = _mm256_set1_epi16(2);
3694        let b = _mm256_set1_epi16(8);
3695        let r = _mm256_shrdi_epi16::<1>(a, b);
3696        let e = _mm256_set1_epi16(1);
3697        assert_eq_m256i(r, e);
3698    }
3699
3700    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3701    unsafe fn test_mm256_mask_shrdi_epi16() {
3702        let a = _mm256_set1_epi16(2);
3703        let b = _mm256_set1_epi16(8);
3704        let r = _mm256_mask_shrdi_epi16::<1>(a, 0, a, b);
3705        assert_eq_m256i(r, a);
3706        let r = _mm256_mask_shrdi_epi16::<1>(a, 0b11111111_11111111, a, b);
3707        let e = _mm256_set1_epi16(1);
3708        assert_eq_m256i(r, e);
3709    }
3710
3711    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3712    unsafe fn test_mm256_maskz_shrdi_epi16() {
3713        let a = _mm256_set1_epi16(2);
3714        let b = _mm256_set1_epi16(8);
3715        let r = _mm256_maskz_shrdi_epi16::<1>(0, a, b);
3716        assert_eq_m256i(r, _mm256_setzero_si256());
3717        let r = _mm256_maskz_shrdi_epi16::<1>(0b11111111_11111111, a, b);
3718        let e = _mm256_set1_epi16(1);
3719        assert_eq_m256i(r, e);
3720    }
3721
3722    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3723    unsafe fn test_mm_shrdi_epi16() {
3724        let a = _mm_set1_epi16(2);
3725        let b = _mm_set1_epi16(8);
3726        let r = _mm_shrdi_epi16::<1>(a, b);
3727        let e = _mm_set1_epi16(1);
3728        assert_eq_m128i(r, e);
3729    }
3730
3731    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3732    unsafe fn test_mm_mask_shrdi_epi16() {
3733        let a = _mm_set1_epi16(2);
3734        let b = _mm_set1_epi16(8);
3735        let r = _mm_mask_shrdi_epi16::<1>(a, 0, a, b);
3736        assert_eq_m128i(r, a);
3737        let r = _mm_mask_shrdi_epi16::<1>(a, 0b11111111, a, b);
3738        let e = _mm_set1_epi16(1);
3739        assert_eq_m128i(r, e);
3740    }
3741
3742    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3743    unsafe fn test_mm_maskz_shrdi_epi16() {
3744        let a = _mm_set1_epi16(2);
3745        let b = _mm_set1_epi16(8);
3746        let r = _mm_maskz_shrdi_epi16::<1>(0, a, b);
3747        assert_eq_m128i(r, _mm_setzero_si128());
3748        let r = _mm_maskz_shrdi_epi16::<1>(0b11111111, a, b);
3749        let e = _mm_set1_epi16(1);
3750        assert_eq_m128i(r, e);
3751    }
3752
3753    #[simd_test(enable = "avx512vbmi2")]
3754    unsafe fn test_mm512_mask_expandloadu_epi16() {
3755        let src = _mm512_set1_epi16(42);
3756        let a = &[
3757            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
3758            24, 25, 26, 27, 28, 29, 30, 31, 32,
3759        ];
3760        let p = a.as_ptr();
3761        let m = 0b11101000_11001010_11110000_00001111;
3762        let r = _mm512_mask_expandloadu_epi16(src, m, black_box(p));
3763        let e = _mm512_set_epi16(
3764            16, 15, 14, 42, 13, 42, 42, 42, 12, 11, 42, 42, 10, 42, 9, 42, 8, 7, 6, 5, 42, 42, 42,
3765            42, 42, 42, 42, 42, 4, 3, 2, 1,
3766        );
3767        assert_eq_m512i(r, e);
3768    }
3769
3770    #[simd_test(enable = "avx512vbmi2")]
3771    unsafe fn test_mm512_maskz_expandloadu_epi16() {
3772        let a = &[
3773            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
3774            24, 25, 26, 27, 28, 29, 30, 31, 32,
3775        ];
3776        let p = a.as_ptr();
3777        let m = 0b11101000_11001010_11110000_00001111;
3778        let r = _mm512_maskz_expandloadu_epi16(m, black_box(p));
3779        let e = _mm512_set_epi16(
3780            16, 15, 14, 0, 13, 0, 0, 0, 12, 11, 0, 0, 10, 0, 9, 0, 8, 7, 6, 5, 0, 0, 0, 0, 0, 0, 0,
3781            0, 4, 3, 2, 1,
3782        );
3783        assert_eq_m512i(r, e);
3784    }
3785
3786    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3787    unsafe fn test_mm256_mask_expandloadu_epi16() {
3788        let src = _mm256_set1_epi16(42);
3789        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
3790        let p = a.as_ptr();
3791        let m = 0b11101000_11001010;
3792        let r = _mm256_mask_expandloadu_epi16(src, m, black_box(p));
3793        let e = _mm256_set_epi16(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
3794        assert_eq_m256i(r, e);
3795    }
3796
3797    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3798    unsafe fn test_mm256_maskz_expandloadu_epi16() {
3799        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
3800        let p = a.as_ptr();
3801        let m = 0b11101000_11001010;
3802        let r = _mm256_maskz_expandloadu_epi16(m, black_box(p));
3803        let e = _mm256_set_epi16(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
3804        assert_eq_m256i(r, e);
3805    }
3806
3807    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3808    unsafe fn test_mm_mask_expandloadu_epi16() {
3809        let src = _mm_set1_epi16(42);
3810        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
3811        let p = a.as_ptr();
3812        let m = 0b11101000;
3813        let r = _mm_mask_expandloadu_epi16(src, m, black_box(p));
3814        let e = _mm_set_epi16(4, 3, 2, 42, 1, 42, 42, 42);
3815        assert_eq_m128i(r, e);
3816    }
3817
3818    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3819    unsafe fn test_mm_maskz_expandloadu_epi16() {
3820        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
3821        let p = a.as_ptr();
3822        let m = 0b11101000;
3823        let r = _mm_maskz_expandloadu_epi16(m, black_box(p));
3824        let e = _mm_set_epi16(4, 3, 2, 0, 1, 0, 0, 0);
3825        assert_eq_m128i(r, e);
3826    }
3827
3828    #[simd_test(enable = "avx512vbmi2")]
3829    unsafe fn test_mm512_mask_expandloadu_epi8() {
3830        let src = _mm512_set1_epi8(42);
3831        let a = &[
3832            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
3833            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
3834            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
3835        ];
3836        let p = a.as_ptr();
3837        let m = 0b11101000_11001010_11110000_00001111_11111111_00000000_10101010_01010101;
3838        let r = _mm512_mask_expandloadu_epi8(src, m, black_box(p));
3839        let e = _mm512_set_epi8(
3840            32, 31, 30, 42, 29, 42, 42, 42, 28, 27, 42, 42, 26, 42, 25, 42, 24, 23, 22, 21, 42, 42,
3841            42, 42, 42, 42, 42, 42, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 42, 42, 42, 42,
3842            42, 42, 42, 42, 8, 42, 7, 42, 6, 42, 5, 42, 42, 4, 42, 3, 42, 2, 42, 1,
3843        );
3844        assert_eq_m512i(r, e);
3845    }
3846
3847    #[simd_test(enable = "avx512vbmi2")]
3848    unsafe fn test_mm512_maskz_expandloadu_epi8() {
3849        let a = &[
3850            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
3851            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
3852            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
3853        ];
3854        let p = a.as_ptr();
3855        let m = 0b11101000_11001010_11110000_00001111_11111111_00000000_10101010_01010101;
3856        let r = _mm512_maskz_expandloadu_epi8(m, black_box(p));
3857        let e = _mm512_set_epi8(
3858            32, 31, 30, 0, 29, 0, 0, 0, 28, 27, 0, 0, 26, 0, 25, 0, 24, 23, 22, 21, 0, 0, 0, 0, 0,
3859            0, 0, 0, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0,
3860            7, 0, 6, 0, 5, 0, 0, 4, 0, 3, 0, 2, 0, 1,
3861        );
3862        assert_eq_m512i(r, e);
3863    }
3864
3865    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3866    unsafe fn test_mm256_mask_expandloadu_epi8() {
3867        let src = _mm256_set1_epi8(42);
3868        let a = &[
3869            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
3870            24, 25, 26, 27, 28, 29, 30, 31, 32,
3871        ];
3872        let p = a.as_ptr();
3873        let m = 0b11101000_11001010_11110000_00001111;
3874        let r = _mm256_mask_expandloadu_epi8(src, m, black_box(p));
3875        let e = _mm256_set_epi8(
3876            16, 15, 14, 42, 13, 42, 42, 42, 12, 11, 42, 42, 10, 42, 9, 42, 8, 7, 6, 5, 42, 42, 42,
3877            42, 42, 42, 42, 42, 4, 3, 2, 1,
3878        );
3879        assert_eq_m256i(r, e);
3880    }
3881
3882    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3883    unsafe fn test_mm256_maskz_expandloadu_epi8() {
3884        let a = &[
3885            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
3886            24, 25, 26, 27, 28, 29, 30, 31, 32,
3887        ];
3888        let p = a.as_ptr();
3889        let m = 0b11101000_11001010_11110000_00001111;
3890        let r = _mm256_maskz_expandloadu_epi8(m, black_box(p));
3891        let e = _mm256_set_epi8(
3892            16, 15, 14, 0, 13, 0, 0, 0, 12, 11, 0, 0, 10, 0, 9, 0, 8, 7, 6, 5, 0, 0, 0, 0, 0, 0, 0,
3893            0, 4, 3, 2, 1,
3894        );
3895        assert_eq_m256i(r, e);
3896    }
3897
3898    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3899    unsafe fn test_mm_mask_expandloadu_epi8() {
3900        let src = _mm_set1_epi8(42);
3901        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
3902        let p = a.as_ptr();
3903        let m = 0b11101000_11001010;
3904        let r = _mm_mask_expandloadu_epi8(src, m, black_box(p));
3905        let e = _mm_set_epi8(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
3906        assert_eq_m128i(r, e);
3907    }
3908
3909    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3910    unsafe fn test_mm_maskz_expandloadu_epi8() {
3911        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
3912        let p = a.as_ptr();
3913        let m = 0b11101000_11001010;
3914        let r = _mm_maskz_expandloadu_epi8(m, black_box(p));
3915        let e = _mm_set_epi8(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
3916        assert_eq_m128i(r, e);
3917    }
3918
3919    #[simd_test(enable = "avx512vbmi2")]
3920    unsafe fn test_mm512_mask_compressstoreu_epi16() {
3921        let a = _mm512_set_epi16(
3922            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11,
3923            10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
3924        );
3925        let mut r = [0_i16; 32];
3926        _mm512_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a);
3927        assert_eq!(&r, &[0_i16; 32]);
3928        _mm512_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000_11001010_11111111_00000000, a);
3929        assert_eq!(
3930            &r,
3931            &[
3932                9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 23, 24, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0,
3933                0, 0, 0, 0, 0, 0, 0, 0, 0
3934            ]
3935        );
3936    }
3937
3938    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3939    unsafe fn test_mm256_mask_compressstoreu_epi16() {
3940        let a = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
3941        let mut r = [0_i16; 16];
3942        _mm256_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a);
3943        assert_eq!(&r, &[0_i16; 16]);
3944        _mm256_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000_11001010, a);
3945        assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
3946    }
3947
3948    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3949    unsafe fn test_mm_mask_compressstoreu_epi16() {
3950        let a = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1);
3951        let mut r = [0_i16; 8];
3952        _mm_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a);
3953        assert_eq!(&r, &[0_i16; 8]);
3954        _mm_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000, a);
3955        assert_eq!(&r, &[5, 6, 7, 8, 0, 0, 0, 0]);
3956    }
3957
3958    #[simd_test(enable = "avx512vbmi2")]
3959    unsafe fn test_mm512_mask_compressstoreu_epi8() {
3960        let a = _mm512_set_epi8(
3961            64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43,
3962            42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21,
3963            20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
3964        );
3965        let mut r = [0_i8; 64];
3966        _mm512_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a);
3967        assert_eq!(&r, &[0_i8; 64]);
3968        _mm512_mask_compressstoreu_epi8(
3969            r.as_mut_ptr(),
3970            0b11110000_11001010_11111111_00000000_10101010_01010101_11110000_00001111,
3971            a,
3972        );
3973        assert_eq!(
3974            &r,
3975            &[
3976                1, 2, 3, 4, 13, 14, 15, 16, 17, 19, 21, 23, 26, 28, 30, 32, 41, 42, 43, 44, 45, 46,
3977                47, 48, 50, 52, 55, 56, 61, 62, 63, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
3978                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3979            ]
3980        );
3981    }
3982
3983    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3984    unsafe fn test_mm256_mask_compressstoreu_epi8() {
3985        let a = _mm256_set_epi8(
3986            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11,
3987            10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
3988        );
3989        let mut r = [0_i8; 32];
3990        _mm256_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a);
3991        assert_eq!(&r, &[0_i8; 32]);
3992        _mm256_mask_compressstoreu_epi8(r.as_mut_ptr(), 0b11110000_11001010_11111111_00000000, a);
3993        assert_eq!(
3994            &r,
3995            &[
3996                9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 23, 24, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0,
3997                0, 0, 0, 0, 0, 0, 0, 0, 0
3998            ]
3999        );
4000    }
4001
4002    #[simd_test(enable = "avx512vbmi2,avx512vl")]
4003    unsafe fn test_mm_mask_compressstoreu_epi8() {
4004        let a = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
4005        let mut r = [0_i8; 16];
4006        _mm_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a);
4007        assert_eq!(&r, &[0_i8; 16]);
4008        _mm_mask_compressstoreu_epi8(r.as_mut_ptr(), 0b11110000_11001010, a);
4009        assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
4010    }
4011}